Example #1
0
                        type=int,
                        default=4,
                        metavar='<int>',
                        help='x-fold cross-validation [%(default)s]')
    parser.add_argument('--seed',
                        required=False,
                        type=int,
                        metavar='<int>',
                        help='random seed')
    arg = parser.parse_args()

    if arg.seed: random.seed(arg.seed)
    assert (arg.order >= 1)

    # read sequences and reformat
    seqs1 = [(1, seq) for name, seq in seqio.read_fasta(arg.file1)]
    seqs0 = [(0, seq) for name, seq in seqio.read_fasta(arg.file0)]
    seqs = seqs1 + seqs0
    random.shuffle(seqs)  # just in case for real data

    # cross-validation splitting
    accs = []
    for train, test in seqio.cross_validation(seqs, arg.xvalid):

        # make pwms from seqs
        trues = [seq for label, seq in train if label == 1]
        fakes = [seq for label, seq in train if label == 0]
        twam = make_wam(trues, arg.order)
        fwam = make_wam(fakes, arg.order)

        # score vs. test set
Example #2
0
    parser.add_argument('--real',
                        required=False,
                        action='store_true',
                        help='If the data is real "possibly remove it later"')
    arg = parser.parse_args()

    #probability should be <= 1.0
    assert (arg.mins1 <= 1.0 and arg.mins1 >= 0.0)
    assert (arg.mins1 <= 1.0 and arg.mins1 >= 0.0)

    if arg.seed:
        random.seed(arg.seed)

    #read sequences and create a dataframe out of them
    seqs1 = [(1, seq[arg.start:arg.stop])
             for name, seq in seqio.read_fasta(arg.file1)]
    seqs0 = [(0, seq[arg.start:arg.stop])
             for name, seq in seqio.read_fasta(arg.file0)]
    seqs = seqs1 + seqs0
    random.shuffle(seqs)

    accs = {}
    #splitting data into training and testing
    for train, test in seqio.cross_validation(seqs, arg.xvalid):
        #extracting trues and fakes out of the train data
        trues_train = [seq for label, seq in train if label == 1]
        fakes_train = [seq for label, seq in train if label == 0]

        #apriori on converted train sets and getting a set of rules
        trues_rules = clust_lib.appr(trues_train, arg.start, arg.stop,
                                     arg.mins1)
Example #3
0
                        help='unconstrained')
    arg = parser.parse_args()
    # options not used: homopolymer, circular, bedfile
    # reverse, complement, invert
    # sensitivity - for the peak caller
    # residuals, dump
    # top
    # localaverageenergy

    # create working directory if necessary
    if not os.path.exists(arg.out): os.system(f'mkdir {arg.out}')

    # create fasta file with proper definition line
    names = []
    with open(f'{arg.out}/fasta', 'w') as fp:
        for name, seq in seqio.read_fasta(arg.fasta):
            tok = name.split(' ')
            names.append(tok[0])
            defline = f">{tok[0]} range=z:1:{len(seq)} 5'pad=0 3'pad=0"
            defline += ' strand=+ repeatMasking=none'
            fp.write(defline)
            fp.write('\n')
            fp.write(seq)
            fp.write('\n')

    # run rlooper
    cmd = (f'{exe} {arg.out}/fasta {arg.out}.output')
    cmd += f' --N {arg.n}'
    cmd += f' --sigma {arg.s}'
    cmd += f' --a arg.a'
    cmd += f' --minlength 2'