type=int, default=4, metavar='<int>', help='x-fold cross-validation [%(default)s]') parser.add_argument('--seed', required=False, type=int, metavar='<int>', help='random seed') arg = parser.parse_args() if arg.seed: random.seed(arg.seed) assert (arg.order >= 1) # read sequences and reformat seqs1 = [(1, seq) for name, seq in seqio.read_fasta(arg.file1)] seqs0 = [(0, seq) for name, seq in seqio.read_fasta(arg.file0)] seqs = seqs1 + seqs0 random.shuffle(seqs) # just in case for real data # cross-validation splitting accs = [] for train, test in seqio.cross_validation(seqs, arg.xvalid): # make pwms from seqs trues = [seq for label, seq in train if label == 1] fakes = [seq for label, seq in train if label == 0] twam = make_wam(trues, arg.order) fwam = make_wam(fakes, arg.order) # score vs. test set
parser.add_argument('--real', required=False, action='store_true', help='If the data is real "possibly remove it later"') arg = parser.parse_args() #probability should be <= 1.0 assert (arg.mins1 <= 1.0 and arg.mins1 >= 0.0) assert (arg.mins1 <= 1.0 and arg.mins1 >= 0.0) if arg.seed: random.seed(arg.seed) #read sequences and create a dataframe out of them seqs1 = [(1, seq[arg.start:arg.stop]) for name, seq in seqio.read_fasta(arg.file1)] seqs0 = [(0, seq[arg.start:arg.stop]) for name, seq in seqio.read_fasta(arg.file0)] seqs = seqs1 + seqs0 random.shuffle(seqs) accs = {} #splitting data into training and testing for train, test in seqio.cross_validation(seqs, arg.xvalid): #extracting trues and fakes out of the train data trues_train = [seq for label, seq in train if label == 1] fakes_train = [seq for label, seq in train if label == 0] #apriori on converted train sets and getting a set of rules trues_rules = clust_lib.appr(trues_train, arg.start, arg.stop, arg.mins1)
help='unconstrained') arg = parser.parse_args() # options not used: homopolymer, circular, bedfile # reverse, complement, invert # sensitivity - for the peak caller # residuals, dump # top # localaverageenergy # create working directory if necessary if not os.path.exists(arg.out): os.system(f'mkdir {arg.out}') # create fasta file with proper definition line names = [] with open(f'{arg.out}/fasta', 'w') as fp: for name, seq in seqio.read_fasta(arg.fasta): tok = name.split(' ') names.append(tok[0]) defline = f">{tok[0]} range=z:1:{len(seq)} 5'pad=0 3'pad=0" defline += ' strand=+ repeatMasking=none' fp.write(defline) fp.write('\n') fp.write(seq) fp.write('\n') # run rlooper cmd = (f'{exe} {arg.out}/fasta {arg.out}.output') cmd += f' --N {arg.n}' cmd += f' --sigma {arg.s}' cmd += f' --a arg.a' cmd += f' --minlength 2'