def main(): args = check_options(get_options()) try: probe = pd.read_table(args.input,names=("chrom","start","end","sequence","score","strand"), dtype={"chrom":str,"start":int,"end":int,"sequence":str,"score":int,"strand":str}) ftprobe = probe[((probe.score > args.mink) & (probe.score < args.maxk) & ( probe.score > probe.score.quantile(args.minquantile)) & ( probe.score < probe.score.quantile(args.maxquantile)))] probe = '' chromnames = ftprobe.chrom.unique() outio = open(args.output, 'w') for chrom in chromnames: prestart = 0 strandfr = 1 for index, row in ftprobe[ftprobe.chrom == chrom].sort_values(by='start').iterrows(): nowpbstart = row.start if nowpbstart > prestart + args.dis: if args.strand == True: if strandfr % 2 == 0: row[5] = '-' row[3] = revcom(row[3]) strandfr += 1 prestart = nowpbstart print(row.chrom, row.start, row.end, row.sequence, row.score, row.strand, sep='\t', file=outio) outio.close() print("Filtered finished!") except: print("Probe file format error. Please check your probe file!") sys.exit(1)
def getconsensus(bcftoolspath, bcffile, chrom, start, end, seq, sample, strand='+'): """ get consensus by using bcftools """ bcftoolspath = subprocesspath.subprocesspath(bcftoolspath) bcffile = subprocesspath.subprocesspath(bcffile) mathlen = len(seq) - 10 if mathlen < 10: mathlen = len(seq) seqlen = str(mathlen) pat = re.compile('[ATCG]{' + seqlen + ',}') if strand == '-': seq = revcom.revcom(seq) fastring = '\'>' + chrom + ':' + start + '-' + end + '\\n' + seq + '\'' bcfcon_command = ' '.join([ 'echo', fastring, '|' + bcftoolspath + ' consensus -s', sample, bcffile ]) consensus = 'N' * len(seq) try: p = Popen(bcfcon_command, shell=True, stdin=PIPE, stdout=PIPE) for i in p.stdout: i = i.decode('utf-8').rstrip('\n') # print(i) if pat.search(i): consensus = pat.search(i)[0] except: print("warnning: ", bcfcon_command, " ##") # print('c:',consensus) return str(consensus)
def probestrtoconsensus(bcfconsensusruner): (chrom, start, end, seq, score, strand) = str(bcfconsensusruner.probestr).rstrip().split("\t") if strand == '-': seq = revcom.revcom(seq) strand = '+' consensusprobe = getconsensus(bcftoolspath=bcfconsensusruner.bcftoolspath, bcffile=bcfconsensusruner.bcffile, chrom=chrom, start=start, end=end, seq=seq, sample=bcfconsensusruner.sample) res = dict() res['probestr'] = bcfconsensusruner.probestr res['consensusprobe'] = consensusprobe return res
def saveProbe(self): rowcount = self.tableWidget.rowCount() if not self.label_prodir.text(): self.setProjetDir() for i in range(rowcount): itchr = self.tableWidget.item(i, 0).text() itstart = int(self.tableWidget.item(i, 1).text()) itend = int(self.tableWidget.item(i, 2).text()) itcolor = self.tableWidget.item(i, 3).text() #choosed probe number itsp = int(self.tableWidget.item(i, 5).text()) itstrand = self.tableWidget.item(i, 8).text() print(itsp) nowprobes = self.probes[self.probes.Chr == itchr] nowprobes = nowprobes[nowprobes.Kb > itstart] nowprobes = nowprobes[nowprobes.Kb < itend] nowprobes = nowprobes.loc[sample(list(nowprobes.index), itsp)] nowprobes = nowprobes.drop('Kb', axis=1) if self.bedformat == 6: nowprobes = nowprobes.drop('Keep', axis=1) nowprobes = nowprobes.drop('Kmerscore', axis=1) outfilename = itcolor + '_' + itchr + '_' + str( itstart) + '_' + str(itend) + '.bed' absfile = os.path.join(self.projectdir, outfilename) nowprobes['Length'] = nowprobes.Seq.map(len) if itstrand == '-': revcomseq = nowprobes.Seq.map(revcom) nowprobes.Seq = revcomseq nowprobes['Strand'] = ['-'] * len(nowprobes.Seq) if itstrand == '+': nowprobes['Strand'] = ['+'] * len(nowprobes.Seq) if itstrand == 'Both': newseq = list() starndlist = list() strandnow = 0 for seq in nowprobes.Seq: # seq = nowprobes.iloc[idx].Seq # print(idx, seq) if strandnow % 2 == 0: starndlist.append('+') newseq.append(seq) else: starndlist.append('-') newseq.append(revcom(seq)) strandnow += 1 nowprobes.Seq = newseq nowprobes['Strand'] = starndlist nowprobes.to_csv(path_or_buf=absfile, sep='\t', index=False, index_label=False, header=False)