Ejemplo n.º 1
0
def main():

    args = check_options(get_options())


    try:

        probe = pd.read_table(args.input,names=("chrom","start","end","sequence","score","strand"),
                      dtype={"chrom":str,"start":int,"end":int,"sequence":str,"score":int,"strand":str})

        ftprobe = probe[((probe.score > args.mink) & (probe.score < args.maxk) & (
                    probe.score > probe.score.quantile(args.minquantile)) & (
                                 probe.score < probe.score.quantile(args.maxquantile)))]

        probe = ''

        chromnames = ftprobe.chrom.unique()

        outio = open(args.output, 'w')

        for chrom in chromnames:

            prestart = 0

            strandfr = 1

            for index, row in ftprobe[ftprobe.chrom == chrom].sort_values(by='start').iterrows():

                nowpbstart = row.start

                if nowpbstart > prestart + args.dis:

                    if args.strand == True:

                        if strandfr % 2 == 0:
                            row[5] = '-'
                            row[3] = revcom(row[3])

                    strandfr += 1

                    prestart = nowpbstart

                    print(row.chrom, row.start, row.end, row.sequence, row.score, row.strand, sep='\t', file=outio)

        outio.close()

        print("Filtered finished!")

    except:

        print("Probe file format error. Please check your probe file!")

        sys.exit(1)
Ejemplo n.º 2
0
def getconsensus(bcftoolspath,
                 bcffile,
                 chrom,
                 start,
                 end,
                 seq,
                 sample,
                 strand='+'):
    """
    get consensus by using bcftools 
    """
    bcftoolspath = subprocesspath.subprocesspath(bcftoolspath)
    bcffile = subprocesspath.subprocesspath(bcffile)
    mathlen = len(seq) - 10
    if mathlen < 10:
        mathlen = len(seq)
    seqlen = str(mathlen)
    pat = re.compile('[ATCG]{' + seqlen + ',}')
    if strand == '-':
        seq = revcom.revcom(seq)
    fastring = '\'>' + chrom + ':' + start + '-' + end + '\\n' + seq + '\''
    bcfcon_command = ' '.join([
        'echo', fastring, '|' + bcftoolspath + ' consensus -s', sample, bcffile
    ])

    consensus = 'N' * len(seq)

    try:
        p = Popen(bcfcon_command, shell=True, stdin=PIPE, stdout=PIPE)

        for i in p.stdout:
            i = i.decode('utf-8').rstrip('\n')
            #         print(i)
            if pat.search(i):
                consensus = pat.search(i)[0]
    except:
        print("warnning: ", bcfcon_command, " ##")
    #             print('c:',consensus)
    return str(consensus)
Ejemplo n.º 3
0
def probestrtoconsensus(bcfconsensusruner):

    (chrom, start, end, seq, score,
     strand) = str(bcfconsensusruner.probestr).rstrip().split("\t")

    if strand == '-':
        seq = revcom.revcom(seq)

        strand = '+'

    consensusprobe = getconsensus(bcftoolspath=bcfconsensusruner.bcftoolspath,
                                  bcffile=bcfconsensusruner.bcffile,
                                  chrom=chrom,
                                  start=start,
                                  end=end,
                                  seq=seq,
                                  sample=bcfconsensusruner.sample)
    res = dict()

    res['probestr'] = bcfconsensusruner.probestr

    res['consensusprobe'] = consensusprobe

    return res
Ejemplo n.º 4
0
    def saveProbe(self):

        rowcount = self.tableWidget.rowCount()

        if not self.label_prodir.text():

            self.setProjetDir()

        for i in range(rowcount):

            itchr = self.tableWidget.item(i, 0).text()

            itstart = int(self.tableWidget.item(i, 1).text())

            itend = int(self.tableWidget.item(i, 2).text())

            itcolor = self.tableWidget.item(i, 3).text()

            #choosed probe number
            itsp = int(self.tableWidget.item(i, 5).text())

            itstrand = self.tableWidget.item(i, 8).text()

            print(itsp)

            nowprobes = self.probes[self.probes.Chr == itchr]

            nowprobes = nowprobes[nowprobes.Kb > itstart]

            nowprobes = nowprobes[nowprobes.Kb < itend]

            nowprobes = nowprobes.loc[sample(list(nowprobes.index), itsp)]

            nowprobes = nowprobes.drop('Kb', axis=1)

            if self.bedformat == 6:

                nowprobes = nowprobes.drop('Keep', axis=1)

                nowprobes = nowprobes.drop('Kmerscore', axis=1)

            outfilename = itcolor + '_' + itchr + '_' + str(
                itstart) + '_' + str(itend) + '.bed'

            absfile = os.path.join(self.projectdir, outfilename)

            nowprobes['Length'] = nowprobes.Seq.map(len)

            if itstrand == '-':

                revcomseq = nowprobes.Seq.map(revcom)

                nowprobes.Seq = revcomseq

                nowprobes['Strand'] = ['-'] * len(nowprobes.Seq)

            if itstrand == '+':

                nowprobes['Strand'] = ['+'] * len(nowprobes.Seq)

            if itstrand == 'Both':

                newseq = list()

                starndlist = list()

                strandnow = 0

                for seq in nowprobes.Seq:

                    # seq = nowprobes.iloc[idx].Seq
                    #     print(idx, seq)
                    if strandnow % 2 == 0:
                        starndlist.append('+')
                        newseq.append(seq)
                    else:
                        starndlist.append('-')
                        newseq.append(revcom(seq))

                    strandnow += 1

                nowprobes.Seq = newseq

                nowprobes['Strand'] = starndlist

            nowprobes.to_csv(path_or_buf=absfile,
                             sep='\t',
                             index=False,
                             index_label=False,
                             header=False)