Exemple #1
0
def bed_tofasta(bed,
                ref_fasta,
                min_size=50,
                stranded=True,
                include_name=False,
                out=sys.stdout):
    if not os.path.exists('%s.fai' % ref_fasta):
        pysam.faidx(ref_fasta)

    fasta = pysam.Fastafile(ref_fasta)

    refs = set()
    with open('%s.fai' % ref_fasta) as f:
        for line in f:
            refs.add(line.split('\t')[0].strip())

    name = ''
    for region in bed:
        if include_name:
            name = '%s|' % (region.name.strip())

        if region.end - region.start >= min_size and region.chrom in refs:
            seq = fasta.fetch(region.chrom, region.start, region.end)
            if stranded and region.strand:
                if region.strand == '-':
                    seq = revcomp(seq)
                out.write('>%s%s:%d-%d[%s]\n%s\n' %
                          (name, region.chrom, region.start, region.end,
                           region.strand, seq))
            else:
                out.write('>%s%s:%d-%d\n%s\n' %
                          (name, region.chrom, region.start, region.end, seq))

    fasta.close()
Exemple #2
0
def bed_tofasta(bed, ref_fasta, min_size=50, stranded=True, include_name=False, out=sys.stdout):
    if not os.path.exists('%s.fai' % ref_fasta):
        pysam.faidx(ref_fasta)

    fasta = pysam.Fastafile(ref_fasta)

    refs = set()
    with open('%s.fai' % ref_fasta) as f:
        for line in f:
            refs.add(line.split('\t')[0].strip())

    name = ''
    for region in bed:
        if include_name:
            name = '%s|' % (region.name.strip())

        if region.end - region.start >= min_size and region.chrom in refs:
            seq = fasta.fetch(region.chrom, region.start, region.end)
            if stranded and region.strand:
                if region.strand == '-':
                    seq = revcomp(seq)
                out.write('>%s%s:%d-%d[%s]\n%s\n' % (name, region.chrom, region.start, region.end, region.strand, seq))
            else:
                out.write('>%s%s:%d-%d%s\n%s\n' % (name, region.chrom, region.start, region.end, seq))

    fasta.close()
Exemple #3
0
    def alleles(self):
        alts = []
        for alt in self.observed.split('/'):
            if alt != '-' and self.strand == '-':
                alt = revcomp(alt)

            alts.append(alt)

        return alts
Exemple #4
0
    def alleles(self):
        alts = []
        for alt in self.observed.split('/'):
            if alt != '-' and self.strand == '-':
                alt = revcomp(alt)

            alts.append(alt)

        return alts
Exemple #5
0
    def alleles(self):
        alts = []
        for alt in self.observed.split("/"):
            if alt != "-" and self.strand == "-":
                alt = revcomp(alt)

            alts.append(alt)

        return alts
Exemple #6
0
def write_fasta(read, out=sys.stdout, colorspace=False):
    if colorspace:
        seq = read.opt('CS')
    else:
        seq = read.seq

    if not read.is_unmapped and read.is_reverse:
        if colorspace:
            seq = seq[::-1]
        else:
            seq = revcomp(seq)

    out.write('>%s\n%s\n' % (read.qname, seq))
Exemple #7
0
def write_fasta(read, out=sys.stdout, colorspace=False):
    if colorspace:
        seq = read.opt('CS')
    else:
        seq = read.seq

    if not read.is_unmapped and read.is_reverse:
        if colorspace:
            seq = seq[::-1]
        else:
            seq = revcomp(seq)

    out.write('>%s\n%s\n' % (read.qname, seq))
Exemple #8
0
def write_fastq(read, out=sys.stdout, colorspace=False):
    if colorspace:
        seq = read.opt('CS')
        qual = read.opt('CQ')
    else:
        seq = read.seq
        qual = read.qual

    if not read.is_unmapped and read.is_reverse:
        if colorspace:
            seq = seq[::-1]
        else:
            seq = revcomp(seq)

        qual = qual[::-1]

    out.write('@%s\n%s\n+\n%s\n' % (read.qname, seq, qual))
Exemple #9
0
def write_fastq(read, out=sys.stdout, colorspace=False):
    if colorspace:
        seq = read.opt('CS')
        qual = read.opt('CQ')
    else:
        seq = read.seq
        qual = read.qual

    if not read.is_unmapped and read.is_reverse:
        if colorspace:
            seq = seq[::-1]
        else:
            seq = revcomp(seq)

        qual = qual[::-1]

    out.write('@%s\n%s\n+\n%s\n' % (read.qname, seq, qual))
Exemple #10
0
def check_tags(barcodes, seq, edit, pos, allow_revcomp=False, verbose=False):
    """
    For each barcode, pull out the appropriate 5' or 3' sub sequence from {seq}. Then
    run a local alignment of the barcode to the subseq. If a good match is found, return
    it; otherwise, find the best match and return that.

    returns a multi-tuple:
        valid, (tag, valid_seq, edits, reason_if_fail)

    For the alignments, the reference is the barcode, the query is the subset of the read
    that is possibly the barcode (5'/3' subseq)
    """

    best = None

    # check perfect matches first...
    # for tag in barcodes:
    #     barcodeseq, orientation = barcodes[tag]
    #     if orientation == '5':
    #         if seq[:len(barcodeseq)] == barcodeseq:
    #             return True, (tag, seq[len(barcodeseq):], 0, '')
    #     else:
    #         if seq[-len(barcodeseq):] == barcodeseq:
    #             return True, (tag, seq[:-len(barcodeseq)], 0, '')

    for tag in barcodes:
        barcodeseq, orientation, strip = barcodes[tag]
        if orientation == "5":
            testseq = seq[: len(barcodeseq) + edit + pos]
        else:
            testseq = seq[-1 * (len(barcodeseq) + edit + pos) :]

        aln = sw.align(barcodeseq, testseq)
        valid, reason = _tag_aln_check(aln, len(testseq), len(barcodeseq), orientation, edit, pos)
        if verbose:
            print "Testing tag: %s vs %s" % (str(barcodes[tag]), testseq)
            aln.dump()
            print valid, reason
        if valid:
            return True, (tag, aln, True, "")

        if not best or aln.score > best[1].score:
            best = (tag, aln, True, reason)

        if allow_revcomp:
            if orientation == "5":
                testseq = seq[-1 * (len(barcodeseq) + edit + pos) :]
            else:
                testseq = seq[: len(barcodeseq) + edit + pos]

            aln = sw.align(revcomp(barcodeseq), testseq)
            valid, reason = _tag_aln_check(
                aln, len(testseq), len(barcodeseq), "5" if orientation == "3" else "3", edit, pos
            )
            if verbose:
                print "Testing tag: %s [rc] vs %s" % (str(barcodes[tag]), testseq)
                aln.dump()
                print valid, reason
            if valid:
                return True, (tag, aln, False, "")

            if not best or aln.score > best[1].score:
                best = (tag, aln, False, reason)

    if verbose:
        print "BEST: ", best
        best[1].dump()

    return False, best
Exemple #11
0
def check_tags(barcodes, seq, edit, pos, allow_revcomp=False, verbose=False):
    '''
    For each barcode, pull out the appropriate 5' or 3' sub sequence from {seq}. Then
    run a local alignment of the barcode to the subseq. If a good match is found, return
    it; otherwise, find the best match and return that.

    returns a multi-tuple:
        valid, (tag, valid_seq, edits, reason_if_fail)

    For the alignments, the reference is the barcode, the query is the subset of the read
    that is possibly the barcode (5'/3' subseq)
    '''

    best = None

    # check perfect matches first...
    # for tag in barcodes:
    #     barcodeseq, orientation = barcodes[tag]
    #     if orientation == '5':
    #         if seq[:len(barcodeseq)] == barcodeseq:
    #             return True, (tag, seq[len(barcodeseq):], 0, '')
    #     else:
    #         if seq[-len(barcodeseq):] == barcodeseq:
    #             return True, (tag, seq[:-len(barcodeseq)], 0, '')

    for tag in barcodes:
        barcodeseq, orientation, strip = barcodes[tag]
        if orientation == '5':
            testseq = seq[:len(barcodeseq) + edit + pos]
        else:
            testseq = seq[-1 * (len(barcodeseq) + edit + pos):]

        aln = sw.align(barcodeseq, testseq)
        valid, reason = _tag_aln_check(aln, len(testseq), len(barcodeseq),
                                       orientation, edit, pos)
        if verbose:
            print 'Testing tag: %s vs %s' % (str(barcodes[tag]), testseq)
            aln.dump()
            print valid, reason
        if valid:
            return True, (tag, aln, True, '')

        if not best or aln.score > best[1].score:
            best = (tag, aln, True, reason)

        if allow_revcomp:
            if orientation == '5':
                testseq = seq[-1 * (len(barcodeseq) + edit + pos):]
            else:
                testseq = seq[:len(barcodeseq) + edit + pos]

            aln = sw.align(revcomp(barcodeseq), testseq)
            valid, reason = _tag_aln_check(aln, len(testseq), len(barcodeseq),
                                           '5' if orientation == '3' else '3',
                                           edit, pos)
            if verbose:
                print 'Testing tag: %s [rc] vs %s' % (str(
                    barcodes[tag]), testseq)
                aln.dump()
                print valid, reason
            if valid:
                return True, (tag, aln, False, '')

            if not best or aln.score > best[1].score:
                best = (tag, aln, False, reason)

    if verbose:
        print 'BEST: ', best
        best[1].dump()

    return False, best