Esempio n. 1
0
def _merge_records(vcf, cpx_records, cpx_record_ids):
    """
    r1, r2 : iter of pysam.VariantRecord
    """
    def _next_record():
        try:
            return next(vcf)
        except StopIteration:
            return None

    def _next_cpx():
        try:
            return cpx_records.popleft()
        except IndexError:
            return None

    # Initialize merge
    curr_record = _next_record()
    curr_cpx = _next_cpx()
    while curr_record is not None and curr_cpx is not None:
        # Remove VCF records that were included in complex event
        if curr_record.id in cpx_record_ids:
            curr_record = _next_record()
            continue
        # Merge sort remaining
        if curr_record.chrom == curr_cpx.chrom:
            if curr_record.pos <= curr_cpx.pos:
                yield curr_record
                curr_record = _next_record()
            else:
                yield curr_cpx
                curr_cpx = _next_cpx()
        elif svu.is_smaller_chrom(curr_record.chrom, curr_cpx.chrom):
            yield curr_record
            curr_record = _next_record()
        else:
            yield curr_cpx
            curr_cpx = _next_cpx()
    # After one iterator is exhausted, return rest of other iterator
    if curr_record is None:
        for cpx in itertools.chain([curr_cpx], cpx_records):
            yield cpx
    elif curr_cpx is None:
        for record in itertools.chain([curr_record], vcf):
            if record.id not in cpx_record_ids:
                yield record
Esempio n. 2
0
def _merge_records(vcf, cpx_records, cpx_record_ids):
    """
    r1, r2 : iter of pysam.VariantRecord
    """
    def _next_record():
        # Skip VCF records that were included in complex event
        # get next record that's not already present in cpx_record_idss
        _rec = next(vcf, None)
        while _rec is not None and _rec.id in cpx_record_ids:
            _rec = next(vcf, None)
        return _rec

    def _next_cpx():
        try:
            return cpx_records.popleft()
        except IndexError:
            return None

    # Initialize merge
    curr_record = _next_record()
    curr_cpx = _next_cpx()
    while curr_record is not None and curr_cpx is not None:
        # Merge sort records not in complex event
        if curr_record.chrom == curr_cpx.chrom:
            if curr_record.pos <= curr_cpx.pos:
                yield curr_record
                curr_record = _next_record()
            else:
                yield curr_cpx
                curr_cpx = _next_cpx()
        elif svu.is_smaller_chrom(curr_record.chrom, curr_cpx.chrom):
            yield curr_record
            curr_record = _next_record()
        else:
            yield curr_cpx
            curr_cpx = _next_cpx()

    # At least one iterator is exhausted, return rest of other iterator, if any
    while curr_record is not None:
        yield curr_record
        curr_record = _next_record()
    while curr_cpx is not None:
        yield curr_cpx
        curr_cpx = _next_cpx()
Esempio n. 3
0
    def standardize_info(self, std_rec, raw_rec):
        """
        Standardize Lumpy record.

        1) Add CHR2, END
        """

        std_rec.info['SVTYPE'] = raw_rec.info['SVTYPE']

        # Strip per-strand counts
        std_rec.info['STRANDS'] = raw_rec.info['STRANDS'][0].split(':')[0]

        # Parse CHR2 and END
        if std_rec.info['SVTYPE'] == 'BND':
            chr2, end = parse_bnd_pos(std_rec.alts[0])

            # swap chr2/chrom, pos/end, and reverse strandedness
            if not is_smaller_chrom(std_rec.chrom, chr2):
                std_rec.pos, end = end, std_rec.pos
                std_rec.chrom, chr2 = chr2, std_rec.chrom
                std_rec.info['STRANDS'] = std_rec.info['STRANDS'][::-1]
        else:
            chr2, end = raw_rec.chrom, raw_rec.stop

        std_rec.info['CHR2'] = chr2
        std_rec.stop = end

        # Add SVLEN
        if std_rec.chrom == std_rec.info['CHR2']:
            std_rec.info['SVLEN'] = end - std_rec.pos
        else:
            std_rec.info['SVLEN'] = -1

        std_rec.info['ALGORITHMS'] = ['smoove']

        return std_rec
Esempio n. 4
0
    def standardize_info(self, std_rec, raw_rec):
        """
        Standardize Delly record.

        1) Rename 'TRA' to 'BND'.
        2) Swap CHROM and CHR2 in translocations.
        3) Add END.
        4) Rename 'CT' to 'STRANDS' and convert notation.
        5) Compute SVLEN.
        6) Add ALGORITHMS.
        7) Standardize ALT to VCF spec.
        """

        # Rename TRA to BND
        svtype = raw_rec.info['SVTYPE']
        if svtype == 'TRA':
            svtype = 'BND'
        std_rec.info['SVTYPE'] = svtype
        END = std_rec.stop
        if "<" not in std_rec.alts[0]:
            std_rec.alts = ('<' + svtype + '>', )
        std_rec.ref = "N"
        std_rec.stop = END
        # Convert strandedness notation
        raw_strands = raw_rec.info['CT']
        if raw_strands == '5to3':
            strands = '-+'
        elif raw_strands == '3to3':
            strands = '++'
        elif raw_strands == '5to5':
            strands = '--'
        elif raw_strands == '3to5':
            strands = '+-'
        elif raw_strands == 'NtoN' and svtype == 'INS':
            strands = '+-'
        else:
            msg = 'Improper strands ({0}) in record {1}'
            raise Exception(msg.format(raw_strands, raw_rec.id))

        std_rec.info['STRANDS'] = strands

        pos, end = raw_rec.pos, raw_rec.stop
        if pos == 0:
            pos = 1

        # Swap CHR2/CHROM if necessary and update ALT
        if svtype == 'BND':
            chrom, chr2 = raw_rec.chrom, raw_rec.info['CHR2']

            # swap chr2/chrom, pos/end, and reverse strandedness
            if not is_smaller_chrom(chrom, chr2):
                if end == 0:
                    end = 1
                std_rec.pos, end = end, pos
                std_rec.chrom, chr2 = chr2, chrom
                std_rec.info['STRANDS'] = strands[::-1]

        else:
            chr2 = raw_rec.chrom

        # Add CHR2 and END
        std_rec.stop = end
        std_rec.info['CHR2'] = chr2

        # Add SVLEN
        if std_rec.chrom == std_rec.info['CHR2']:
            std_rec.info['SVLEN'] = std_rec.stop - std_rec.pos
        else:
            std_rec.info['SVLEN'] = -1

        std_rec.info['ALGORITHMS'] = ['delly']

        return std_rec
Esempio n. 5
0
 def __lt__(self, other):
     if self.tup[0] == other.tup[0]:
         return int(self.tup[1]) < int(other.tup[1])
     else:
         return is_smaller_chrom(self.tup[0], other.tup[0])
Esempio n. 6
0
 def __lt__(self, other):
     if self.record.chrom == other.record.chrom:
         return self.record.pos < other.record.pos
     else:
         return svu.is_smaller_chrom(self.record.chrom, other.record.chrom)
Esempio n. 7
0
    def standardize_info(self, std_rec, raw_rec):
        """
        Standardize Manta record.

        1) Replace colons in ID with underscores (otherwise breaks VCF parsing)
        2) Define CHR2 and END
        3) Add strandedness
        4) Add SVLEN
        """

        # Colons in the ID can break parsing
        std_rec.id = std_rec.id.replace(':', '_')

        svtype = raw_rec.info['SVTYPE']
        std_rec.info['SVTYPE'] = svtype

        # Define CHR2 and END
        if svtype == 'BND':
            chr2, end = parse_bnd_pos(raw_rec.alts[0])
            chrom, pos = raw_rec.chrom, raw_rec.pos
            if not is_smaller_chrom(chrom, chr2):
                pos, end = end, pos
                chrom, chr2 = chr2, chrom
                std_rec.pos = pos
                std_rec.chrom = chrom
        elif svtype == 'INS':
            chr2 = raw_rec.chrom
            end = raw_rec.pos + 1
        else:
            chr2 = raw_rec.chrom
            end = raw_rec.stop

        std_rec.info['CHR2'] = chr2
        std_rec.stop = end

        # Strand parsing
        if svtype == 'INV':
            if 'INV3' in raw_rec.info.keys():
                strands = '++'
            else:
                strands = '--'
        elif svtype == 'BND':
            strands = parse_bnd_strands(raw_rec.alts[0])
        elif svtype == 'DEL':
            strands = '+-'
        elif svtype == 'DUP':
            strands = '-+'
        elif svtype == 'INS':
            strands = '+-'

        if not is_smaller_chrom(std_rec.chrom, std_rec.info['CHR2']):
            strands = strands[::-1]
        std_rec.info['STRANDS'] = strands

        if svtype == 'BND' and std_rec.chrom != std_rec.info['CHR2']:
            std_rec.info['SVLEN'] = -1
        elif svtype == 'INS':
            std_rec.info['SVLEN'] = raw_rec.info.get('SVLEN', -1)
        else:
            std_rec.info['SVLEN'] = std_rec.stop - std_rec.pos

        std_rec.info['ALGORITHMS'] = ['manta']

        return std_rec