예제 #1
0
def sv_end(pos, alt, svend=None, svlen=None):
    """Return the end coordinate for a structural variant
    The END field from INFO usually works fine, although for some cases like insertions the callers
     set end to same as pos. In those cases we can hope that there is a svlen...

    Translocations needs their own treatment as usual

    Args:
        pos(int)
        alt(str)
        svend(int)
        svlen(int)

    Returns:
        end(int)
    """
    end = svend

    if ":" in alt:
        match = BND_ALT_PATTERN.match(alt)
        if match:
            end = int(match.group(2))

    if svend == pos:
        if svlen:
            end = pos + svlen

    return end
예제 #2
0
def get_end_chrom(alt, chrom):
    """Return the end chromosome for a tranlocation

    Args:
        alt(str)
        chrom(str)

    Returns:
        end_chrom(str)
    """
    end_chrom = chrom
    if ":" not in alt:
        return end_chrom

    match = BND_ALT_PATTERN.match(alt)
    # BND will often be translocations between different chromosomes
    if match:
        other_chrom = match.group(1)
        match = CHR_PATTERN.match(other_chrom)
        end_chrom = match.group(2)
    return end_chrom
예제 #3
0
def get_end(pos, alt, category, snvend=None, svend=None, svlen=None):
    """Return the end coordinate for a variant
    
    Args:
        pos(int)
        alt(str)
        category(str)
        snvend(str)
        svend(int)
        svlen(int)
    
    Returns:
        end(int)
    """
    # If nothing is known we set end to be same as start
    end = pos
    # If variant is snv or indel we know that cyvcf2 can handle end pos
    if category in ('snv', 'indel', 'cancer'):
        end = snvend

    # With SVs we have to be a bit more careful
    elif category == 'sv':
        # The END field from INFO usually works fine
        end = svend

        # For some cases like insertions the callers set end to same as pos
        # In those cases we can hope that there is a svlen...
        if svend == pos:
            if svlen:
                end = pos + svlen
        # If variant is 'BND' they have ':' in alt field
        # Information about other end is in the alt field
        if ':' in alt:
            match = BND_ALT_PATTERN.match(alt)
            if match:
                end = int(match.group(2))

    return end
예제 #4
0
def get_end(pos, alt, category, snvend=None, svend=None, svlen=None):
    """Return the end coordinate for a variant

    Args:
        pos(int)
        alt(str)
        category(str)
        snvend(str)
        svend(int)
        svlen(int)

    Returns:
        end(int)
    """
    # If nothing is known we set end to be same as start
    end = pos
    # If variant is snv or indel we know that cyvcf2 can handle end pos
    if category in ('snv', 'indel', 'cancer'):
        end = snvend

    # With SVs we have to be a bit more careful
    elif category == 'sv':
        # The END field from INFO usually works fine
        end = svend

        # For some cases like insertions the callers set end to same as pos
        # In those cases we can hope that there is a svlen...
        if svend == pos:
            if svlen:
                end = pos + svlen
        # If variant is 'BND' they have ':' in alt field
        # Information about other end is in the alt field
        if ':' in alt:
            match = BND_ALT_PATTERN.match(alt)
            if match:
                end = int(match.group(2))

    return end
예제 #5
0
def parse_coordinates(variant, category):
    """Find out the coordinates for a variant
    
    Args:
        variant(cyvcf2.Variant)
    
    Returns:
        coordinates(dict): A dictionary on the form:
        {
            'position':<int>, 
            'end':<int>, 
            'end_chrom':<str>,
            'length':<int>, 
            'sub_category':<str>,
            'mate_id':<str>,
            'cytoband_start':<str>,
            'cytoband_end':<str>,
        }
    """
    ref = variant.REF
    alt = variant.ALT[0]
    chrom_match = CHR_PATTERN.match(variant.CHROM)
    chrom = chrom_match.group(2)

    svtype = variant.INFO.get('SVTYPE')
    if svtype:
        svtype = svtype.lower()

    mate_id = variant.INFO.get('MATEID')

    svlen = variant.INFO.get('SVLEN')

    svend = variant.INFO.get('END')
    snvend = int(variant.end)

    position = int(variant.POS)

    ref_len = len(ref)
    alt_len = len(alt)

    sub_category = get_sub_category(alt_len, ref_len, category, svtype)
    end = get_end(position, alt, category, snvend, svend)

    length = get_length(alt_len, ref_len, category, position, end, svtype,
                        svlen)
    end_chrom = chrom

    if sub_category == 'bnd':
        if ':' in alt:
            match = BND_ALT_PATTERN.match(alt)
            # BND will often be translocations between different chromosomes
            if match:
                other_chrom = match.group(1)
                match = CHR_PATTERN.match(other_chrom)
                end_chrom = match.group(2)

    cytoband_start = get_cytoband_coordinates(chrom, position)
    cytoband_end = get_cytoband_coordinates(end_chrom, end)

    coordinates = {
        'position': position,
        'end': end,
        'length': length,
        'sub_category': sub_category,
        'mate_id': mate_id,
        'cytoband_start': cytoband_start,
        'cytoband_end': cytoband_end,
        'end_chrom': end_chrom,
    }

    return coordinates
예제 #6
0
def parse_coordinates(variant, category):
    """Find out the coordinates for a variant

    Args:
        variant(cyvcf2.Variant)

    Returns:
        coordinates(dict): A dictionary on the form:
        {
            'position':<int>,
            'end':<int>,
            'end_chrom':<str>,
            'length':<int>,
            'sub_category':<str>,
            'mate_id':<str>,
            'cytoband_start':<str>,
            'cytoband_end':<str>,
        }
    """
    ref = variant.REF

    if variant.ALT:
        alt = variant.ALT[0]
    if category == "str" and not variant.ALT:
        alt = "."

    chrom_match = CHR_PATTERN.match(variant.CHROM)
    chrom = chrom_match.group(2)

    svtype = variant.INFO.get("SVTYPE")
    if svtype:
        svtype = svtype.lower()

    mate_id = variant.INFO.get("MATEID")

    svlen = variant.INFO.get("SVLEN")

    svend = variant.INFO.get("END")
    snvend = int(variant.end)

    position = int(variant.POS)

    ref_len = len(ref)
    alt_len = len(alt)

    sub_category = get_sub_category(alt_len, ref_len, category, svtype)
    end = get_end(position, alt, category, snvend, svend)

    length = get_length(alt_len, ref_len, category, position, end, svtype,
                        svlen)
    end_chrom = chrom

    if sub_category == "bnd":
        if ":" in alt:
            match = BND_ALT_PATTERN.match(alt)
            # BND will often be translocations between different chromosomes
            if match:
                other_chrom = match.group(1)
                match = CHR_PATTERN.match(other_chrom)
                end_chrom = match.group(2)

    cytoband_start = get_cytoband_coordinates(chrom, position)
    cytoband_end = get_cytoband_coordinates(end_chrom, end)

    coordinates = {
        "position": position,
        "end": end,
        "length": length,
        "sub_category": sub_category,
        "mate_id": mate_id,
        "cytoband_start": cytoband_start,
        "cytoband_end": cytoband_end,
        "end_chrom": end_chrom,
    }

    return coordinates
예제 #7
0
def parse_coordinates(variant, category):
    """Find out the coordinates for a variant

    Args:
        variant(cyvcf2.Variant)

    Returns:
        coordinates(dict): A dictionary on the form:
        {
            'position':<int>,
            'end':<int>,
            'end_chrom':<str>,
            'length':<int>,
            'sub_category':<str>,
            'mate_id':<str>,
            'cytoband_start':<str>,
            'cytoband_end':<str>,
        }
    """
    ref = variant.REF

    if variant.ALT:
        alt = variant.ALT[0]
    if category=="str" and not variant.ALT:
        alt = '.'

    chrom_match = CHR_PATTERN.match(variant.CHROM)
    chrom = chrom_match.group(2)

    svtype = variant.INFO.get('SVTYPE')
    if svtype:
        svtype = svtype.lower()

    mate_id = variant.INFO.get('MATEID')

    svlen = variant.INFO.get('SVLEN')

    svend = variant.INFO.get('END')
    snvend = int(variant.end)

    position = int(variant.POS)

    ref_len = len(ref)
    alt_len = len(alt)

    sub_category = get_sub_category(alt_len, ref_len, category, svtype)
    end = get_end(position, alt, category, snvend, svend)

    length = get_length(alt_len, ref_len, category, position, end, svtype, svlen)
    end_chrom = chrom

    if sub_category == 'bnd':
        if ':' in alt:
            match = BND_ALT_PATTERN.match(alt)
            # BND will often be translocations between different chromosomes
            if match:
                other_chrom = match.group(1)
                match = CHR_PATTERN.match(other_chrom)
                end_chrom = match.group(2)

    cytoband_start = get_cytoband_coordinates(chrom, position)
    cytoband_end = get_cytoband_coordinates(end_chrom, end)

    coordinates = {
        'position': position,
        'end': end,
        'length': length,
        'sub_category': sub_category,
        'mate_id': mate_id,
        'cytoband_start': cytoband_start,
        'cytoband_end': cytoband_end,
        'end_chrom': end_chrom,
    }


    return coordinates