コード例 #1
0
def classify_2_cnv(FF, RR, cnvs, min_frac=0.5):
    """
    Classify the cxSV class of a pair of inv bkpts and two associated CNVs.

    Matches each CNV to a 5' or 3' location, as constrained by the breakpoint
    coordinates.

    Parameters
    ----------
    FF : pysam.VariantRecord
    RR : pysam.VariantRecord
    cnvs : [pysam.VariantRecord, pysam.VariantRecord]
    min_frac : float, optional
        Minimum reciprocal overlap of each cnv with a candidate CNV interval
        defined by the breakpoint coordinates.

    Returns
    -------
    svtype : str
    """

    # Assign CNVs to 5' or 3' based on ordering
    cnv5, cnv3 = sorted(cnvs, key=lambda r: r.pos)

    # Check if 5' CNV matches breakpoints
    if cnv5.info['SVTYPE'] == 'DEL':
        interval5 = (FF.pos, RR.pos)
    else:
        interval5 = (RR.pos, FF.pos)
    frac5 = svu.reciprocal_overlap(cnv5.pos, cnv5.stop, *interval5)

    # Check if 3' CNV matches breakpoints
    if cnv3.info['SVTYPE'] == 'DEL':
        interval3 = (FF.stop, RR.stop)
    else:
        interval3 = (RR.stop, FF.stop)
    frac3 = svu.reciprocal_overlap(cnv3.pos, cnv3.stop, *interval3)

    # Report cxSV class based on whether CNVs matched intervals
    if frac5 >= min_frac and frac3 >= min_frac:
        svtype = (cnv5.info['SVTYPE'].lower() + 'INV' +
                  cnv3.info['SVTYPE'].lower())
    elif frac5 >= min_frac and frac3 < min_frac:
        return classify_1_cnv(FF, RR, cnv5)
    elif frac5 < min_frac and frac3 >= min_frac:
        return classify_1_cnv(FF, RR, cnv3)
    else:
        svtype = 'CNV_2_FAIL'

    return svtype, cnvs
コード例 #2
0
    def _test_overlap(cnv):
        svtype = cnv.info['SVTYPE']
        if svtype == 'DEL':
            frac5 = svu.reciprocal_overlap(cnv.pos, cnv.stop, *del5)
            frac3 = svu.reciprocal_overlap(cnv.pos, cnv.stop, *del3)
        else:
            frac5 = svu.reciprocal_overlap(cnv.pos, cnv.stop, *dup5)
            frac3 = svu.reciprocal_overlap(cnv.pos, cnv.stop, *dup3)

        if frac5 >= min_frac and frac3 >= min_frac:
            return svtype + '_53'
        elif frac5 >= min_frac:
            return svtype + '_5'
        elif frac3 >= min_frac:
            return svtype + '_3'
        else:
            return 'no_hit'
コード例 #3
0
def classify_1_cnv(FF,
                   RR,
                   cnv,
                   min_frac=0.5,
                   min_bkpt_cnv_size=500,
                   max_bkpt_cnv_size=4000):
    """
    Classify the cxSV class of a pair of inv bkpts and one associated CNV.

    Matches each CNV to a 5' or 3' location, as constrained by the breakpoint
    coordinates. After matching CNV, check if distance between breakpoints
    at other end is sufficient to call a second flanking CNV.

    Parameters
    ----------
    FF : pysam.VariantRecord
    RR : pysam.VariantRecord
    cnvs : [pysam.VariantRecord, pysam.VariantRecord]
    min_frac : float, optional
        Minimum reciprocal overlap of each cnv with a candidate CNV interval
        defined by the breakpoint coordinates.
    min_bkpt_cnv_size : int, optional
        Minimum distance between breakpoints to call flanking CNV.
    max_bkpt_cnv_size : int, optional
        Maximum distance between breakpoints to call flanking CNV.

    Returns
    -------
    svtype : str
    """

    # Make CNV class lowercase (for later concatenation with INV)
    cnv_type = cnv.info['SVTYPE'].lower()

    # Determine eligible 5'/3' CNV intervals defined by the breakpoints
    if cnv_type == 'del':
        interval5 = (FF.pos, RR.pos)
        interval3 = (FF.stop, RR.stop)
    else:
        interval5 = (RR.pos, FF.pos)
        interval3 = (RR.stop, FF.stop)

    # Check overlap of CNV against full inversion length
    start = min(FF.pos, RR.pos)
    end = max(FF.stop, RR.stop)
    total_frac = svu.reciprocal_overlap(cnv.pos, cnv.stop, start, end)
    frac5 = svu.overlap_frac(*interval5, cnv.pos, cnv.stop)
    frac3 = svu.overlap_frac(*interval3, cnv.pos, cnv.stop)

    # If one CNV spans the entire event, it likely represents two CNV merged
    # during preprocessing or clustering
    if total_frac > 0.9 and frac5 > 0.95 and frac3 > 0.95:
        svtype = cnv_type + 'INV' + cnv_type  # + '_merged'
        return svtype, [cnv]

    # Otherwise, check whether it's 5' or 3'
    frac5 = svu.reciprocal_overlap(cnv.pos, cnv.stop, *interval5)
    frac3 = svu.reciprocal_overlap(cnv.pos, cnv.stop, *interval3)

    # 5' CNV; check 3' breakpoints for small flanking CNV
    if frac5 >= min_frac and frac3 < min_frac:
        svtype = cnv_type + 'INV'

        dist3 = RR.stop - FF.stop
        if min_bkpt_cnv_size <= dist3 < max_bkpt_cnv_size:
            svtype = svtype + 'del'
        elif min_bkpt_cnv_size <= -dist3 < max_bkpt_cnv_size:
            svtype = svtype + 'dup'

    # 3' CNV; check 5' breakpoints for small flanking CNV
    elif frac5 < min_frac and frac3 >= min_frac:
        svtype = 'INV' + cnv_type

        dist5 = RR.pos - FF.pos
        if min_bkpt_cnv_size <= dist5 < max_bkpt_cnv_size:
            svtype = 'del' + svtype
        elif min_bkpt_cnv_size <= -dist5 < max_bkpt_cnv_size:
            svtype = 'dup' + svtype

    # Couldn't match the CNV
    else:
        return classify_0_cnv(FF, RR)

    return svtype, [cnv]