예제 #1
0
def ProcessCC(indivAttr, thresh):
    """ Apply filters to the individual: True = pass, False = fail

    Parameters
    ----------
    indivAttr: [GT:AD:DP:GQ:PL]
    thresh: hash table of thresholds
    """

    # Apply quality filters.
    if not filters.passFilters(indivAttr, thresh, thresh['GQ_CC_Thresh']):
        return False

    # Apply PL filter to individual.
    if not filters.PhredScaleFilter(indivAttr, thresh['PL_Thresh']):
        return False

    return True
예제 #2
0
파일: TDT_CC.py 프로젝트: JKosmicki/TDT
def ProcessCC(indivAttr, thresh):
    """ Apply filters to the individual: True = pass, False = fail

    Parameters
    ----------
    indivAttr: [GT:AD:DP:GQ:PL]
    thresh: hash table of thresholds
    """

    # Apply quality filters.
    if not filters.passFilters(indivAttr, thresh, thresh['GQ_CC_Thresh']):
        return False

    # Apply PL filter to individual.
    if not filters.PhredScaleFilter(indivAttr, thresh['PL_Thresh']):
        return False

    return True
예제 #3
0
def doTDT(v, family, thresh):
    """ Perform the Transmission Disequilibrium Test (TDT).

    Parameters
    ----------
    v: line of the VCF
    family is a hash table with:
        Key: individual id      Value: [father id, mother id, sex]
    thresh: hash table of thresholds
    """

    # If filters on the line failed move on.
    if not v:
        return None

    TU = [0, 0]  # Array of [transmissions, untransmissions]
    TU_m = [0, 0]  # same as TU but for males
    TU_f = [0, 0]  # same as TU but for females
    mErr = 0  # Count mendelian error: Parents: ref, child: het
    mErr_o = 0  # Count other mendelian errors
    Nproband_alt = 0  # Number of homozygous alt probands that passed all thresholds
    AN = 0  # Number of families that passed all thresholds
    indivs_T = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']
                ]  # Array of individuals who were transmitted the variant
    indivs_U = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']
                ]  # Array of individuals who did not receive the variant

    for indiv_id in family.keys():  #loop through all the probands
        # indiv_data is their GT:AD:DP:GQ:PL stats
        indiv_data = v[indiv_id]

        if indiv_data == None:
            continue

        # Apply quality control filters on proband.
        if not filters.passFilters(indiv_data, thresh,
                                   thresh['GQ_Kid_Thresh']):
            continue

        # Apply PL filter to child.
        if not filters.PhredScaleFilter(indiv_data, thresh['PL_Thresh']):
            continue

        father = v[family[indiv_id][0]]
        mother = v[family[indiv_id][1]]

        # Check if the parents have the alternate allele
        # so they can pass it on AND apply quality control filters.
        if filters.TDT_Parent_Filters(father, mother, thresh):
            AN += 1  # all individuals in the nuclear family passed the filters

            # TDT operates differently in the hemizygous chromosomes
            # PAR regions defined from
            # http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/
            # in this case we are in the Par region so transmission is normal
            if filters.check_Hemizgyous(v['CHROM'], family[indiv_id][2],
                                        filters.inPar(v['POS'])):
                TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(
                    indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m,
                    TU_f, family[indiv_id][2], False, mErr, mErr_o)

            else:
                TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(
                    indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m,
                    TU_f, family[indiv_id][2], True, mErr, mErr_o)

            if indiv_data['GT'] == 'homoAlt':
                Nproband_alt += 1

            if transFlag == True:  # if the variant was transmitted
                indivs_T.extend(
                    (indiv_id, family[indiv_id][2], indiv_data,
                     family[indiv_id][0], father, family[indiv_id][1], mother))
            elif transFlag == False:
                indivs_U.extend(
                    (indiv_id, family[indiv_id][2], indiv_data,
                     family[indiv_id][0], father, family[indiv_id][1], mother))

    # Ignore the cases in which we have 0 transmissions and 0 untransmissions.
    if TU[0] + TU[1] == 0:
        return None

    # Calculate percentage of mendelian errors.
    mendErrorPercent = (mErr + mErr_o) / (TU[0] + TU[1] + mErr + mErr_o)

    if vepFieldNames:
        gene, anno, pph2, sift, lof = vepA.findVariantAnnotation(
            v, args, vepFieldNames)
    else:
        gene, anno, pph2, sift, lof = ('', '', '', '', '')

    return [
        v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'], v['FILTER'],
        v['VQSLOD'], gene, anno, pph2, sift, lof, v['AF'], v['AC'], AN,
        Nproband_alt, TU[0], TU[1], TU_m[0], TU_m[1], TU_f[0], TU_f[1], mErr,
        mErr_o, mendErrorPercent
    ], indivs_T, indivs_U
예제 #4
0
파일: TDT_CC.py 프로젝트: JKosmicki/TDT
def doTDT(v, family, thresh):
    """ Perform the Transmission Disequilibrium Test (TDT).

    Parameters
    ----------
    v: line of the VCF
    family is a hash table with:
        Key: individual id      Value: [father id, mother id, sex]
    thresh: hash table of thresholds
    """

    # If filters on the line failed move on.
    if not v:
        return None

    TU = [0, 0]         # Array of [transmissions, untransmissions]
    TU_m = [0, 0]       # same as TU but for males
    TU_f = [0, 0]       # same as TU but for females
    mErr = 0            # Count mendelian error: Parents: ref, child: het
    mErr_o = 0          # Count other mendelian errors
    N_het = 0           # Number of heterozygous individuals that passed all thresholds
    Nproband_alt = 0    # Number of homozygous alt probands that passed all thresholds
    AN = 0              # Number of families that passed all thresholds
    DP_het = []         # Array pf depth of all het individuals who passed filters
    DP = []             # Array of depth of all non-het individuals who passed filters
    AB = []             # Array of the allelic balance
    indivs_T = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']]       # Array of individuals who were transmitted the variant
    indivs_U = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']]       # Array of individuals who did not receive the variant

    for indiv_id in family.keys():         #loop through all the probands
        # indiv_data is their GT:AD:DP:GQ:PL stats
        indiv_data = v[indiv_id]

        if indiv_data == None:
            continue

        # Apply quality control filters on proband.
        if not filters.passFilters(indiv_data, thresh, thresh['GQ_Kid_Thresh']):
            continue

        # Apply PL filter to child.
        if not filters.PhredScaleFilter(indiv_data, thresh['PL_Thresh']):
            continue

        father = v[family[indiv_id][0]]
        mother = v[family[indiv_id][1]]

        # Check if the parents have the alternate allele
        # so they can pass it on AND apply quality control filters.
        if filters.TDT_Parent_Filters(father, mother, thresh):
            AN += 1         # all individuals in the nuclear family passed the filters

            # TDT operates differently in the hemizygous chromosomes
            # PAR regions defined from
            # http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/
            # in this case we are in the Par region so transmission is normal
            if filters.check_Hemizgyous(v['CHROM'], family[indiv_id][2], filters.inPar(v['POS'])):
                TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m, TU_f, family[indiv_id][2], False, mErr, mErr_o)

            else:
                TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m, TU_f, family[indiv_id][2], True, mErr, mErr_o)

            # Update totals
            AB, N_het, Nproband_alt, DP, DP_het = updateTotals(AB, N_het, Nproband_alt, DP, DP_het, indiv_data, father, mother)

            if transFlag == True:               # if the variant was transmitted
                indivs_T.extend((indiv_id, indiv_data, family[indiv_id][0], father, family[indiv_id][1], mother))
            elif transFlag == False:
                indivs_U.extend((indiv_id, indiv_data, family[indiv_id][0], father, family[indiv_id][1], mother))

    # Ignore the cases in which we have 0 transmissions and 0 untransmissions.
    if TU[0] + TU[1] == 0:
        return None

    # Calculate percentage of mendelian errors.
    mendErrorPercent = (mErr + mErr_o) / (TU[0] + TU[1] + mErr + mErr_o)

    # Calculate averages for allelic balance (AB), depth (DP), and depth of hets (DP_het).
    AB = np.average(np.array(AB))
    DP = np.average(np.concatenate((np.array(DP),np.array(DP_het))))
    DP_het = np.average(np.array(DP_het))

    gene, anno, pph2, sift, lof = vepA.findVariantAnnotation(v, args, vepFieldNames)

    return [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'], gene, anno, pph2,
            sift, lof, v['AF'], v['AC'], AN, AB, DP, DP_het, Nproband_alt,
            TU[0], TU[1], TU_m[0], TU_m[1], TU_f[0], TU_f[1],
            mErr, mErr_o, mendErrorPercent], indivs_T, indivs_U