def doTDT(v, family, thresh):
    """ Perform the Transmission Disequilibrium Test (TDT).

    Parameters
    ----------
    v: line of the VCF
    family is a hash table with:
        Key: individual id      Value: [father id, mother id, sex]
    thresh: hash table of thresholds
    """

    # If filters on the line failed move on.
    if not v:
        return None

    TU = [0, 0]  # Array of [transmissions, untransmissions]
    TU_m = [0, 0]  # same as TU but for males
    TU_f = [0, 0]  # same as TU but for females
    mErr = 0  # Count mendelian error: Parents: ref, child: het
    mErr_o = 0  # Count other mendelian errors
    Nproband_alt = 0  # Number of homozygous alt probands that passed all thresholds
    AN = 0  # Number of families that passed all thresholds
    indivs_T = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']
                ]  # Array of individuals who were transmitted the variant
    indivs_U = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']
                ]  # Array of individuals who did not receive the variant

    for indiv_id in family.keys():  #loop through all the probands
        # indiv_data is their GT:AD:DP:GQ:PL stats
        indiv_data = v[indiv_id]

        if indiv_data == None:
            continue

        # Apply quality control filters on proband.
        if not filters.passFilters(indiv_data, thresh,
                                   thresh['GQ_Kid_Thresh']):
            continue

        # Apply PL filter to child.
        if not filters.PhredScaleFilter(indiv_data, thresh['PL_Thresh']):
            continue

        father = v[family[indiv_id][0]]
        mother = v[family[indiv_id][1]]

        # Check if the parents have the alternate allele
        # so they can pass it on AND apply quality control filters.
        if filters.TDT_Parent_Filters(father, mother, thresh):
            AN += 1  # all individuals in the nuclear family passed the filters

            # TDT operates differently in the hemizygous chromosomes
            # PAR regions defined from
            # http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/
            # in this case we are in the Par region so transmission is normal
            if filters.check_Hemizgyous(v['CHROM'], family[indiv_id][2],
                                        filters.inPar(v['POS'])):
                TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(
                    indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m,
                    TU_f, family[indiv_id][2], False, mErr, mErr_o)

            else:
                TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(
                    indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m,
                    TU_f, family[indiv_id][2], True, mErr, mErr_o)

            if indiv_data['GT'] == 'homoAlt':
                Nproband_alt += 1

            if transFlag == True:  # if the variant was transmitted
                indivs_T.extend(
                    (indiv_id, family[indiv_id][2], indiv_data,
                     family[indiv_id][0], father, family[indiv_id][1], mother))
            elif transFlag == False:
                indivs_U.extend(
                    (indiv_id, family[indiv_id][2], indiv_data,
                     family[indiv_id][0], father, family[indiv_id][1], mother))

    # Ignore the cases in which we have 0 transmissions and 0 untransmissions.
    if TU[0] + TU[1] == 0:
        return None

    # Calculate percentage of mendelian errors.
    mendErrorPercent = (mErr + mErr_o) / (TU[0] + TU[1] + mErr + mErr_o)

    if vepFieldNames:
        gene, anno, pph2, sift, lof = vepA.findVariantAnnotation(
            v, args, vepFieldNames)
    else:
        gene, anno, pph2, sift, lof = ('', '', '', '', '')

    return [
        v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'], v['FILTER'],
        v['VQSLOD'], gene, anno, pph2, sift, lof, v['AF'], v['AC'], AN,
        Nproband_alt, TU[0], TU[1], TU_m[0], TU_m[1], TU_f[0], TU_f[1], mErr,
        mErr_o, mendErrorPercent
    ], indivs_T, indivs_U
Beispiel #2
0
def doTDT(v, family, thresh):
    """ Perform the Transmission Disequilibrium Test (TDT).

    Parameters
    ----------
    v: line of the VCF
    family is a hash table with:
        Key: individual id      Value: [father id, mother id, sex]
    thresh: hash table of thresholds
    """

    # If filters on the line failed move on.
    if not v:
        return None

    TU = [0, 0]         # Array of [transmissions, untransmissions]
    TU_m = [0, 0]       # same as TU but for males
    TU_f = [0, 0]       # same as TU but for females
    mErr = 0            # Count mendelian error: Parents: ref, child: het
    mErr_o = 0          # Count other mendelian errors
    N_het = 0           # Number of heterozygous individuals that passed all thresholds
    Nproband_alt = 0    # Number of homozygous alt probands that passed all thresholds
    AN = 0              # Number of families that passed all thresholds
    DP_het = []         # Array pf depth of all het individuals who passed filters
    DP = []             # Array of depth of all non-het individuals who passed filters
    AB = []             # Array of the allelic balance
    indivs_T = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']]       # Array of individuals who were transmitted the variant
    indivs_U = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']]       # Array of individuals who did not receive the variant

    for indiv_id in family.keys():         #loop through all the probands
        # indiv_data is their GT:AD:DP:GQ:PL stats
        indiv_data = v[indiv_id]

        if indiv_data == None:
            continue

        # Apply quality control filters on proband.
        if not filters.passFilters(indiv_data, thresh, thresh['GQ_Kid_Thresh']):
            continue

        # Apply PL filter to child.
        if not filters.PhredScaleFilter(indiv_data, thresh['PL_Thresh']):
            continue

        father = v[family[indiv_id][0]]
        mother = v[family[indiv_id][1]]

        # Check if the parents have the alternate allele
        # so they can pass it on AND apply quality control filters.
        if filters.TDT_Parent_Filters(father, mother, thresh):
            AN += 1         # all individuals in the nuclear family passed the filters

            # TDT operates differently in the hemizygous chromosomes
            # PAR regions defined from
            # http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/
            # in this case we are in the Par region so transmission is normal
            if filters.check_Hemizgyous(v['CHROM'], family[indiv_id][2], filters.inPar(v['POS'])):
                TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m, TU_f, family[indiv_id][2], False, mErr, mErr_o)

            else:
                TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m, TU_f, family[indiv_id][2], True, mErr, mErr_o)

            # Update totals
            AB, N_het, Nproband_alt, DP, DP_het = updateTotals(AB, N_het, Nproband_alt, DP, DP_het, indiv_data, father, mother)

            if transFlag == True:               # if the variant was transmitted
                indivs_T.extend((indiv_id, indiv_data, family[indiv_id][0], father, family[indiv_id][1], mother))
            elif transFlag == False:
                indivs_U.extend((indiv_id, indiv_data, family[indiv_id][0], father, family[indiv_id][1], mother))

    # Ignore the cases in which we have 0 transmissions and 0 untransmissions.
    if TU[0] + TU[1] == 0:
        return None

    # Calculate percentage of mendelian errors.
    mendErrorPercent = (mErr + mErr_o) / (TU[0] + TU[1] + mErr + mErr_o)

    # Calculate averages for allelic balance (AB), depth (DP), and depth of hets (DP_het).
    AB = np.average(np.array(AB))
    DP = np.average(np.concatenate((np.array(DP),np.array(DP_het))))
    DP_het = np.average(np.array(DP_het))

    gene, anno, pph2, sift, lof = vepA.findVariantAnnotation(v, args, vepFieldNames)

    return [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'], gene, anno, pph2,
            sift, lof, v['AF'], v['AC'], AN, AB, DP, DP_het, Nproband_alt,
            TU[0], TU[1], TU_m[0], TU_m[1], TU_f[0], TU_f[1],
            mErr, mErr_o, mendErrorPercent], indivs_T, indivs_U
def doCaseControl(v, cases, controls, thresh):
    """ Analyzes case/control data, counting the
        number of reference and alternate alleles.

    Parameters
    ----------
    v: line of the VCF
    case and control are hash tables with:
        Key: individual id      Value: gender
    thresh is a hash table with:
        Key: name of threshold  Value: threshold
    """

    # If filters on the line failed move on.
    if not v:
        return None

    # Count the number of ref and alt alleles in cases and controls.
    caseRefs_m = 0
    caseAlts_m = 0
    controlRefs_m = 0
    controlAlts_m = 0
    caseRefs_f = 0
    caseAlts_f = 0
    controlRefs_f = 0
    controlAlts_f = 0

    # Loop through all the individuals in the case hash table.
    for indiv_id in cases:

        # indiv_data is their GT:AD:DP:GQ:PL stats
        indiv_data = v[indiv_id]

        if indiv_data == None:
            continue

        # Apply filters and update counts   Note: cases[indiv_id] is gender
        if ProcessCC(indiv_data, thresh):
            parFlag = filters.check_Hemizgyous(v['CHROM'], cases[indiv_id],
                                               filters.inPar(v['POS']))

            if cases[indiv_id] == 'male':
                caseRefs_m, caseAlts_m = Counts(indiv_data, caseRefs_m,
                                                caseAlts_m, parFlag)
            elif cases[indiv_id] == 'female':
                caseRefs_f, caseAlts_f = Counts(indiv_data, caseRefs_f,
                                                caseAlts_f, parFlag)

    # Loop through all indivs in the control hash table.
    for indiv_id in controls:

        # indiv_data is their GT:AD:DP:GQ:PL stats
        indiv_data = v.get(indiv_id)

        if indiv_data == None:
            continue

        # Apply filters and update counts   Note: controls[indiv_id] is gender
        if ProcessCC(indiv_data, thresh):
            parFlag = filters.check_Hemizgyous(v['CHROM'], controls[indiv_id],
                                               filters.inPar(v['POS']))

            if controls[indiv_id] == 'male':
                controlRefs_m, controlAlts_m = Counts(indiv_data,
                                                      controlRefs_m,
                                                      controlAlts_m, parFlag)
            elif controls[indiv_id] == 'female':
                controlRefs_f, controlAlts_f = Counts(indiv_data,
                                                      controlRefs_f,
                                                      controlAlts_f, parFlag)

    caseAlts = caseAlts_f + caseAlts_m
    caseRefs = caseRefs_f + caseRefs_m
    controlAlts = controlAlts_f + controlAlts_m
    controlRefs = controlRefs_f + controlRefs_m

    if (caseAlts + controlAlts == 0) | (caseRefs + controlRefs == 0):
        return None

    AC = caseAlts + controlAlts
    AN = AC + caseRefs + controlRefs
    AF = AC / AN

    if vepFieldNames:
        gene, anno, pph2, sift, lof = vepA.findVariantAnnotation(
            v, args, vepFieldNames)
    else:
        gene, anno, pph2, sift, lof = ('', '', '', '', '')

    return [
        v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'], v['FILTER'],
        v['VQSLOD'], gene, anno, pph2, sift, lof, AF, AC, AN, caseRefs,
        caseAlts, controlRefs, controlAlts, caseRefs_m, caseAlts_m,
        controlRefs_m, controlAlts_m, caseRefs_f, caseAlts_f, controlRefs_f,
        controlAlts_f
    ]
Beispiel #4
0
def doCaseControl(v, cases, controls, thresh):
    """ Analyzes case/control data, counting the
        number of reference and alternate alleles.

    Parameters
    ----------
    v: line of the VCF
    case and control are hash tables with:
        Key: individual id      Value: gender
    thresh is a hash table with:
        Key: name of threshold  Value: threshold
    """

    # If filters on the line failed move on.
    if not v:
        return None

    # Count the number of ref and alt alleles in cases and controls.
    caseRefs = 0
    caseAlts = 0
    controlRefs = 0
    controlAlts = 0

    # Loop through all the individuals in the case hash table.
    for indiv_id in cases:

        # indiv_data is their GT:AD:DP:GQ:PL stats
        indiv_data = v[indiv_id]

        if indiv_data == None:
            continue

        # Apply filters and update counts   Note: cases[indiv_id] is gender
        if ProcessCC(indiv_data, thresh):
            parFlag = filters.check_Hemizgyous(v['CHROM'], cases[indiv_id], filters.inPar(v['POS']) )
            caseRefs, caseAlts = Counts(indiv_data, caseRefs, caseAlts, parFlag)

    # Loop through all indivs in the control hash table.
    for indiv_id in controls:

        # indiv_data is their GT:AD:DP:GQ:PL stats
        indiv_data = v.get(indiv_id)

        if indiv_data == None:
            continue

        # Apply filters and update counts   Note: controls[indiv_id] is gender
        if ProcessCC(indiv_data, thresh):
            parFlag = filters.check_Hemizgyous(v['CHROM'], controls[indiv_id], filters.inPar(v['POS']) )
            controlRefs, controlAlts = Counts(indiv_data, controlRefs, controlAlts, parFlag)

    if (caseAlts + controlAlts == 0) | (caseRefs + controlRefs == 0):
        return None

    AC = caseAlts + controlAlts
    AN = AC + caseRefs + controlRefs
    AF = AC / AN

    gene, anno, pph2, sift, lof = vepA.findVariantAnnotation(v, args, vepFieldNames)

    return [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'], gene, anno, pph2, sift,
            lof, AF, AC, AN, caseRefs, caseAlts, controlRefs, controlAlts]