def ProcessCC(indivAttr, thresh): """ Apply filters to the individual: True = pass, False = fail Parameters ---------- indivAttr: [GT:AD:DP:GQ:PL] thresh: hash table of thresholds """ # Apply quality filters. if not filters.passFilters(indivAttr, thresh, thresh['GQ_CC_Thresh']): return False # Apply PL filter to individual. if not filters.PhredScaleFilter(indivAttr, thresh['PL_Thresh']): return False return True
def doTDT(v, family, thresh): """ Perform the Transmission Disequilibrium Test (TDT). Parameters ---------- v: line of the VCF family is a hash table with: Key: individual id Value: [father id, mother id, sex] thresh: hash table of thresholds """ # If filters on the line failed move on. if not v: return None TU = [0, 0] # Array of [transmissions, untransmissions] TU_m = [0, 0] # same as TU but for males TU_f = [0, 0] # same as TU but for females mErr = 0 # Count mendelian error: Parents: ref, child: het mErr_o = 0 # Count other mendelian errors Nproband_alt = 0 # Number of homozygous alt probands that passed all thresholds AN = 0 # Number of families that passed all thresholds indivs_T = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'] ] # Array of individuals who were transmitted the variant indivs_U = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'] ] # Array of individuals who did not receive the variant for indiv_id in family.keys(): #loop through all the probands # indiv_data is their GT:AD:DP:GQ:PL stats indiv_data = v[indiv_id] if indiv_data == None: continue # Apply quality control filters on proband. if not filters.passFilters(indiv_data, thresh, thresh['GQ_Kid_Thresh']): continue # Apply PL filter to child. if not filters.PhredScaleFilter(indiv_data, thresh['PL_Thresh']): continue father = v[family[indiv_id][0]] mother = v[family[indiv_id][1]] # Check if the parents have the alternate allele # so they can pass it on AND apply quality control filters. if filters.TDT_Parent_Filters(father, mother, thresh): AN += 1 # all individuals in the nuclear family passed the filters # TDT operates differently in the hemizygous chromosomes # PAR regions defined from # http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/ # in this case we are in the Par region so transmission is normal if filters.check_Hemizgyous(v['CHROM'], family[indiv_id][2], filters.inPar(v['POS'])): TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions( indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m, TU_f, family[indiv_id][2], False, mErr, mErr_o) else: TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions( indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m, TU_f, family[indiv_id][2], True, mErr, mErr_o) if indiv_data['GT'] == 'homoAlt': Nproband_alt += 1 if transFlag == True: # if the variant was transmitted indivs_T.extend( (indiv_id, family[indiv_id][2], indiv_data, family[indiv_id][0], father, family[indiv_id][1], mother)) elif transFlag == False: indivs_U.extend( (indiv_id, family[indiv_id][2], indiv_data, family[indiv_id][0], father, family[indiv_id][1], mother)) # Ignore the cases in which we have 0 transmissions and 0 untransmissions. if TU[0] + TU[1] == 0: return None # Calculate percentage of mendelian errors. mendErrorPercent = (mErr + mErr_o) / (TU[0] + TU[1] + mErr + mErr_o) if vepFieldNames: gene, anno, pph2, sift, lof = vepA.findVariantAnnotation( v, args, vepFieldNames) else: gene, anno, pph2, sift, lof = ('', '', '', '', '') return [ v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'], v['FILTER'], v['VQSLOD'], gene, anno, pph2, sift, lof, v['AF'], v['AC'], AN, Nproband_alt, TU[0], TU[1], TU_m[0], TU_m[1], TU_f[0], TU_f[1], mErr, mErr_o, mendErrorPercent ], indivs_T, indivs_U
def doTDT(v, family, thresh): """ Perform the Transmission Disequilibrium Test (TDT). Parameters ---------- v: line of the VCF family is a hash table with: Key: individual id Value: [father id, mother id, sex] thresh: hash table of thresholds """ # If filters on the line failed move on. if not v: return None TU = [0, 0] # Array of [transmissions, untransmissions] TU_m = [0, 0] # same as TU but for males TU_f = [0, 0] # same as TU but for females mErr = 0 # Count mendelian error: Parents: ref, child: het mErr_o = 0 # Count other mendelian errors N_het = 0 # Number of heterozygous individuals that passed all thresholds Nproband_alt = 0 # Number of homozygous alt probands that passed all thresholds AN = 0 # Number of families that passed all thresholds DP_het = [] # Array pf depth of all het individuals who passed filters DP = [] # Array of depth of all non-het individuals who passed filters AB = [] # Array of the allelic balance indivs_T = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']] # Array of individuals who were transmitted the variant indivs_U = [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT']] # Array of individuals who did not receive the variant for indiv_id in family.keys(): #loop through all the probands # indiv_data is their GT:AD:DP:GQ:PL stats indiv_data = v[indiv_id] if indiv_data == None: continue # Apply quality control filters on proband. if not filters.passFilters(indiv_data, thresh, thresh['GQ_Kid_Thresh']): continue # Apply PL filter to child. if not filters.PhredScaleFilter(indiv_data, thresh['PL_Thresh']): continue father = v[family[indiv_id][0]] mother = v[family[indiv_id][1]] # Check if the parents have the alternate allele # so they can pass it on AND apply quality control filters. if filters.TDT_Parent_Filters(father, mother, thresh): AN += 1 # all individuals in the nuclear family passed the filters # TDT operates differently in the hemizygous chromosomes # PAR regions defined from # http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/ # in this case we are in the Par region so transmission is normal if filters.check_Hemizgyous(v['CHROM'], family[indiv_id][2], filters.inPar(v['POS'])): TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m, TU_f, family[indiv_id][2], False, mErr, mErr_o) else: TU, TU_m, TU_f, mErr, mErr_o, transFlag = numberTransmissions(indiv_data['GT'], father['GT'], mother['GT'], TU, TU_m, TU_f, family[indiv_id][2], True, mErr, mErr_o) # Update totals AB, N_het, Nproband_alt, DP, DP_het = updateTotals(AB, N_het, Nproband_alt, DP, DP_het, indiv_data, father, mother) if transFlag == True: # if the variant was transmitted indivs_T.extend((indiv_id, indiv_data, family[indiv_id][0], father, family[indiv_id][1], mother)) elif transFlag == False: indivs_U.extend((indiv_id, indiv_data, family[indiv_id][0], father, family[indiv_id][1], mother)) # Ignore the cases in which we have 0 transmissions and 0 untransmissions. if TU[0] + TU[1] == 0: return None # Calculate percentage of mendelian errors. mendErrorPercent = (mErr + mErr_o) / (TU[0] + TU[1] + mErr + mErr_o) # Calculate averages for allelic balance (AB), depth (DP), and depth of hets (DP_het). AB = np.average(np.array(AB)) DP = np.average(np.concatenate((np.array(DP),np.array(DP_het)))) DP_het = np.average(np.array(DP_het)) gene, anno, pph2, sift, lof = vepA.findVariantAnnotation(v, args, vepFieldNames) return [v['CHROM'], v['POS'], v['ID'], v['REF'], v['ALT'], gene, anno, pph2, sift, lof, v['AF'], v['AC'], AN, AB, DP, DP_het, Nproband_alt, TU[0], TU[1], TU_m[0], TU_m[1], TU_f[0], TU_f[1], mErr, mErr_o, mendErrorPercent], indivs_T, indivs_U