def append_exac_allele_frequencies(record, new_record=None, i=None): if new_record is None: for subpopulation in EXAC_SUBPOPULATIONS: # calculate allele frequencies for each subpopulation allele_count = record.INFO[("AC_" + subpopulation)] allele_number = record.INFO[("AN_" + subpopulation)] allele_frequency = "-" if len(allele_count) > 0 and allele_number != 0: allele_frequency = float( allele_count[0]) / float(allele_number) allele_frequency = str( utilities.round_sigfigs(allele_frequency, 3)) record.INFO[("AF_" + subpopulation)] = allele_frequency return record else: new_record.INFO['AF'] = record.INFO['AF'][i] for subpopulation in EXAC_SUBPOPULATIONS: allele_count = record.INFO[("AC_" + subpopulation)][i] allele_number = record.INFO[("AN_" + subpopulation)] allele_frequency = "-" if allele_number != 0: allele_frequency = float(allele_count) / float(allele_number) allele_frequency = str( utilities.round_sigfigs(allele_frequency, 3)) new_record.INFO[("AF_" + subpopulation)] = allele_frequency return new_record
def determineGnomADAlleleFrequency(row): if isEmpty(row['Allele_frequency_genome_GnomAD']) and isEmpty(row['Allele_frequency_exome_GnomAD']): return EMPTY else: ac_genome = getNumericAFValue(row['Allele_count_genome_GnomAD']) an_genome = getNumericAFValue(row['Allele_number_genome_GnomAD']) ac_exome = getNumericAFValue(row['Allele_count_exome_GnomAD']) an_exome = getNumericAFValue(row['Allele_number_exome_GnomAD']) if (an_genome + an_exome) == 0: return EMPTY return round_sigfigs(((ac_genome + ac_exome) / (an_genome + an_exome)), 4)
def test_append_exac_allele_frequencies_rounds_to_three_sig_figs(self): EXAC_VCF_FILENAME = os.path.join(os.path.dirname(__file__), 'test_files/ExAC_AF.vcf') for record in vcf.Reader(open(EXAC_VCF_FILENAME, 'r')): record = append_exac_allele_frequencies(record, new_record=None, i=None) for subpopulation in EXAC_SUBPOPULATIONS: val = record.INFO["AF_" + subpopulation] try: float_val = float(val) self.assertEqual(float_val, round_sigfigs(float(val), 3)) except ValueError: self.assertEqual(val, '-')
def append_exac_allele_frequencies(record, new_record=None, i=None): if new_record is None: for subpopulation in EXAC_SUBPOPULATIONS: # calculate allele frequencies for each subpopulation allele_count = record.INFO[("AC_" + subpopulation)] allele_number = record.INFO[("AN_" + subpopulation)] allele_frequency = "-" if len(allele_count) > 0 and allele_number != 0: allele_frequency = float(allele_count[0]) / float(allele_number) allele_frequency = str(utilities.round_sigfigs(allele_frequency, 3)) record.INFO[("AF_" + subpopulation)] = allele_frequency return record else: new_record.INFO['AF'] = record.INFO['AF'][i] for subpopulation in EXAC_SUBPOPULATIONS: allele_count = record.INFO[("AC_" + subpopulation)][i] allele_number = record.INFO[("AN_" + subpopulation)] allele_frequency = "-" if allele_number != 0: allele_frequency = float(allele_count) / float(allele_number) allele_frequency = str(utilities.round_sigfigs(allele_frequency, 3)) new_record.INFO[("AF_" + subpopulation)] = allele_frequency return new_record
def selectMaxAlleleFrequency(newRow): maxFreq = 0 maxFreqString = EMPTY allele_frequency_fields = [ "EA_Allele_Frequency_ESP", "AA_Allele_Frequency_ESP", "Allele_Frequency_ESP", "EUR_Allele_frequency_1000_Genomes", "AFR_Allele_frequency_1000_Genomes", "AMR_Allele_frequency_1000_Genomes", "EAS_Allele_frequency_1000_Genomes", "SAS_Allele_frequency_1000_Genomes", "Allele_frequency_AFR_ExAC", "Allele_frequency_AMR_ExAC", "Allele_frequency_EAS_ExAC", "Allele_frequency_FIN_ExAC", "Allele_frequency_NFE_ExAC", "Allele_frequency_OTH_ExAC", "Allele_frequency_SAS_ExAC" ] for field in allele_frequency_fields: if newRow[field] != EMPTY and newRow[field] != None: freqs = [float(i) for i in newRow[field].split(',')] max_in_field = max(freqs) if max_in_field > maxFreq: source = determineSourceForMAF(field) subpopulation = determineSubpopulationForMAF(field) maxFreq = max_in_field if "ExAC" in source: # Ensure exac values maintain 3 sigfigs maxFreqStringPrefix = str(utilities.round_sigfigs(float(max_in_field), 3)) maxFreqStringSuffix = " (%s from %s)" % (subpopulation, source) maxFreqString = maxFreqStringPrefix + maxFreqStringSuffix else: maxFreqString = "%f (%s from %s)" % (max_in_field, subpopulation, source) return(maxFreqString)