Esempio n. 1
0
def append_exac_allele_frequencies(record, new_record=None, i=None):
    if new_record is None:
        for subpopulation in EXAC_SUBPOPULATIONS:
            # calculate allele frequencies for each subpopulation
            allele_count = record.INFO[("AC_" + subpopulation)]
            allele_number = record.INFO[("AN_" + subpopulation)]
            allele_frequency = "-"
            if len(allele_count) > 0 and allele_number != 0:
                allele_frequency = float(
                    allele_count[0]) / float(allele_number)
                allele_frequency = str(
                    utilities.round_sigfigs(allele_frequency, 3))
            record.INFO[("AF_" + subpopulation)] = allele_frequency
        return record
    else:
        new_record.INFO['AF'] = record.INFO['AF'][i]
        for subpopulation in EXAC_SUBPOPULATIONS:
            allele_count = record.INFO[("AC_" + subpopulation)][i]
            allele_number = record.INFO[("AN_" + subpopulation)]
            allele_frequency = "-"
            if allele_number != 0:
                allele_frequency = float(allele_count) / float(allele_number)
                allele_frequency = str(
                    utilities.round_sigfigs(allele_frequency, 3))
            new_record.INFO[("AF_" + subpopulation)] = allele_frequency
        return new_record
def determineGnomADAlleleFrequency(row):
    if isEmpty(row['Allele_frequency_genome_GnomAD']) and isEmpty(row['Allele_frequency_exome_GnomAD']):
        return EMPTY
    else:
        ac_genome = getNumericAFValue(row['Allele_count_genome_GnomAD'])
        an_genome = getNumericAFValue(row['Allele_number_genome_GnomAD'])
        ac_exome = getNumericAFValue(row['Allele_count_exome_GnomAD'])
        an_exome = getNumericAFValue(row['Allele_number_exome_GnomAD'])
        if (an_genome + an_exome) == 0:
            return EMPTY
        return round_sigfigs(((ac_genome + ac_exome) / (an_genome + an_exome)), 4)
 def test_append_exac_allele_frequencies_rounds_to_three_sig_figs(self):
     EXAC_VCF_FILENAME = os.path.join(os.path.dirname(__file__), 'test_files/ExAC_AF.vcf')
     for record in vcf.Reader(open(EXAC_VCF_FILENAME, 'r')):
         record = append_exac_allele_frequencies(record, new_record=None, i=None)
         for subpopulation in EXAC_SUBPOPULATIONS:
             val = record.INFO["AF_" + subpopulation]
             try:
                 float_val = float(val)
                 self.assertEqual(float_val, round_sigfigs(float(val), 3))
             except ValueError:
                 self.assertEqual(val, '-')
Esempio n. 4
0
 def test_append_exac_allele_frequencies_rounds_to_three_sig_figs(self):
     EXAC_VCF_FILENAME = os.path.join(os.path.dirname(__file__),
                                      'test_files/ExAC_AF.vcf')
     for record in vcf.Reader(open(EXAC_VCF_FILENAME, 'r')):
         record = append_exac_allele_frequencies(record,
                                                 new_record=None,
                                                 i=None)
         for subpopulation in EXAC_SUBPOPULATIONS:
             val = record.INFO["AF_" + subpopulation]
             try:
                 float_val = float(val)
                 self.assertEqual(float_val, round_sigfigs(float(val), 3))
             except ValueError:
                 self.assertEqual(val, '-')
def append_exac_allele_frequencies(record, new_record=None, i=None):
    if new_record is None:
        for subpopulation in EXAC_SUBPOPULATIONS:
            # calculate allele frequencies for each subpopulation
            allele_count = record.INFO[("AC_" + subpopulation)]
            allele_number = record.INFO[("AN_" + subpopulation)]
            allele_frequency = "-"
            if len(allele_count) > 0 and allele_number != 0:
                allele_frequency = float(allele_count[0]) / float(allele_number)
                allele_frequency = str(utilities.round_sigfigs(allele_frequency, 3))
            record.INFO[("AF_" + subpopulation)] = allele_frequency
        return record
    else:
        new_record.INFO['AF'] = record.INFO['AF'][i]
        for subpopulation in EXAC_SUBPOPULATIONS:
            allele_count = record.INFO[("AC_" + subpopulation)][i]
            allele_number = record.INFO[("AN_" + subpopulation)]
            allele_frequency = "-"
            if allele_number != 0:
                allele_frequency = float(allele_count) / float(allele_number)
                allele_frequency = str(utilities.round_sigfigs(allele_frequency, 3))
            new_record.INFO[("AF_" + subpopulation)] = allele_frequency
        return new_record
def selectMaxAlleleFrequency(newRow):
    maxFreq = 0
    maxFreqString = EMPTY
    allele_frequency_fields = [
        "EA_Allele_Frequency_ESP",
        "AA_Allele_Frequency_ESP",
        "Allele_Frequency_ESP",
        "EUR_Allele_frequency_1000_Genomes",
        "AFR_Allele_frequency_1000_Genomes",
        "AMR_Allele_frequency_1000_Genomes",
        "EAS_Allele_frequency_1000_Genomes",
        "SAS_Allele_frequency_1000_Genomes",
        "Allele_frequency_AFR_ExAC",
        "Allele_frequency_AMR_ExAC",
        "Allele_frequency_EAS_ExAC",
        "Allele_frequency_FIN_ExAC",
        "Allele_frequency_NFE_ExAC",
        "Allele_frequency_OTH_ExAC",
        "Allele_frequency_SAS_ExAC"
    ]
    for field in allele_frequency_fields:
        if newRow[field] != EMPTY and newRow[field] != None:
            freqs = [float(i) for i in newRow[field].split(',')]
            max_in_field = max(freqs)
            if max_in_field > maxFreq:
                source = determineSourceForMAF(field)
                subpopulation = determineSubpopulationForMAF(field)
                maxFreq = max_in_field
                if "ExAC" in source:
                    # Ensure exac values maintain 3 sigfigs
                    maxFreqStringPrefix = str(utilities.round_sigfigs(float(max_in_field), 3))
                    maxFreqStringSuffix = " (%s from %s)" % (subpopulation, source)
                    maxFreqString = maxFreqStringPrefix + maxFreqStringSuffix
                else:
                    maxFreqString = "%f (%s from %s)" % (max_in_field, subpopulation, source)
    return(maxFreqString)