def test_ignores_unused_column_changes_in_compare_row(self): releaseDiff.added_data = self.added_data self.oldRow['pyhgvs_Protein'] = "NP_009225.1:p.?" self.newRow['HGVS_Protein'] = "NM_000059:p.His1085Arg" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) self.assertIsNone(change_type)
def test_builds_diff_with_adjusted_column_names_to_match_db(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['pyhgvs_Protein'] = "NP_009225.1:p.?" self.newRow['pyhgvs_Protein'] = "NM_000059:p.His1085Arg" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json[variant] self.assertIs(diff[0]['field'], 'HGVS_Protein')
def test_properly_classifies_variants_with_removed_columns_of_empty_data(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow["Functional_analysis_result_LOVD"] = "-" self.newRow['Source'] += ",ENIGMA" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json[variant] self.assertEqual(len(diff), 1) self.assertIs(change_type, "added_information")
def test_catches_reordered_source_urls(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow["Source_URL"] = "http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000075538, http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000144133, http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000109288" self.newRow["Source_URL"] = "http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000144133, http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000075538, http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000109288" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_ignore_added_or_removed_white_space(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json self.oldRow['Synonyms'] = '-2498del2' self.newRow['Synonyms'] = '-2498 del2' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_ignore_date_format_changes(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json self.oldRow['Date_last_evaluated_ENIGMA'] = '1 / 12 / 2015' self.newRow['Date_last_evaluated_ENIGMA'] = '2015 - 01 - 12' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_ignore_white_spaces(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json self.oldRow['BIC_Nomenclature'] = 'IVS7-680del2' self.newRow['BIC_Nomenclature'] = 'IVS 7-680del2' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_ignores_max_allele_frequency_field(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.newRow["Max_Allele_Frequency"] = "-" change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_catches_new_column_changes_in_compare_row(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['pyhgvs_Protein'] = "NP_009225.1:p.?" self.newRow['pyhgvs_Protein'] = "NM_000059:p.His1085Arg" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json[variant] self.assertEqual(change_type, "changed_information") self.assertIs(diff[0]['removed'], 'NP_009225.1:p.?') self.assertIs(diff[0]['added'], 'NM_000059:p.His1085Arg')
def test_catches_pathogenicity_changes(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Pathogenicity_all'] = "Pathogenic,not_provided (ClinVar); Class 5 (BIC)" self.oldRow['Pathogenicity_expert'] = "Not Yet Classified" self.newRow['Pathogenicity_all'] = "Pathogenic(ENIGMA); Pathogenic,not_provided (ClinVar); Class 5 (BIC)" self.newRow['Pathogenicity_expert'] = "Pathogenic" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json[variant] self.assertEqual(len(diff), 2) self.assertIs(change_type, "changed_classification")
def test_catches_pathogenicity_changes(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Pathogenicity_all'] = "Pathogenic,not_provided (ClinVar); Class 5 (BIC)" self.oldRow['Pathogenicity_expert'] = "Not Yet Classified" self.newRow['Pathogenicity_all'] = "Pathogenic(ENIGMA); Pathogenic,not_provided (ClinVar); Class 5 (BIC)" self.newRow['Pathogenicity_expert'] = "Pathogenic" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json[variant] self.assertEqual(len(diff), 2) self.assertIs(change_type, "changed_classification")
def test_change_type_ignores_reorders(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Pathogenicity_all'] = "Pathogenic(ENIGMA); not_provided,Pathogenic (ClinVar); Class 5 (BIC)" self.oldRow['Pathogenicity_expert'] = "Pathogenic" self.newRow['Pathogenicity_all'] = "Pathogenic(ENIGMA); Pathogenic,not_provided (ClinVar); Class 5 (BIC)" self.newRow['Pathogenicity_expert'] = "Pathogenic" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(len(diff), 0) self.assertIsNone(change_type)
def test_properly_ignores_exac_minus_tcga_rounding_changes_for_generic_allele_frequency(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Allele_Frequency'] = '9.419e-06 (ExAC minus TCGA)' self.newRow['Allele_Frequency'] = '9.42e-06 (ExAC minus TCGA)' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_ignored_exac_minus_tcga_instead_of_exac(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Allele_Frequency'] = '9.42e-06 (ExAC)' self.newRow['Allele_Frequency'] = '9.42e-06 (ExAC minus TCGA)' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_catches_reordered_source_urls(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow[ "Source_URL"] = "http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000075538, http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000144133, http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000109288" self.newRow[ "Source_URL"] = "http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000144133, http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000075538, http://www.ncbi.nlm.nih.gov/clinvar/?term=SCV000109288" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_handles_null_data_in_new_columns_in_v2_data_correctly(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Genetic_origin_LOVD', 'RNA_LOVD', 'Submitters_LOVD'] v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) self.newRow["Genetic_origin_LOVD"] = "" self.newRow["RNA_LOVD"] = None self.newRow["Submitters_LOVD"] = "-" change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_ignores_rounding_for_exac_af_fields(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Allele_frequency_FIN_ExAC'] = '9.4181237657' self.newRow['Allele_frequency_FIN_ExAC'] = '9.42' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_handles_new_data_in_new_columns_in_v2_data_correctly(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Genetic_origin_LOVD', 'RNA_LOVD', 'Submitters_LOVD'] v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) self.newRow["Genetic_origin_LOVD"] = "lorem ipsum" self.newRow["RNA_LOVD"] = "lorem ipsum" self.newRow["Submitters_LOVD"] = "lorem ipsum" change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(change_type, "added_information") self.assertTrue(len(diff[variant]) == 3)
def test_catches_changed_numeric_values_after_normalization(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow['Allele_frequency_ExAC'] = '9.841e-06' self.newRow['Allele_frequency_ExAC'] = '9.841e-07' change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(len(diff), 1) self.assertIs(change_type, "changed_information")
def test_change_type_ignores_reorders(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Pathogenicity_all'] = "Pathogenic(ENIGMA); not_provided,Pathogenic (ClinVar); Class 5 (BIC)" self.oldRow['Pathogenicity_expert'] = "Pathogenic" self.newRow['Pathogenicity_all'] = "Pathogenic(ENIGMA); Pathogenic,not_provided (ClinVar); Class 5 (BIC)" self.newRow['Pathogenicity_expert'] = "Pathogenic" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(len(diff), 0) self.assertIsNone(change_type)
def test_ignores_change_from_none_to_empty_string(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow["Submitter_ClinVar"] = "Quest_Diagnostics_Nichols_Institute_San_Juan_Capistrano" self.newRow["Submitter_ClinVar"] = ",Quest_Diagnostics_Nichols_Institute_San_Juan_Capistrano" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_properly_ignores_exac_minus_tcga_rounding_changes_for_generic_allele_frequency(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Allele_Frequency'] = '9.419e-06 (ExAC minus TCGA)' self.newRow['Allele_Frequency'] = '9.42e-06 (ExAC minus TCGA)' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_ignores_change_from_none_to_empty_string(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow["Submitter_ClinVar"] = "Quest_Diagnostics_Nichols_Institute_San_Juan_Capistrano" self.newRow["Submitter_ClinVar"] = ",Quest_Diagnostics_Nichols_Institute_San_Juan_Capistrano" change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_ignores_polyphen_fields(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.newRow["Polyphen_Score"] = "0.283" self.newRow["Polyphen_Prediction"] = "probably_damaging" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_handles_null_data_in_new_columns_in_v2_data_correctly(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Genetic_origin_LOVD', 'RNA_LOVD', 'Submitters_LOVD'] v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) self.newRow["Genetic_origin_LOVD"] = "" self.newRow["RNA_LOVD"] = None self.newRow["Submitters_LOVD"] = "-" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_handles_new_data_in_new_columns_in_v2_data_correctly(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Genetic_origin_LOVD', 'RNA_LOVD', 'Submitters_LOVD'] v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) self.newRow["Genetic_origin_LOVD"] = "lorem ipsum" self.newRow["RNA_LOVD"] = "lorem ipsum" self.newRow["Submitters_LOVD"] = "lorem ipsum" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(change_type, "added_information") self.assertTrue(len(diff[variant]) == 3)
def test_catches_changed_numeric_values_after_normalization(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow['Allele_frequency_ExAC'] = '9.841e-06' self.newRow['Allele_frequency_ExAC'] = '9.841e-07' change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(len(diff), 1) self.assertIs(change_type, "changed_information")
def test_add_gs_to_genomic_coordinate(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json self.oldRow['pyhgvs_Genomic_Coordinate_38'] = "chr17:43049067:C>T" self.newRow['pyhgvs_Genomic_Coordinate_38'] = "chr17:g.43049067:C>T" self.oldRow['pyhgvs_Genomic_Coordinate_37'] = "chr17:43049067:C>T" self.newRow['pyhgvs_Genomic_Coordinate_37'] = "chr17:g.43049067:C>T" self.oldRow['pyhgvs_Genomic_Coordinate_36'] = "chr17:43049067:C>T" self.newRow['pyhgvs_Genomic_Coordinate_36'] = "chr17:g.43049067:C>T" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow = releaseDiff.addGsIfNecessary(self.oldRow) self.oldRow = releaseDiff.addGsIfNecessary(self.newRow) change_type = v1v2.compareRow(self.oldRow, self.newRow) self.assertIsNone(change_type)
def test_handle_repeat_data_correctly(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow["Pathogenicity_all"] = "Pathogenic,Pathogenic,not_provided" self.newRow["Pathogenicity_all"] = "Pathogenic,not_provided" self.oldRow["Clinical_Significance_ClinVar"] = "Pathogenic,Pathogenic,not_provided" self.newRow["Clinical_Significance_ClinVar"] = "Pathogenic,not_provided" self.oldRow["Submitter_ClinVar"] = "PreventionGenetics" self.newRow["Submitter_ClinVar"] = "PreventionGenetics,PreventionGenetics" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_displays_integers_as_integers_not_floats(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Allele_count_AFR'] = '-' self.newRow['Allele_count_AFR'] = '567' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json v_diff = diff['chr17:g.43049067:C>T'][0] self.assertEqual(len(diff), 1) self.assertEqual(v_diff['field'], 'Allele_count_AFR') self.assertEqual(v_diff['added'], '567') self.assertEqual(v_diff['removed'], '-')
def test_properly_handles_field_name_changes_same_data(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Minor_allele_frequency_percent_ESP'] self.fieldnames = self.fieldnames + ['Minor_allele_frequency_ESP'] self.oldRow['Minor_allele_frequency_ESP'] = '2.5' self.newRow['Minor_allele_frequency_percent_ESP'] = '2.5' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_properly_handles_field_name_changes_same_data(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Minor_allele_frequency_percent_ESP'] self.fieldnames = self.fieldnames + ['Minor_allele_frequency_ESP'] self.oldRow['Minor_allele_frequency_ESP'] = '2.5' self.newRow['Minor_allele_frequency_percent_ESP'] = '2.5' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_different_values_for_exac_af_fields(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Allele_frequency_FIN_ExAC'] = '9.4181237657' self.newRow['Allele_frequency_FIN_ExAC'] = '9.41' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json v_diff = diff['chr17:g.43049067:C>T'][0] self.assertEqual(len(diff), 1) self.assertEqual(v_diff['field'], 'Allele_frequency_FIN_ExAC') self.assertEqual(v_diff['added'], '9.41') self.assertEqual(v_diff['removed'], '9.42') self.assertEqual(change_type, "changed_information")
def test_catches_value_change_for_exac_allele_frequency(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.oldRow['Allele_Frequency'] = '9.42e-06 (ExAC)' self.newRow['Allele_Frequency'] = '9.99e-06 (ExAC minus TCGA)' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json v_diff = diff['chr17:g.43049067:C>T'][0] self.assertEqual(len(diff), 1) self.assertEqual(v_diff['field'], 'Allele_Frequency') self.assertEqual(v_diff['added'], '9.99e-06 (ExAC minus TCGA)') self.assertEqual(v_diff['removed'], '9.42e-06 (ExAC minus TCGA)') self.assertEqual(change_type, "changed_information")
def test_ignores_cosmetic_changes_in_diff(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow[ "Submitter_ClinVar"] = "The_Consortium_of_Investigators_of_Modifiers_of_BRCA1/2_(CIMBA),c/o_University_of_Cambridge" self.newRow[ "Submitter_ClinVar"] = "Consortium_of_Investigators_of_Modifiers_of_BRCA1/2_(CIMBA),_c/o_University_of_Cambridge" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type) self.oldRow[ "Submitter_ClinVar"] = "Consortium_of_Investigators_of_Modifiers_of_BRCA1/2_(CIMBA),_c/o_University_of_Cambridge" self.newRow[ "Submitter_ClinVar"] = "The_Consortium_of_Investigators_of_Modifiers_of_BRCA1/2_(CIMBA),c/o_University_of_Cambridge" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type) self.newRow['EAS_Allele_frequency_1000_Genomes'] = '0.0' self.newRow['Allele_frequency_ExAC'] = '9.841e-06' change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type) self.oldRow['EAS_Allele_frequency_1000_Genomes'] = '0.0' self.oldRow['Allele_frequency_ExAC'] = '9.841e-06' self.newRow['EAS_Allele_frequency_1000_Genomes'] = '0' self.newRow['Allele_frequency_ExAC'] = '9.841E-06' change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_handles_fields_with_commas_in_parentheses_correctly(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Genetic_origin_LOVD', 'RNA_LOVD', 'Submitters_LOVD'] v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) self.newRow["Submitters_LOVD"] = "Ans M.W. van den Ouweland (Rotterdam,NL), Genevieve Michils (Leuven,BE), Rien Blok (Maastricht NL)" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(len(diff), 1) v_diff = diff['chr17:g.43049067:C>T'][0] self.assertEqual(v_diff['field'], 'Submitters_LOVD') self.assertEqual(len(v_diff['added']), 3) self.assertIn("Ans M.W. van den Ouweland (Rotterdam,NL)", v_diff['added']) self.assertIn("Genevieve Michils (Leuven,BE)", v_diff['added']) self.assertIn("Rien Blok (Maastricht NL)", v_diff['added'])
def test_handles_fields_with_commas_in_parentheses_correctly(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Genetic_origin_LOVD', 'RNA_LOVD', 'Submitters_LOVD'] v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) self.newRow["Submitters_LOVD"] = "Ans M.W. van den Ouweland (Rotterdam,NL), Genevieve Michils (Leuven,BE), Rien Blok (Maastricht NL)" change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json self.assertEqual(len(diff), 1) v_diff = diff['chr17:g.43049067:C>T'][0] self.assertEqual(v_diff['field'], 'Submitters_LOVD') self.assertEqual(len(v_diff['added']), 3) self.assertIn("Ans M.W. van den Ouweland (Rotterdam,NL)", v_diff['added']) self.assertIn("Genevieve Michils (Leuven,BE)", v_diff['added']) self.assertIn("Rien Blok (Maastricht NL)", v_diff['added'])
def test_properly_handles_field_name_changes_added_data(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Minor_allele_frequency_percent_ESP'] self.fieldnames = self.fieldnames + ['Minor_allele_frequency_ESP'] self.oldRow['Minor_allele_frequency_ESP'] = '-' self.newRow['Minor_allele_frequency_percent_ESP'] = '2.5' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json v_diff = diff['chr17:g.43049067:C>T'][0] self.assertEqual(len(diff), 1) self.assertEqual(v_diff['field'], 'Minor_allele_frequency_percent_ESP') self.assertEqual(v_diff['added'], '2.5') self.assertEqual(v_diff['removed'], '-') self.assertEqual(change_type, "added_information")
def test_properly_handles_field_name_changes_added_data(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' self.updated_fieldnames = self.fieldnames + ['Minor_allele_frequency_percent_ESP'] self.fieldnames = self.fieldnames + ['Minor_allele_frequency_ESP'] self.oldRow['Minor_allele_frequency_ESP'] = '-' self.newRow['Minor_allele_frequency_percent_ESP'] = '2.5' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.updated_fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow, False) diff = releaseDiff.diff_json v_diff = diff['chr17:g.43049067:C>T'][0] self.assertEqual(len(diff), 1) self.assertEqual(v_diff['field'], 'Minor_allele_frequency_percent_ESP') self.assertEqual(v_diff['added'], '2.5') self.assertEqual(v_diff['removed'], '-') self.assertEqual(change_type, "added_information")
def test_ignores_cosmetic_changes_in_diff(self): releaseDiff.added_data = self.added_data releaseDiff.diff = self.diff releaseDiff.diff_json = self.diff_json variant = 'chr17:g.43049067:C>T' v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) self.oldRow["Submitter_ClinVar"] = "The_Consortium_of_Investigators_of_Modifiers_of_BRCA1/2_(CIMBA),c/o_University_of_Cambridge" self.newRow["Submitter_ClinVar"] = "Consortium_of_Investigators_of_Modifiers_of_BRCA1/2_(CIMBA),_c/o_University_of_Cambridge" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type) self.oldRow["Submitter_ClinVar"] = "Consortium_of_Investigators_of_Modifiers_of_BRCA1/2_(CIMBA),_c/o_University_of_Cambridge" self.newRow["Submitter_ClinVar"] = "The_Consortium_of_Investigators_of_Modifiers_of_BRCA1/2_(CIMBA),c/o_University_of_Cambridge" change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type) self.newRow['EAS_Allele_frequency_1000_Genomes'] = '0.0' self.newRow['Allele_frequency_ExAC'] = '9.841e-06' change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type) self.oldRow['EAS_Allele_frequency_1000_Genomes'] = '0.0' self.oldRow['Allele_frequency_ExAC'] = '9.841e-06' self.newRow['EAS_Allele_frequency_1000_Genomes'] = '0' self.newRow['Allele_frequency_ExAC'] = '9.841E-06' change_type = v1v2.compareRow(self.oldRow, self.newRow) diff = releaseDiff.diff_json self.assertEqual(diff, {}) self.assertIsNone(change_type)
def test_compare_row_added_data(self): releaseDiff.added_data = self.added_data self.newRow['Source'] += ",ENIGMA" v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) self.assertEqual(change_type, "added_information")
def test_compare_row_equal_rows(self): v1v2 = releaseDiff.v1ToV2(self.fieldnames, self.fieldnames) change_type = v1v2.compareRow(self.oldRow, self.newRow) self.assertIsNone(change_type)