def test_parse_clnsig_(): ## Test parsing classical clnsig representation variant = { 'info_dict':{ 'CLNACC': "RCV000014440.17|RCV000014441.25|RCV000014442.25|RCV000014443.17|RCV000184011.1|RCV000188658.1", 'CLNSIG': "5|5|5|5|5|5", 'CLNREVSTAT': "conf|single|single|single|conf|conf", } } ## WHEN parsing the clinical significance clnsig_annotations = parse_clnsig( acc=variant['info_dict']['CLNACC'], sig=variant['info_dict']['CLNSIG'], revstat=variant['info_dict']['CLNREVSTAT'], transcripts=[] ) ## THEN assert that they where parsed correct assert len(clnsig_annotations) == 6 for entry in clnsig_annotations: if entry['accession'] == "RCV000014440.17": assert entry['value'] == 5 assert entry['revstat'] == 'conf' ## Test parsing clnsig combination of values from different submitters: variant = { 'info_dict':{ 'CLNACC': "265359", 'CLNSIG': "Pathogenic/Likely pathogenic", 'CLNREVSTAT': "criteria_provided,_multiple_submitters,_no_conflicts", } } clinrevstat = variant['info_dict']['CLNREVSTAT'] revstat_groups = [rev.lstrip('_') for rev in clinrevstat.split(',')] clnsig_annotations = parse_clnsig( acc=variant['info_dict']['CLNACC'], sig=variant['info_dict']['CLNSIG'], revstat=variant['info_dict']['CLNREVSTAT'], transcripts=[] ) ## assert that they where parsed correct assert len(clnsig_annotations) == 2 for entry in clnsig_annotations: assert entry['accession'] == int(variant['info_dict']['CLNACC']) assert entry['value'] in ['Pathogenic', 'Likely pathogenic'] assert entry['revstat'] == ', '.join(revstat_groups)
def test_parse_clnsig_(): ## Test parsing classical clnsig representation variant = { 'info_dict': { 'CLNACC': "RCV000014440.17|RCV000014441.25|RCV000014442.25|RCV000014443.17|RCV000184011.1|RCV000188658.1", 'CLNSIG': "5|5|5|5|5|5", 'CLNREVSTAT': "conf|single|single|single|conf|conf", } } ## WHEN parsing the clinical significance clnsig_annotations = parse_clnsig( acc=variant['info_dict']['CLNACC'], sig=variant['info_dict']['CLNSIG'], revstat=variant['info_dict']['CLNREVSTAT'], transcripts=[]) ## THEN assert that they where parsed correct assert len(clnsig_annotations) == 6 for entry in clnsig_annotations: if entry['accession'] == "RCV000014440.17": assert entry['value'] == 5 assert entry['revstat'] == 'conf' ## Test parsing clnsig combination of values from different submitters: variant = { 'info_dict': { 'CLNACC': "265359", 'CLNSIG': "Pathogenic/Likely pathogenic", 'CLNREVSTAT': "criteria_provided,_multiple_submitters,_no_conflicts", } } clinrevstat = variant['info_dict']['CLNREVSTAT'] revstat_groups = [rev.lstrip('_') for rev in clinrevstat.split(',')] clnsig_annotations = parse_clnsig( acc=variant['info_dict']['CLNACC'], sig=variant['info_dict']['CLNSIG'], revstat=variant['info_dict']['CLNREVSTAT'], transcripts=[]) ## assert that they where parsed correct assert len(clnsig_annotations) == 2 for entry in clnsig_annotations: assert entry['accession'] == int(variant['info_dict']['CLNACC']) assert entry['value'] in ['Pathogenic', 'Likely pathogenic'] assert entry['revstat'] == ', '.join(revstat_groups)
def test_parse_clnsig(): ## GIVEN some clnsig information variant = { 'info_dict': { 'CLNACC': "RCV000014440.17|RCV000014441.25|RCV000014442.25|RCV000014443.17|RCV000184011.1|RCV000188658.1", 'CLNSIG': "5|5|5|5|5|5", 'CLNREVSTAT': "conf|single|single|single|conf|conf", } } ## WHEN parsing the clinical significance clnsig_annotations = parse_clnsig( acc=variant['info_dict']['CLNACC'], sig=variant['info_dict']['CLNSIG'], revstat=variant['info_dict']['CLNREVSTAT'], transcripts=[]) ## THEN assert that they where parsed correct assert len(clnsig_annotations) == 6 for entry in clnsig_annotations: if entry['accession'] == "RCV000014440.17": assert entry['value'] == 5 assert entry['revstat'] == 'conf'
def test_parse_clnsig_transcripts(cyvcf2_variant): ## GIVEN a variant with classic clinvar annotations transcripts = [{"clnsig": ["likely_benign"]}] ## WHEN parsing the annotations clnsig_annotations = parse_clnsig(cyvcf2_variant, transcripts=transcripts) ## THEN assert that they where parsed correct assert len(clnsig_annotations) == 1 assert clnsig_annotations[0]["value"] == "likely_benign"
def test_parse_complex_clnsig(cyvcf2_variant): ## GIVEN a variant with classic clinvar annotations acc_nr = "265359" clnsig = "Benign/Likely_benign,_other" revstat = "criteria_provided,_multiple_submitters,_no_conflicts" cyvcf2_variant.INFO["CLNACC"] = acc_nr cyvcf2_variant.INFO["CLNSIG"] = clnsig cyvcf2_variant.INFO["CLNREVSTAT"] = revstat ## WHEN parsing the annotations clnsig_annotations = parse_clnsig(cyvcf2_variant) ## THEN assert that they where parsed correct assert len(clnsig_annotations) == 3
def test_parse_clnsig_transcripts(cyvcf2_variant): ## GIVEN a variant with slash-separated values or values that start with underscore transcripts = [{ "clnsig": [ "pathogenic/likely_pathogenic", "likely_pathogenic", "pathogenic", "_other" ] }] ## WHEN parsing the annotations clnsig_annotations = parse_clnsig(cyvcf2_variant, transcripts=transcripts) ## THEN assert that they where parsed correct assert len(clnsig_annotations) == 3 for clnsig in ["pathogenic", "likely_pathogenic", "other"]: clnsig_dict = {"value": clnsig} assert clnsig_dict in clnsig_annotations
def test_parse_classic_clnsig(cyvcf2_variant): ## GIVEN a variant with classic clinvar annotations acc_nr = "RCV000014440.17|RCV000014441.25|RCV000014442.25|RCV000014443.17|RCV000184011.1|RCV000188658.1" clnsig = "5|4|3|2|1|0" revstat = "conf|single|single|single|conf|conf" cyvcf2_variant.INFO["CLNACC"] = acc_nr cyvcf2_variant.INFO["CLNSIG"] = clnsig cyvcf2_variant.INFO["CLNREVSTAT"] = revstat ## WHEN parsing the annotations clnsig_annotations = parse_clnsig(cyvcf2_variant) ## THEN assert that they where parsed correct assert len(clnsig_annotations) == len(clnsig.split("|")) ## THEN assert that all accessions are there assert {term["accession"] for term in clnsig_annotations} == set(acc_nr.split("|")) ## THEN assert that all have been parsed as expected for entry in clnsig_annotations: if entry["accession"] == "RCV000014440.17": assert entry["value"] == 5 assert entry["revstat"] == "conf" if entry["accession"] == "RCV000014441.25": assert entry["value"] == 4 assert entry["revstat"] == "single" if entry["accession"] == "RCV000014442.25": assert entry["value"] == 3 assert entry["revstat"] == "single" if entry["accession"] == "RCV000014443.17": assert entry["value"] == 2 assert entry["revstat"] == "single" if entry["accession"] == "RCV000184011.1": assert entry["value"] == 1 assert entry["revstat"] == "conf" if entry["accession"] == "RCV000188658.1": assert entry["value"] == 0 assert entry["revstat"] == "conf"
def test_parse_modern_clnsig_clnvid(cyvcf2_variant): ## GIVEN a variant with classic clinvar annotations acc_nr = "10" clnsig = "conflicting_interpretations_of_pathogenicity&_other" revstat = "criteria_provided&_conflicting_interpretations" cyvcf2_variant.INFO["CLNVID"] = acc_nr cyvcf2_variant.INFO["CLNSIG"] = clnsig cyvcf2_variant.INFO["CLNREVSTAT"] = revstat ## WHEN parsing the annotations clnsig_annotations = parse_clnsig(cyvcf2_variant) ## THEN assert that the correct terms are parsed assert set(["conflicting_interpretations_of_pathogenicity", "other"]) == {term["value"] for term in clnsig_annotations} ## THEN assert that they where parsed correct assert len(clnsig_annotations) == 2
def test_parse_modern_clnsig(cyvcf2_variant): ## GIVEN a variant with classic clinvar annotations acc_nr = "265359" clnsig = "Pathogenic/Likely_pathogenic" revstat = "criteria_provided,_multiple_submitters,_no_conflicts" cyvcf2_variant.INFO["CLNACC"] = acc_nr cyvcf2_variant.INFO["CLNSIG"] = clnsig cyvcf2_variant.INFO["CLNREVSTAT"] = revstat ## WHEN parsing the annotations clnsig_annotations = parse_clnsig(cyvcf2_variant) ## THEN assert that the correct terms are parsed assert set(["pathogenic", "likely_pathogenic" ]) == {term["value"] for term in clnsig_annotations} ## THEN assert that they where parsed correct assert len(clnsig_annotations) == len(clnsig.split("/"))
def test_parse_clnsig_all(cyvcf2_variant): ## GIVEN a variant with classic clinvar annotations acc_nr = "265359" clnsig = "Pathogenic/Likely pathogenic" revstat = "criteria_provided,_multiple_submitters,_no_conflicts" cyvcf2_variant.INFO["CLNACC"] = acc_nr cyvcf2_variant.INFO["CLNSIG"] = clnsig cyvcf2_variant.INFO["CLNREVSTAT"] = revstat revstat_groups = [rev.lstrip("_") for rev in revstat.split(",")] clnsig_annotations = parse_clnsig(cyvcf2_variant) ## assert that they where parsed correct assert len(clnsig_annotations) == 2 for entry in clnsig_annotations: assert entry["accession"] == int(acc_nr) assert entry["value"] in ["pathogenic", "likely_pathogenic"] assert entry["revstat"] == ",".join(revstat_groups)
def test_parse_semi_modern_clnsig(cyvcf2_variant): ## GIVEN a variant with semi modern clinvar annotations # This means that there can be spaces between words acc_nr = "265359" clnsig = "Pathogenic/Likely pathogenic" revstat = "criteria_provided,_multiple_submitters,_no_conflicts" cyvcf2_variant.INFO["CLNACC"] = acc_nr cyvcf2_variant.INFO["CLNSIG"] = clnsig cyvcf2_variant.INFO["CLNREVSTAT"] = revstat ## WHEN parsing the annotations clnsig_annotations = parse_clnsig(cyvcf2_variant) ## THEN assert that the correct terms are parsed assert set(["pathogenic", "likely_pathogenic" ]) == {term["value"] for term in clnsig_annotations} ## THEN assert that they where parsed correct assert len(clnsig_annotations) == len(clnsig.split("/")) for annotation in clnsig_annotations: assert annotation["accession"] == int(acc_nr) assert set(annotation["revstat"].split(",")) == set( ["criteria_provided", "multiple_submitters", "no_conflicts"])