def test_mondo_synonymization_2(rosetta): node = KNode('MONDO:0005737', type=node_types.DISEASE) synonyms = synonymize(node, rosetta.core) assert len(synonyms) > 1 node.add_synonyms(synonyms) doids = node.get_synonyms_by_prefix('DOID') assert len(doids) == 1 meshes = node.get_synonyms_by_prefix('MESH') assert len(meshes) > 0 assert Text.get_curie(node.id) == 'MONDO'
def test_neuron(rosetta): node = KNode("CL:0000540", type=node_types.CELL) synonymize(node, rosetta.core) assert len(node.synonyms) > 5 #we're no longer so pathological about trying to get meshIDs so in this case we don't get one meshcell = node.get_synonyms_by_prefix("MESH") assert len(meshcell) == 0 #BUt we should get a FMA? #We used to get a UMLS, but OXO isn't giving us that for some reason... umlscell = node.get_synonyms_by_prefix("FMA") mid = list(umlscell)[0] assert mid == 'FMA:54527' \
def test_mondo_synonymization(rosetta): #Niemann Pick Disease (not type C) node = KNode('MONDO:0001982', type=node_types.DISEASE) synonyms = synonymize(node, rosetta.core) assert len(synonyms) > 10 node.add_synonyms(synonyms) doids = node.get_synonyms_by_prefix('DOID') assert len(doids) == 1 assert doids.pop() == 'DOID:14504' meshes = node.get_synonyms_by_prefix('MESH') assert len(meshes) == 2 assert 'MeSH:D009542' in meshes assert 'MeSH:D052556' in meshes assert Text.get_curie(node.id) == 'MONDO'
def test_hgnc_label(rosetta): """Do I get a label back?""" node = KNode('HGNC:18729', type=node_types.GENE) rosetta.synonymizer.synonymize(node) hgnc = node.get_synonyms_by_prefix('HGNC') assert node.name is not None assert node.name != ''
def test_phenotype(rosetta): node = KNode("MEDDRA:10014408", type=node_types.PHENOTYPIC_FEATURE) synonymize(node, rosetta.core) assert len(node.synonyms) > 10 hpsyns = node.get_synonyms_by_prefix("HP") assert len(hpsyns) > 0 print(hpsyns)
def future_test_disease_normalization(rosetta): node = KNode('DOID:4325', type=node_types.DISEASE) synonyms = synonymize(node, rosetta.core) print(synonyms) node.add_synonyms(synonyms) mondos = node.get_synonyms_by_prefix('MONDO') assert len(mondos) > 0 assert Text.get_curie(node.id) == 'MONDO'
def xxtest_go(rosetta): node = KNode("HGNC:10593",label="SCN5A",type=node_types.GENE) s3 = rosetta.cache.get('synonymize(HGNC:10593)') rosetta.synonymizer.synonymize(node) print (node.get_synonyms_by_prefix('UNIPROTKB')) biolink = rosetta.core.biolink r=biolink.gene_get_process_or_function(node) assert len(r) > 0
def test_uniprot(rosetta): """Do we correctly synonymize if all we have is a UniProtKB identifier?""" node = KNode('UniProtKB:O75381', type=node_types.GENE) rosetta.synonymizer.synonymize(node) hgnc = node.get_synonyms_by_prefix('HGNC') assert len(hgnc) == 1 assert hgnc.pop() == 'HGNC:8856' assert node.id == 'HGNC:8856' assert node.name == 'PEX14'
def test_failing_uniprot_2(rosetta): """Do we correctly synonymize if all we have is a UniProtKB identifier?""" node = KNode('UniProtKB:P14416', type=node_types.GENE, name='') rosetta.synonymizer.synonymize(node) hgnc = node.get_synonyms_by_prefix('HGNC') assert len(hgnc) == 1 assert hgnc.pop() == 'HGNC:3023' assert node.id == 'HGNC:3023' assert node.name == 'DRD2'
def test_crappy_uniprot(rosetta): """Do we correctly synonymize if all we have is a UniProtKB identifier?""" node = KNode('UniProtKB:A0A024QZH5', type=node_types.GENE) rosetta.synonymizer.synonymize(node) hgnc = node.get_synonyms_by_prefix('HGNC') assert len(hgnc) == 1 assert hgnc.pop() == 'HGNC:18859' assert node.id == 'HGNC:18859' assert node.name == 'SPHK2'
def test_biolink(rosetta, biolink): variant_node = KNode('HGVS:NC_000023.9:g.32317682G>A', type=node_types.SEQUENCE_VARIANT) rosetta.synonymizer.synonymize(variant_node) assert 'CLINVARVARIANT:94623' in variant_node.get_synonyms_by_prefix('CLINVARVARIANT') relations = biolink.sequence_variant_get_phenotype(variant_node) identifiers = [node.id for r,node in relations] assert 'HP:0000750' in identifiers assert 'HP:0003236' in identifiers predicates = [ relation.standard_predicate for relation,n in relations ] plabels = set( [p.label for p in predicates] ) assert 'has_phenotype' in plabels
def test_synonymization(rosetta, clingen): variant_node = KNode('CAID:CA128085', type=node_types.SEQUENCE_VARIANT) rosetta.synonymizer.synonymize(variant_node) assert 'HGVS:NC_000012.12:g.111803962G>A' in variant_node.get_synonyms_by_prefix('HGVS') assert 'CLINVARVARIANT:18390' in variant_node.get_synonyms_by_prefix('CLINVARVARIANT') assert 'DBSNP:rs671' in variant_node.get_synonyms_by_prefix('DBSNP') variant_node = KNode('DBSNP:rs369602258', type=node_types.SEQUENCE_VARIANT) rosetta.synonymizer.synonymize(variant_node) assert 'CAID:CA321211' in variant_node.get_synonyms_by_prefix('CAID') assert 'MYVARIANT_HG38:chr11:g.68032291C>T' in variant_node.get_synonyms_by_prefix('MYVARIANT_HG38') # TODO: it should have these as well - # assert 'CAID:CA6146346' in variant_node.get_synonyms_by_prefix('CAID') # assert 'MYVARIANT_HG38:chr11:g.68032291C>G' in variant_node.get_synonyms_by_prefix('MYVARIANT_HG38') variant_node = KNode('HGVS:NC_000023.9:g.32317682G>A', type=node_types.SEQUENCE_VARIANT) rosetta.synonymizer.synonymize(variant_node) assert 'CAID:CA267021' in variant_node.get_synonyms_by_prefix('CAID') assert 'MYVARIANT_HG38:chrX:g.32389644G>A' in variant_node.get_synonyms_by_prefix('MYVARIANT_HG38') assert 'CLINVARVARIANT:94623' in variant_node.get_synonyms_by_prefix('CLINVARVARIANT') variant_node = KNode('CLINVARVARIANT:18390', type=node_types.SEQUENCE_VARIANT) rosetta.synonymizer.synonymize(variant_node) assert 'CAID:CA128085' in variant_node.get_synonyms_by_prefix('CAID') #variant_node = KNode('MYVARIANT_HG19:chr11:g.67799758C>G', type=node_types.SEQUENCE_VARIANT) #rosetta.synonymizer.synonymize(variant_node) #assert 'CAID:CA6146346' in variant_node.get_synonyms_by_prefix('CAID') #assert 'DBSNP:rs369602258' in variant_node.get_synonyms_by_prefix('DBSNP') #assert 'HGVS:NC_000011.10:g.68032291C>G' in variant_node.get_synonyms_by_prefix('HGVS') #assert 'HGVS:CM000673.2:g.68032291C>G' in variant_node.get_synonyms_by_prefix('HGVS') #variant_node = KNode('MYVARIANT_HG38:chr11:g.68032291C>G', type=node_types.SEQUENCE_VARIANT) #rosetta.synonymizer.synonymize(variant_node) #assert 'CAID:CA6146346' in variant_node.get_synonyms_by_prefix('CAID') #assert 'DBSNP:rs369602258' in variant_node.get_synonyms_by_prefix('DBSNP') hgvs_ids = ['NC_000011.10:g.68032291C>G', 'NC_000023.9:g.32317682G>A', 'NC_000017.10:g.43009069G>C', 'NC_000017.10:g.43009127delG'] batch_synonyms = clingen.get_batch_of_synonyms(hgvs_ids) synonyms_1 = [identifier for identifier, label in batch_synonyms['HGVS:NC_000023.9:g.32317682G>A']] assert 'CAID:CA267021' in synonyms_1 synonyms_2 = [identifier for identifier, label in batch_synonyms['HGVS:NC_000011.10:g.68032291C>G']] assert 'DBSNP:rs369602258' in synonyms_2
def test_hgnc(rosetta): """Observed an error for this id, is it transient?""" node = KNode('HGNC:8599', type=node_types.GENE) rosetta.synonymizer.synonymize(node) hgnc = node.get_synonyms_by_prefix('HGNC') assert node.name == 'PANX1'