def test_mondo_synonymization_2(rosetta):
    node = KNode('MONDO:0005737', type=node_types.DISEASE)
    synonyms = synonymize(node, rosetta.core)
    assert len(synonyms) > 1
    node.add_synonyms(synonyms)
    doids = node.get_synonyms_by_prefix('DOID')
    assert len(doids) == 1
    meshes = node.get_synonyms_by_prefix('MESH')
    assert len(meshes) > 0
    assert Text.get_curie(node.id) == 'MONDO'
def test_neuron(rosetta):
    node = KNode("CL:0000540", type=node_types.CELL)
    synonymize(node, rosetta.core)
    assert len(node.synonyms) > 5
    #we're no longer so pathological about trying to get meshIDs so in this case we don't get one
    meshcell = node.get_synonyms_by_prefix("MESH")
    assert len(meshcell) == 0
    #BUt we should get a FMA?
    #We used to get a UMLS, but OXO isn't giving us that for some reason...
    umlscell = node.get_synonyms_by_prefix("FMA")
    mid = list(umlscell)[0]
    assert mid == 'FMA:54527' \
def test_mondo_synonymization(rosetta):
    #Niemann Pick Disease (not type C)
    node = KNode('MONDO:0001982', type=node_types.DISEASE)
    synonyms = synonymize(node, rosetta.core)
    assert len(synonyms) > 10
    node.add_synonyms(synonyms)
    doids = node.get_synonyms_by_prefix('DOID')
    assert len(doids) == 1
    assert doids.pop() == 'DOID:14504'
    meshes = node.get_synonyms_by_prefix('MESH')
    assert len(meshes) == 2
    assert 'MeSH:D009542' in meshes
    assert 'MeSH:D052556' in meshes
    assert Text.get_curie(node.id) == 'MONDO'
def test_hgnc_label(rosetta):
    """Do I get a label back?"""
    node = KNode('HGNC:18729', type=node_types.GENE)
    rosetta.synonymizer.synonymize(node)
    hgnc = node.get_synonyms_by_prefix('HGNC')
    assert node.name is not None
    assert node.name != ''
def test_phenotype(rosetta):
    node = KNode("MEDDRA:10014408", type=node_types.PHENOTYPIC_FEATURE)
    synonymize(node, rosetta.core)
    assert len(node.synonyms) > 10
    hpsyns = node.get_synonyms_by_prefix("HP")
    assert len(hpsyns) > 0
    print(hpsyns)
def future_test_disease_normalization(rosetta):
    node = KNode('DOID:4325', type=node_types.DISEASE)
    synonyms = synonymize(node, rosetta.core)
    print(synonyms)
    node.add_synonyms(synonyms)
    mondos = node.get_synonyms_by_prefix('MONDO')
    assert len(mondos) > 0
    assert Text.get_curie(node.id) == 'MONDO'
def xxtest_go(rosetta):
    node = KNode("HGNC:10593",label="SCN5A",type=node_types.GENE)
    s3 = rosetta.cache.get('synonymize(HGNC:10593)')
    rosetta.synonymizer.synonymize(node)
    print (node.get_synonyms_by_prefix('UNIPROTKB'))
    biolink = rosetta.core.biolink
    r=biolink.gene_get_process_or_function(node)
    assert len(r) > 0
def test_uniprot(rosetta):
    """Do we correctly synonymize if all we have is a UniProtKB identifier?"""
    node = KNode('UniProtKB:O75381', type=node_types.GENE)
    rosetta.synonymizer.synonymize(node)
    hgnc = node.get_synonyms_by_prefix('HGNC')
    assert len(hgnc) == 1
    assert hgnc.pop() == 'HGNC:8856'
    assert node.id == 'HGNC:8856'
    assert node.name == 'PEX14'
def test_failing_uniprot_2(rosetta):
    """Do we correctly synonymize if all we have is a UniProtKB identifier?"""
    node = KNode('UniProtKB:P14416', type=node_types.GENE, name='')
    rosetta.synonymizer.synonymize(node)
    hgnc = node.get_synonyms_by_prefix('HGNC')
    assert len(hgnc) == 1
    assert hgnc.pop() == 'HGNC:3023'
    assert node.id == 'HGNC:3023'
    assert node.name == 'DRD2'
def test_crappy_uniprot(rosetta):
    """Do we correctly synonymize if all we have is a UniProtKB identifier?"""
    node = KNode('UniProtKB:A0A024QZH5', type=node_types.GENE)
    rosetta.synonymizer.synonymize(node)
    hgnc = node.get_synonyms_by_prefix('HGNC')
    assert len(hgnc) == 1
    assert hgnc.pop() == 'HGNC:18859'
    assert node.id == 'HGNC:18859'
    assert node.name == 'SPHK2'
def test_biolink(rosetta, biolink):
    variant_node = KNode('HGVS:NC_000023.9:g.32317682G>A', type=node_types.SEQUENCE_VARIANT)
    rosetta.synonymizer.synonymize(variant_node)
    assert 'CLINVARVARIANT:94623' in variant_node.get_synonyms_by_prefix('CLINVARVARIANT')
    relations = biolink.sequence_variant_get_phenotype(variant_node)
    identifiers = [node.id for r,node in relations]
    assert 'HP:0000750' in identifiers
    assert 'HP:0003236' in identifiers
    predicates = [ relation.standard_predicate for relation,n in relations ] 
    plabels = set( [p.label for p in predicates] )
    assert 'has_phenotype' in plabels
def test_synonymization(rosetta, clingen):
    variant_node = KNode('CAID:CA128085', type=node_types.SEQUENCE_VARIANT)
    rosetta.synonymizer.synonymize(variant_node)
    assert 'HGVS:NC_000012.12:g.111803962G>A' in variant_node.get_synonyms_by_prefix('HGVS')
    assert 'CLINVARVARIANT:18390' in variant_node.get_synonyms_by_prefix('CLINVARVARIANT')
    assert 'DBSNP:rs671' in variant_node.get_synonyms_by_prefix('DBSNP')

    variant_node = KNode('DBSNP:rs369602258', type=node_types.SEQUENCE_VARIANT)
    rosetta.synonymizer.synonymize(variant_node)
    assert 'CAID:CA321211' in variant_node.get_synonyms_by_prefix('CAID')
    assert 'MYVARIANT_HG38:chr11:g.68032291C>T' in variant_node.get_synonyms_by_prefix('MYVARIANT_HG38')
    # TODO: it should have these as well - 
    # assert 'CAID:CA6146346' in variant_node.get_synonyms_by_prefix('CAID')
    # assert 'MYVARIANT_HG38:chr11:g.68032291C>G' in variant_node.get_synonyms_by_prefix('MYVARIANT_HG38')

    variant_node = KNode('HGVS:NC_000023.9:g.32317682G>A', type=node_types.SEQUENCE_VARIANT)
    rosetta.synonymizer.synonymize(variant_node)
    assert 'CAID:CA267021' in variant_node.get_synonyms_by_prefix('CAID')
    assert 'MYVARIANT_HG38:chrX:g.32389644G>A' in variant_node.get_synonyms_by_prefix('MYVARIANT_HG38')
    assert 'CLINVARVARIANT:94623' in variant_node.get_synonyms_by_prefix('CLINVARVARIANT')

    variant_node = KNode('CLINVARVARIANT:18390', type=node_types.SEQUENCE_VARIANT)
    rosetta.synonymizer.synonymize(variant_node)
    assert 'CAID:CA128085' in variant_node.get_synonyms_by_prefix('CAID')

    #variant_node = KNode('MYVARIANT_HG19:chr11:g.67799758C>G', type=node_types.SEQUENCE_VARIANT)
    #rosetta.synonymizer.synonymize(variant_node)
    #assert 'CAID:CA6146346' in variant_node.get_synonyms_by_prefix('CAID') 
    #assert 'DBSNP:rs369602258' in variant_node.get_synonyms_by_prefix('DBSNP')
    #assert 'HGVS:NC_000011.10:g.68032291C>G' in variant_node.get_synonyms_by_prefix('HGVS')
    #assert 'HGVS:CM000673.2:g.68032291C>G' in variant_node.get_synonyms_by_prefix('HGVS')

    #variant_node = KNode('MYVARIANT_HG38:chr11:g.68032291C>G', type=node_types.SEQUENCE_VARIANT)
    #rosetta.synonymizer.synonymize(variant_node)
    #assert 'CAID:CA6146346' in variant_node.get_synonyms_by_prefix('CAID')
    #assert 'DBSNP:rs369602258' in variant_node.get_synonyms_by_prefix('DBSNP')

    hgvs_ids = ['NC_000011.10:g.68032291C>G', 'NC_000023.9:g.32317682G>A', 'NC_000017.10:g.43009069G>C', 'NC_000017.10:g.43009127delG']
    batch_synonyms = clingen.get_batch_of_synonyms(hgvs_ids)
  
    synonyms_1 = [identifier for identifier, label in batch_synonyms['HGVS:NC_000023.9:g.32317682G>A']]
    assert 'CAID:CA267021' in synonyms_1
  
    synonyms_2 = [identifier for identifier, label in batch_synonyms['HGVS:NC_000011.10:g.68032291C>G']]
    assert 'DBSNP:rs369602258' in synonyms_2
def test_hgnc(rosetta):
    """Observed an error for this id, is it transient?"""
    node = KNode('HGNC:8599', type=node_types.GENE)
    rosetta.synonymizer.synonymize(node)
    hgnc = node.get_synonyms_by_prefix('HGNC')
    assert node.name == 'PANX1'