예제 #1
0
    def start_build(self) -> list:
        # Entry point
        variant_list = self.get_all_variants_and_synonymns()
        if not variant_list:
            logger.info('No Sequence variant nodes found from graph.')
        variant_subset = []
        with self.writerDelegator as writer:
            # for each variant
            for var in variant_list:
                # check to see if we have all the data elements we need. element [0] is the ID, element [1] is the synonym list
                if len(var) == 2:
                    # create a variant node
                    variant_curie = var[0]

                    # get the synonym data from the graph DB call
                    variant_syn_set = set(var[1])

                    variant_node = KNode(variant_curie, type=node_types.SEQUENCE_VARIANT)
                    variant_node.add_synonyms(variant_syn_set)
                    variant_node.add_export_labels([node_types.SEQUENCE_VARIANT])

                    variant_subset.append(variant_node)
                    if len(variant_subset) == 1000:
                        self.process_variant_to_gene_relationships(variant_nodes=variant_subset, writer=writer)
                        variant_subset = []
            if variant_subset:
                # for left overs
                self.process_variant_to_gene_relationships(variant_nodes=variant_subset, writer=writer)
def create_node(session):
    #Make sure we're clean
    session.run("MATCH (a {id:{id}}) DETACH DELETE a", {"id": TEST_ID})
    original = get_node(TEST_ID, session)
    assert original is None
    node = KNode(TEST_ID, node_types.DISEASE)
    node.add_synonyms(ORIGINAL_SYNONYMS)
    export_node(node, session)
def future_test_disease_normalization(rosetta):
    node = KNode('DOID:4325', type=node_types.DISEASE)
    synonyms = synonymize(node, rosetta.core)
    print(synonyms)
    node.add_synonyms(synonyms)
    mondos = node.get_synonyms_by_prefix('MONDO')
    assert len(mondos) > 0
    assert Text.get_curie(node.id) == 'MONDO'
예제 #4
0
def x_test_event_to_drug(mychem):
    node = KNode('MONDO:0002050',
                 type=node_types.DISEASE,
                 name='Mental Depression')
    node.add_synonyms(
        set([LabeledID(identifier='MedDRA:10002855', label='Depression')]))
    results = mychem.get_drug_from_adverse_events(node)
    assert len(results) > 0
def x_test_event_to_drug(mychem):
    node = KNode('HP:0002018',
                 type=node_types.PHENOTYPIC_FEATURE,
                 name='Nausea')
    node.add_synonyms(
        set([LabeledID(identifier='MedDRA:10028813', label='Nausea')]))
    results = mychem.get_drug_from_adverse_events(node)
    assert len(results) > 0
예제 #6
0
def test_complicated(rosetta):
    """make sure that a very complicated cast gets everything to the right place"""
    fname = 'caster.output_filter(input_filter(upcast(hetio~disease_to_phenotype,disease_or_phenotypic_feature),disease,typecheck~is_disease),disease,typecheck~is_disease)'
    func = rosetta.get_ops(fname)
    assert func is not None
    node = KNode('HP:0007354', type=node_types.PHENOTYPIC_FEATURE)
    node.add_synonyms(set([LabeledID(identifier='DOID:332', label='ALS')]))
    results = func(node)
    assert results is not None
예제 #7
0
def test_gene_to_drug_synonym(ctd):
    # Even though the main identifier is drugbank, CTD should find the right synonym in there somewhere.
    input_node = KNode("DB:FakeID", type=node_types.GENE)
    input_node.add_synonyms(set(["NCBIGene:5743"]))
    results = ctd.gene_to_drug(input_node)
    for _, node in results:
        assert node.type == node_types.DRUG
    result_ids = [node.id for edge, node in results]
    assert 'MESH:D000068579' in result_ids  # Cox2 for a cox2 inhibitor
def test_smdb_id_normalizer(hmdb):
    node = KNode('HMDB:HMDB0112245', type=node_types.CHEMICAL_SUBSTANCE)
    node.add_synonyms(['HMDB:HMDB0112245'])
    results = hmdb.metabolite_to_pathway(node)
    for r in results:
        r_node = r[1]
        if r_node.id.startswith('SMPDB'):
            uncuried = Text.un_curie(r_node.id)
            assert len(uncuried) == 10
def test_mondo_synonymization_2(rosetta):
    node = KNode('MONDO:0005737', type=node_types.DISEASE)
    synonyms = synonymize(node, rosetta.core)
    assert len(synonyms) > 1
    node.add_synonyms(synonyms)
    doids = node.get_synonyms_by_prefix('DOID')
    assert len(doids) == 1
    meshes = node.get_synonyms_by_prefix('MESH')
    assert len(meshes) > 0
    assert Text.get_curie(node.id) == 'MONDO'
예제 #10
0
def test_drug_to_gene_synonym(ctd):
    # Even though the main identifier is drugbank, CTD should find the right synonym in there somewhere.
    input_node = KNode("DB:FakeID", type=node_types.DRUG)
    input_node.add_synonyms(
        set([LabeledID(identifier="MESH:D000068579", label="blah")]))
    results = ctd.drug_to_gene(input_node)
    for _, node in results:
        assert node.type == node_types.GENE
    result_ids = [node.id for edge, node in results]
    assert 'NCBIGENE:5743' in result_ids  # Cox2 for a cox2 inhibitor
 def parse_dict_to_knode(nn_dict: dict) -> KNode:
     node = KNode(
         id=nn_dict.get('id', {}).get('identifier', ''),
         name=nn_dict.get('id', {}).get('label', ''),
         type=nn_dict.get('type', ['named_thing'])[0],
     )
     node.add_synonyms(
         set(
             map(lambda x: LabeledID(**x),
                 nn_dict.get('equivalent_identifiers', []))))
     node.add_export_labels(nn_dict.get('type', ['named_thing']))
     return node
예제 #12
0
def precache_variant_batch_data(rosetta: object, force_all: bool=False) -> object:
    # init the return value
    ret_val = None

    try:
        cache = rosetta.cache
        myvariant = rosetta.core.myvariant

        # get the list of variants
        if force_all:
            var_list = get_all_variants_and_synonymns(rosetta)
        else:
            # grab only variants with no existing gene relationships 
            var_list = get_variants_and_synonyms_without_genes_from_graph(rosetta)

        # create an array to handle the ones not already in cache that need to be processed
        uncached_variant_annotation_nodes = []

        # for each variant
        for var in var_list:
            # check to see if we have all the data elements we need. element [0] is the ID, element [1] is the synonym list
            if len(var) == 2:
                # create a variant node
                variant_node = KNode(var[0], name=var[0], type=node_types.SEQUENCE_VARIANT)

                # get the synonym data from the graph DB call
                syn_set = set(var[1])

                # add the synonyms to the node
                variant_node.add_synonyms(syn_set)

                # check if myvariant key exists in cache, otherwise add it to buffer for batch processing
                cache_results = cache.get(f'myvariant.sequence_variant_to_gene({variant_node.id})')

                if cache_results == None:
                    uncached_variant_annotation_nodes.append(variant_node)

                    # if there is enough in the variant annotation batch process them and empty the array
                    if len(uncached_variant_annotation_nodes) == 1000:
                        prepopulate_variant_annotation_cache(cache, myvariant, uncached_variant_annotation_nodes)
                        uncached_variant_annotation_nodes = []

        # if there are remainder variant node entries left to process
        if uncached_variant_annotation_nodes:
            prepopulate_variant_annotation_cache(cache, myvariant, uncached_variant_annotation_nodes)

    except Exception as e:
        logger.error(f'Exception caught. Exception: {e}')
        ret_val = e

    # return to the caller
    return ret_val
def test_mondo_synonymization(rosetta):
    #Niemann Pick Disease (not type C)
    node = KNode('MONDO:0001982', type=node_types.DISEASE)
    synonyms = synonymize(node, rosetta.core)
    assert len(synonyms) > 10
    node.add_synonyms(synonyms)
    doids = node.get_synonyms_by_prefix('DOID')
    assert len(doids) == 1
    assert doids.pop() == 'DOID:14504'
    meshes = node.get_synonyms_by_prefix('MESH')
    assert len(meshes) == 2
    assert 'MeSH:D009542' in meshes
    assert 'MeSH:D052556' in meshes
    assert Text.get_curie(node.id) == 'MONDO'
def test_just_overwrite_name(session):
    """If we have a node with synonyms, and we have different synonyms and write that node again, we will overwrite the synonyms"""
    #Add a node with type disease
    create_node(session)
    #Now, have the same identifier, but new synonyms set.  The original synonyms are ORIGINAL_SYNONYMS
    node = KNode(TEST_ID,
                 node_types.GENETIC_CONDITION,
                 label="Sweet new label")
    node.add_synonyms(ORIGINAL_SYNONYMS)
    export_node(node, session)
    rounder = get_node(TEST_ID, session)
    assert len(rounder.labels) == 2
    assert node_types.DISEASE in rounder.labels
    assert node_types.GENETIC_CONDITION in rounder.labels
    assert rounder.properties['name'] == 'Sweet new label'
def test_also_overwrite_synonyms(session):
    """If we have a node with synonyms, and we have different synonyms and write that node again, we will overwrite the synonyms"""
    #Add a node with type disease
    create_node(session)
    #Now, have the same identifier, but new synonyms set.  The original synonyms are ORIGINAL_SYNONYMS
    node = KNode(TEST_ID, node_types.GENETIC_CONDITION)
    FINAL_SYN = "FINAL_SYN"
    node.add_synonyms(set([FINAL_SYN]))
    export_node(node, session)
    rounder = get_node(TEST_ID, session)
    assert len(rounder.labels) == 2
    assert node_types.DISEASE in rounder.labels
    assert node_types.GENETIC_CONDITION in rounder.labels
    assert len(rounder.properties['equivalent_identifiers']) == 2
    assert TEST_ID in rounder.properties['equivalent_identifiers']
    assert FINAL_SYN in rounder.properties['equivalent_identifiers']
def test_drug_get_gene_other_table(pharos):
    #pharos should find chembl in the synonyms
    node = KNode('DB:FakeyName', type=node_types.CHEMICAL_SUBSTANCE)
    node.add_synonyms([LabeledID(identifier='CHEMBL:CHEMBL3658657', label='blahbalh')])
    results = pharos.drug_get_gene(node)
    #we get results
    assert len(results) > 0
    #They are gene nodes:
    ntypes = set([n.type for e,n in results])
    assert node_types.GENE in ntypes
    assert len(ntypes) == 1
    #All of the ids should be HGNC
    identifiers = [n.id for e,n in results]
    prefixes = set([ Text.get_curie(i) for i in identifiers])
    assert 'HGNC' in prefixes
    assert len(prefixes) == 1
    #PTGS2 (COX2) (HGNC:9605) should be in there
    assert 'HGNC:6871' in identifiers
예제 #17
0
def test_combined_gene_annotation(gene_annotator):
    # gene_annotator.annotate - these are coming from the cache after the first time

    gene_node = KNode('HGNC:9604', type=node_types.GENE)
    gene_node.add_synonyms(
        set([LabeledID(identifier='ENSEMBL:ENSG00000095303', label='PTGS1')]))
    gene_annotator.annotate(gene_node)
    # these are from ensembl
    assert gene_node.properties['ensembl_name'] == 'PTGS1'
    assert gene_node.properties['chromosome'] == '9'
    # these are from hgnc
    assert gene_node.properties['location'] == '9q33.2'

    gene_node = KNode('HGNC:13089', type=node_types.GENE)
    gene_node.add_synonyms(
        set([LabeledID(identifier='ENSEMBL:ENSG00000166526', label='ZNF3')]))
    gene_annotator.annotate(gene_node)
    # these are from ensembl
    assert gene_node.properties['ensembl_name'] == 'ZNF3'
    assert gene_node.properties['chromosome'] == '7'
    # these are from hgnc
    assert 'Zinc fingers C2H2-type' in gene_node.properties['gene_family']
    assert 28 in gene_node.properties['gene_family_id']

    gene_node = KNode('HGNC:122', type=node_types.GENE)
    gene_node.add_synonyms(
        set([LabeledID(identifier='ENSEMBL:ENSG00000143727', label='ACP1')]))
    gene_annotator.annotate(gene_node)
    # these are from ensembl
    assert gene_node.properties['ensembl_name'] == 'ACP1'
    assert gene_node.properties['chromosome'] == '2'
    # these are from hgnc
    assert 1071 in gene_node.properties['gene_family_id']