Esempio n. 1
0
def test():
    """What do we get back for HBB"""
    b = Biolink(ServiceContext.create_context())
    relations = b.gene_get_disease(('HGNC:4827', ))
    checker = Mondo(ServiceContext.create_context())
    for p, a in relations:
        igc, nid = checker.is_genetic_disease(a)
        print(a['id'], igc, nid)
Esempio n. 2
0
def test_one(infname, outfname, fieldnum):
    m = Mondo(ServiceContext.create_context())
    n_good = 0
    n_bad = 0
    diseases = set()
    with open(infname, 'r') as inf, open(outfname, 'w') as outf:
        h = inf.readline()
        for line in inf:
            if line.startswith('#'):
                continue
            x = line.strip().split('\t')[fieldnum]
            if x in diseases:
                continue
            diseases.add(x)
            result = m.search(x)
            if len(result) == 0:
                mondos = ''
                names = ''
                doids = ''
                umlss = ''
                efos = ''
                n_bad += 1
            else:
                n_good += 1
                mondos = ';'.join(result)
                names = ';'.join([m.get_label(r) for r in result])
                doids = ';'.join(sum([m.mondo_get_doid(r) for r in result],
                                     []))
                umlss = ';'.join(sum([m.mondo_get_umls(r) for r in result],
                                     []))
                efos = ';'.join(sum([m.mondo_get_efo(r) for r in result], []))
            outf.write('{}\t{}\t{}\t{}\t{}\n'.format(x, mondos, doids, umlss,
                                                     efos))
            print('Good: {}   Bad: {}'.format(n_good, n_bad))
Esempio n. 3
0
def test2():
    from greent.service import ServiceContext
    cb = ChemBioKS(ServiceContext.create_context())
    with open('q2-drugandcondition-list.txt', 'r') as inf:
        h = inf.readline()
        uniq = set()
        for line in inf:
            x = line.split('\t')[0]
            uniq.add(x)
    n_no_pub = 0
    n_no_ncbi = 0
    for name in uniq:
        input_node = KNode("DRUG_NAME:{}".format(name), node_types.DRUG_NAME)
        try:
            drug_node = cb.graph_drugname_to_pubchem(input_node)[0][1]
            ident = drug_node.identifier
            ncbi_nodes = cb.graph_pubchem_to_ncbigene(drug_node)
            if len(ncbi_nodes) == 0:
                n_no_ncbi += 1
        except:
            n_no_pub += 1
            ident = ''
            ncbi_nodes = []
        print('{}\t{}\t{}'.format(name, ident, len(ncbi_nodes)))
    print('{} drugs'.format(len(uniq)))
    print('{} without pubchem id'.format(n_no_pub))
    print('{} without genes'.format(n_no_ncbi))
    ngood = len(uniq) - n_no_pub - n_no_ncbi
    print('{} good ({})'.format(ngood, ngood / len(uniq)))
Esempio n. 4
0
def test_q2_diseases():
    t = TranslatorKnowledgeBeaconAggregator (ServiceContext.create_context ())
    n_good = 0
    n_bad = 0
    diseases = set()
#    with open('q2-drugandcondition-list.txt','r') as inf, open('q2_disease_report.txt','w') as outf:
#        h = inf.readline()
#        outf.write('OriginalName\tDOIDs\tOthers\n')
#        for line in inf:
#            x = line.strip().split('\t')[1]
    with open('q1-disease-list.txt','r') as inf, open('q1_disease_report.txt','w') as outf:
        h = inf.readline()
        outf.write('OriginalName\tDOIDs\tOthers\n')
        for line in inf:
            x = line.strip().split('\t')[0]
            if x in diseases:
                continue
            diseases.add(x)
            node = KNode("NAME.DISEASE:{}".format(x), node_types.DISEASE)
            result = t.name_to_doid (node)
            if len(result) == 0:
                doids = ''
                aliases = t.name_to_anything(node)
                n_bad += 1
            else:
                n_good += 1
                doids = ';'.join( [r[1].identifier for r in result] )
                aliases = ''
            outf.write('{}\t{}\t{}\n'.format(x, doids, aliases))
            print( 'Good: {}   Bad: {}'.format(n_good, n_bad) )
Esempio n. 5
0
    def __init__(self, config=None, override={}):
        self.service_context = ServiceContext.create_context(config)
        service_context = self.service_context

        self.clinical = Clinical(service_context)
        #temporarly taken out because of http errors
        #self.exposures = CMAQ (service_context)
        self.endotype = Endotype(service_context)

        self.chembio = ChemBioKS(self.service_context)
        self.chemotext = Chemotext(self.service_context)
        self.disease_ontology = DiseaseOntology(self.service_context)
        self.pharos = Pharos(self.service_context)
        self.oxo = OXO(self.service_context)
        self.hpo = HPO(self.service_context)
        self.hetio = HetIO(self.service_context)
        self.biolink = Biolink(self.service_context)
        self.mondo = Mondo(self.service_context)
        self.go = GO(self.service_context)
        self.tkba = TranslatorKnowledgeBeaconAggregator(self.service_context)
        self.translator_registry = TranslatorRegistry(self.service_context)
        self.quickgo = QuickGo(self.service_context)
        self.translator = Translator(core=self)
        self.hgnc = HGNC(self.service_context)
        self.uberongraph = UberonGraphKS(self.service_context)
        self.ctd = CTD(self.service_context)
Esempio n. 6
0
def test_all_drugs_ctd():
    from greent.service import ServiceContext
    ctd = CTD(ServiceContext.create_context())
    with open('q2-drugandcondition-list.txt', 'r') as inf:
        h = inf.readline()
        uniq = set()
        for line in inf:
            x = line.split('\t')[0]
            uniq.add(x)
    n_no_ctd = 0
    n_no_gene = 0
    for name in uniq:
        input_node = KNode("DRUG_NAME:{}".format(name),
                           type=node_types.CHEMICAL_SUBSTANCE_NAME)
        results = ctd.drugname_to_ctd(input_node)
        try:
            drug_node = results[0][1]
            ident = drug_node.identifier
        except:
            n_no_ctd += 1
            ident = ''
            gene_nodes = []
        if ident != '':
            gene_nodes = ctd.drug_to_gene(drug_node)
            if len(gene_nodes) == 0:
                n_no_gene += 1
        print('{}\t{}\t{}\t{}'.format(name, ident, len(results),
                                      len(gene_nodes)))
    print('{} drugs'.format(len(uniq)))
    print('{} without pubchem id'.format(n_no_ctd))
    print('{} without genes'.format(n_no_gene))
    ngood = len(uniq) - n_no_ctd - n_no_gene
    print('{} good ({})'.format(ngood, ngood / len(uniq)))
Esempio n. 7
0
def test_all_drugs_pharos():
    from greent.service import ServiceContext
    pharos = Pharos(ServiceContext.create_context())
    with open('q2-drugandcondition-list.txt', 'r') as inf:
        h = inf.readline()
        uniq = set()
        for line in inf:
            x = line.split('\t')[0]
            uniq.add(x)
    n_no_pharos = 0
    n_no_hgnc = 0
    for name in uniq:
        input_node = KNode("DRUG_NAME:{}".format(name),
                           type=node_types.CHEMICAL_SUBSTANCE_NAME)
        try:
            results = pharos.drugname_to_pharos(input_node)
            #print(name, results)
            drug_node = results[0][1]
            ident = drug_node.identifier
            hgnc_nodes = pharos.drug_get_gene(drug_node)
            if len(hgnc_nodes) == 0:
                n_no_hgnc += 1
        except:
            # print ('Not finding {}'.format(name))
            # exit()
            n_no_pharos += 1
            ident = ''
            hgnc_nodes = []
        print('{}\t{}\t{}\t{}'.format(name, ident, len(results),
                                      len(hgnc_nodes)))
    print('{} drugs'.format(len(uniq)))
    print('{} without pubchem id'.format(n_no_pharos))
    print('{} without genes'.format(n_no_hgnc))
    ngood = len(uniq) - n_no_pharos - n_no_hgnc
    print('{} good ({})'.format(ngood, ngood / len(uniq)))
Esempio n. 8
0
def test_gc():
    b = Biolink(ServiceContext.create_context())
    gene = KNode('HGNC:4851', node_type=node_types.GENE)
    results = b.gene_get_disease(gene)
    for e, k in results:
        print(k)
    results = b.gene_get_genetic_condition(gene)
    print(results)
Esempio n. 9
0
class TestHetIO(unittest.TestCase):

    h = HetIO(ServiceContext.create_context())

    def test_anatomy(self):
        pprint(self.h.gene_to_anatomy(KNode('HGNC:TP53', node_types.GENE)))

    def test_cell(self):
        pprint(self.h.gene_to_cell(KNode('HGNC:7121', node_types.GENE)))
Esempio n. 10
0
def basic_test():
    t = TranslatorKnowledgeBeaconAggregator (ServiceContext.create_context ())
    #print (t.name_to_mesh_disease (KNode("NAME.DISEASE:asthma", node_types.NAME_DISEASE)))
    #print (t.name_to_doid (KNode("NAME.DISEASE:asthma", node_types.DISEASE)))
    print ('1.')
    print (t.name_to_doid (KNode("NAME.DISEASE:Osteoporosis", node_types.DISEASE)))
    print ('2.')
    print (t.name_to_doid (KNode("NAME.DISEASE:HIV infection", node_types.DISEASE)))
    print ('3.')
    print (t.name_to_efo (KNode("NAME.DISEASE:HIV infection", node_types.DISEASE)))
Esempio n. 11
0
def test():
    uk = UberonGraphKS(ServiceContext.create_context())
    #Test cell->anatomy
    #    k = KNode('CL:0000097',node_types.CELL)
    #    results = uk.get_anatomy_by_cell_graph( k )
    #    print(results)
    #Test pheno->anatomy
    k = KNode('HP:0011675', node_types.PHENOTYPE)
    results = uk.get_anatomy_by_phenotype_graph(k)
    print(results)
Esempio n. 12
0
def test():
    q = QuickGo(ServiceContext.create_context())
    r = q.go_term_xontology_relationships(
        KNode("GO:0002551", node_types.PROCESS))
    pprint.pprint(r)
    r = q.go_term_xontology_relationships(
        KNode("GO.BIOLOGICAL_PROCESS:0042626", node_types.PROCESS))
    pprint.pprint(r)
    r = q.go_term_annotation_extensions(
        KNode("GO.BIOLOGICAL_PROCESS:0007269", node_types.PROCESS))
    pprint.pprint(r)
Esempio n. 13
0
 def __init__(self,greent):
     greent.chemotext2 = chemotext2.Chemotext2( ServiceContext.create_context() )
     self.chemotext2 = greent.chemotext2
     self.badwords = set(['disease','virus','infection','fever','syndrome','hemorrhagic','disorder',\
                 'gene','cell','system','tissue','non','positive','negative','receptor',\
                 'type','severe','perinatal','form','adult','onset','nonsyndromic','syndromic',\
                 'syndrome','infantile','juvenile','early','late','chronic','rare',\
                 'autosomal','dominant','recessive', 'congenital','hereditary','familial',\
                 'male','female','with','without','single','mutation','isolated','absence','group', \
                 'susceptibility','plus','essential','distal','and','during','continuous',\
                 'due','deficiency','extensive','large','small','pro','partial','complete','morbid', \
                 'central','distal','middle','deficit','defect','status','rhythm','like'])
Esempio n. 14
0
def test_pathways():
    b = Biolink(ServiceContext.create_context())
    gene = KNode('HGNC:5013', node_type=node_types.GENE)
    results = b.gene_get_kegg_pathway(gene)
    for e, k in results:
        print(k)
    results = b.gene_get_react_pathway(gene)
    for e, k in results:
        print(k)
    k = results[0][1]
    results = b.pathway_get_gene(k)
    for e, k in results:
        print(k)
Esempio n. 15
0
def build_synonym_cache(ctext=None):
    if ctext is None:
        from greent.service import ServiceContext
        ctext = Chemotext(ServiceContext.create_context())
    response = ctext.query(query="MATCH (d:Term) RETURN d")
    with open(ctext.cache, 'w') as outfile:
        outfile.write('QUERY\tKEY\n')
        res = response['results'][0]
        n = 0
        for datum in res['data']:
            rows = datum['row']
            for row in rows:
                n += 1
                rowtype = row['type']
                meshname = row['name']
                if 'synonyms' in row:
                    rowsyn = row['synonyms']
                else:
                    rowsyn = []
                outfile.write('{}\t{}\n'.format(meshname.upper(), meshname))
                for syn in rowsyn:
                    outfile.write('{}\t{}\n'.format(syn.upper(), meshname))
Esempio n. 16
0
def test():
    h = HPO (ServiceContext.create_context ())
    #print( h.search('Arrhythmias, Cardiac') )
    print( h.search('Thyroid Neoplasms') )
    print( h.search('Neoplasm of the thyroid gland') )
Esempio n. 17
0
                                        node_types.PHENOTYPE)))
        return edge_node

        #return [ ( self.get_edge ({ 'res' : r }, predicate='affects'), KNode("MESH:{0}".format (r['identifier']), 'PH') ) for r in result ]


class TestHetIO(unittest.TestCase):

    h = HetIO(ServiceContext.create_context())

    def test_anatomy(self):
        pprint(self.h.gene_to_anatomy(KNode('HGNC:TP53', node_types.GENE)))

    def test_cell(self):
        pprint(self.h.gene_to_cell(KNode('HGNC:7121', node_types.GENE)))


if __name__ == '__main__':

    het = HetIO(ServiceContext.create_context())
    print(het.disease_to_phenotype(KNode('DOID:2841', node_types.DISEASE)))
    '''
    with open('hgnc-entrez', 'r') as stream:
        for line in stream:
            h, e, u = line.split ('\t')
            het.gene_to_anatomy (KNode('SOMETHING:{}'.format (e), node_types.GENE))
    '''
    #unittest.main ()

#MATCH (g:Gene)-[r]-(c:CellularComponent) WHERE g.name='HGNC:3263' RETURN g, r, c LIMIT 200
Esempio n. 18
0
def test():
    from greent.service import ServiceContext
    cb = ChemBioKS(ServiceContext.create_context())
    print(cb.drugname_to_pubchem('imatinib'))
    input_node = KNode("DRUG_NAME:imatinib", node_types.DRUG_NAME)
    print(cb.graph_drugname_to_pubchem(input_node))
Esempio n. 19
0
def test():
    from greent.service import ServiceContext 
    hgnc = HGNC( ServiceContext.create_context() )
    input_knode = KNode( 'NCBIGENE:3815' , node_type = node_types.GENE )
    print( hgnc.ncbigene_to_uniprotkb( input_knode ) )
Esempio n. 20
0
def test_name():
    uk = UberonGraphKS(ServiceContext.create_context())
    #Test cell->name
    cn = 'CL:0000097'
    results = uk.cell_get_cellname(cn)
    print(results)
Esempio n. 21
0
        return {
            "exposures"  : exposures,
            "icd_codes"  : icd_codes,
            "lat"        : lat,
            "lon"        : lon,
            "time"       : time,
            "visit_type" : visit_type
        }
    def get_endotype (self, request):
        print (json.dumps (request))
        r = self.client.endotypes.endotypes_post (input=request).result()
        print (r)
        return r['output'] if 'output' in r else None
    
if __name__ == "__main__":
    e = Endotype (ServiceContext.create_context ())
    exposures = list(map(lambda exp : e.create_exposure (**exp), [{
        "exposure_type": "pm25",
        "units"        : "",
        "value"        : 2
    }]))
    visits = list(map(lambda v : e.create_visit(**v), [{
            "icd_codes"  : "ICD9:V12,ICD9:E002",
            "lat"        : "20",
            "lon"        : "20",
            "time"       : "2017-10-12 21:12:29",
            "visit_type" : "INPATIENT",
            "exposures"  : exposures
        }]))
    request = e.create_request (dob= "2017-10-04", model_type="M0", race="1", sex="M", visits = visits)
    print (json.dumps (e.get_endotype (request), indent=2))
Esempio n. 22
0
        rosetta_config = Resource.get_resource_obj("rosetta.yml",
                                                   format='yaml')
        semantics = rosetta_config['@translator-semantics']
        subscriptions = []
        for api in self.op_map:
            for in_type, in_params in self.op_map[api].items():
                for out_type, out_vals in self.op_map[api][in_type].items():
                    predicate = semantics.get(api,
                                              {}).get(in_type,
                                                      {}).get(out_type, None)
                    #if not predicate:
                    #    predicate = '*-missing-*'
                    subscriptions.append((in_type, out_type, {
                        "link":
                        predicate,
                        "op":
                        self.add_method(TranslatorRegistry, api, in_type,
                                        out_type)
                    }))
        return subscriptions

if __name__ == "__main__":
    """ Load the registry """
    treg = TranslatorRegistry(ServiceContext.create_context())
    """ Generate subscriptions """
    subscriptions = treg.get_subscriptions()

    r = treg.myvariantinfo__uniprot_to_hgvs(
        KNode('UNIPROT:AKT1', node_types.GENE))
    pprint(r)
Esempio n. 23
0
 def __init__(self, context):
     super(Biolink, self).__init__("biolink", context)
     # TODO, can we just use the Mondo that's inthe core already?
     self.checker = Mondo(ServiceContext.create_context())
     self.go = GO(ServiceContext.create_context())
Esempio n. 24
0
def test_go():
    KIT_protein = KNode('UniProtKB:P10721', node_types.GENE)
    b = Biolink(ServiceContext.create_context())
    results = b.gene_get_process(KIT_protein)
    for ke, kn in results:
        print(ke, kn)
Esempio n. 25
0
 def __init__(self):
     """ Create a GreenT service context. Initialize the Chemotext service with that context."""
     self.service_context = ServiceContext()
     self.chemotext = Chemotext(self.service_context)
Esempio n. 26
0
def test_phenotypes():
    asthma = KNode('DOID:2841', node_types.DISEASE)
    b = Biolink(ServiceContext.create_context())
    results = b.disease_get_phenotype(asthma)
    for ke, kn in results:
        print(ke, kn)
Esempio n. 27
0
def test_parts():
    uk = UberonGraphKS(ServiceContext.create_context())
    print(uk.get_anatomy_parts('UBERON:0004535'))
Esempio n. 28
0
            raise ValueError ("We don't have a word embedding model for {0} word phrases".format (term_a.count(' ') + 1))
            model = self.bigram_model
            term_a = term_a.replace (' ', '_')
            term_b = term_b.replace (' ', '_')
        elif term_a.count (' ') == 0:
            model = self.model
        else:
            raise ValueError ("We don't have a word embedding model for {0} word phrases".format (term_a.count(' ') + 1))
            
        return model.similarity (term_a, term_b) if term_a in model.vocab and term_b in model.vocab else -1.0
            
        #return self.model.similarity (term_a, term_b) if term_a in self.model.vocab and term_b in self.model.vocab else -1.0


if __name__ == "__main__":
    ct2 = Chemotext2 (ServiceContext.create_context ())
    print (ct2.get_semantic_similarity ("lung cancer", "p53"))
    print (ct2.get_semantic_similarity ("cell line", "disease"))
    print (ct2.get_semantic_similarity ("cellular component", "nucleus"))
    print (ct2.get_semantic_similarity ("cell cycle", "krebbs"))

    print (ct2.get_semantic_similarity ("MAPK2", "P53"))

    w = [ "albuterol", "imatinib", "aspirin", "atrovent", "decadron", "medrol", "rayos" , "abemaciclib", "abraxane"]
    pairs = zip (w, w[1:])
    for k, v in pairs:
        print ((" k %s -> v %s : sim: %s" % (k, v, ct2.get_semantic_similarity (k, v))))
    print (ct2.model.most_similar (positive=['aspirin' ]))
    print (ct2.model.most_similar (positive=['p53' ]))
    print (ct2.model.most_similar (positive=['kit' ]))
    print (ct2.model.most_similar (positive=['asthma' ]))
Esempio n. 29
0
def test():
    m = Mondo(ServiceContext.create_context())
    huntington = KNode('OMIM:143100', node_types.DISEASE)
    print(m.is_genetic_disease(huntington))
    print('------')