Esempio n. 1
0
def run(args):
    rosetta = Rosetta()
    if args.all:
        print('all')
        crawl_all(rosetta)
    elif args.synonyms:
        print('synonyms')
        load_synonyms(rosetta)
    elif args.load_genetics:
        print('load genetic variation')
        load_genetic_variants(rosetta)
    elif args.crawl_genetics:
        print('crawl genetic variation')
        crawl_genetic_variants(rosetta)
    elif args.omnicache:
        rosetta = Rosetta(use_graph=False)
        print('omnicache')
        create_omnicache(rosetta)
    elif args.annotate:
        print('annotate')
        load_annotations(rosetta)
    elif args.service:
        if args.service in rosetta.core.lazy_loader:
            print(f"Trying to get everything from {args.service}")
            run_per_service(args.service, rosetta)
    else:
        print(f'crawl from {args.source} to {args.target}')
        poolrun(args.source, args.target, rosetta)
 def __init__(self):
     self.cord_dir = os.environ.get('CORD_DIR')
     self.rosetta = Rosetta()
     self.writer = WriterDelegator(rosetta=self.rosetta)
     # line counts for reporting
     self.num_edges = self.count_lines_in_file('edges.txt')
     self.num_nodes = self.count_lines_in_file('nodes.txt')
def setup(config):
    logger = logging.getLogger('application')
    logger.setLevel(level=logging.DEBUG)
    global rosetta_global
    if rosetta_global == None:
        rosetta_global = Rosetta(greentConf=config, debug=True)
    return rosetta_global
def test2():
    from greent.rosetta import Rosetta
    rosetta = Rosetta()
    gt = rosetta.core
    support = ChemotextSupport(gt)
    from greent.graph_components import KNode
    node_a = KNode('CTD:1,2-linoleoylphosphatidylcholine', type=node_types.CHEMICAL_SUBSTANCE, name='1,2-linoleoylphosphatidylcholine')
    node_b = KNode('CTD:Hydrogen Peroxide', type=node_types.CHEMICAL_SUBSTANCE, name='Hydrogen Peroxide')
def process_queue(pool_id=0, errors={}):
    rosetta = Rosetta()
    wdg = WriterDelegator(rosetta, push_to_queue=True)
    print('starting consumer')
    # send a 'close' message to stop consumer consumer at the end assuming that this will go at the end of the nodes and edges.
    wdg.flush()
    wdg.close()
    start_consuming(max_retries=-1)
Esempio n. 6
0
 def __init__(self, sv_neo4j_credentials, crawl_for_service, recreate_sv_node):
     self.rosetta = Rosetta()
     self.writerDelegator = WriterDelegator(rosetta=self.rosetta)
     self.sv_neo4j_credentials = sv_neo4j_credentials
     self.crawl_for_service = crawl_for_service
     self.genetics_services = GeneticsServices()
     self.recreate_sv_node = recreate_sv_node
     self.written_genes = set()
def test():
    from greent.rosetta import Rosetta
    rosetta = Rosetta()
    gt = rosetta.core
    support = ChemotextSupport(gt)
    from greent.graph_components import KNode
    node = KNode('HP:0000964', type=node_types.PHENOTYPIC_FEATURE, name='Eczema')
    # node.mesh_identifiers.append( { 'curie': 'MeSH:D004485', 'label': 'Eczema' } )
    support.add_chemotext_terms( [node] )
def check_queue(size):
    rosetta = Rosetta()
    wdg = WriterDelegator(rosetta, push_to_queue=True)
    import time
    # wait a bit before reading the queue

    time.sleep(1)
    res = wdg.channel.queue_declare(queue="neo4j", passive=True)
    return res.method.message_count == size
Esempio n. 9
0
def test():
    from greent.rosetta import Rosetta
    from greent.graph_components import KNode
    from greent import node_types
    rosetta = Rosetta()
    gt = rosetta.core
    cdw = CDWSupport(gt)
    #node = KNode( 'MESH:D008175', node_type=node_types.GENETIC_CONDITION )
    node = KNode( 'DOID:9352', node_type=node_types.DISEASE )
    cdw.prepare( [node] )
Esempio n. 10
0
 def __init__(self, config="greent.conf", debug=False):
     self.rosetta = Rosetta(debug=debug, greentConf=config)
     self.ndex = None
     ndex_creds = os.path.expanduser("~/.ndex")
     if os.path.exists(ndex_creds):
         with open(ndex_creds, "r") as stream:
             ndex_creds_obj = json.loads(stream.read())
             print(f"connecting to ndex as {ndex_creds_obj['username']}")
             self.ndex = NDEx(ndex_creds_obj['username'],
                              ndex_creds_obj['password'])
Esempio n. 11
0
def test():
    from greent.rosetta import Rosetta
    rosetta = Rosetta()
    gt = rosetta.core
    support = ChemotextSupport(gt)
    from greent.graph_components import KNode
    node = KNode('HP:0000964', node_type=node_types.PHENOTYPE, label='Eczema')
    node.mesh_identifiers.append({'curie': 'MeSH:D004485', 'label': 'Eczema'})
    support.add_chemotext_terms([node])
    import json
    print(json.dumps(node.mesh_identifiers[0], indent=4))
Esempio n. 12
0
def test_edge():
    from greent.rosetta import Rosetta
    from greent.graph_components import KNode
    from greent import node_types
    rosetta = Rosetta()
    gt = rosetta.core
    cdw = CDWSupport(gt)
    #node = KNode( 'MESH:D008175', node_type=node_types.GENETIC_CONDITION )
    nodea = KNode( 'DOID:11476', node_type=node_types.DISEASE )
    nodeb = KNode( 'Orphanet:90318', node_type=node_types.DISEASE )
    cdw.prepare( [nodea,nodeb] )
    e = cdw.term_to_term( nodea,nodeb) 
    print (e)
    def run(self, nodes_file_name, edges_file_name, provided_by, delimiter):
        self.rosetta = Rosetta()
        self.wdg = WriterDelegator(rosetta)
        self.wdg.normalized = True

        for node in self.get_nodes_from_file(nodes_file_name, delimiter):
            self.wdg.write_node(node, annotate=False)

        for edge in self.get_edges_from_file(edges_file_name,
                                             provided_by=provided_by,
                                             delimiter=delimiter):
            self.wdg.write_edge(edge)
        self.wdg.flush()
Esempio n. 14
0
 def __init__(self):
     self.url = "https://stars-app.renci.org/uberongraph/sparql"
     self.triplestore = TripleStore(self.url)
     self.prefix_set = {
         node_types.DISEASE_OR_PHENOTYPIC_FEATURE: ['HP', 'MONDO'],
         node_types.CELLULAR_COMPONENT: ['CL'],
         node_types.BIOLOGICAL_PROCESS_OR_ACTIVITY: ['GO'],
         node_types.ANATOMICAL_ENTITY: ['UBERON'],
         node_types.CHEMICAL_SUBSTANCE: ['CHEBI']
     }
     self.root_uris = {
         node_types.ANATOMICAL_ENTITY:
         "<http://purl.obolibrary.org/obo/UBERON_0001062>",
         node_types.DISEASE:
         "<http://purl.obolibrary.org/obo/MONDO_0000001>",
         node_types.MOLECULAR_ACTIVITY:
         "<http://purl.obolibrary.org/obo/GO_0003674>",
         node_types.BIOLOGICAL_PROCESS:
         "<http://purl.obolibrary.org/obo/GO_0008150>",
         node_types.CHEMICAL_SUBSTANCE:
         "<http://purl.obolibrary.org/obo/CHEBI_24431>",
         node_types.PHENOTYPIC_FEATURE:
         "<http://purl.obolibrary.org/obo/HP_0000118>",
         node_types.CELL:
         "<http://purl.obolibrary.org/obo/CL_0000000>",
         node_types.CELLULAR_COMPONENT:
         "<http://purl.orolibrary.org/obo/GO_0005575>"
     }
     obo_prefixes = '\n'.join([
         f'PREFIX {pref}: <http://purl.obolibrary.org/obo/{pref}_>'
         for pref in set(
             reduce(lambda x, y: x + y, self.prefix_set.values(), []))
     ])
     self.query = f"""
                 {obo_prefixes}
                 PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>        
                 select  ?parent_id ?parent_label ?child_id ?child_label
                 where {{                        
                     ?parent_id rdfs:subClassOf $root_uri .
                     ?child_id rdfs:subClassOf ?parent_id.
                OPTIONAL {{
                 ?parent_id rdfs:label ?parent_label.
                 ?child_id rdfs:label ?child_label.
                 }}                      
                 }}
                     """
     rosetta = Rosetta()
     self.wdg = WriterDelegator(rosetta)
Esempio n. 15
0
def load_genetic_variants(rosetta=None):
    if rosetta is None:
        rosetta = Rosetta()

    # load starting set of variants into the graph
    print('loading the GWAS Catalog...')
    load_gwas_knowledge(rosetta)
    # or test with a smaller number of variants
    #load_gwas_knowledge(rosetta, limit=25)
    print('GWAS Catalog loading complete...')

    # load default gtex knowledge
    print('loading GTEx Data...')
    load_gtex_knowledge(rosetta)
    # or from a specific list of files
    #load_gtex_knowledge(rosetta, ['test_signif_Adipose_Subcutaneous_100.csv'])
    print('finished loading GTEx Data...')
Esempio n. 16
0
def run(id_list, service):
    rosy = Rosetta()
    triplets = get_supported_types(service_name=service, rosetta=rosy)

    for triplet in triplets:
        # here a triplet contains something like
        # 'gene' or 'disease' coming from the name attr of concept graph
        # this mini 'crawl' should run for a type that exists in the keys
        # of the grouped types. The keys look something like
        # `gene:gene_or_gene_product:macromolecular ...`
        key = list(filter(lambda b: triplet[0] in b, id_list.keys()))
        if not len(key):
            # if there is no match continue for others
            continue
        key = key[0]
        identifiers = [LabeledID(identifier=y) for y in id_list[key]]
        print(f'running {triplet[0]} --> {triplet[2]}')
        bake_programs(triplet, rosy, identifier_list=identifiers)
Esempio n. 17
0
def create_metamap():
    rosetta = Rosetta()
    uberon = rosetta.core.uberongraph
    types = [
        node_types.DISEASE, node_types.MOLECULAR_ACTIVITY,
        node_types.BIOLOGICAL_PROCESS, node_types.PHENOTYPIC_FEATURE,
        node_types.CELL, node_types.ANATOMICAL_ENTITY,
        node_types.CHEMICAL_SUBSTANCE
    ]
    with open('ubergraph_metamap.txt', 'w') as outf:
        outf.write('sourcetype\tobjecttype\trelation_id\trelation_label\n')
        for i, ti in enumerate(types):
            for j, tj in enumerate(types[i:]):
                results = uberon.get_edges(ti, tj)
                write_results(outf, ti, tj, results)
                if not j == 0:
                    results = uberon.get_edges(tj, ti)
                    write_results(outf, tj, ti, results)
def start(args) :
    if args.annotate:
        rosetta = Rosetta()
        if args.annotate in annotator_class_list:
            print('starting annotation and synonmization')
            results = grab_all(args.annotate, rosetta)
            lids = [LabeledID(x['id'],x['label']) for x in results]
            pool_size = 10
            chunks = pool_size * 2
            chunksize = int(len(lids)/chunks)
            single_run_size = chunksize if chunksize > 0 else 1 
            lids_chunks = [lids[i: i+ single_run_size] for i in range(0, len(lids),single_run_size)]
            partial_run = partial(run_wrapper,f'{args.annotate}')
            print('starting processes')
            pool = Pool(processes = pool_size)
            pool.map_async(partial_run, lids_chunks, error_callback = lambda error: print(error))  
            pool.close()
            pool.join() 
            print('done.')
        else: 
            raise Exception(f'No annotator found for {args.annotate}')
    else:
        raise Exception('No argument passed.')
Esempio n. 19
0
def load_synonyms(rosetta=None, refresh_chemicals=False):
    if rosetta is None:
        rosetta = Rosetta()
#    load_genes(rosetta)
    load_chemicals(rosetta, refresh=refresh_chemicals)
def rosetta (conf):
    """ Rosetta fixture """
    config = conf.get ("config", "greent.conf")
    print (f"CONFIG: *** > {config}")
    return Rosetta(debug=True, greentConf=config)
Esempio n. 21
0
def crawl():
    rosetta = Rosetta()
    #load_genes(rosetta)
    #load_chemicals(rosetta,refresh=False)
    #load_diseases_and_phenotypes(rosetta)
    create_omnicache(rosetta)
Esempio n. 22
0
def setup():    
    logger = logging.getLogger('application')
    logger.setLevel(level = logging.DEBUG)
    rosetta = Rosetta()
    return rosetta
Esempio n. 23
0
def setup():
    rosetta = Rosetta()
    neodriver = rosetta.type_graph.driver;
    return neodriver
Esempio n. 24
0
def runBuilderQuery(database_file, board_id):
    """Given a board id, create a knowledge graph though querying external data sources.
       Export the graph to Neo4j.
       
       board_id may be a comma-separated list of board ids.
       e.g. asdfly,sdhjdhl,sdflch"""

    # initialize rosetta
    rosetta = Rosetta()

    board_ids = board_id.split(',')

    for board_id in board_ids:
        condition = "id='{}'".format(board_id)
        rows = fetch_table_entries(database_file, "building", condition)

        board_name = rows[0][1]
        board_description = rows[0][2]
        board_query = json.loads(rows[0][3])
        
        try:
            # convert query to the required form
            query = boardQueryToRenciQuery(board_query, rosetta)

            # build knowledge graph
            kgraph = KnowledgeGraph(query, rosetta)

            # get construction/source graph
            sgraph = getSourceGraph(kgraph)

            # export graph to Neo4j
            supports = ['chemotext']
            # supports = ['chemotext', 'chemotext2'] # chemotext2 is really slow
            exportBioGraph(kgraph, board_id, supports=supports)
            
        except Exception as err:
            print(err)
            # Set flag in building table to indicated finsihed
            table_name = 'building'
            database = sqlite3.connect(database_file)
            cursor = database.cursor()
            # insert blackboard information into database
            cursor.execute('''UPDATE {}
                SET finished = ?
                WHERE {}'''.format(table_name, condition), ("Failed",))
            database.commit()
            database.close()
        
        # insert blackboard information into blackboards (indicating that it is finished)
        table_name = 'blackboards'
        database = sqlite3.connect(database_file)
        cursor = database.cursor()
        cursor.execute('''CREATE TABLE IF NOT EXISTS {}
                (id text, name text, description text, query_json text, con_graph text)'''\
                .format(table_name))
        # insert blackboard information into database
        cursor.execute("INSERT INTO {} VALUES (?,?,?,?,?)".format(table_name),\
            (board_id, board_name, board_description, json.dumps(board_query), json.dumps(sgraph)))
        database.commit()
        database.close()

        # Set flag in building table to indicated finsihed
        table_name = 'building'
        database = sqlite3.connect(database_file)
        cursor = database.cursor()
        # insert blackboard information into database
        cursor.execute('''UPDATE {}
            SET finished = ?
            WHERE {}'''.format(table_name, condition), ("True",))
        database.commit()
        database.close()
Esempio n. 25
0
def setup(config):
    logger = logging.getLogger('application')
    logger.setLevel(level=logging.DEBUG)
    rosetta = Rosetta(greentConf=config, debug=True)
    return rosetta
def test():
    from greent.rosetta import Rosetta
    r = Rosetta()
    names = ['BUTYLSCOPOLAMINE','ADAPALENE','NADIFLOXACIN','TAZAROTENE']
    for name in names:
        print ( name, lookup_drug_by_name( name , r.core) )
            logger.error(f'Exception caught: Exception: {e}')
            ret_val = e

        # output some final feedback for the user
        logger.info(f'Building complete. Processed {line_counter} variants.')

        # return to the caller
        return ret_val


#######
# Main - Stand alone entry point for testing
#######
if __name__ == '__main__':
    # create a new builder object
    gtb = GTExBuilder(Rosetta())

    # directory to write/read GTEx data to process
    working_data_directory = '.'

    # load up the eqtl GTEx data with default settings
    rv = gtb.load(working_data_directory)

    # or use some optional parameters
    # out_file_name specifies the name of the combined and processed gtex cvs (eqtl_signif_pairs.csv)
    # process_raw_data creates that file - specify the existing file name and set to False if one exists
    # rv = gtb.load(working_data_directory,
    #              out_file_name='example_eqtl_output.csv',
    #              process_raw_data=True,
    #              process_for_graph=True,
    #              gtex_version=8)
def rosetta():
    from greent.rosetta import Rosetta
    return Rosetta()
Esempio n. 29
0
    def create_variant_to_phenotype_components(self,
                                               variant_node,
                                               phenotype_id,
                                               phenotype_label,
                                               pubmed_id=None,
                                               properties={}):
        phenotype_node = KNode(phenotype_id,
                               name=phenotype_label,
                               type=node_types.DISEASE_OR_PHENOTYPIC_FEATURE)
        pubs = []
        if pubmed_id:
            pubs.append(f'PMID:{pubmed_id}')

        predicate = LabeledID(identifier=f'RO:0002200', label=f'has_phenotype')
        edge = self.create_edge(
            variant_node,
            phenotype_node,
            'gwascatalog.sequence_variant_to_disease_or_phenotypic_feature',
            variant_node.id,
            predicate,
            url=self.query_url,
            properties=properties,
            publications=pubs)
        return (edge, phenotype_node)


if __name__ == "__main__":
    rosetta = Rosetta()
    gwas_builder = GWASCatalog(rosetta)
    gwas_builder.process_gwas()
Esempio n. 30
0
    mcfname = os.path.join (os.path.dirname (__file__), 'meshcas.pickle')
    mufname = os.path.join (os.path.dirname (__file__), 'meshunii.pickle')
    ecfname = os.path.join (os.path.dirname (__file__), 'meschec.pickle')
    with open(umfname,'wb') as um, open(mcfname,'wb') as mc, open(mufname,'wb') as mu, open(ecfname,'wb') as mec:
        pickle.dump(unmapped_mesh,um)
        pickle.dump(term_to_cas,mc)
        pickle.dump(term_to_unii,mu)
        pickle.dump(term_to_EC,mec)
    with open(umfname,'rb') as um, open(mcname,'rb') as mc, open(mufname,'rb') as mu, open(ecfname,'rb') as mec:
        unmapped_mesh=pickle.load(um)
        term_to_cas=pickle.load(mc)
        term_to_unii=pickle.load(mu)
        term_to_EC=pickle.load(mec)
    '''
    muni_name = os.path.join(os.path.dirname(__file__), 'mesh_to_unii.txt')
    mec_name = os.path.join(os.path.dirname(__file__), 'mesh_to_EC.txt')
    dump(term_to_unii, muni_name)
    dump(term_to_EC, mec_name)
    context = rosetta.service_context
    api_key = context.config['EUTILS_API_KEY']
    term_to_pubchem_by_mesh = lookup_by_mesh(unmapped_mesh, api_key)
    term_to_pubchem_by_cas = lookup_by_cas(term_to_cas, api_key)
    term_to_pubchem = {**term_to_pubchem_by_cas, **term_to_pubchem_by_mesh}
    mpc_name = os.path.join(os.path.dirname(__file__), 'mesh_to_pubchem.txt')
    dump(term_to_pubchem, mpc_name)


if __name__ == '__main__':
    from greent.rosetta import Rosetta
    refresh_mesh_pubchem(Rosetta())