def query_data(self): data = {} replace = [("PROJECTID", self.identifier)] try: cwd = os.path.abspath(os.path.dirname(__file__)) queries_path = "queries/datasets_cypher.yml" datasets_cypher = ckg_utils.get_queries( os.path.join(cwd, queries_path)) if "replace" in self.configuration: replace = self.configuration["replace"] for query_name in datasets_cypher[self.dataset_type]: title = query_name.lower().replace('_', ' ') query_type = datasets_cypher[ self.dataset_type][query_name]['query_type'] query = datasets_cypher[self.dataset_type][query_name]['query'] for r, by in replace: query = query.replace(r, by) if query_type == "pre": data[title] = self.send_query(query) else: self.update_analysis_queries({query_name.lower(): query}) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Reading queries from file {}: {}, file: {},line: {}, err: {}". format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return data
def get_user_creation_queries(): """ Reads the YAML file containing the queries relevant to user creation, parses the given stream and \ returns a Python object (dict[dict]). """ try: queries_path = config['cypher_queries_file'] user_creation_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Reading queries from file {}: {}, file: {},line: {}, error: {}".format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return user_creation_cypher
def get_data_upload_queries(): """ Reads the YAML file containing the queries relevant to parsing of clinical data and \ returns a Python object (dict[dict]). :return: Nested dictionary. """ try: queries_path = "../queries/data_upload_cypher.yml" data_upload_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Error: {}. Reading queries from file {}: {}, file: {},line: {}".format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno)) return data_upload_cypher
def get_query(): """ Reads the YAML file containing the queries relevant for graph database stats, parses the given stream and \ returns a Python object (dict[dict]). :return: Nested dictionary. """ try: queries_path = "../queries/dbstats_cypher.yml" data_upload_cypher = ckg_utils.get_queries( os.path.join(cwd, queries_path)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] raise Exception( "Erro: {}. Reading queries from file {}: {}, file: {},line: {}". format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno)) return data_upload_cypher
def remove_samples_nodes_db(driver, projectId): """ """ result = None query_name = 'remove_project' query = '' try: queries_path = "../queries/project_cypher.yml" project_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path)) query = project_cypher[query_name]['query'].replace('PROJECTID', projectId).split(';')[:-2] for q in query: result = connector.getCursorData(driver, q+';') except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Error: {}. Removing nodes associated to project: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) return result
def get_project_creation_queries(): """ Reads the YAML file containing the queries relevant to user creation, parses the given stream and \ returns a Python object (dict[dict]). :return: Nested dictionary. """ try: cwd = os.path.abspath(os.path.dirname(__file__)) queries_path = "../queries/project_creation_cypher.yml" project_creation_cypher = ckg_utils.get_queries( os.path.join(cwd, queries_path)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Reading queries from file {}: {}, file: {},line: {}, err: {}". format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return project_creation_cypher
def query_data(self, replace): query_data = {} try: cwd = os.path.abspath(os.path.dirname(__file__)) cypher_queries = ckg_utils.get_queries(os.path.join(cwd, self.queries_file)) if cypher_queries is not None: for query_name in cypher_queries: if 'query_type' in cypher_queries[query_name]: if cypher_queries[query_name]['query_type'] == 'knowledge_report': query = cypher_queries[query_name]['query'] for r, by in replace: query = query.replace(r, by) query_data[query_name] = self.send_query(query) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Reading queries from file {}: {}, file: {},line: {}, err: {}".format(self.queries_file, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return query_data
def modifyEntityProperty(parameters): '''parameters: tuple with entity name, entity id, property name to modify, and value''' driver = getGraphDatabaseConnectionConfiguration() entity, entityid, attribute, value = parameters try: cwd = os.path.abspath(os.path.dirname(__file__)) queries_path = "./queries.yml" project_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path)) for query_name in project_cypher: title = query_name.lower().replace('_', ' ') if title == 'modify': query = project_cypher[query_name]['query'] % ( entity, entityid, attribute, value) sendQuery(driver, query) print("Property successfully modified") except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Error: {}. Reading queries from file {}: {}, file: {},line: {}". format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno))
def updateDB(driver, imports=None, specific=[]): """ Populates the graph database with information for each Database, Ontology or Experiment \ specified in imports. If imports is not defined, the function populates the entire graph \ database based on the graph variable defined in the grapher_config.py module. \ This function also updates the graph stats object with numbers from the loaded entities and \ relationships. :param driver: neo4j driver, which provides the connection to the neo4j graph database. :type driver: neo4j driver :param list imports: a list of entities to be loaded into the graph. """ if imports is None: imports = config["graph"] try: cypher_queries = ckg_utils.get_queries( os.path.join(cwd, config['cypher_queries_file'])) except Exception as err: logger.error("Reading queries file > {}.".format(err)) for i in imports: queries = [] logger.info("Loading {} into the database".format(i)) try: import_dir = os.path.join( cwd, directories["databasesDirectory"]).replace('\\', '/') if i == "ontologies": entities = [e.lower() for e in config["ontology_entities"]] if len(specific) > 0: entities = list( set(entities).intersection( [s.lower() for s in specific])) import_dir = os.path.join( cwd, directories["ontologiesDirectory"]).replace('\\', '/') ontologyDataImportCode = cypher_queries[ 'IMPORT_ONTOLOGY_DATA']['query'] for entity in entities: queries.extend( ontologyDataImportCode.replace( "ENTITY", entity.capitalize()).replace( "IMPORTDIR", import_dir).split(';')[0:-1]) mappings = config['ontology_mappings'] mappingImportCode = cypher_queries[ 'IMPORT_ONTOLOGY_MAPPING_DATA']['query'] for m in mappings: if m.lower() in entities: for r in mappings[m]: queries.extend( mappingImportCode.replace( "ENTITY1", m).replace("ENTITY2", r).replace( "IMPORTDIR", import_dir).split(';')[0:-1]) print('Done Loading ontologies') elif i == "biomarkers": code = cypher_queries['IMPORT_BIOMARKERS']['query'] import_dir = os.path.join( cwd, directories["curatedDirectory"]).replace('\\', '/') queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading biomarkers') elif i == "qcmarkers": code = cypher_queries['IMPORT_QCMARKERS']['query'] import_dir = os.path.join( cwd, directories["curatedDirectory"]).replace('\\', '/') queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading qcmarkers') elif i == "chromosomes": code = cypher_queries['IMPORT_CHROMOSOME_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading chromosomes') elif i == "genes": code = cypher_queries['IMPORT_GENE_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading genes') elif i == "transcripts": code = cypher_queries['IMPORT_TRANSCRIPT_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading transcritps') elif i == "proteins": code = cypher_queries['IMPORT_PROTEIN_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading proteins') elif i == "functional_regions": code = cypher_queries["IMPORT_FUNCTIONAL_REGIONS"]['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading functional_regions') elif i == "annotations": code = cypher_queries['IMPORT_PROTEIN_ANNOTATIONS']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading annotations') elif i == "complexes": code = cypher_queries['IMPORT_COMPLEXES']['query'] for resource in config["complexes_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading complexes') elif i == "modified_proteins": code = cypher_queries['IMPORT_MODIFIED_PROTEINS']['query'] for resource in config["modified_proteins_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_MODIFIED_PROTEIN_ANNOTATIONS'][ 'query'] for resource in config[ "modified_proteins_annotation_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading modified_proteins') elif i == "pathology_expression": code = cypher_queries['IMPORT_PATHOLOGY_EXPRESSION']['query'] for resource in config["pathology_expression_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading pathology_expression') elif i == "ppi": code = cypher_queries['IMPORT_CURATED_PPI_DATA']['query'] for resource in config["curated_PPI_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_COMPILED_PPI_DATA']['query'] for resource in config["compiled_PPI_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_PPI_ACTION']['query'] for resource in config["PPI_action_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading ppi') elif i == "protein_structure": code = cypher_queries['IMPORT_PROTEIN_STRUCTURES']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading protein_structure') elif i == "diseases": code = cypher_queries['IMPORT_DISEASE_DATA']['query'] for entity, resource in config["disease_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "ENTITY", entity).replace("RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading diseases') elif i == "drugs": code = cypher_queries['IMPORT_DRUG_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] code = cypher_queries['IMPORT_DRUG_INTERACTION_DATA']['query'] for resource in config['drug_drug_interaction_resources']: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_CURATED_DRUG_DATA']['query'] for resource in config["curated_drug_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_COMPILED_DRUG_DATA']['query'] for resource in config["compiled_drug_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_DRUG_ACTS_ON']['query'] for resource in config["drug_action_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading drugs') elif i == "side_effects": code = cypher_queries['IMPORT_DRUG_SIDE_EFFECTS']['query'] for resource in config["side_effects_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading side_effects') elif i == 'pathway': code = cypher_queries['IMPORT_PATHWAY_DATA']['query'] for resource in config["pathway_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading pathway') elif i == 'metabolite': code = cypher_queries['IMPORT_METABOLITE_DATA']['query'] for resource in config["metabolite_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading metabolite') elif i == 'food': code = cypher_queries['IMPORT_FOOD_DATA']['query'] for resource in config["food_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading food') elif i == "gwas": code = cypher_queries['IMPORT_GWAS']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] code = cypher_queries['IMPORT_VARIANT_FOUND_IN_GWAS']['query'] queries.extend( code.replace("IMPORTDIR", import_dir).split(';')[0:-1]) code = cypher_queries['IMPORT_GWAS_STUDIES_TRAIT']['query'] queries.extend( code.replace("IMPORTDIR", import_dir).split(';')[0:-1]) print('Done Loading gwas') elif i == "known_variants": code = cypher_queries['IMPORT_KNOWN_VARIANT_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading known_variants') elif i == "clinical_variants": code = cypher_queries[ 'IMPORT_CLINICALLY_RELEVANT_VARIANT_DATA']['query'] for resource in config["clinical_variant_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading clinical_variants') elif i == "jensenlab": code = cypher_queries['IMPORT_JENSENLAB_DATA']['query'] for (entity1, entity2) in config["jensenlabEntities"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "ENTITY1", entity1).replace("ENTITY2", entity2).split(';')[0:-1]) print('Done Loading jensenlab') elif i == "mentions": code = cypher_queries['CREATE_PUBLICATIONS']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] code = cypher_queries['IMPORT_MENTIONS']['query'] for entity in config["mentionEntities"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "ENTITY", entity).split(';')[0:-1]) print('Done Loading mentions') elif i == "published": code = cypher_queries['IMPORT_PUBLISHED_IN']['query'] for entity in config["publicationEntities"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "ENTITY", entity).split(';')[0:-1]) print('Done Loading published') elif i == "user": usersDir = os.path.join( cwd, directories["usersImportDirectory"]).replace('\\', '/') user_cypher = cypher_queries['CREATE_USER_NODE'] code = user_cypher['query'] queries.extend( code.replace("IMPORTDIR", usersDir).split(';')[0:-1]) print('Done Loading user') elif i == "project": import_dir = os.path.join( cwd, directories["experimentsDirectory"]).replace('\\', '/') projects = builder_utils.listDirectoryFolders(import_dir) if len(specific) > 0: projects = list(set(projects).intersection(specific)) project_cypher = cypher_queries['IMPORT_PROJECT'] for project in projects: projectDir = os.path.join(import_dir, project) projectDir = os.path.join(projectDir, 'project').replace('\\', '/') for project_section in project_cypher: code = project_section['query'] queries.extend( code.replace("IMPORTDIR", projectDir).replace( 'PROJECTID', project).split(';')[0:-1]) print('Done Loading project') elif i == "experiment": import_dir = os.path.join( cwd, directories["experimentsDirectory"]).replace('\\', '/') datasets_cypher = cypher_queries['IMPORT_DATASETS'] projects = builder_utils.listDirectoryFolders(import_dir) if len(specific) > 0: projects = list(set(projects).intersection(specific)) for project in projects: projectDir = os.path.join(import_dir, project).replace('\\', '/') datasetTypes = builder_utils.listDirectoryFolders( projectDir) for dtype in datasetTypes: datasetDir = os.path.join(projectDir, dtype).replace('\\', '/') if dtype in datasets_cypher: dataset = datasets_cypher[dtype] code = dataset['query'] queries.extend( code.replace("IMPORTDIR", datasetDir).replace( 'PROJECTID', project).split(';')[0:-1]) print('Done Loading experiment') else: logger.error( "Non-existing dataset. The dataset you are trying to load does not exist: {}." .format(i)) load_into_database(driver, queries, i) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Loading: {}: {}, file: {}, line: {}".format( i, err, fname, exc_tb.tb_lineno))
def annotate_list(self, query_list, entity_type, attribute='name', queries_file=None, diseases=[], entities=None): self.empty_graph() if queries_file is None: queries_file = 'queries/knowledge_annotation.yml' if entities is None: entities = self.entities if diseases is None or len(diseases) < 1: replace_by = ('DISEASE_COND', '') else: replace_by = ( 'DISEASE_COND', 'OR (d.name IN {} AND r.score > 1.5)'.format(diseases)) query_data = [] drugs = [] q = 'NA' try: cwd = os.path.abspath(os.path.dirname(__file__)) cypher_queries = ckg_utils.get_queries( os.path.join(cwd, queries_file)) if cypher_queries is not None: if entity_type.capitalize() in cypher_queries: queries = cypher_queries[entity_type.capitalize()] for query_name in queries: involved_nodes = queries[query_name]['involves_nodes'] if len(set(involved_nodes).intersection( entities)) > 0 or query_name.capitalize( ) == entity_type.capitalize(): query = queries[query_name]['query'] q = 'NA' for q in query.split(';')[:-1]: q = q.format(query_list=query_list).replace( "ATTRIBUTE", attribute).replace( replace_by[0], replace_by[1]).replace( 'DISEASES', str(diseases)).replace( 'DRUGS', str(drugs)) data = self.send_query(q) if not data.empty: if query_name == 'disease' and len( diseases) < 1: diseases = data['target'].unique( ).tolist() if query_name == 'drug': drugs = data['target'].unique().tolist( ) query_data.append(data) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Error annotating list. Query: {} from file {}: {}, file: {},line: {}, err: {}" .format(q, queries_file, sys.exc_info(), fname, exc_tb.tb_lineno, err)) print( "Error annotating list. Query: {} from file {}: {}, file: {},line: {}, err: {}" .format(q, queries_file, sys.exc_info(), fname, exc_tb.tb_lineno, err)) if len(query_data) > 0: self.data = pd.DataFrame().append(query_data) for df in query_data: entity1 = df['source_type'][0][0] entity2 = df['target_type'][0][0] assoc_type = df['rel_type'][0] df['weight'] = df['weight'].fillna(0.5) nodes, relationships = self.generate_knowledge_from_edgelist( df, entity1, entity2, source='source', target='target', rtype=assoc_type, weight='weight') self.nodes.update(nodes) self.relationships.update(relationships)
def read_queries(queries_file): queries = ckg_utils.get_queries(queries_file) return queries