def query_data(self): data = {} replace = [("PROJECTID", self.identifier)] try: queries_path = "queries/datasets_cypher.yml" datasets_cypher = ckg_utils.get_queries( os.path.join(cwd, queries_path)) if "replace" in self.configuration: replace = self.configuration["replace"] for query_name in datasets_cypher[self.dataset_type]: title = query_name.lower().replace('_', ' ') query_type = datasets_cypher[ self.dataset_type][query_name]['query_type'] query = datasets_cypher[self.dataset_type][query_name]['query'] for r, by in replace: query = query.replace(r, by) if query_type == "pre": data[title] = self.send_query(query) else: self.update_analysis_queries({query_name.lower(): query}) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Reading queries from file {}: {}, file: {},line: {}, err: {}". format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return data
def get_user_creation_queries(): """ Reads the YAML file containing the queries relevant to user creation, parses the given stream and \ returns a Python object (dict[dict]). """ try: queries_path = config['cypher_queries_file'] user_creation_cypher = ckg_utils.get_queries( os.path.join(cwd, queries_path)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Reading queries from file {}: {}, file: {},line: {}, error: {}". format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return user_creation_cypher
def get_query(): """ Reads the YAML file containing the queries relevant for graph database stats, parses the given stream and \ returns a Python object (dict[dict]). :return: Nested dictionary. """ try: queries_path = "../queries/dbstats_cypher.yml" directory = os.path.dirname(os.path.abspath(__file__)) data_upload_cypher = ckg_utils.get_queries(os.path.join(directory, queries_path)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] raise Exception("Erro: {}. Reading queries from file {}: {}, file: {},line: {}".format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno)) return data_upload_cypher
def remove_samples_nodes_db(driver, projectId): """ """ result = None query_name = 'remove_project' query = '' try: queries_path = "../queries/project_cypher.yml" directory = os.path.dirname(os.path.abspath(__file__)) project_cypher = ckg_utils.get_queries(os.path.join(directory, queries_path)) query = project_cypher[query_name]['query'].replace('PROJECTID', projectId).split(';')[:-2] for q in query: result = connector.commitQuery(driver, q+';') except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Error: {}. Removing nodes associated to project: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) return result
def modifyEntityProperty(parameters): '''parameters: tuple with entity name, entity id, property name to modify, and value''' driver = getGraphDatabaseConnectionConfiguration() entity, entityid, attribute, value = parameters try: queries_path = "./queries.yml" project_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path)) for query_name in project_cypher: title = query_name.lower().replace('_', ' ') if title == 'modify': query = project_cypher[query_name]['query'] % (entity, entityid, attribute, value) sendQuery(driver, query) print("Property successfully modified") except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Error: {}. Reading queries from file {}: {}, file: {},line: {}".format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno))
def get_project_creation_queries(): """ Reads the YAML file containing the queries relevant to user creation, parses the given stream and \ returns a Python object (dict[dict]). :return: Nested dictionary. """ try: directory = os.path.dirname(os.path.abspath(__file__)) queries_path = "../queries/project_creation_cypher.yml" project_creation_cypher = ckg_utils.get_queries( os.path.join(directory, queries_path)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Reading queries from file {}: {}, file: {},line: {}, err: {}". format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return project_creation_cypher
def updateDB(driver, imports=None, specific=[]): """ Populates the graph database with information for each Database, Ontology or Experiment \ specified in imports. If imports is not defined, the function populates the entire graph \ database based on the graph variable defined in the grapher_config.py module. \ This function also updates the graph stats object with numbers from the loaded entities and \ relationships. :param driver: neo4j driver, which provides the connection to the neo4j graph database. :type driver: neo4j driver :param list imports: a list of entities to be loaded into the graph. """ if imports is None: imports = config["graph"] try: cypher_queries = ckg_utils.get_queries( os.path.join(cwd, config['cypher_queries_file'])) except Exception as err: logger.error("Reading queries file > {}.".format(err)) for i in imports: queries = [] logger.info("Loading {} into the database".format(i)) try: import_dir = ckg_config['imports_databases_directory'] if i == "ontologies": entities = [e.lower() for e in config["ontology_entities"]] if len(specific) > 0: entities = list( set(entities).intersection( [s.lower() for s in specific])) import_dir = ckg_config['imports_ontologies_directory'] ontologyDataImportCode = cypher_queries[ 'IMPORT_ONTOLOGY_DATA']['query'] for entity in entities: queries.extend( ontologyDataImportCode.replace( "ENTITY", entity.capitalize()).replace( "IMPORTDIR", import_dir).split(';')[0:-1]) mappings = config['ontology_mappings'] mappingImportCode = cypher_queries[ 'IMPORT_ONTOLOGY_MAPPING_DATA']['query'] for m in mappings: if m.lower() in entities: for r in mappings[m]: queries.extend( mappingImportCode.replace( "ENTITY1", m).replace("ENTITY2", r).replace( "IMPORTDIR", import_dir).split(';')[0:-1]) print('Done Loading ontologies') elif i == "biomarkers": code = cypher_queries['IMPORT_BIOMARKERS']['query'] import_dir = ckg_config['imports_curated_directory'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading biomarkers') elif i == "qcmarkers": code = cypher_queries['IMPORT_QCMARKERS']['query'] import_dir = ckg_config['imports_curated_directory'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading qcmarkers') elif i == "chromosomes": code = cypher_queries['IMPORT_CHROMOSOME_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading chromosomes') elif i == "genes": code = cypher_queries['IMPORT_GENE_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading genes') elif i == "transcripts": code = cypher_queries['IMPORT_TRANSCRIPT_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading transcritps') elif i == "proteins": code = cypher_queries['IMPORT_PROTEIN_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading proteins') elif i == "functional_regions": code = cypher_queries["IMPORT_FUNCTIONAL_REGIONS"]['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading functional_regions') elif i == "annotations": code = cypher_queries['IMPORT_PROTEIN_ANNOTATIONS']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading annotations') elif i == "complexes": code = cypher_queries['IMPORT_COMPLEXES']['query'] for resource in config["complexes_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading complexes') elif i == "modified_proteins": code = cypher_queries['IMPORT_MODIFIED_PROTEINS']['query'] for resource in config["modified_proteins_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_MODIFIED_PROTEIN_ANNOTATIONS'][ 'query'] for resource in config[ "modified_proteins_annotation_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading modified_proteins') elif i == "pathology_expression": code = cypher_queries['IMPORT_PATHOLOGY_EXPRESSION']['query'] for resource in config["pathology_expression_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading pathology_expression') elif i == "ppi": code = cypher_queries['IMPORT_CURATED_PPI_DATA']['query'] for resource in config["curated_PPI_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_COMPILED_PPI_DATA']['query'] for resource in config["compiled_PPI_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_PPI_ACTION']['query'] for resource in config["PPI_action_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading ppi') elif i == "protein_structure": code = cypher_queries['IMPORT_PROTEIN_STRUCTURES']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading protein_structure') elif i == "diseases": code = cypher_queries['IMPORT_DISEASE_DATA']['query'] for entity, resource in config["disease_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "ENTITY", entity).replace("RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading diseases') elif i == "drugs": code = cypher_queries['IMPORT_DRUG_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] code = cypher_queries['IMPORT_DRUG_INTERACTION_DATA']['query'] for resource in config['drug_drug_interaction_resources']: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_CURATED_DRUG_DATA']['query'] for resource in config["curated_drug_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_COMPILED_DRUG_DATA']['query'] for resource in config["compiled_drug_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) code = cypher_queries['IMPORT_DRUG_ACTS_ON']['query'] for resource in config["drug_action_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading drugs') elif i == "side_effects": code = cypher_queries['IMPORT_DRUG_SIDE_EFFECTS']['query'] for resource in config["side_effects_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading side_effects') elif i == 'pathway': code = cypher_queries['IMPORT_PATHWAY_DATA']['query'] for resource in config["pathway_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading pathway') elif i == 'metabolite': code = cypher_queries['IMPORT_METABOLITE_DATA']['query'] for resource in config["metabolite_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading metabolite') elif i == 'food': code = cypher_queries['IMPORT_FOOD_DATA']['query'] for resource in config["food_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading food') elif i == "gwas": code = cypher_queries['IMPORT_GWAS']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] code = cypher_queries['IMPORT_VARIANT_FOUND_IN_GWAS']['query'] queries.extend( code.replace("IMPORTDIR", import_dir).split(';')[0:-1]) code = cypher_queries['IMPORT_GWAS_STUDIES_TRAIT']['query'] queries.extend( code.replace("IMPORTDIR", import_dir).split(';')[0:-1]) print('Done Loading gwas') elif i == "known_variants": code = cypher_queries['IMPORT_KNOWN_VARIANT_DATA']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] print('Done Loading known_variants') elif i == "clinical_variants": code = cypher_queries[ 'IMPORT_CLINICALLY_RELEVANT_VARIANT_DATA']['query'] for resource in config["clinical_variant_resources"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "RESOURCE", resource.lower()).split(';')[0:-1]) print('Done Loading clinical_variants') elif i == "jensenlab": code = cypher_queries['IMPORT_JENSENLAB_DATA']['query'] for (entity1, entity2) in config["jensenlabEntities"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "ENTITY1", entity1).replace("ENTITY2", entity2).split(';')[0:-1]) print('Done Loading jensenlab') elif i == "mentions": code = cypher_queries['CREATE_PUBLICATIONS']['query'] queries = code.replace("IMPORTDIR", import_dir).split(';')[0:-1] code = cypher_queries['IMPORT_MENTIONS']['query'] for entity in config["mentionEntities"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "ENTITY", entity).split(';')[0:-1]) print('Done Loading mentions') elif i == "published": code = cypher_queries['IMPORT_PUBLISHED_IN']['query'] for entity in config["publicationEntities"]: queries.extend( code.replace("IMPORTDIR", import_dir).replace( "ENTITY", entity).split(';')[0:-1]) print('Done Loading published') elif i == "user": usersDir = ckg_config['imports_users_directory'] user_cypher = cypher_queries['CREATE_USER_NODE'] code = user_cypher['query'] queries.extend( code.replace("IMPORTDIR", usersDir).split(';')[0:-1]) print('Done Loading user') elif i == "project": import_dir = ckg_config['imports_experiments_directory'] projects = builder_utils.listDirectoryFolders(import_dir) if len(specific) > 0: projects = list(set(projects).intersection(specific)) project_cypher = cypher_queries['IMPORT_PROJECT'] for project in projects: projectDir = os.path.join(import_dir, project) projectDir = os.path.join(projectDir, 'project').replace('\\', '/') for project_section in project_cypher: code = project_section['query'] queries.extend( code.replace("IMPORTDIR", projectDir).replace( 'PROJECTID', project).split(';')[0:-1]) print('Done Loading project') elif i == "experiment": import_dir = ckg_config['imports_experiments_directory'] datasets_cypher = cypher_queries['IMPORT_DATASETS'] projects = builder_utils.listDirectoryFolders(import_dir) if len(specific) > 0: projects = list(set(projects).intersection(specific)) for project in projects: projectDir = os.path.join(import_dir, project).replace('\\', '/') datasetTypes = builder_utils.listDirectoryFolders( projectDir) for dtype in datasetTypes: datasetDir = os.path.join(projectDir, dtype).replace('\\', '/') if dtype in datasets_cypher: dataset = datasets_cypher[dtype] code = dataset['query'] queries.extend( code.replace("IMPORTDIR", datasetDir).replace( 'PROJECTID', project).split(';')[0:-1]) print('Done Loading experiment') else: logger.error( "Non-existing dataset. The dataset you are trying to load does not exist: {}." .format(i)) load_into_database(driver, queries, i) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Loading: {}: {}, file: {}, line: {}".format( i, err, fname, exc_tb.tb_lineno))
def read_queries(queries_file): queries = ckg_utils.get_queries(queries_file) return queries