Beispiel #1
0
    def query_data(self):
        data = {}
        replace = [("PROJECTID", self.identifier)]
        try:
            queries_path = "queries/datasets_cypher.yml"
            datasets_cypher = ckg_utils.get_queries(
                os.path.join(cwd, queries_path))
            if "replace" in self.configuration:
                replace = self.configuration["replace"]
            for query_name in datasets_cypher[self.dataset_type]:
                title = query_name.lower().replace('_', ' ')
                query_type = datasets_cypher[
                    self.dataset_type][query_name]['query_type']
                query = datasets_cypher[self.dataset_type][query_name]['query']
                for r, by in replace:
                    query = query.replace(r, by)
                if query_type == "pre":
                    data[title] = self.send_query(query)
                else:
                    self.update_analysis_queries({query_name.lower(): query})
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error(
                "Reading queries from file {}: {}, file: {},line: {}, err: {}".
                format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno,
                       err))

        return data
Beispiel #2
0
def get_user_creation_queries():
    """
    Reads the YAML file containing the queries relevant to user creation, parses the given stream and \
    returns a Python object (dict[dict]).
    """
    try:
        queries_path = config['cypher_queries_file']
        user_creation_cypher = ckg_utils.get_queries(
            os.path.join(cwd, queries_path))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Reading queries from file {}: {}, file: {},line: {}, error: {}".
            format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err))
    return user_creation_cypher
Beispiel #3
0
def get_query():
    """
       Reads the YAML file containing the queries relevant for graph database stats, parses the given stream and \
       returns a Python object (dict[dict]).

    :return: Nested dictionary.
    """
    try:
        queries_path = "../queries/dbstats_cypher.yml"
        directory = os.path.dirname(os.path.abspath(__file__))
        data_upload_cypher = ckg_utils.get_queries(os.path.join(directory, queries_path))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        raise Exception("Erro: {}. Reading queries from file {}: {}, file: {},line: {}".format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno))
    return data_upload_cypher
Beispiel #4
0
def remove_samples_nodes_db(driver, projectId):
    """
    """
    result = None
    query_name = 'remove_project'
    query = ''
    try:
        queries_path = "../queries/project_cypher.yml"
        directory = os.path.dirname(os.path.abspath(__file__))
        project_cypher = ckg_utils.get_queries(os.path.join(directory, queries_path))
        query = project_cypher[query_name]['query'].replace('PROJECTID', projectId).split(';')[:-2]
        for q in query:
            result = connector.commitQuery(driver, q+';')
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Removing nodes associated to project: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno))

    return result
Beispiel #5
0
def modifyEntityProperty(parameters):
    '''parameters: tuple with entity name, entity id, property name to modify, and value'''

    driver = getGraphDatabaseConnectionConfiguration()
    entity, entityid, attribute, value = parameters

    try:
        queries_path = "./queries.yml"
        project_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path))
        for query_name in project_cypher:
            title = query_name.lower().replace('_', ' ')
            if title == 'modify':
                query = project_cypher[query_name]['query'] % (entity, entityid, attribute, value)
                sendQuery(driver, query)
                print("Property successfully modified")
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Reading queries from file {}: {}, file: {},line: {}".format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno))
Beispiel #6
0
def get_project_creation_queries():
    """
    Reads the YAML file containing the queries relevant to user creation, parses the given stream and \
    returns a Python object (dict[dict]).

    :return: Nested dictionary.
    """
    try:
        directory = os.path.dirname(os.path.abspath(__file__))
        queries_path = "../queries/project_creation_cypher.yml"
        project_creation_cypher = ckg_utils.get_queries(
            os.path.join(directory, queries_path))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Reading queries from file {}: {}, file: {},line: {}, err: {}".
            format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err))
    return project_creation_cypher
Beispiel #7
0
def updateDB(driver, imports=None, specific=[]):
    """
    Populates the graph database with information for each Database, Ontology or Experiment \
    specified in imports. If imports is not defined, the function populates the entire graph \
    database based on the graph variable defined in the grapher_config.py module. \
    This function also updates the graph stats object with numbers from the loaded entities and \
    relationships.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param list imports: a list of entities to be loaded into the graph.
    """
    if imports is None:
        imports = config["graph"]
    try:
        cypher_queries = ckg_utils.get_queries(
            os.path.join(cwd, config['cypher_queries_file']))
    except Exception as err:
        logger.error("Reading queries file > {}.".format(err))

    for i in imports:
        queries = []
        logger.info("Loading {} into the database".format(i))
        try:
            import_dir = ckg_config['imports_databases_directory']
            if i == "ontologies":
                entities = [e.lower() for e in config["ontology_entities"]]
                if len(specific) > 0:
                    entities = list(
                        set(entities).intersection(
                            [s.lower() for s in specific]))
                import_dir = ckg_config['imports_ontologies_directory']
                ontologyDataImportCode = cypher_queries[
                    'IMPORT_ONTOLOGY_DATA']['query']
                for entity in entities:
                    queries.extend(
                        ontologyDataImportCode.replace(
                            "ENTITY", entity.capitalize()).replace(
                                "IMPORTDIR", import_dir).split(';')[0:-1])
                mappings = config['ontology_mappings']
                mappingImportCode = cypher_queries[
                    'IMPORT_ONTOLOGY_MAPPING_DATA']['query']
                for m in mappings:
                    if m.lower() in entities:
                        for r in mappings[m]:
                            queries.extend(
                                mappingImportCode.replace(
                                    "ENTITY1",
                                    m).replace("ENTITY2", r).replace(
                                        "IMPORTDIR",
                                        import_dir).split(';')[0:-1])
                print('Done Loading ontologies')
            elif i == "biomarkers":
                code = cypher_queries['IMPORT_BIOMARKERS']['query']
                import_dir = ckg_config['imports_curated_directory']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading biomarkers')
            elif i == "qcmarkers":
                code = cypher_queries['IMPORT_QCMARKERS']['query']
                import_dir = ckg_config['imports_curated_directory']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading qcmarkers')
            elif i == "chromosomes":
                code = cypher_queries['IMPORT_CHROMOSOME_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading chromosomes')
            elif i == "genes":
                code = cypher_queries['IMPORT_GENE_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading genes')
            elif i == "transcripts":
                code = cypher_queries['IMPORT_TRANSCRIPT_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading transcritps')
            elif i == "proteins":
                code = cypher_queries['IMPORT_PROTEIN_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading proteins')
            elif i == "functional_regions":
                code = cypher_queries["IMPORT_FUNCTIONAL_REGIONS"]['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading functional_regions')
            elif i == "annotations":
                code = cypher_queries['IMPORT_PROTEIN_ANNOTATIONS']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading annotations')
            elif i == "complexes":
                code = cypher_queries['IMPORT_COMPLEXES']['query']
                for resource in config["complexes_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading complexes')
            elif i == "modified_proteins":
                code = cypher_queries['IMPORT_MODIFIED_PROTEINS']['query']
                for resource in config["modified_proteins_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_MODIFIED_PROTEIN_ANNOTATIONS'][
                    'query']
                for resource in config[
                        "modified_proteins_annotation_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading modified_proteins')
            elif i == "pathology_expression":
                code = cypher_queries['IMPORT_PATHOLOGY_EXPRESSION']['query']
                for resource in config["pathology_expression_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading pathology_expression')
            elif i == "ppi":
                code = cypher_queries['IMPORT_CURATED_PPI_DATA']['query']
                for resource in config["curated_PPI_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_COMPILED_PPI_DATA']['query']
                for resource in config["compiled_PPI_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_PPI_ACTION']['query']
                for resource in config["PPI_action_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading ppi')
            elif i == "protein_structure":
                code = cypher_queries['IMPORT_PROTEIN_STRUCTURES']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading protein_structure')
            elif i == "diseases":
                code = cypher_queries['IMPORT_DISEASE_DATA']['query']
                for entity, resource in config["disease_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "ENTITY",
                            entity).replace("RESOURCE",
                                            resource.lower()).split(';')[0:-1])
                print('Done Loading diseases')
            elif i == "drugs":
                code = cypher_queries['IMPORT_DRUG_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                code = cypher_queries['IMPORT_DRUG_INTERACTION_DATA']['query']
                for resource in config['drug_drug_interaction_resources']:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_CURATED_DRUG_DATA']['query']
                for resource in config["curated_drug_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_COMPILED_DRUG_DATA']['query']
                for resource in config["compiled_drug_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_DRUG_ACTS_ON']['query']
                for resource in config["drug_action_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading drugs')
            elif i == "side_effects":
                code = cypher_queries['IMPORT_DRUG_SIDE_EFFECTS']['query']
                for resource in config["side_effects_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading side_effects')
            elif i == 'pathway':
                code = cypher_queries['IMPORT_PATHWAY_DATA']['query']
                for resource in config["pathway_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading pathway')
            elif i == 'metabolite':
                code = cypher_queries['IMPORT_METABOLITE_DATA']['query']
                for resource in config["metabolite_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading metabolite')
            elif i == 'food':
                code = cypher_queries['IMPORT_FOOD_DATA']['query']
                for resource in config["food_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading food')
            elif i == "gwas":
                code = cypher_queries['IMPORT_GWAS']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                code = cypher_queries['IMPORT_VARIANT_FOUND_IN_GWAS']['query']
                queries.extend(
                    code.replace("IMPORTDIR", import_dir).split(';')[0:-1])
                code = cypher_queries['IMPORT_GWAS_STUDIES_TRAIT']['query']
                queries.extend(
                    code.replace("IMPORTDIR", import_dir).split(';')[0:-1])
                print('Done Loading gwas')
            elif i == "known_variants":
                code = cypher_queries['IMPORT_KNOWN_VARIANT_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading known_variants')
            elif i == "clinical_variants":
                code = cypher_queries[
                    'IMPORT_CLINICALLY_RELEVANT_VARIANT_DATA']['query']
                for resource in config["clinical_variant_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading clinical_variants')
            elif i == "jensenlab":
                code = cypher_queries['IMPORT_JENSENLAB_DATA']['query']
                for (entity1, entity2) in config["jensenlabEntities"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "ENTITY1",
                            entity1).replace("ENTITY2",
                                             entity2).split(';')[0:-1])
                print('Done Loading jensenlab')
            elif i == "mentions":
                code = cypher_queries['CREATE_PUBLICATIONS']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                code = cypher_queries['IMPORT_MENTIONS']['query']
                for entity in config["mentionEntities"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "ENTITY", entity).split(';')[0:-1])
                print('Done Loading mentions')
            elif i == "published":
                code = cypher_queries['IMPORT_PUBLISHED_IN']['query']
                for entity in config["publicationEntities"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "ENTITY", entity).split(';')[0:-1])
                print('Done Loading published')
            elif i == "user":
                usersDir = ckg_config['imports_users_directory']
                user_cypher = cypher_queries['CREATE_USER_NODE']
                code = user_cypher['query']
                queries.extend(
                    code.replace("IMPORTDIR", usersDir).split(';')[0:-1])
                print('Done Loading user')
            elif i == "project":
                import_dir = ckg_config['imports_experiments_directory']
                projects = builder_utils.listDirectoryFolders(import_dir)
                if len(specific) > 0:
                    projects = list(set(projects).intersection(specific))
                project_cypher = cypher_queries['IMPORT_PROJECT']
                for project in projects:
                    projectDir = os.path.join(import_dir, project)
                    projectDir = os.path.join(projectDir,
                                              'project').replace('\\', '/')
                    for project_section in project_cypher:
                        code = project_section['query']
                        queries.extend(
                            code.replace("IMPORTDIR", projectDir).replace(
                                'PROJECTID', project).split(';')[0:-1])
                print('Done Loading project')
            elif i == "experiment":
                import_dir = ckg_config['imports_experiments_directory']
                datasets_cypher = cypher_queries['IMPORT_DATASETS']
                projects = builder_utils.listDirectoryFolders(import_dir)
                if len(specific) > 0:
                    projects = list(set(projects).intersection(specific))
                for project in projects:
                    projectDir = os.path.join(import_dir,
                                              project).replace('\\', '/')
                    datasetTypes = builder_utils.listDirectoryFolders(
                        projectDir)
                    for dtype in datasetTypes:
                        datasetDir = os.path.join(projectDir,
                                                  dtype).replace('\\', '/')
                        if dtype in datasets_cypher:
                            dataset = datasets_cypher[dtype]
                            code = dataset['query']
                            queries.extend(
                                code.replace("IMPORTDIR", datasetDir).replace(
                                    'PROJECTID', project).split(';')[0:-1])
                print('Done Loading experiment')
            else:
                logger.error(
                    "Non-existing dataset. The dataset you are trying to load does not exist: {}."
                    .format(i))
            load_into_database(driver, queries, i)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error("Loading: {}: {}, file: {}, line: {}".format(
                i, err, fname, exc_tb.tb_lineno))
Beispiel #8
0
def read_queries(queries_file):
    queries = ckg_utils.get_queries(queries_file)

    return queries