예제 #1
0
    def query_data(self):
        data = {}
        replace = [("PROJECTID", self.identifier)]
        try:
            cwd = os.path.abspath(os.path.dirname(__file__))
            queries_path = "queries/datasets_cypher.yml"
            datasets_cypher = ckg_utils.get_queries(
                os.path.join(cwd, queries_path))
            if "replace" in self.configuration:
                replace = self.configuration["replace"]
            for query_name in datasets_cypher[self.dataset_type]:
                title = query_name.lower().replace('_', ' ')
                query_type = datasets_cypher[
                    self.dataset_type][query_name]['query_type']
                query = datasets_cypher[self.dataset_type][query_name]['query']
                for r, by in replace:
                    query = query.replace(r, by)
                if query_type == "pre":
                    data[title] = self.send_query(query)
                else:
                    self.update_analysis_queries({query_name.lower(): query})
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error(
                "Reading queries from file {}: {}, file: {},line: {}, err: {}".
                format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno,
                       err))

        return data
예제 #2
0
def get_user_creation_queries():
    """
    Reads the YAML file containing the queries relevant to user creation, parses the given stream and \
    returns a Python object (dict[dict]).
    """
    try:
        queries_path = config['cypher_queries_file']
        user_creation_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Reading queries from file {}: {}, file: {},line: {}, error: {}".format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err))
    return user_creation_cypher
예제 #3
0
def get_data_upload_queries():
    """
    Reads the YAML file containing the queries relevant to parsing of clinical data and \
    returns a Python object (dict[dict]).

    :return: Nested dictionary.
    """
    try:
        queries_path = "../queries/data_upload_cypher.yml"
        data_upload_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Reading queries from file {}: {}, file: {},line: {}".format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno))

    return data_upload_cypher
예제 #4
0
def get_query():
    """
       Reads the YAML file containing the queries relevant for graph database stats, parses the given stream and \
       returns a Python object (dict[dict]).

    :return: Nested dictionary.
    """
    try:
        queries_path = "../queries/dbstats_cypher.yml"
        data_upload_cypher = ckg_utils.get_queries(
            os.path.join(cwd, queries_path))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        raise Exception(
            "Erro: {}. Reading queries from file {}: {}, file: {},line: {}".
            format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno))
    return data_upload_cypher
예제 #5
0
def remove_samples_nodes_db(driver, projectId):
    """
    """
    result = None
    query_name = 'remove_project'
    query = ''
    try:
        queries_path = "../queries/project_cypher.yml"
        project_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path))
        query = project_cypher[query_name]['query'].replace('PROJECTID', projectId).split(';')[:-2]
        for q in query:
            result = connector.getCursorData(driver, q+';')
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Removing nodes associated to project: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno))

    return result
예제 #6
0
def get_project_creation_queries():
    """
    Reads the YAML file containing the queries relevant to user creation, parses the given stream and \
    returns a Python object (dict[dict]).

    :return: Nested dictionary.
    """
    try:
        cwd = os.path.abspath(os.path.dirname(__file__))
        queries_path = "../queries/project_creation_cypher.yml"
        project_creation_cypher = ckg_utils.get_queries(
            os.path.join(cwd, queries_path))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Reading queries from file {}: {}, file: {},line: {}, err: {}".
            format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err))
    return project_creation_cypher
예제 #7
0
파일: knowledge.py 프로젝트: DotaArtist/CKG
    def query_data(self, replace):
        query_data = {}
        try:
            cwd = os.path.abspath(os.path.dirname(__file__))
            cypher_queries = ckg_utils.get_queries(os.path.join(cwd, self.queries_file))
            if cypher_queries is not None:
                for query_name in cypher_queries:
                    if 'query_type' in cypher_queries[query_name]:
                        if cypher_queries[query_name]['query_type'] == 'knowledge_report':
                            query = cypher_queries[query_name]['query']
                            for r, by in replace:
                                query = query.replace(r, by)
                            query_data[query_name] = self.send_query(query)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error("Reading queries from file {}: {}, file: {},line: {}, err: {}".format(self.queries_file, sys.exc_info(), fname, exc_tb.tb_lineno, err))

        return query_data
예제 #8
0
def modifyEntityProperty(parameters):
    '''parameters: tuple with entity name, entity id, property name to modify, and value'''

    driver = getGraphDatabaseConnectionConfiguration()
    entity, entityid, attribute, value = parameters

    try:
        cwd = os.path.abspath(os.path.dirname(__file__))
        queries_path = "./queries.yml"
        project_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path))
        for query_name in project_cypher:
            title = query_name.lower().replace('_', ' ')
            if title == 'modify':
                query = project_cypher[query_name]['query'] % (
                    entity, entityid, attribute, value)
                sendQuery(driver, query)
                print("Property successfully modified")
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Error: {}. Reading queries from file {}: {}, file: {},line: {}".
            format(err, queries_path, sys.exc_info(), fname, exc_tb.tb_lineno))
예제 #9
0
파일: loader.py 프로젝트: sailor723/CKG
def updateDB(driver, imports=None, specific=[]):
    """
    Populates the graph database with information for each Database, Ontology or Experiment \
    specified in imports. If imports is not defined, the function populates the entire graph \
    database based on the graph variable defined in the grapher_config.py module. \
    This function also updates the graph stats object with numbers from the loaded entities and \
    relationships.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param list imports: a list of entities to be loaded into the graph.
    """
    if imports is None:
        imports = config["graph"]
    try:
        cypher_queries = ckg_utils.get_queries(
            os.path.join(cwd, config['cypher_queries_file']))
    except Exception as err:
        logger.error("Reading queries file > {}.".format(err))

    for i in imports:
        queries = []
        logger.info("Loading {} into the database".format(i))
        try:
            import_dir = os.path.join(
                cwd, directories["databasesDirectory"]).replace('\\', '/')
            if i == "ontologies":
                entities = [e.lower() for e in config["ontology_entities"]]
                if len(specific) > 0:
                    entities = list(
                        set(entities).intersection(
                            [s.lower() for s in specific]))
                import_dir = os.path.join(
                    cwd,
                    directories["ontologiesDirectory"]).replace('\\', '/')
                ontologyDataImportCode = cypher_queries[
                    'IMPORT_ONTOLOGY_DATA']['query']
                for entity in entities:
                    queries.extend(
                        ontologyDataImportCode.replace(
                            "ENTITY", entity.capitalize()).replace(
                                "IMPORTDIR", import_dir).split(';')[0:-1])
                mappings = config['ontology_mappings']
                mappingImportCode = cypher_queries[
                    'IMPORT_ONTOLOGY_MAPPING_DATA']['query']
                for m in mappings:
                    if m.lower() in entities:
                        for r in mappings[m]:
                            queries.extend(
                                mappingImportCode.replace(
                                    "ENTITY1",
                                    m).replace("ENTITY2", r).replace(
                                        "IMPORTDIR",
                                        import_dir).split(';')[0:-1])
                print('Done Loading ontologies')
            elif i == "biomarkers":
                code = cypher_queries['IMPORT_BIOMARKERS']['query']
                import_dir = os.path.join(
                    cwd, directories["curatedDirectory"]).replace('\\', '/')
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading biomarkers')
            elif i == "qcmarkers":
                code = cypher_queries['IMPORT_QCMARKERS']['query']
                import_dir = os.path.join(
                    cwd, directories["curatedDirectory"]).replace('\\', '/')
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading qcmarkers')
            elif i == "chromosomes":
                code = cypher_queries['IMPORT_CHROMOSOME_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading chromosomes')
            elif i == "genes":
                code = cypher_queries['IMPORT_GENE_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading genes')
            elif i == "transcripts":
                code = cypher_queries['IMPORT_TRANSCRIPT_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading transcritps')
            elif i == "proteins":
                code = cypher_queries['IMPORT_PROTEIN_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading proteins')
            elif i == "functional_regions":
                code = cypher_queries["IMPORT_FUNCTIONAL_REGIONS"]['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading functional_regions')
            elif i == "annotations":
                code = cypher_queries['IMPORT_PROTEIN_ANNOTATIONS']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading annotations')
            elif i == "complexes":
                code = cypher_queries['IMPORT_COMPLEXES']['query']
                for resource in config["complexes_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading complexes')
            elif i == "modified_proteins":
                code = cypher_queries['IMPORT_MODIFIED_PROTEINS']['query']
                for resource in config["modified_proteins_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_MODIFIED_PROTEIN_ANNOTATIONS'][
                    'query']
                for resource in config[
                        "modified_proteins_annotation_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading modified_proteins')
            elif i == "pathology_expression":
                code = cypher_queries['IMPORT_PATHOLOGY_EXPRESSION']['query']
                for resource in config["pathology_expression_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading pathology_expression')
            elif i == "ppi":
                code = cypher_queries['IMPORT_CURATED_PPI_DATA']['query']
                for resource in config["curated_PPI_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_COMPILED_PPI_DATA']['query']
                for resource in config["compiled_PPI_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_PPI_ACTION']['query']
                for resource in config["PPI_action_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading ppi')
            elif i == "protein_structure":
                code = cypher_queries['IMPORT_PROTEIN_STRUCTURES']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading protein_structure')
            elif i == "diseases":
                code = cypher_queries['IMPORT_DISEASE_DATA']['query']
                for entity, resource in config["disease_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "ENTITY",
                            entity).replace("RESOURCE",
                                            resource.lower()).split(';')[0:-1])
                print('Done Loading diseases')
            elif i == "drugs":
                code = cypher_queries['IMPORT_DRUG_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                code = cypher_queries['IMPORT_DRUG_INTERACTION_DATA']['query']
                for resource in config['drug_drug_interaction_resources']:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_CURATED_DRUG_DATA']['query']
                for resource in config["curated_drug_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_COMPILED_DRUG_DATA']['query']
                for resource in config["compiled_drug_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                code = cypher_queries['IMPORT_DRUG_ACTS_ON']['query']
                for resource in config["drug_action_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading drugs')
            elif i == "side_effects":
                code = cypher_queries['IMPORT_DRUG_SIDE_EFFECTS']['query']
                for resource in config["side_effects_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading side_effects')
            elif i == 'pathway':
                code = cypher_queries['IMPORT_PATHWAY_DATA']['query']
                for resource in config["pathway_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading pathway')
            elif i == 'metabolite':
                code = cypher_queries['IMPORT_METABOLITE_DATA']['query']
                for resource in config["metabolite_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading metabolite')
            elif i == 'food':
                code = cypher_queries['IMPORT_FOOD_DATA']['query']
                for resource in config["food_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading food')
            elif i == "gwas":
                code = cypher_queries['IMPORT_GWAS']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                code = cypher_queries['IMPORT_VARIANT_FOUND_IN_GWAS']['query']
                queries.extend(
                    code.replace("IMPORTDIR", import_dir).split(';')[0:-1])
                code = cypher_queries['IMPORT_GWAS_STUDIES_TRAIT']['query']
                queries.extend(
                    code.replace("IMPORTDIR", import_dir).split(';')[0:-1])
                print('Done Loading gwas')
            elif i == "known_variants":
                code = cypher_queries['IMPORT_KNOWN_VARIANT_DATA']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                print('Done Loading known_variants')
            elif i == "clinical_variants":
                code = cypher_queries[
                    'IMPORT_CLINICALLY_RELEVANT_VARIANT_DATA']['query']
                for resource in config["clinical_variant_resources"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "RESOURCE", resource.lower()).split(';')[0:-1])
                print('Done Loading clinical_variants')
            elif i == "jensenlab":
                code = cypher_queries['IMPORT_JENSENLAB_DATA']['query']
                for (entity1, entity2) in config["jensenlabEntities"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "ENTITY1",
                            entity1).replace("ENTITY2",
                                             entity2).split(';')[0:-1])
                print('Done Loading jensenlab')
            elif i == "mentions":
                code = cypher_queries['CREATE_PUBLICATIONS']['query']
                queries = code.replace("IMPORTDIR",
                                       import_dir).split(';')[0:-1]
                code = cypher_queries['IMPORT_MENTIONS']['query']
                for entity in config["mentionEntities"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "ENTITY", entity).split(';')[0:-1])
                print('Done Loading mentions')
            elif i == "published":
                code = cypher_queries['IMPORT_PUBLISHED_IN']['query']
                for entity in config["publicationEntities"]:
                    queries.extend(
                        code.replace("IMPORTDIR", import_dir).replace(
                            "ENTITY", entity).split(';')[0:-1])
                print('Done Loading published')
            elif i == "user":
                usersDir = os.path.join(
                    cwd,
                    directories["usersImportDirectory"]).replace('\\', '/')
                user_cypher = cypher_queries['CREATE_USER_NODE']
                code = user_cypher['query']
                queries.extend(
                    code.replace("IMPORTDIR", usersDir).split(';')[0:-1])
                print('Done Loading user')
            elif i == "project":
                import_dir = os.path.join(
                    cwd,
                    directories["experimentsDirectory"]).replace('\\', '/')
                projects = builder_utils.listDirectoryFolders(import_dir)
                if len(specific) > 0:
                    projects = list(set(projects).intersection(specific))
                project_cypher = cypher_queries['IMPORT_PROJECT']
                for project in projects:
                    projectDir = os.path.join(import_dir, project)
                    projectDir = os.path.join(projectDir,
                                              'project').replace('\\', '/')
                    for project_section in project_cypher:
                        code = project_section['query']
                        queries.extend(
                            code.replace("IMPORTDIR", projectDir).replace(
                                'PROJECTID', project).split(';')[0:-1])
                print('Done Loading project')
            elif i == "experiment":
                import_dir = os.path.join(
                    cwd,
                    directories["experimentsDirectory"]).replace('\\', '/')
                datasets_cypher = cypher_queries['IMPORT_DATASETS']
                projects = builder_utils.listDirectoryFolders(import_dir)
                if len(specific) > 0:
                    projects = list(set(projects).intersection(specific))
                for project in projects:
                    projectDir = os.path.join(import_dir,
                                              project).replace('\\', '/')
                    datasetTypes = builder_utils.listDirectoryFolders(
                        projectDir)
                    for dtype in datasetTypes:
                        datasetDir = os.path.join(projectDir,
                                                  dtype).replace('\\', '/')
                        if dtype in datasets_cypher:
                            dataset = datasets_cypher[dtype]
                            code = dataset['query']
                            queries.extend(
                                code.replace("IMPORTDIR", datasetDir).replace(
                                    'PROJECTID', project).split(';')[0:-1])
                print('Done Loading experiment')
            else:
                logger.error(
                    "Non-existing dataset. The dataset you are trying to load does not exist: {}."
                    .format(i))
            load_into_database(driver, queries, i)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error("Loading: {}: {}, file: {}, line: {}".format(
                i, err, fname, exc_tb.tb_lineno))
예제 #10
0
파일: knowledge.py 프로젝트: sailor723/CKG
    def annotate_list(self,
                      query_list,
                      entity_type,
                      attribute='name',
                      queries_file=None,
                      diseases=[],
                      entities=None):
        self.empty_graph()
        if queries_file is None:
            queries_file = 'queries/knowledge_annotation.yml'

        if entities is None:
            entities = self.entities

        if diseases is None or len(diseases) < 1:
            replace_by = ('DISEASE_COND', '')
        else:
            replace_by = (
                'DISEASE_COND',
                'OR (d.name IN {} AND r.score > 1.5)'.format(diseases))

        query_data = []
        drugs = []
        q = 'NA'
        try:
            cwd = os.path.abspath(os.path.dirname(__file__))
            cypher_queries = ckg_utils.get_queries(
                os.path.join(cwd, queries_file))
            if cypher_queries is not None:
                if entity_type.capitalize() in cypher_queries:
                    queries = cypher_queries[entity_type.capitalize()]
                    for query_name in queries:
                        involved_nodes = queries[query_name]['involves_nodes']
                        if len(set(involved_nodes).intersection(
                                entities)) > 0 or query_name.capitalize(
                                ) == entity_type.capitalize():
                            query = queries[query_name]['query']
                            q = 'NA'
                            for q in query.split(';')[:-1]:
                                q = q.format(query_list=query_list).replace(
                                    "ATTRIBUTE", attribute).replace(
                                        replace_by[0], replace_by[1]).replace(
                                            'DISEASES', str(diseases)).replace(
                                                'DRUGS', str(drugs))
                                data = self.send_query(q)
                                if not data.empty:
                                    if query_name == 'disease' and len(
                                            diseases) < 1:
                                        diseases = data['target'].unique(
                                        ).tolist()
                                    if query_name == 'drug':
                                        drugs = data['target'].unique().tolist(
                                        )
                                    query_data.append(data)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error(
                "Error annotating list. Query: {} from file {}: {}, file: {},line: {}, err: {}"
                .format(q, queries_file, sys.exc_info(), fname,
                        exc_tb.tb_lineno, err))
            print(
                "Error annotating list. Query: {} from file {}: {}, file: {},line: {}, err: {}"
                .format(q, queries_file, sys.exc_info(), fname,
                        exc_tb.tb_lineno, err))

        if len(query_data) > 0:
            self.data = pd.DataFrame().append(query_data)
            for df in query_data:
                entity1 = df['source_type'][0][0]
                entity2 = df['target_type'][0][0]
                assoc_type = df['rel_type'][0]
                df['weight'] = df['weight'].fillna(0.5)
                nodes, relationships = self.generate_knowledge_from_edgelist(
                    df,
                    entity1,
                    entity2,
                    source='source',
                    target='target',
                    rtype=assoc_type,
                    weight='weight')
                self.nodes.update(nodes)
                self.relationships.update(relationships)
예제 #11
0
def read_queries(queries_file):
    queries = ckg_utils.get_queries(queries_file)

    return queries