Beispiel #1
0
def get_subjects_enrolled_in_project(driver, projectId):
    """
    Extracts the number of subjects included in a given project.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param str projectId: external project identifier (from the graph database).
    :return: Number of subjects.
    :rtype: Numpy ndarray
    """
    query_name = 'extract_enrolled_subjects'
    query = ''
    try:
        data_upload_cypher = get_data_upload_queries()
        query = data_upload_cypher[query_name]['query']
        for q in query.split(';')[0:-1]:
            if '$' in q:
                result = connector.getCursorData(
                    driver,
                    q + ';',
                    parameters={'external_id': str(projectId)})
            else:
                result = connector.getCursorData(driver, q + ';')
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Error: {}. Getting new subjects enrolled in project: Query name ({}) - Query ({}), error info: {}, file: {},line: {}"
            .format(err, query_name, query, sys.exc_info(), fname,
                    exc_tb.tb_lineno))
    return result.values
Beispiel #2
0
def create_db_user(driver, data):
    """
    Creates and assigns role to new graph database user, if user not in list of local users.
 
    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param Series data: pandas Series with required user information (see set_arguments()).
    """
    query_name_add = 'create_db_user'
    query_name_role = 'add_role_to_db_user'
    query_list_db_users = 'list_db_users'

    try:
        cypher = get_user_creation_queries()
        db_query = cypher[query_name_add]['query'] + cypher[query_name_role][
            'query']
        db_users = connector.getCursorData(
            driver, cypher[query_list_db_users]['query'], {})
        if data['username'] not in db_users['username'].to_list(
        ) or db_users.empty:
            for q in db_query.split(';')[0:-1]:
                result = connector.getCursorData(driver,
                                                 q + ';',
                                                 parameters=data.to_dict())
            logger.info("New user created: {}. Result: {}".format(
                data['username'], result))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Reading query {}: {}, file: {},line: {}, error: {}".format(
                query_name_add, sys.exc_info(), fname, exc_tb.tb_lineno, err))
Beispiel #3
0
def check_if_node_exists(driver, node, node_property, value):
    """
    Queries the graph database and checks if a node with a specific property and property value already exists.
    :param driver: py2neo driver, which provides the connection to the neo4j graph database.
    :type driver: py2neo driver
    :param str node: node to be matched in the database.
    :param str node_property: property of the node.
    :param value: property value.
    :type value: str, int, float or bool
    :return: Pandas dataframe with user identifier if User with node_property and value already exists, \
            if User does not exist, returns and empty dataframe.
    """
    query_name = 'check_node'
    try:
        cypher = get_project_creation_queries()
        query = cypher[query_name]['query'].replace('NODE', node).replace(
            'PROPERTY', node_property)
        for q in query.split(';')[0:-1]:
            if '$' in q:
                result = connector.getCursorData(driver,
                                                 q + ';',
                                                 parameters={'value': value})
            else:
                result = connector.getCursorData(driver, q + ';')
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Reading query {}: {}, file: {},line: {}, error: {}".format(
                query_name, sys.exc_info(), fname, exc_tb.tb_lineno, err))

    return result
Beispiel #4
0
def get_new_subject_identifier(driver):
    """
    Queries the database for the last subject identifier and returns a new sequential identifier.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param str projectId: external project identifier (from the graph database).
    :return: Subject identifier.
    :rtype: str
    """
    query_name = 'increment_subject_id'
    query = ''
    try:
        cypher = get_data_upload_queries()
        query = cypher[query_name]['query']
        subject_identifier = connector.getCursorData(driver,
                                                     query).values[0][0]
    except Exception as err:
        subject_identifier = None
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Error: {}. Getting new subject identifiers: Query name ({}) - Query ({}), error info: {}, file: {},line: {}"
            .format(err, query_name, query, sys.exc_info(), fname,
                    exc_tb.tb_lineno))
    return subject_identifier
Beispiel #5
0
def get_db_stats_data():
    """
    Retrieves all the stats data from the graph database and returns them as a dictionary.

    :return: Dictionary of dataframes.
    """
    query_names = [
        'unique_projects', 'get_db_stats', 'get_db_store_size',
        'get_db_transactions', 'get_db_kernel'
    ]
    df_names = [
        'projects', 'meta_stats', 'store_size', 'transactions',
        'kernel_monitor'
    ]

    dfs = {}
    cypher = get_query()
    for i, j in zip(df_names, query_names):
        query = cypher[j]['query']
        data = connector.getCursorData(driver, query)
        if i == 'store_size':
            data = data.T
            data['size'] = [size_converter(i) for i in data[0]]
        dfs[i] = data.to_json(orient='records')
    return dfs
Beispiel #6
0
def get_subject_number_in_project(driver, projectId):
    """
    Extracts the number of subjects included in a given project.

    :param driver: py2neo driver, which provides the connection to the neo4j graph database.
    :type driver: py2neo driver
    :param str projectId: external project identifier (from the graph database).
    :return: Integer with the number of subjects.
    """
    query_name = 'subject_number'
    try:
        cypher = get_project_creation_queries()
        query = cypher[query_name]['query']
        result = connector.getCursorData(driver,
                                         query,
                                         parameters={
                                             'external_id': projectId
                                         }).values[0][0]
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Error: {}. Reading query {}: {}, file: {},line: {}".format(
                err, query_name, sys.exc_info(), fname, exc_tb.tb_lineno))
    return result
Beispiel #7
0
def create_new_ansamples(driver, data):
    """
    :param driver: py2neo driver, which provides the connection to the neo4j graph database.
    :param data: pandas Dataframe with clinical data as columns and samples as rows.

    :return: Pandas DataFrame where new analytical sample internal identifiers have been added.
    """
    external_ids = data['analytical_sample external_id'].unique()
    biosample_ids = data['biological_sample id']
    ansample_id = get_new_analytical_sample_identifier(driver)
    if ansample_id is None:
        ansample_id = '1'

    ansample_ids = ['AS'+str(i) for i in np.arange(int(ansample_id), int(ansample_id)+len(external_ids))]
    ansample_dict = dict(zip(external_ids, ansample_ids))
    asample_biosample_dict = dict(zip(external_ids, biosample_ids))
    query_name = 'create_asamples_biosamples'
    for external_id, asample_id in ansample_dict.items():
        biosample_id = asample_biosample_dict[external_id]
        parameters = {'external_id': str(external_id), 'biosample_id':biosample_id, 'asample_id':asample_id}
        try:
            query = ''
            data_upload_cypher = get_data_upload_queries()
            queries = data_upload_cypher[query_name]['query'].split(';')[:-1]
            for query in queries:
                res = connector.getCursorData(driver, query+';', parameters=parameters)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error("Error: {}. Creating analytical samples: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno))

    data['analytical_sample id'] = data['analytical_sample external_id'].map(ansample_dict)

    return data
Beispiel #8
0
def get_new_project_identifier(driver, projectId):
    """
    Queries the database for the last project external identifier and returns a new sequential identifier.

    :param driver: py2neo driver, which provides the connection to the neo4j graph database.
    :type driver: py2neo driver
    :param str projectId: internal project identifier (CPxxxxxxxxxxxx).
    :return: Project external identifier.
    :rtype: str
    """
    query_name = 'increment_project_id'
    try:
        project_creation_cypher = get_project_creation_queries()
        query = project_creation_cypher[query_name]['query']
        last_project, new_id = connector.getCursorData(driver, query).values[0]
        if last_project is None and new_id is None:
            external_identifier = 'P0000001'
        else:
            length = len(last_project.split('P')[-1])
            new_length = len(str(new_id))
            external_identifier = 'P' + '0' * (length -
                                               new_length) + str(new_id)
    except Exception as err:
        external_identifier = None
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Reading query {}: {}, file: {},line: {}, err: {}".format(
            query_name, sys.exc_info(), fname, exc_tb.tb_lineno, err))

    return external_identifier
Beispiel #9
0
    def query_data(self):
        data = {}
        try:
            cwd = os.path.abspath(os.path.dirname(__file__))
            queries_path = os.path.join(cwd, self.queries_file)
            project_cypher = query_utils.read_queries(queries_path)

            driver = connector.getGraphDatabaseConnectionConfiguration()
            replace = [("PROJECTID", self.identifier)]
            for query_name in project_cypher:
                title = query_name.lower().replace('_', ' ')
                query = project_cypher[query_name]['query']
                query_type = project_cypher[query_name]['query_type']
                for r, by in replace:
                    query = query.replace(r, by)
                if query_type == "pre":
                    data[title] = connector.getCursorData(driver, query)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error(
                "Reading queries from file {}: {}, file: {},line: {}, error: {}"
                .format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno,
                        err))

        return data
Beispiel #10
0
def get_project_information(driver, project_id):
    query_name = 'project_graph'
    res = pd.DataFrame()
    try:
        query = ''
        parameters = {'project_id': project_id}
        data_upload_cypher = get_data_upload_queries()
        queries = data_upload_cypher[query_name]['query'].split(';')[:-1]
        for query in queries:
            res = connector.getCursorData(driver,
                                          query + ';',
                                          parameters=parameters)
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Error: {}. Creating analytical samples: Query name ({}) - Query ({}), error info: {}, file: {},line: {}"
            .format(err, query_name, query, sys.exc_info(), fname,
                    exc_tb.tb_lineno))

    if not res.empty:
        res = viz.get_table(
            res,
            identifier='new_project',
            title='Data Uploaded for Project {}'.format(project_id))

    return res
Beispiel #11
0
def create_user_from_dict(driver, data):
    """
    Creates graph database node for new user and adds properties to the node.

    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :param dict data: dictionary with the user information).
    """
    query_name_node = 'create_user_node'
    result = None
    try:
        user_id = get_new_user_id(driver)
        if 'ID' in data and data['ID'] is None:
            data['ID'] = user_id
        elif 'ID' not in data:
            data['ID'] = user_id

        cypher = uh.get_user_creation_queries()
        query = cypher[query_name_node]['query']
        for q in query.split(';')[0:-1]:
            result = connector.getCursorData(driver, q+';', parameters=data)
        logger.info("New user node created: {}. Result: {}".format(data['username'], result))
        print("New user node created: {}. Result: {}".format(data['username'], result))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Reading query {}: {}, file: {},line: {}, error: {}".format(query_name_node, sys.exc_info(), fname, exc_tb.tb_lineno, err))
        print("Reading query {}: {}, file: {},line: {}, error: {}".format(query_name_node, sys.exc_info(), fname, exc_tb.tb_lineno, err))
    return result
Beispiel #12
0
def getMappingFromDatabase(id_list, node, attribute_from='id', attribute_to='name'):
    id_list = ["'{}'".format(i) for i in id_list]
    driver = connector.getGraphDatabaseConnectionConfiguration()
    mapping_query = "MATCH (n:{}) WHERE n.{} IN [{}] RETURN n.{} AS from, n.{} AS to"
    mapping = connector.getCursorData(driver, mapping_query.format(node, attribute_from, ','.join(id_list), attribute_from, attribute_to))
    if not mapping.empty:
        mapping = dict(zip(mapping['from'], mapping['to']))

    return mapping
Beispiel #13
0
def get_mapping_analytical_samples(project_id):
    from graphdb_connector import connector
    driver = connector.getGraphDatabaseConnectionConfiguration()

    mapping = {}
    query = "MATCH (p:Project)-[:HAS_ENROLLED]-(:Subject)-[:BELONGS_TO_SUBJECT]-()-[:SPLITTED_INTO]-(a:Analytical_sample) WHERE p.id='{}' RETURN a.external_id, a.id".format(project_id)
    mapping = connector.getCursorData(driver, query)
    if not mapping.empty:
        mapping = mapping.set_index("a.external_id").to_dict(orient='dict')["a.id"]

    return mapping
Beispiel #14
0
def create_new_ansamples(driver, data):
    """
    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :param data: pandas Dataframe with clinical data as columns and samples as rows.

    :return: Pandas DataFrame where new analytical sample internal identifiers have been added.
    """
    data = data.rename(
        columns={
            'analytical_sample external_id': 'external_id',
            'biological_sample id': 'biosample_id'
        })
    data['external_id'] = data['external_id'].astype(str)
    num_samples = data['external_id'].shape[0]
    if 'grouping2' not in data:
        data['grouping2'] = None
    ansample_id = get_new_analytical_sample_identifier(driver)
    if ansample_id is None:
        ansample_id = '1'

    ansample_ids = [
        'AS' + str(i) for i in np.arange(int(ansample_id),
                                         int(ansample_id) + num_samples)
    ]
    data['asample_id'] = ansample_ids
    query_name = 'create_asamples_biosamples'
    for parameters in data.to_dict('records'):
        print(parameters)
        try:
            query = ''
            data_upload_cypher = get_data_upload_queries()
            queries = data_upload_cypher[query_name]['query'].split(';')[:-1]
            for query in queries:
                res = connector.getCursorData(driver,
                                              query + ';',
                                              parameters=parameters)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error(
                "Error: {}. Creating analytical samples: Query name ({}) - Query ({}), error info: {}, file: {},line: {}"
                .format(err, query_name, query, sys.exc_info(), fname,
                        exc_tb.tb_lineno))

    data = data.rename(
        columns={
            'asample_id': 'analytical_sample id',
            'external_id': 'analytical_sample external_id',
            'biosample_id': 'biological_sample id'
        })

    return data
Beispiel #15
0
def map_node_name_to_id(driver, node, value):
    identifier = None
    query_name = 'map_node_name'
    cwd = os.path.abspath(os.path.dirname(__file__))
    queries_path = "queries.yml"
    cypher = read_queries(os.path.join(cwd, queries_path))
    query = cypher[query_name]['query'].replace('NODE', node)
    result = connector.getCursorData(driver,
                                     query,
                                     parameters={'name': str(value).lower()})

    if result is not None and not result.empty:
        identifier = result.values[0][0]

    return identifier
Beispiel #16
0
def check_external_ids_in_db(driver, projectId):
    """
    """
    query_name = 'check_external_ids'
    query = ''
    result = pd.DataFrame()
    try:
        data_upload_cypher = get_data_upload_queries()
        query = data_upload_cypher[query_name]['query']
        result = connector.getCursorData(driver, query, parameters={'external_id': str(projectId)})
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Checking if external identifiers exist in the database: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno))

    return result
Beispiel #17
0
def remove_samples_nodes_db(driver, projectId):
    """
    """
    result = None
    query_name = 'remove_project'
    query = ''
    try:
        queries_path = "../queries/project_cypher.yml"
        project_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path))
        query = project_cypher[query_name]['query'].replace('PROJECTID', projectId).split(';')[:-2]
        for q in query:
            result = connector.getCursorData(driver, q+';')
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Removing nodes associated to project: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno))

    return result
Beispiel #18
0
def get_new_user_identifier(driver):
    """
    Queries the database for the last user identifier and returns a new sequential identifier.
 
    :param driver: py2neo driver, which provides the connection to the neo4j graph database.
    :type driver: py2neo driver
    :return: User identifier.
    :rtype: str
    """
    query_name = 'increment_user_id'
    try:
        user_creation_cypher = get_user_creation_queries()
        query = user_creation_cypher[query_name]['query']
        user_identifier = connector.getCursorData(driver, query).values[0][0]
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Reading query {}: {}, file: {},line: {}, error: {}".format(query, sys.exc_info(), fname, exc_tb.tb_lineno, err))
    return user_identifier
Beispiel #19
0
def create_new_subjects(driver, data, projectId):
    """
    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :param data: pandas Dataframe with clinical data as columns and samples as rows.
    :param string projectId: project identifier.
    :return: Pandas DataFrame where new biological sample internal identifiers have been added.
    """
    external_ids = data['subject external_id'].unique()
    subject_id = get_new_subject_identifier(driver)
    if subject_id is None:
        subject_id = '1'
    subject_ids = [
        'S' + str(i) for i in np.arange(int(subject_id),
                                        int(subject_id) + len(external_ids))
    ]
    subject_dict = dict(zip(external_ids, subject_ids))
    query_name = 'create_project_subject'
    for external_id, subject_id in subject_dict.items():
        parameters = {
            'external_id': str(external_id),
            'project_id': projectId,
            'subject_id': subject_id
        }
        try:
            query = ''
            data_upload_cypher = get_data_upload_queries()
            queries = data_upload_cypher[query_name]['query'].split(';')[:-1]
            for query in queries:
                res = connector.getCursorData(driver,
                                              query + ';',
                                              parameters=parameters)
        except Exception as err:
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            logger.error(
                "Error: {}. Creating new subjects: Query name ({}) - Query ({}), error info: {}, file: {},line: {}"
                .format(err, query_name, query, sys.exc_info(), fname,
                        exc_tb.tb_lineno))

    data['subject id'] = data['subject external_id'].map(subject_dict)

    return data
Beispiel #20
0
def get_new_analytical_sample_identifier(driver):
    """
    Queries the database for the last analytical sample internal identifier and returns a new sequential identifier.
    :param driver: py2neo driver, which provides the connection to the neo4j graph database.

    :return: Analytical sample identifier.
    """
    query_name = 'increment_analytical_sample_id'
    query = ''
    try:
        cypher = get_data_upload_queries()
        query = cypher[query_name]['query']
        identifier = connector.getCursorData(driver, query).values[0][0]
    except Exception as err:
        identifier = None
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error("Error: {}. Getting new analytical sample identifiers: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno))

    return identifier
Beispiel #21
0
def create_user_node(driver, data):
    """
    Creates graph database node for new user and adds respective properties to node.
 
    :param driver: py2neo driver, which provides the connection to the neo4j graph database.
    :type driver: py2neo driver
    :param Series data: pandas Series with new user identifier and required user information (see set_arguments()).
    """
    query_name_node = 'create_user_node'
    try:
        cypher = uh.get_user_creation_queries()
        query = cypher[query_name_node]['query']
        for q in query.split(';')[0:-1]:
            result = connector.getCursorData(driver,
                                             q + ';',
                                             parameters=data.to_dict())
        logger.info("New user node created: {}. Result: {}".format(
            data['username'], result))
    except Exception as err:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        logger.error(
            "Reading query {}: {}, file: {},line: {}, error: {}".format(
                query_name_node, sys.exc_info(), fname, exc_tb.tb_lineno, err))
Beispiel #22
0
    def send_query(self, query):
        driver = connector.getGraphDatabaseConnectionConfiguration()
        data = connector.getCursorData(driver, query)

        return data