def get_subjects_enrolled_in_project(driver, projectId): """ Extracts the number of subjects included in a given project. :param driver: neo4j driver, which provides the connection to the neo4j graph database. :type driver: neo4j driver :param str projectId: external project identifier (from the graph database). :return: Number of subjects. :rtype: Numpy ndarray """ query_name = 'extract_enrolled_subjects' query = '' try: data_upload_cypher = get_data_upload_queries() query = data_upload_cypher[query_name]['query'] for q in query.split(';')[0:-1]: if '$' in q: result = connector.getCursorData( driver, q + ';', parameters={'external_id': str(projectId)}) else: result = connector.getCursorData(driver, q + ';') except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Error: {}. Getting new subjects enrolled in project: Query name ({}) - Query ({}), error info: {}, file: {},line: {}" .format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) return result.values
def create_db_user(driver, data): """ Creates and assigns role to new graph database user, if user not in list of local users. :param driver: neo4j driver, which provides the connection to the neo4j graph database. :type driver: neo4j driver :param Series data: pandas Series with required user information (see set_arguments()). """ query_name_add = 'create_db_user' query_name_role = 'add_role_to_db_user' query_list_db_users = 'list_db_users' try: cypher = get_user_creation_queries() db_query = cypher[query_name_add]['query'] + cypher[query_name_role][ 'query'] db_users = connector.getCursorData( driver, cypher[query_list_db_users]['query'], {}) if data['username'] not in db_users['username'].to_list( ) or db_users.empty: for q in db_query.split(';')[0:-1]: result = connector.getCursorData(driver, q + ';', parameters=data.to_dict()) logger.info("New user created: {}. Result: {}".format( data['username'], result)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Reading query {}: {}, file: {},line: {}, error: {}".format( query_name_add, sys.exc_info(), fname, exc_tb.tb_lineno, err))
def check_if_node_exists(driver, node, node_property, value): """ Queries the graph database and checks if a node with a specific property and property value already exists. :param driver: py2neo driver, which provides the connection to the neo4j graph database. :type driver: py2neo driver :param str node: node to be matched in the database. :param str node_property: property of the node. :param value: property value. :type value: str, int, float or bool :return: Pandas dataframe with user identifier if User with node_property and value already exists, \ if User does not exist, returns and empty dataframe. """ query_name = 'check_node' try: cypher = get_project_creation_queries() query = cypher[query_name]['query'].replace('NODE', node).replace( 'PROPERTY', node_property) for q in query.split(';')[0:-1]: if '$' in q: result = connector.getCursorData(driver, q + ';', parameters={'value': value}) else: result = connector.getCursorData(driver, q + ';') except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Reading query {}: {}, file: {},line: {}, error: {}".format( query_name, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return result
def get_new_subject_identifier(driver): """ Queries the database for the last subject identifier and returns a new sequential identifier. :param driver: neo4j driver, which provides the connection to the neo4j graph database. :type driver: neo4j driver :param str projectId: external project identifier (from the graph database). :return: Subject identifier. :rtype: str """ query_name = 'increment_subject_id' query = '' try: cypher = get_data_upload_queries() query = cypher[query_name]['query'] subject_identifier = connector.getCursorData(driver, query).values[0][0] except Exception as err: subject_identifier = None exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Error: {}. Getting new subject identifiers: Query name ({}) - Query ({}), error info: {}, file: {},line: {}" .format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) return subject_identifier
def get_db_stats_data(): """ Retrieves all the stats data from the graph database and returns them as a dictionary. :return: Dictionary of dataframes. """ query_names = [ 'unique_projects', 'get_db_stats', 'get_db_store_size', 'get_db_transactions', 'get_db_kernel' ] df_names = [ 'projects', 'meta_stats', 'store_size', 'transactions', 'kernel_monitor' ] dfs = {} cypher = get_query() for i, j in zip(df_names, query_names): query = cypher[j]['query'] data = connector.getCursorData(driver, query) if i == 'store_size': data = data.T data['size'] = [size_converter(i) for i in data[0]] dfs[i] = data.to_json(orient='records') return dfs
def get_subject_number_in_project(driver, projectId): """ Extracts the number of subjects included in a given project. :param driver: py2neo driver, which provides the connection to the neo4j graph database. :type driver: py2neo driver :param str projectId: external project identifier (from the graph database). :return: Integer with the number of subjects. """ query_name = 'subject_number' try: cypher = get_project_creation_queries() query = cypher[query_name]['query'] result = connector.getCursorData(driver, query, parameters={ 'external_id': projectId }).values[0][0] except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Error: {}. Reading query {}: {}, file: {},line: {}".format( err, query_name, sys.exc_info(), fname, exc_tb.tb_lineno)) return result
def create_new_ansamples(driver, data): """ :param driver: py2neo driver, which provides the connection to the neo4j graph database. :param data: pandas Dataframe with clinical data as columns and samples as rows. :return: Pandas DataFrame where new analytical sample internal identifiers have been added. """ external_ids = data['analytical_sample external_id'].unique() biosample_ids = data['biological_sample id'] ansample_id = get_new_analytical_sample_identifier(driver) if ansample_id is None: ansample_id = '1' ansample_ids = ['AS'+str(i) for i in np.arange(int(ansample_id), int(ansample_id)+len(external_ids))] ansample_dict = dict(zip(external_ids, ansample_ids)) asample_biosample_dict = dict(zip(external_ids, biosample_ids)) query_name = 'create_asamples_biosamples' for external_id, asample_id in ansample_dict.items(): biosample_id = asample_biosample_dict[external_id] parameters = {'external_id': str(external_id), 'biosample_id':biosample_id, 'asample_id':asample_id} try: query = '' data_upload_cypher = get_data_upload_queries() queries = data_upload_cypher[query_name]['query'].split(';')[:-1] for query in queries: res = connector.getCursorData(driver, query+';', parameters=parameters) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Error: {}. Creating analytical samples: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) data['analytical_sample id'] = data['analytical_sample external_id'].map(ansample_dict) return data
def get_new_project_identifier(driver, projectId): """ Queries the database for the last project external identifier and returns a new sequential identifier. :param driver: py2neo driver, which provides the connection to the neo4j graph database. :type driver: py2neo driver :param str projectId: internal project identifier (CPxxxxxxxxxxxx). :return: Project external identifier. :rtype: str """ query_name = 'increment_project_id' try: project_creation_cypher = get_project_creation_queries() query = project_creation_cypher[query_name]['query'] last_project, new_id = connector.getCursorData(driver, query).values[0] if last_project is None and new_id is None: external_identifier = 'P0000001' else: length = len(last_project.split('P')[-1]) new_length = len(str(new_id)) external_identifier = 'P' + '0' * (length - new_length) + str(new_id) except Exception as err: external_identifier = None exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Reading query {}: {}, file: {},line: {}, err: {}".format( query_name, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return external_identifier
def query_data(self): data = {} try: cwd = os.path.abspath(os.path.dirname(__file__)) queries_path = os.path.join(cwd, self.queries_file) project_cypher = query_utils.read_queries(queries_path) driver = connector.getGraphDatabaseConnectionConfiguration() replace = [("PROJECTID", self.identifier)] for query_name in project_cypher: title = query_name.lower().replace('_', ' ') query = project_cypher[query_name]['query'] query_type = project_cypher[query_name]['query_type'] for r, by in replace: query = query.replace(r, by) if query_type == "pre": data[title] = connector.getCursorData(driver, query) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Reading queries from file {}: {}, file: {},line: {}, error: {}" .format(queries_path, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return data
def get_project_information(driver, project_id): query_name = 'project_graph' res = pd.DataFrame() try: query = '' parameters = {'project_id': project_id} data_upload_cypher = get_data_upload_queries() queries = data_upload_cypher[query_name]['query'].split(';')[:-1] for query in queries: res = connector.getCursorData(driver, query + ';', parameters=parameters) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Error: {}. Creating analytical samples: Query name ({}) - Query ({}), error info: {}, file: {},line: {}" .format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) if not res.empty: res = viz.get_table( res, identifier='new_project', title='Data Uploaded for Project {}'.format(project_id)) return res
def create_user_from_dict(driver, data): """ Creates graph database node for new user and adds properties to the node. :param driver: neo4j driver, which provides the connection to the neo4j graph database. :param dict data: dictionary with the user information). """ query_name_node = 'create_user_node' result = None try: user_id = get_new_user_id(driver) if 'ID' in data and data['ID'] is None: data['ID'] = user_id elif 'ID' not in data: data['ID'] = user_id cypher = uh.get_user_creation_queries() query = cypher[query_name_node]['query'] for q in query.split(';')[0:-1]: result = connector.getCursorData(driver, q+';', parameters=data) logger.info("New user node created: {}. Result: {}".format(data['username'], result)) print("New user node created: {}. Result: {}".format(data['username'], result)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Reading query {}: {}, file: {},line: {}, error: {}".format(query_name_node, sys.exc_info(), fname, exc_tb.tb_lineno, err)) print("Reading query {}: {}, file: {},line: {}, error: {}".format(query_name_node, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return result
def getMappingFromDatabase(id_list, node, attribute_from='id', attribute_to='name'): id_list = ["'{}'".format(i) for i in id_list] driver = connector.getGraphDatabaseConnectionConfiguration() mapping_query = "MATCH (n:{}) WHERE n.{} IN [{}] RETURN n.{} AS from, n.{} AS to" mapping = connector.getCursorData(driver, mapping_query.format(node, attribute_from, ','.join(id_list), attribute_from, attribute_to)) if not mapping.empty: mapping = dict(zip(mapping['from'], mapping['to'])) return mapping
def get_mapping_analytical_samples(project_id): from graphdb_connector import connector driver = connector.getGraphDatabaseConnectionConfiguration() mapping = {} query = "MATCH (p:Project)-[:HAS_ENROLLED]-(:Subject)-[:BELONGS_TO_SUBJECT]-()-[:SPLITTED_INTO]-(a:Analytical_sample) WHERE p.id='{}' RETURN a.external_id, a.id".format(project_id) mapping = connector.getCursorData(driver, query) if not mapping.empty: mapping = mapping.set_index("a.external_id").to_dict(orient='dict')["a.id"] return mapping
def create_new_ansamples(driver, data): """ :param driver: neo4j driver, which provides the connection to the neo4j graph database. :param data: pandas Dataframe with clinical data as columns and samples as rows. :return: Pandas DataFrame where new analytical sample internal identifiers have been added. """ data = data.rename( columns={ 'analytical_sample external_id': 'external_id', 'biological_sample id': 'biosample_id' }) data['external_id'] = data['external_id'].astype(str) num_samples = data['external_id'].shape[0] if 'grouping2' not in data: data['grouping2'] = None ansample_id = get_new_analytical_sample_identifier(driver) if ansample_id is None: ansample_id = '1' ansample_ids = [ 'AS' + str(i) for i in np.arange(int(ansample_id), int(ansample_id) + num_samples) ] data['asample_id'] = ansample_ids query_name = 'create_asamples_biosamples' for parameters in data.to_dict('records'): print(parameters) try: query = '' data_upload_cypher = get_data_upload_queries() queries = data_upload_cypher[query_name]['query'].split(';')[:-1] for query in queries: res = connector.getCursorData(driver, query + ';', parameters=parameters) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Error: {}. Creating analytical samples: Query name ({}) - Query ({}), error info: {}, file: {},line: {}" .format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) data = data.rename( columns={ 'asample_id': 'analytical_sample id', 'external_id': 'analytical_sample external_id', 'biosample_id': 'biological_sample id' }) return data
def map_node_name_to_id(driver, node, value): identifier = None query_name = 'map_node_name' cwd = os.path.abspath(os.path.dirname(__file__)) queries_path = "queries.yml" cypher = read_queries(os.path.join(cwd, queries_path)) query = cypher[query_name]['query'].replace('NODE', node) result = connector.getCursorData(driver, query, parameters={'name': str(value).lower()}) if result is not None and not result.empty: identifier = result.values[0][0] return identifier
def check_external_ids_in_db(driver, projectId): """ """ query_name = 'check_external_ids' query = '' result = pd.DataFrame() try: data_upload_cypher = get_data_upload_queries() query = data_upload_cypher[query_name]['query'] result = connector.getCursorData(driver, query, parameters={'external_id': str(projectId)}) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Error: {}. Checking if external identifiers exist in the database: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) return result
def remove_samples_nodes_db(driver, projectId): """ """ result = None query_name = 'remove_project' query = '' try: queries_path = "../queries/project_cypher.yml" project_cypher = ckg_utils.get_queries(os.path.join(cwd, queries_path)) query = project_cypher[query_name]['query'].replace('PROJECTID', projectId).split(';')[:-2] for q in query: result = connector.getCursorData(driver, q+';') except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Error: {}. Removing nodes associated to project: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) return result
def get_new_user_identifier(driver): """ Queries the database for the last user identifier and returns a new sequential identifier. :param driver: py2neo driver, which provides the connection to the neo4j graph database. :type driver: py2neo driver :return: User identifier. :rtype: str """ query_name = 'increment_user_id' try: user_creation_cypher = get_user_creation_queries() query = user_creation_cypher[query_name]['query'] user_identifier = connector.getCursorData(driver, query).values[0][0] except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Reading query {}: {}, file: {},line: {}, error: {}".format(query, sys.exc_info(), fname, exc_tb.tb_lineno, err)) return user_identifier
def create_new_subjects(driver, data, projectId): """ :param driver: neo4j driver, which provides the connection to the neo4j graph database. :param data: pandas Dataframe with clinical data as columns and samples as rows. :param string projectId: project identifier. :return: Pandas DataFrame where new biological sample internal identifiers have been added. """ external_ids = data['subject external_id'].unique() subject_id = get_new_subject_identifier(driver) if subject_id is None: subject_id = '1' subject_ids = [ 'S' + str(i) for i in np.arange(int(subject_id), int(subject_id) + len(external_ids)) ] subject_dict = dict(zip(external_ids, subject_ids)) query_name = 'create_project_subject' for external_id, subject_id in subject_dict.items(): parameters = { 'external_id': str(external_id), 'project_id': projectId, 'subject_id': subject_id } try: query = '' data_upload_cypher = get_data_upload_queries() queries = data_upload_cypher[query_name]['query'].split(';')[:-1] for query in queries: res = connector.getCursorData(driver, query + ';', parameters=parameters) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Error: {}. Creating new subjects: Query name ({}) - Query ({}), error info: {}, file: {},line: {}" .format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) data['subject id'] = data['subject external_id'].map(subject_dict) return data
def get_new_analytical_sample_identifier(driver): """ Queries the database for the last analytical sample internal identifier and returns a new sequential identifier. :param driver: py2neo driver, which provides the connection to the neo4j graph database. :return: Analytical sample identifier. """ query_name = 'increment_analytical_sample_id' query = '' try: cypher = get_data_upload_queries() query = cypher[query_name]['query'] identifier = connector.getCursorData(driver, query).values[0][0] except Exception as err: identifier = None exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error("Error: {}. Getting new analytical sample identifiers: Query name ({}) - Query ({}), error info: {}, file: {},line: {}".format(err, query_name, query, sys.exc_info(), fname, exc_tb.tb_lineno)) return identifier
def create_user_node(driver, data): """ Creates graph database node for new user and adds respective properties to node. :param driver: py2neo driver, which provides the connection to the neo4j graph database. :type driver: py2neo driver :param Series data: pandas Series with new user identifier and required user information (see set_arguments()). """ query_name_node = 'create_user_node' try: cypher = uh.get_user_creation_queries() query = cypher[query_name_node]['query'] for q in query.split(';')[0:-1]: result = connector.getCursorData(driver, q + ';', parameters=data.to_dict()) logger.info("New user node created: {}. Result: {}".format( data['username'], result)) except Exception as err: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logger.error( "Reading query {}: {}, file: {},line: {}, error: {}".format( query_name_node, sys.exc_info(), fname, exc_tb.tb_lineno, err))
def send_query(self, query): driver = connector.getGraphDatabaseConnectionConfiguration() data = connector.getCursorData(driver, query) return data