def map_experiment_files(project_id, datasetPath, mapping): files = builder_utils.listDirectoryFiles(datasetPath) for file in files: outputfile = os.path.join(datasetPath, file) data = builder_utils.readDataset(outputfile) data = map_experimental_data(data, mapping) builder_utils.export_contents(data, datasetPath, file)
def create_experiment_internal_identifiers(driver, projectId, data, directory, filename): done = 0 df = create_new_subjects(driver, data, projectId) df1 = create_new_biosamples(driver, df) df2 = create_new_ansamples(driver, df1) builder_utils.export_contents(df2, directory, filename) done += 1 return done
def create_mapping_cols_clinical(driver, data, directory, filename, separator='|'): """ :param driver: neo4j driver, which provides the connection to the neo4j graph database. :type driver: neo4j driver :param data: pandas Dataframe with clinical data as columns and samples as rows. :param str separator: character used to separate multiple entries in an attribute. :return: Pandas Dataframe with all clinical data and graph database internal identifiers. """ tissue_dict = {} disease_dict = {} intervention_dict = {} if 'disease' in data: for disease in data['disease'].dropna().unique(): if len(disease.split(separator)) > 1: ids = [] for i in disease.split(separator): disease_id = query_utils.map_node_name_to_id( driver, 'Disease', str(i.strip())) if disease_id is not None: ids.append(disease_id) disease_dict[disease] = '|'.join(ids) else: disease_id = query_utils.map_node_name_to_id( driver, 'Disease', str(disease.strip())) disease_dict[disease] = disease_id data['disease id'] = data['disease'].map(disease_dict) if 'tissue' in data: for tissue in data['tissue'].dropna().unique(): tissue_id = query_utils.map_node_name_to_id( driver, 'Tissue', str(tissue.strip())) tissue_dict[tissue] = tissue_id data['tissue id'] = data['tissue'].map(tissue_dict) if 'studies_intervention' in data: for interventions in data['studies_intervention'].dropna().unique(): for intervention in str(interventions).split('|'): if len(intervention.split()) > 1: intervention_dict[intervention] = re.search( r'\(([^)]+)', intervention.split()[-1]).group(1) else: intervention_dict[intervention] = intervention data['intervention id'] = data['studies_intervention'].map( intervention_dict) builder_utils.export_contents(data, directory, filename)
def save_files_in_tmp(content, dataset, prot_tool, prot_file, projectid, uploaded_file): if dataset is not None: session_cookie = flask.request.cookies.get('custom-auth-session') temporaryDirectory = os.path.join(tmpDirectory, session_cookie + "upload") if not os.path.exists(tmpDirectory): os.makedirs(tmpDirectory) elif not os.path.exists(temporaryDirectory): os.makedirs(temporaryDirectory) directory = os.path.join(temporaryDirectory, dataset) if os.path.exists(directory) and uploaded_file is not None: if os.path.exists(os.path.join(directory, uploaded_file)): shutil.rmtree(directory) builder_utils.checkDirectory(directory) if dataset in ['proteomics', 'interactomics', 'phosphoproteomics' ] and prot_tool != '' and prot_file != '': selected_file = prot_tool.lower() + "-" + prot_file.lower() if selected_file in config['file_proteomics']: filename = config['file_proteomics'][selected_file] else: filename = dataset + '_' + prot_tool.lower( ) + '_' + prot_file.replace( ' ', '').lower() + '.' + uploaded_file.split('.')[-1] directory = os.path.join(directory, prot_tool.lower()) if os.path.exists(directory): if os.path.exists(os.path.join(directory, filename)): os.remove(os.path.join(directory, filename)) builder_utils.checkDirectory(directory) elif dataset == 'experimental_design': filename = config['file_design'].split( '_')[0] + '_' + projectid + '.' + uploaded_file.split('.')[-1] elif dataset == 'clinical': filename = config['file_clinical'].split( '_')[0] + '_' + projectid + '.' + uploaded_file.split('.')[-1] if uploaded_file is None: content = None if content is not None: data = builder_utils.parse_contents(content, filename) builder_utils.export_contents(data, directory, filename) uploaded = uploaded_file uploaded_file = None return uploaded, uploaded_file, '', '' else: raise PreventUpdate return '', None, '', ''