Exemple #1
0
def map_experiment_files(project_id, datasetPath, mapping):
    files = builder_utils.listDirectoryFiles(datasetPath)

    for file in files:
        outputfile = os.path.join(datasetPath, file)
        data = builder_utils.readDataset(outputfile)
        data = map_experimental_data(data, mapping)
        builder_utils.export_contents(data, datasetPath, file)
Exemple #2
0
def create_experiment_internal_identifiers(driver, projectId, data, directory, filename):
    done = 0
    df = create_new_subjects(driver, data, projectId)
    df1 = create_new_biosamples(driver, df)
    df2 = create_new_ansamples(driver, df1)
    builder_utils.export_contents(df2, directory, filename)
    done += 1

    return done
Exemple #3
0
def create_mapping_cols_clinical(driver,
                                 data,
                                 directory,
                                 filename,
                                 separator='|'):
    """
    :param driver: neo4j driver, which provides the connection to the neo4j graph database.
    :type driver: neo4j driver
    :param data: pandas Dataframe with clinical data as columns and samples as rows.
    :param str separator: character used to separate multiple entries in an attribute.

    :return: Pandas Dataframe with all clinical data and graph database internal identifiers.
    """
    tissue_dict = {}
    disease_dict = {}
    intervention_dict = {}
    if 'disease' in data:
        for disease in data['disease'].dropna().unique():
            if len(disease.split(separator)) > 1:
                ids = []
                for i in disease.split(separator):
                    disease_id = query_utils.map_node_name_to_id(
                        driver, 'Disease', str(i.strip()))
                    if disease_id is not None:
                        ids.append(disease_id)
                    disease_dict[disease] = '|'.join(ids)
            else:
                disease_id = query_utils.map_node_name_to_id(
                    driver, 'Disease', str(disease.strip()))
                disease_dict[disease] = disease_id
        data['disease id'] = data['disease'].map(disease_dict)

    if 'tissue' in data:
        for tissue in data['tissue'].dropna().unique():
            tissue_id = query_utils.map_node_name_to_id(
                driver, 'Tissue', str(tissue.strip()))
            tissue_dict[tissue] = tissue_id

        data['tissue id'] = data['tissue'].map(tissue_dict)

    if 'studies_intervention' in data:
        for interventions in data['studies_intervention'].dropna().unique():
            for intervention in str(interventions).split('|'):
                if len(intervention.split()) > 1:
                    intervention_dict[intervention] = re.search(
                        r'\(([^)]+)',
                        intervention.split()[-1]).group(1)
                else:
                    intervention_dict[intervention] = intervention

        data['intervention id'] = data['studies_intervention'].map(
            intervention_dict)

    builder_utils.export_contents(data, directory, filename)
Exemple #4
0
def save_files_in_tmp(content, dataset, prot_tool, prot_file, projectid,
                      uploaded_file):
    if dataset is not None:
        session_cookie = flask.request.cookies.get('custom-auth-session')
        temporaryDirectory = os.path.join(tmpDirectory,
                                          session_cookie + "upload")
        if not os.path.exists(tmpDirectory):
            os.makedirs(tmpDirectory)
        elif not os.path.exists(temporaryDirectory):
            os.makedirs(temporaryDirectory)

        directory = os.path.join(temporaryDirectory, dataset)
        if os.path.exists(directory) and uploaded_file is not None:
            if os.path.exists(os.path.join(directory, uploaded_file)):
                shutil.rmtree(directory)

        builder_utils.checkDirectory(directory)
        if dataset in ['proteomics', 'interactomics', 'phosphoproteomics'
                       ] and prot_tool != '' and prot_file != '':
            selected_file = prot_tool.lower() + "-" + prot_file.lower()
            if selected_file in config['file_proteomics']:
                filename = config['file_proteomics'][selected_file]
            else:
                filename = dataset + '_' + prot_tool.lower(
                ) + '_' + prot_file.replace(
                    ' ', '').lower() + '.' + uploaded_file.split('.')[-1]
            directory = os.path.join(directory, prot_tool.lower())
            if os.path.exists(directory):
                if os.path.exists(os.path.join(directory, filename)):
                    os.remove(os.path.join(directory, filename))
            builder_utils.checkDirectory(directory)
        elif dataset == 'experimental_design':
            filename = config['file_design'].split(
                '_')[0] + '_' + projectid + '.' + uploaded_file.split('.')[-1]
        elif dataset == 'clinical':
            filename = config['file_clinical'].split(
                '_')[0] + '_' + projectid + '.' + uploaded_file.split('.')[-1]

        if uploaded_file is None:
            content = None
        if content is not None:
            data = builder_utils.parse_contents(content, filename)
            builder_utils.export_contents(data, directory, filename)

            uploaded = uploaded_file
            uploaded_file = None
            return uploaded, uploaded_file, '', ''
        else:
            raise PreventUpdate

    return '', None, '', ''