Exemple #1
0
def test_reading(files_source):
    mwtabfile_generator = mwtab.read_files(files_source)
    mwtabfiles_list = list(mwtabfile_generator)
    mwtabfiles_study_ids_set = set(mwf.study_id for mwf in mwtabfiles_list)
    mwtabfiles_analysis_ids_set = set(mwf.analysis_id
                                      for mwf in mwtabfiles_list)
    assert mwtabfiles_study_ids_set == {"ST000001", "ST000002"}
    assert mwtabfiles_analysis_ids_set == {"AN000001", "AN000002"}
Exemple #2
0
def test_from_local_file():
    mwtabfile_generator = mwtab.read_files(
        "tests/example_data/mwtab_files/ST000001_AN000001.txt",
        "tests/example_data/mwtab_files/ST000002_AN000002.txt")
    mwtabfile1 = next(mwtabfile_generator)
    mwtabfile2 = next(mwtabfile_generator)
    assert mwtabfile1.study_id == "ST000001" and mwtabfile2.study_id == "ST000002"
    assert mwtabfile1.analysis_id == "AN000001" and mwtabfile2.analysis_id == "AN000002"
Exemple #3
0
def test_converter_module(from_path, to_path, from_format, to_format):
    converter = Converter(from_path=from_path,
                          to_path=to_path,
                          from_format=from_format,
                          to_format=to_format)
    converter.convert()

    mwtabfile_generator = mwtab.read_files(to_path)
    mwtabfiles_list = list(mwtabfile_generator)
    mwtabfiles_study_ids_set = set(mwf.study_id for mwf in mwtabfiles_list)
    mwtabfiles_analysis_ids_set = set(mwf.analysis_id
                                      for mwf in mwtabfiles_list)
    assert mwtabfiles_study_ids_set.issubset({"ST000001", "ST000002"})
    assert mwtabfiles_analysis_ids_set.issubset({"AN000001", "AN000002"})
def mwtab_to_df(path, id_mapping='pubchem_id'):
    '''
    Parse mwtab file to df

    :param path: path of mwtab file
    :param id_mapping: which db will be used to annotate metabolite names.
    Those are valid inputs {None, 'PubChem ID', 'KEGG ID', 'HMDB'}
    '''
    f = next(mwtab.read_files(path))

    id_factor_mapping = {
        i['local_sample_id']: i['factors'].split('-')[1].strip()
        if not i['factors'].startswith('Source:Method Blanks') else 'healthy'
        for i in f['SUBJECT_SAMPLE_FACTORS']['SUBJECT_SAMPLE_FACTORS']
    }

    metabolites_names = {
        i['metabolite_name']: i[id_mapping]
        for i in f['METABOLITES']['METABOLITES_START']['DATA']
        if id_mapping in i and i[id_mapping]
    }

    metabolite_measurements = dict()

    for i in f['MS_METABOLITE_DATA']['MS_METABOLITE_DATA_START']['DATA']:
        m = i['metabolite_name']
        del i['metabolite_name']

        if id_mapping:
            if m in metabolites_names:
                metabolite_measurements[metabolites_names[m]] = i
        else:
            metabolite_measurements[m] = i

    df = pd.DataFrame(metabolite_measurements, dtype=float)
    df = df[[i in id_factor_mapping for i in df.index]]

    labels = [id_factor_mapping[i] for i in df.index]
    labels = [i if i != 'Adenocarcnoma' else 'Adenocarcinoma' for i in labels]

    df.insert(0, 'labels', labels)
    df = df.reset_index()
    del df['index']

    return df.replace('', np.nan).dropna(axis=1, how='any')
Exemple #5
0
def test_from_analysis_id():
    mwtabfile_generator = mwtab.read_files("5")
    mwtabfile = next(mwtabfile_generator)
    assert mwtabfile.study_id == "ST000004"
    assert mwtabfile.analysis_id == "AN000005"