def tcell_read_metabolomics_frames():
    proc_tcell_t = cache.TsvFileTracker(os.path.join(cache.get_cache_path(), metabolite_expression_name + ".tsv.gz"), tcell_read_metabolomics_data)
    metabolomics_df = proc_tcell_t.read_file()
    values,cols = [],[]
    for coln in metabolomics_df.columns:
        if "non act" in coln:
            time = -1.
        elif "ON" in coln:
            time = 0.
        elif "act 3h" in coln:
            time = 3.
        elif "act 12h" in coln:
            time = 12.
        elif "act 14h" in coln:
            time = 14.
        elif "act 24h" in coln:
            time = 24.
        elif "act 2d" in coln:
            time = 48.
        elif "act 3d" in coln:
            time = 72.
        elif "act 4d" in coln:
            time = 96.
        else:
            print(coln)
        dish = coln.split('-')[0]
        rep = coln.split('-')[2].replace(" ","")
        values += [time]
        cols += ['_'.join([dish,str(int(time)),rep])]
    phenotype_df = pd.DataFrame(columns=cols, data=[values], index=["Time"])
    metabolomics_df.columns = cols
    return phenotype_df, metabolomics_df
예제 #2
0
def generate_reactome_sunburst(values_df, sb_configuration = conf_human):
    base = cache.get_cache_path()
    json = generate_reactome_sunburst_json(values_df, sb_configuration)
    write_json(json, os.path.join(base, 'results.json'))
    resource_path = os.path.dirname(inspect.getsourcefile(run_server))
    shutil.copy(os.path.join(resource_path,"index.html"), base)
    shutil.copy(os.path.join(resource_path,"breadcrumb.js"), base)
    run_server.run_sunburst(path=base)
def tcell_read_proteomics_data():
    """This function is quite convoluted as it downloads an excelfile from a publication and extracts a dataframe. The function also caches intermediate files"""

    tcell_prot_xls = cache.UrlFileCache(os.path.join(cache.get_cache_path(),  protein_expression_name + ".xlsx"),proteomics_data_url)
    proteomics_df = pd.read_excel(tcell_prot_xls.get_file_name(), sheet_name = "Data", index_col=0, usecols="A,D:U")
#    proteomics_df = pd.read_excel(tcell_prot_xls.get_file_name(), sheet_name = "Data", index_col=0, usecols="A,V:AM")
    proteomics_df = proteomics_df - proteomics_df.mean()    # Normalize by subtracting column mean
    proteomics_df = proteomics_df.apply(np.exp2)    # The excel data is in log2 space, return it to normal
    proteomics_df = proteomics_df.groupby("Protein IDs", group_keys=False).apply(one_row_per_proteoform).reset_index(drop=True)
    proteomics_df.set_index("ProteinID", inplace=True)
    return proteomics_df
def get_reactome_df(organism = "HSA", gene_anot = "Ensembl"):
    fn = gene_anot + reactome_fn
    path = os.path.join(cache.get_cache_path(),fn)
    url = reactome_url + fn
    reactome_df = pd.read_csv(cache.download_file(path, url),
                        sep='\t',
                        header=None,
                        usecols=[0,1,3],
                        names=["gene","reactome_id","reactome_name"])
    organism = "R-" + organism
    reactome_df = reactome_df[reactome_df["reactome_id"].str.startswith(organism) ]
    return reactome_df
def tcell_read_metabolomics_data():
    """This function is quite convoluted as it downloads an excelfile from a publication and extracts a dataframe, idexed by chebi. The function also caches intermediate files"""
    tcell_metabol_xls = cache.UrlFileCache(os.path.join(cache.get_cache_path(),  metabolite_expression_name + ".xlsx"), metabolomics_data_url)
    metabolomics_df = pd.read_excel(tcell_metabol_xls.get_file_name(), sheet_name = "normalized by sample mean", index_col=0, usecols="A,C:HN", skiprows = [0])
    #metabolomics_df = pd.read_excel(tcell_metabol_xls.get_file_name(), sheet_name = "normalized by sample mean", index_col=0, usecols="A,C:HN", skiprows = [0])
    for col in metabolomics_df.columns:
        # Average all technical replicates (Named by trailing ".1")
        if len(col.split('.'))>1 and col.split('.')[1] == "1":
            remcol = col.split('.')[0]
            metabolomics_df[remcol] = scipy.stats.gmean(metabolomics_df[[remcol,col]],axis=1)
            metabolomics_df.drop(col, axis=1, inplace=True)
    metabolomics_df.index.name = "KEGG_ID"
    metabolomics_df = metabolomics_df.apply(np.exp2)    # The excel data is in log2 space, return it to normal
    k = KEGG(verbose=False)
    map_kegg_chebi = k.conv("chebi", "compound")
    metabolomics_df = metabolomics_df.groupby("KEGG_ID", group_keys=False).apply(lambda x: one_row_per_compound_convert(x, map_kegg_chebi)).reset_index(drop=True)
    metabolomics_df.set_index("MetaboliteID", inplace=True)
    return metabolomics_df
def tcell_read_proteomics_frames():
    proc_tcell_t = cache.TsvFileTracker(os.path.join(cache.get_cache_path(), protein_expression_name + ".tsv.gz"),tcell_read_proteomics_data)
    proteomics_df = proc_tcell_t.read_file()
    values,cols = [],[]
    for coln in proteomics_df.columns:
        if "notact" in coln:
            time = 0.
        elif "act12h" in coln:
            time = 12.
        elif "act24h" in coln:
            time = 24.
        elif "act48h" in coln:
            time = 48.
        elif "act72h" in coln:
            time = 72.
        elif "act96h" in coln:
            time = 96.
        else:
            print(coln)
        not_sure = coln.split('_')[0].replace("q","")
        rep = int(coln.split('_')[3].replace(" ",""))
        if rep<18:
            dish = int(rep/5)+2
            rep = rep%5+1
        elif rep<26:
            dish = rep%3+2
            rep = 3
        elif rep<31:
            dish = (rep-1)%3+2
            rep = 3
        else:
            dish = (rep-2)%3+2
            rep = 3
        values += [time]
        cols += ['_'.join([str(dish),str(int(time)),str(rep)])]
    proteomics_df.columns = cols
    phenotype_df = pd.DataFrame(columns=cols, data=[values], index=["Time"])
    return phenotype_df, proteomics_df
예제 #7
0
def generate_reactome_tree(sb_configuration = get_conf_human()):
    """ Download the reactome pathway relation file and generate a tree structure """
    # Derive a filename from the url
    fn = os.path.basename(urlparse(url_to_reactome_relation_file).path)
    path = os.path.join(cache.get_cache_path(),fn)
    return generate_root_node(cache.download_file(path, url_to_reactome_relation_file), sb_configuration)