def summarize(pathway): path_dict = read.get_model(data_path + 'model2.csv') pathways = path_dict.keys() features = read.get_metabolites(path_dict) cofactors = read.get_cofactors(data_path + 'cofactors') paths = (hil_neg, hil_pos, syn_neg) metlin = [] hmdb = [] metfrag = [] for path in paths: metlin.append(read.metlin(path)) hmdb.append(read.hmdb(path)) metfrag.append( set(read.metfrag_with_scores(path, keep_zero_scores=False).keys())) # remove non-features & cofactors for i in range(len(paths)): metlin[i] = (metlin[i] & features) - cofactors hmdb[i] = (hmdb[i] & features) - cofactors metfrag[i] = (metfrag[i] & features) - cofactors all = [] for db in [metlin, hmdb, metfrag]: all.append(reduce(lambda x, y: x | y, db)) def stats(pathway, metlin, hmdb, metfrag=set()): cge = path_dict[pathway] observed = ((metlin | hmdb | metfrag) & features) - cofactors reverse_path_dict = read.reverse_dict(path_dict) for compound in observed & cge: print(compound, len(reverse_path_dict[compound])) compounds = path_dict[pathway] #print ("Prob(C05381)", read.metfrag_with_scores(list(paths))['C05381']) print(pathway, len(compounds - cofactors)) print("Unobserved:") stats(pathway, compounds - cofactors, set()) print("Full Stats:") stats(pathway, all[0], all[1], all[2]) print("Hard Stats:") stats(pathway, all[0], all[1], set()) return all
def get_metabolite_sets(): """ Gets the sets of observed metabolites """ path_dict = read.get_model(data_path + 'model2.csv') pathways = path_dict.keys() features = read.get_metabolites(path_dict) cofactors = read.get_cofactors(data_path + 'cofactors') paths = (hil_neg, hil_pos, syn_neg) metlin = [] hmdb = [] metfrag = [] for path in paths: metlin.append(read.metlin(path)) hmdb.append(read.hmdb(path)) metfrag.append( set(read.metfrag_with_scores(path, keep_zero_scores=False).keys())) # remove non-features & cofactors for i in range(len(paths)): metlin[i] = (metlin[i] & features) - cofactors hmdb[i] = (hmdb[i] & features) - cofactors metfrag[i] = (metfrag[i] & features) - cofactors all = [] for db in [metlin, hmdb, metfrag]: all.append(reduce(lambda x, y: x | y, db)) return all
def get_path_dict(): data_path = '../data/' return read.get_model(data_path + 'model2.csv')
meta.py Produces venn diagram plots of the metadata. This is useful in visualizing the structure of the data. """ from matplotlib import pyplot as plt from matplotlib_venn import venn3 import read data_path = '../data/' hil_neg = data_path + 'HilNeg 0324 -- Data.csv' hil_pos = data_path + 'HilPos 0324 -- Data.csv' syn_neg = data_path + 'SynNeg 0324 -- Data.csv' path_dict = read.get_model(data_path + 'model2.csv') pathways = path_dict.keys() features = read.get_metabolites(path_dict) cofactors = read.get_cofactors(data_path + 'cofactors') def plot_hil_neg(): observation_file = hil_neg metlin = read.metlin(observation_file) hmdb = read.hmdb(observation_file) metfrag = set(read.metfrag_with_scores(observation_file).keys()) metfrag_no_zeros = set( read.metfrag_with_scores(observation_file, keep_zero_scores=False).keys()) feature_compounds = set([f for f in features if str.startswith(f, 'C')]) """
from pymc import Bernoulli, Gamma, Poisson, InverseGamma import pymc import numpy as np import gen import read import math ONE, ZERO = (0.99, 0.01) data_path = '../data/' observation_file = data_path + 'HilNeg 0324 -- Data.csv' cofactors = read.get_cofactors(data_path + 'cofactors') path_dict = read.get_model(data_path + 'model2.csv', cofactors=cofactors) pathways = path_dict.keys() features = read.get_metabolites(path_dict) evidence = read.metlin(observation_file) evidence |= read.hmdb(observation_file) evidence -= cofactors features -= cofactors evidence &= features reverse_path_dict = read.reverse_dict(path_dict) metfrag = read.metfrag(observation_file) metfrag_evidence = read.dict_of_set( read.metfrag_with_scores(observation_file, keep_zero_scores=False), metfrag & features - cofactors - evidence) evidence = {e: 1 for e in evidence} rate_prior = 0.5 ap = {p: Gamma('p_' + p, rate_prior, 1) for p in pathways} bmp = {
def summarize_compound(compound): path_dict = read.get_model(data_path + 'model2.csv') reverse_path_dict = read.reverse_dict(path_dict) print "Compound", compound, "has pathways:", reverse_path_dict[compound]
def summarize_compound(compound): path_dict = read.get_model(data_path + 'model2.csv') reverse_path_dict = read.reverse_dict(path_dict) print("Compound", compound, "has pathways:") for path in sorted(reverse_path_dict[compound]): print(path)