def plot_all(): paths = (hil_neg, hil_pos, syn_neg) metlin = [] hmdb = [] metfrag = [] for path in paths: metlin.append(read.metlin(path)) hmdb.append(read.hmdb(path)) metfrag.append( set(read.metfrag_with_scores(path, keep_zero_scores=False).keys())) # remove non-features & cofactors for i in range(len(paths)): metlin[i] = (metlin[i] & features) - cofactors hmdb[i] = (hmdb[i] & features) - cofactors metfrag[i] = (metfrag[i] & features) - cofactors all = [] for db in [metlin, hmdb, metfrag]: all.append(reduce(lambda x, y: x | y, db)) figure, axes = plt.subplots(2, 3) venn3(all, ('metlin', 'hmdb', 'metfrag'), ax=axes[0][0]) venn3((all[0] | all[1], all[2], features), ('observed', 'metfrag', 'features'), ax=axes[0][1]) venn3(metlin, ('hilneg', 'hilpos', 'synneg'), ax=axes[1][0]) venn3(hmdb, ('hilneg', 'hilpos', 'synneg'), ax=axes[1][1]) venn3(metfrag, ('hilneg', 'hilpos', 'synneg'), ax=axes[1][2]) axes[0][0].set_title('All') axes[1][0].set_title('Metlin') axes[1][1].set_title('HMDB') axes[1][2].set_title('MetFrag') plt.show()
def plot_hil_neg(): observation_file = hil_neg metlin = read.metlin(observation_file) hmdb = read.hmdb(observation_file) metfrag = set(read.metfrag_with_scores(observation_file).keys()) metfrag_no_zeros = set( read.metfrag_with_scores(observation_file, keep_zero_scores=False).keys()) feature_compounds = set([f for f in features if str.startswith(f, 'C')]) """ PLOTTING """ figure, axes = plt.subplots(2, 3) venn3([features, metlin, hmdb], ('features', 'metlin', 'hmdb'), ax=axes[0][0]) venn3([features, cofactors, metlin | hmdb], ('features', 'cofactors', 'observed'), ax=axes[0][1]) venn3([cofactors, metlin, hmdb], ('cofactors', 'metlin', 'hmdb'), ax=axes[0][2]) venn3([metfrag_no_zeros, cofactors, features], ('metfrag_no_zeros', 'cofactors', 'features'), ax=axes[1][0]) venn3([ metfrag - cofactors, (metlin | hmdb) - cofactors, features - cofactors ], ('metfrag_with_zeros', 'observed', 'features'), ax=axes[1][1]) venn3([ metfrag_no_zeros - cofactors, (metlin | hmdb) - cofactors, features - cofactors ], ('metfrag', 'observed', 'features'), ax=axes[1][2]) axes[0][0].set_title('hard_evidence') axes[0][1].set_title('hard evidence cofactor analysis vs features') axes[0][2].set_title('hard evidence cofactor analysis') axes[1][0].set_title('metfrag cofactor analysis') axes[1][1].set_title('metfrag with zero scores (exclude cofactors)') axes[1][2].set_title('metfrag without zero scores (exclude cofactors)') plt.show()
def summarize(pathway): path_dict = read.get_model(data_path + 'model2.csv') pathways = path_dict.keys() features = read.get_metabolites(path_dict) cofactors = read.get_cofactors(data_path + 'cofactors') paths = (hil_neg, hil_pos, syn_neg) metlin = [] hmdb = [] metfrag = [] for path in paths: metlin.append(read.metlin(path)) hmdb.append(read.hmdb(path)) metfrag.append( set(read.metfrag_with_scores(path, keep_zero_scores=False).keys())) # remove non-features & cofactors for i in range(len(paths)): metlin[i] = (metlin[i] & features) - cofactors hmdb[i] = (hmdb[i] & features) - cofactors metfrag[i] = (metfrag[i] & features) - cofactors all = [] for db in [metlin, hmdb, metfrag]: all.append(reduce(lambda x, y: x | y, db)) def stats(pathway, metlin, hmdb, metfrag=set()): cge = path_dict[pathway] observed = ((metlin | hmdb | metfrag) & features) - cofactors reverse_path_dict = read.reverse_dict(path_dict) for compound in observed & cge: print compound, len(reverse_path_dict[compound]) compounds = path_dict[pathway] print "Prob(C05381)", read.metfrag_with_scores(list(paths))['C05381'] print pathway, len(compounds - cofactors) print "Unobserved:" stats(pathway, compounds - cofactors, set()) print "Full Stats:" stats(pathway, all[0], all[1], all[2]) print "Hard Stats:" stats(pathway, all[0], all[1], set())
def get_metabolite_sets(): """ Gets the sets of observed metabolites """ path_dict = read.get_model(data_path + 'model2.csv') pathways = path_dict.keys() features = read.get_metabolites(path_dict) cofactors = read.get_cofactors(data_path + 'cofactors') paths = (hil_neg, hil_pos, syn_neg) metlin = [] hmdb = [] metfrag = [] for path in paths: metlin.append(read.metlin(path)) hmdb.append(read.hmdb(path)) metfrag.append( set(read.metfrag_with_scores(path, keep_zero_scores=False).keys())) # remove non-features & cofactors for i in range(len(paths)): metlin[i] = (metlin[i] & features) - cofactors hmdb[i] = (hmdb[i] & features) - cofactors metfrag[i] = (metfrag[i] & features) - cofactors all = [] for db in [metlin, hmdb, metfrag]: all.append(reduce(lambda x, y: x | y, db)) return all
data_path = '../data/' observation_file = data_path + 'HilNeg 0324 -- Data.csv' cofactors = read.get_cofactors(data_path + 'cofactors') path_dict = read.get_model(data_path + 'model2.csv', cofactors=cofactors) pathways = path_dict.keys() features = read.get_metabolites(path_dict) evidence = read.metlin(observation_file) evidence |= read.hmdb(observation_file) evidence -= cofactors features -= cofactors evidence &= features reverse_path_dict = read.reverse_dict(path_dict) metfrag = read.metfrag(observation_file) metfrag_evidence = read.dict_of_set( read.metfrag_with_scores(observation_file, keep_zero_scores=False), metfrag & features - cofactors - evidence) evidence = {e: 1 for e in evidence} rate_prior = 0.5 ap = {p: Gamma('p_' + p, rate_prior, 1) for p in pathways} bmp = { p: { feat: Gamma('b_{' + p + ',' + feat + '}', ap[p], 1) for feat in path_dict[p] } for p in pathways } y_bmp = {} virtual = {}