def summarize(pathway):
    path_dict = read.get_model(data_path + 'model2.csv')
    pathways = path_dict.keys()
    features = read.get_metabolites(path_dict)
    cofactors = read.get_cofactors(data_path + 'cofactors')
    paths = (hil_neg, hil_pos, syn_neg)
    metlin = []
    hmdb = []
    metfrag = []
    for path in paths:
        metlin.append(read.metlin(path))
        hmdb.append(read.hmdb(path))
        metfrag.append(
            set(read.metfrag_with_scores(path, keep_zero_scores=False).keys()))
    # remove non-features & cofactors
    for i in range(len(paths)):
        metlin[i] = (metlin[i] & features) - cofactors
        hmdb[i] = (hmdb[i] & features) - cofactors
        metfrag[i] = (metfrag[i] & features) - cofactors
    all = []
    for db in [metlin, hmdb, metfrag]:
        all.append(reduce(lambda x, y: x | y, db))

    def stats(pathway, metlin, hmdb, metfrag=set()):
        cge = path_dict[pathway]
        observed = ((metlin | hmdb | metfrag) & features) - cofactors
        reverse_path_dict = read.reverse_dict(path_dict)
        for compound in observed & cge:
            print(compound, len(reverse_path_dict[compound]))

    compounds = path_dict[pathway]
    #print ("Prob(C05381)", read.metfrag_with_scores(list(paths))['C05381'])
    print(pathway, len(compounds - cofactors))
    print("Unobserved:")
    stats(pathway, compounds - cofactors, set())
    print("Full Stats:")
    stats(pathway, all[0], all[1], all[2])
    print("Hard Stats:")
    stats(pathway, all[0], all[1], set())
    return all
def get_metabolite_sets():
    """ Gets the sets of observed metabolites """
    path_dict = read.get_model(data_path + 'model2.csv')
    pathways = path_dict.keys()
    features = read.get_metabolites(path_dict)
    cofactors = read.get_cofactors(data_path + 'cofactors')
    paths = (hil_neg, hil_pos, syn_neg)
    metlin = []
    hmdb = []
    metfrag = []
    for path in paths:
        metlin.append(read.metlin(path))
        hmdb.append(read.hmdb(path))
        metfrag.append(
            set(read.metfrag_with_scores(path, keep_zero_scores=False).keys()))
    # remove non-features & cofactors
    for i in range(len(paths)):
        metlin[i] = (metlin[i] & features) - cofactors
        hmdb[i] = (hmdb[i] & features) - cofactors
        metfrag[i] = (metfrag[i] & features) - cofactors
    all = []
    for db in [metlin, hmdb, metfrag]:
        all.append(reduce(lambda x, y: x | y, db))
    return all
Esempio n. 3
0
def get_path_dict():
    data_path = '../data/'
    return read.get_model(data_path + 'model2.csv')
meta.py

Produces venn diagram plots of the metadata. This is useful in visualizing the
structure of the data.

"""

from matplotlib import pyplot as plt
from matplotlib_venn import venn3
import read

data_path = '../data/'
hil_neg = data_path + 'HilNeg 0324 -- Data.csv'
hil_pos = data_path + 'HilPos 0324 -- Data.csv'
syn_neg = data_path + 'SynNeg 0324 -- Data.csv'
path_dict = read.get_model(data_path + 'model2.csv')
pathways = path_dict.keys()
features = read.get_metabolites(path_dict)
cofactors = read.get_cofactors(data_path + 'cofactors')


def plot_hil_neg():
    observation_file = hil_neg
    metlin = read.metlin(observation_file)
    hmdb = read.hmdb(observation_file)
    metfrag = set(read.metfrag_with_scores(observation_file).keys())
    metfrag_no_zeros = set(
        read.metfrag_with_scores(observation_file,
                                 keep_zero_scores=False).keys())
    feature_compounds = set([f for f in features if str.startswith(f, 'C')])
    """
from pymc import Bernoulli, Gamma, Poisson, InverseGamma
import pymc
import numpy as np
import gen
import read
import math

ONE, ZERO = (0.99, 0.01)

data_path = '../data/'
observation_file = data_path + 'HilNeg 0324 -- Data.csv'
cofactors = read.get_cofactors(data_path + 'cofactors')
path_dict = read.get_model(data_path + 'model2.csv', cofactors=cofactors)
pathways = path_dict.keys()
features = read.get_metabolites(path_dict)
evidence = read.metlin(observation_file)
evidence |= read.hmdb(observation_file)
evidence -= cofactors
features -= cofactors
evidence &= features
reverse_path_dict = read.reverse_dict(path_dict)
metfrag = read.metfrag(observation_file)
metfrag_evidence = read.dict_of_set(
    read.metfrag_with_scores(observation_file, keep_zero_scores=False),
    metfrag & features - cofactors - evidence)
evidence = {e: 1 for e in evidence}

rate_prior = 0.5

ap = {p: Gamma('p_' + p, rate_prior, 1) for p in pathways}
bmp = {
Esempio n. 6
0
def summarize_compound(compound):
    path_dict = read.get_model(data_path + 'model2.csv')
    reverse_path_dict = read.reverse_dict(path_dict)
    print "Compound", compound, "has pathways:", reverse_path_dict[compound]
def summarize_compound(compound):
    path_dict = read.get_model(data_path + 'model2.csv')
    reverse_path_dict = read.reverse_dict(path_dict)
    print("Compound", compound, "has pathways:")
    for path in sorted(reverse_path_dict[compound]):
        print(path)