def plot_all():
    paths = (hil_neg, hil_pos, syn_neg)
    metlin = []
    hmdb = []
    metfrag = []
    for path in paths:
        metlin.append(read.metlin(path))
        hmdb.append(read.hmdb(path))
        metfrag.append(
            set(read.metfrag_with_scores(path, keep_zero_scores=False).keys()))
    # remove non-features & cofactors
    for i in range(len(paths)):
        metlin[i] = (metlin[i] & features) - cofactors
        hmdb[i] = (hmdb[i] & features) - cofactors
        metfrag[i] = (metfrag[i] & features) - cofactors

    all = []
    for db in [metlin, hmdb, metfrag]:
        all.append(reduce(lambda x, y: x | y, db))
    figure, axes = plt.subplots(2, 3)
    venn3(all, ('metlin', 'hmdb', 'metfrag'), ax=axes[0][0])
    venn3((all[0] | all[1], all[2], features),
          ('observed', 'metfrag', 'features'),
          ax=axes[0][1])

    venn3(metlin, ('hilneg', 'hilpos', 'synneg'), ax=axes[1][0])
    venn3(hmdb, ('hilneg', 'hilpos', 'synneg'), ax=axes[1][1])
    venn3(metfrag, ('hilneg', 'hilpos', 'synneg'), ax=axes[1][2])
    axes[0][0].set_title('All')
    axes[1][0].set_title('Metlin')
    axes[1][1].set_title('HMDB')
    axes[1][2].set_title('MetFrag')

    plt.show()
def plot_hil_neg():
    observation_file = hil_neg
    metlin = read.metlin(observation_file)
    hmdb = read.hmdb(observation_file)
    metfrag = set(read.metfrag_with_scores(observation_file).keys())
    metfrag_no_zeros = set(
        read.metfrag_with_scores(observation_file,
                                 keep_zero_scores=False).keys())
    feature_compounds = set([f for f in features if str.startswith(f, 'C')])
    """
    PLOTTING
    """
    figure, axes = plt.subplots(2, 3)
    venn3([features, metlin, hmdb], ('features', 'metlin', 'hmdb'),
          ax=axes[0][0])
    venn3([features, cofactors, metlin | hmdb],
          ('features', 'cofactors', 'observed'),
          ax=axes[0][1])
    venn3([cofactors, metlin, hmdb], ('cofactors', 'metlin', 'hmdb'),
          ax=axes[0][2])
    venn3([metfrag_no_zeros, cofactors, features],
          ('metfrag_no_zeros', 'cofactors', 'features'),
          ax=axes[1][0])
    venn3([
        metfrag - cofactors, (metlin | hmdb) - cofactors, features - cofactors
    ], ('metfrag_with_zeros', 'observed', 'features'),
          ax=axes[1][1])
    venn3([
        metfrag_no_zeros - cofactors,
        (metlin | hmdb) - cofactors, features - cofactors
    ], ('metfrag', 'observed', 'features'),
          ax=axes[1][2])

    axes[0][0].set_title('hard_evidence')
    axes[0][1].set_title('hard evidence cofactor analysis vs features')
    axes[0][2].set_title('hard evidence cofactor analysis')
    axes[1][0].set_title('metfrag cofactor analysis')
    axes[1][1].set_title('metfrag with zero scores (exclude cofactors)')
    axes[1][2].set_title('metfrag without zero scores (exclude cofactors)')

    plt.show()
Esempio n. 3
0
def summarize(pathway):
    path_dict = read.get_model(data_path + 'model2.csv')
    pathways = path_dict.keys()
    features = read.get_metabolites(path_dict)
    cofactors = read.get_cofactors(data_path + 'cofactors')
    paths = (hil_neg, hil_pos, syn_neg)
    metlin = []
    hmdb = []
    metfrag = []
    for path in paths:
        metlin.append(read.metlin(path))
        hmdb.append(read.hmdb(path))
        metfrag.append(
            set(read.metfrag_with_scores(path, keep_zero_scores=False).keys()))
    # remove non-features & cofactors
    for i in range(len(paths)):
        metlin[i] = (metlin[i] & features) - cofactors
        hmdb[i] = (hmdb[i] & features) - cofactors
        metfrag[i] = (metfrag[i] & features) - cofactors
    all = []
    for db in [metlin, hmdb, metfrag]:
        all.append(reduce(lambda x, y: x | y, db))

    def stats(pathway, metlin, hmdb, metfrag=set()):
        cge = path_dict[pathway]
        observed = ((metlin | hmdb | metfrag) & features) - cofactors
        reverse_path_dict = read.reverse_dict(path_dict)
        for compound in observed & cge:
            print compound, len(reverse_path_dict[compound])

    compounds = path_dict[pathway]
    print "Prob(C05381)", read.metfrag_with_scores(list(paths))['C05381']
    print pathway, len(compounds - cofactors)
    print "Unobserved:"
    stats(pathway, compounds - cofactors, set())
    print "Full Stats:"
    stats(pathway, all[0], all[1], all[2])
    print "Hard Stats:"
    stats(pathway, all[0], all[1], set())
def get_metabolite_sets():
    """ Gets the sets of observed metabolites """
    path_dict = read.get_model(data_path + 'model2.csv')
    pathways = path_dict.keys()
    features = read.get_metabolites(path_dict)
    cofactors = read.get_cofactors(data_path + 'cofactors')
    paths = (hil_neg, hil_pos, syn_neg)
    metlin = []
    hmdb = []
    metfrag = []
    for path in paths:
        metlin.append(read.metlin(path))
        hmdb.append(read.hmdb(path))
        metfrag.append(
            set(read.metfrag_with_scores(path, keep_zero_scores=False).keys()))
    # remove non-features & cofactors
    for i in range(len(paths)):
        metlin[i] = (metlin[i] & features) - cofactors
        hmdb[i] = (hmdb[i] & features) - cofactors
        metfrag[i] = (metfrag[i] & features) - cofactors
    all = []
    for db in [metlin, hmdb, metfrag]:
        all.append(reduce(lambda x, y: x | y, db))
    return all
data_path = '../data/'
observation_file = data_path + 'HilNeg 0324 -- Data.csv'
cofactors = read.get_cofactors(data_path + 'cofactors')
path_dict = read.get_model(data_path + 'model2.csv', cofactors=cofactors)
pathways = path_dict.keys()
features = read.get_metabolites(path_dict)
evidence = read.metlin(observation_file)
evidence |= read.hmdb(observation_file)
evidence -= cofactors
features -= cofactors
evidence &= features
reverse_path_dict = read.reverse_dict(path_dict)
metfrag = read.metfrag(observation_file)
metfrag_evidence = read.dict_of_set(
    read.metfrag_with_scores(observation_file, keep_zero_scores=False),
    metfrag & features - cofactors - evidence)
evidence = {e: 1 for e in evidence}

rate_prior = 0.5

ap = {p: Gamma('p_' + p, rate_prior, 1) for p in pathways}
bmp = {
    p: {
        feat: Gamma('b_{' + p + ',' + feat + '}', ap[p], 1)
        for feat in path_dict[p]
    }
    for p in pathways
}
y_bmp = {}
virtual = {}