Beispiel #1
0
def hmdb_disease_analysis():
    naming = NamingService('recon')

    y, X = list(zip(*DataReader().read_hmdb_diseases().items()))

    dyn_pre = DynamicPreprocessing(['fva'])

    X_t = dyn_pre.fit_transform(X, y)
    DataWriter('hmdb_disease_analysis').write_json(dict(zip(y, X_t)))
Beispiel #2
0
def hmdb_disease_analysis_pathway_level():
    X, y = DataReader().read_solution('hmdb_disease_analysis')

    with open('../models/api_model.p', 'rb') as f:
        reaction_scaler = pickle.load(f)

    dyn_pre = DynamicPreprocessing(
        ['pathway-scoring', 'transport-elimination'])

    X_t = reaction_scaler._model.named_steps['flux-diff'].transform(X)
    X_t = dyn_pre.fit_transform(X_t, y)
    DataWriter('hmdb_disease_analysis_pathway_level').write_json(
        dict(zip(y, X_t)))
Beispiel #3
0
def pathifier(disease_name):
    model = DataReader().read_network_model()
    X, y = DataReader().read_data(disease_name)
    pre = DynamicPreprocessing(['metabolic-standard'])

    X = pre.fit_transform(X, y)
    import pdb
    pdb.set_trace()

    df = pd.DataFrame(X)
    metabolite_fold_changes = robj.r.matrix(robj.FloatVector(
        df.as_matrix().T.ravel().tolist()),
                                            nrow=df.shape[1])
    all_metabolite_ids = robj.StrVector(list(df))

    subsystem_metabolite = defaultdict(set)
    for r in model.reactions:
        if r.subsystem and not (r.subsystem.startswith('Transport')
                                or r.subsystem.startswith('Exchange')):
            subsystem_metabolite[r.subsystem] \
                .update(m.id for m in r.metabolites if m.id in df)

    pathway_names, pathway_metabolites = zip(
        *filter(lambda x: x[1], subsystem_metabolite.items()))

    pathway_metabolites = robj.r['list'](
        *map(lambda x: robj.StrVector(list(x)), pathway_metabolites))

    pathway_names = robj.StrVector(list(pathway_names))
    is_healthy = robj.BoolVector(list(map(lambda x: x == 'h', y)))

    pathifier = importr("pathifier")

    result = pathifier.quantify_pathways_deregulation(metabolite_fold_changes,
                                                      all_metabolite_ids,
                                                      pathway_metabolites,
                                                      pathway_names,
                                                      is_healthy,
                                                      attempts=100,
                                                      min_exp=0,
                                                      min_std=0)

    regScores = dict()
    for pathway, scores in dict(result.items())['scores'].items():
        regScores[pathway] = list(scores[:])

    df = pd.DataFrame(regScores)
    df.insert(0, 'stage', y)
    df.to_csv('../dataset/disease/%s_regulization.csv' % disease_name,
              index=False)
Beispiel #4
0
def lasting_anaylsis():
    sample = json.load(open('../dataset/lasting.json'))

    x = DynamicPreprocessing(['fva']).fit_transform(sample, ['bc'])

    import pdb
    pdb.set_trace()
Beispiel #5
0
def save_analysis(analysis_id, concentration_changes):

    with open('../models/api_model.p', 'rb') as f:
        reaction_scaler = pickle.load(f)

    pathway_scaler = DynamicPreprocessing(
        ['pathway-scoring', 'transport-elimination'])

    results_reaction = reaction_scaler.transform(concentration_changes)
    results_pathway = pathway_scaler.transform(results_reaction)

    analysis = Analysis.query.get(analysis_id)
    analysis.results_reaction = analysis.clean_name_tag(results_reaction)
    analysis.results_pathway = analysis.clean_name_tag(results_pathway)
    analysis.status = True
    analysis.end_time = datetime.datetime.now()
    db.session.commit()
Beispiel #6
0
def healties_model():
    X, y = DataReader().read_healthy('BC')

    pre_model = DynamicPreprocessing(['naming', 'basic-fold-change-scaler'])
    X = pre_model.fit_transform(list(X), y)

    model = DynamicPreprocessing(['fva', 'flux-diff'])
    model.fit(X, y)

    with open('../outputs/api_model.p', 'wb') as f:
        pickle.dump(model, f)
Beispiel #7
0
def eliminate_best_k():
    (X, y) = DataReader().read_data('BC')

    for i in range(1, len(X[0].keys()) + 1, 10):

        vect = DictVectorizer(sparse=False)
        selector = SelectNotKBest(k=i)

        pipe = Pipeline([
            # pipe for compare model with eliminating some features
            ('metabolic',
             DynamicPreprocessing(['naming', 'metabolic-standard'])),
            ('vect', vect),
            ('selector', selector),
            ('inv_vect', InverseDictVectorizer(vect, selector)),
            ('fva', DynamicPreprocessing(['fva']))
        ])

        X_result = pipe.fit_transform(X, y)

        DataWriter('bc_disease_analysis#k=%s' % i) \
            .write_json_dataset(X_result, y)
Beispiel #8
0
def elimination_tabular():
    (X, y) = DataReader().read_data('BC')

    datasets = {'metabolite': DataReader().read_data('BC')}
    scores = list()

    for i in range(1, len(X[0].keys()) + 1, 10):

        vect = DictVectorizer(sparse=False)
        selector = SelectNotKBest(k=i)

        clfs = dict()

        clfs['metabolite'] = Pipeline([
            # pipe for compare model with eliminating some features
            ('metabolic',
             DynamicPreprocessing(['naming', 'metabolic-standard'])),
            ('vect', vect),
            ('selector', selector),
            ('pca', PCA()),
            ('clf', LogisticRegression(C=0.01, random_state=43))
        ])

        try:
            path = '../dataset/solutions/bc_disease_analysis#k=%d.json' % i
            datasets['reaction'] = list(
                zip(*[json.loads(i) for i in open(path)][0]))
        except:
            print(pd.DataFrame(scores))
            return

        clfs['reaction'] = FVADiseaseClassifier()

        kf = StratifiedKFold(n_splits=10, random_state=43)

        score = {
            name: np.mean(
                cross_val_score(clf,
                                datasets[name][0],
                                datasets[name][1],
                                cv=kf,
                                n_jobs=-1,
                                scoring='f1_micro'))
            for name, clf in clfs.items()
        }
        score['iteration'] = i
        scores.append(score)

    print(pd.DataFrame(scores))
Beispiel #9
0
def fva_range_with_basic_analysis_save():
    X, y = DataReader().read_data('BC')

    # preproc = DynamicPreprocessing(['naming', 'basic-fold-change-scaler'])
    # X_p = preproc.fit_transform(X, y)
    # import pprint
    # import pdb
    # for i in X_p:
    #     pprint.pprint(i)
    #     pdb.set_trace()

    for x in X:
        for k, v in x.items():
            x[k] = round(v, 3)

    preproc = DynamicPreprocessing(
        ['naming', 'basic-fold-change-scaler', 'fva']).fit(X, y)

    print('model trained...')

    DataWriter('fva_solution_with_basic_fold_change') \
        .write_json_stream(preproc.transform, X)