def hmdb_disease_analysis(): naming = NamingService('recon') y, X = list(zip(*DataReader().read_hmdb_diseases().items())) dyn_pre = DynamicPreprocessing(['fva']) X_t = dyn_pre.fit_transform(X, y) DataWriter('hmdb_disease_analysis').write_json(dict(zip(y, X_t)))
def hmdb_disease_analysis_pathway_level(): X, y = DataReader().read_solution('hmdb_disease_analysis') with open('../models/api_model.p', 'rb') as f: reaction_scaler = pickle.load(f) dyn_pre = DynamicPreprocessing( ['pathway-scoring', 'transport-elimination']) X_t = reaction_scaler._model.named_steps['flux-diff'].transform(X) X_t = dyn_pre.fit_transform(X_t, y) DataWriter('hmdb_disease_analysis_pathway_level').write_json( dict(zip(y, X_t)))
def pathifier(disease_name): model = DataReader().read_network_model() X, y = DataReader().read_data(disease_name) pre = DynamicPreprocessing(['metabolic-standard']) X = pre.fit_transform(X, y) import pdb pdb.set_trace() df = pd.DataFrame(X) metabolite_fold_changes = robj.r.matrix(robj.FloatVector( df.as_matrix().T.ravel().tolist()), nrow=df.shape[1]) all_metabolite_ids = robj.StrVector(list(df)) subsystem_metabolite = defaultdict(set) for r in model.reactions: if r.subsystem and not (r.subsystem.startswith('Transport') or r.subsystem.startswith('Exchange')): subsystem_metabolite[r.subsystem] \ .update(m.id for m in r.metabolites if m.id in df) pathway_names, pathway_metabolites = zip( *filter(lambda x: x[1], subsystem_metabolite.items())) pathway_metabolites = robj.r['list']( *map(lambda x: robj.StrVector(list(x)), pathway_metabolites)) pathway_names = robj.StrVector(list(pathway_names)) is_healthy = robj.BoolVector(list(map(lambda x: x == 'h', y))) pathifier = importr("pathifier") result = pathifier.quantify_pathways_deregulation(metabolite_fold_changes, all_metabolite_ids, pathway_metabolites, pathway_names, is_healthy, attempts=100, min_exp=0, min_std=0) regScores = dict() for pathway, scores in dict(result.items())['scores'].items(): regScores[pathway] = list(scores[:]) df = pd.DataFrame(regScores) df.insert(0, 'stage', y) df.to_csv('../dataset/disease/%s_regulization.csv' % disease_name, index=False)
def lasting_anaylsis(): sample = json.load(open('../dataset/lasting.json')) x = DynamicPreprocessing(['fva']).fit_transform(sample, ['bc']) import pdb pdb.set_trace()
def save_analysis(analysis_id, concentration_changes): with open('../models/api_model.p', 'rb') as f: reaction_scaler = pickle.load(f) pathway_scaler = DynamicPreprocessing( ['pathway-scoring', 'transport-elimination']) results_reaction = reaction_scaler.transform(concentration_changes) results_pathway = pathway_scaler.transform(results_reaction) analysis = Analysis.query.get(analysis_id) analysis.results_reaction = analysis.clean_name_tag(results_reaction) analysis.results_pathway = analysis.clean_name_tag(results_pathway) analysis.status = True analysis.end_time = datetime.datetime.now() db.session.commit()
def healties_model(): X, y = DataReader().read_healthy('BC') pre_model = DynamicPreprocessing(['naming', 'basic-fold-change-scaler']) X = pre_model.fit_transform(list(X), y) model = DynamicPreprocessing(['fva', 'flux-diff']) model.fit(X, y) with open('../outputs/api_model.p', 'wb') as f: pickle.dump(model, f)
def eliminate_best_k(): (X, y) = DataReader().read_data('BC') for i in range(1, len(X[0].keys()) + 1, 10): vect = DictVectorizer(sparse=False) selector = SelectNotKBest(k=i) pipe = Pipeline([ # pipe for compare model with eliminating some features ('metabolic', DynamicPreprocessing(['naming', 'metabolic-standard'])), ('vect', vect), ('selector', selector), ('inv_vect', InverseDictVectorizer(vect, selector)), ('fva', DynamicPreprocessing(['fva'])) ]) X_result = pipe.fit_transform(X, y) DataWriter('bc_disease_analysis#k=%s' % i) \ .write_json_dataset(X_result, y)
def elimination_tabular(): (X, y) = DataReader().read_data('BC') datasets = {'metabolite': DataReader().read_data('BC')} scores = list() for i in range(1, len(X[0].keys()) + 1, 10): vect = DictVectorizer(sparse=False) selector = SelectNotKBest(k=i) clfs = dict() clfs['metabolite'] = Pipeline([ # pipe for compare model with eliminating some features ('metabolic', DynamicPreprocessing(['naming', 'metabolic-standard'])), ('vect', vect), ('selector', selector), ('pca', PCA()), ('clf', LogisticRegression(C=0.01, random_state=43)) ]) try: path = '../dataset/solutions/bc_disease_analysis#k=%d.json' % i datasets['reaction'] = list( zip(*[json.loads(i) for i in open(path)][0])) except: print(pd.DataFrame(scores)) return clfs['reaction'] = FVADiseaseClassifier() kf = StratifiedKFold(n_splits=10, random_state=43) score = { name: np.mean( cross_val_score(clf, datasets[name][0], datasets[name][1], cv=kf, n_jobs=-1, scoring='f1_micro')) for name, clf in clfs.items() } score['iteration'] = i scores.append(score) print(pd.DataFrame(scores))
def fva_range_with_basic_analysis_save(): X, y = DataReader().read_data('BC') # preproc = DynamicPreprocessing(['naming', 'basic-fold-change-scaler']) # X_p = preproc.fit_transform(X, y) # import pprint # import pdb # for i in X_p: # pprint.pprint(i) # pdb.set_trace() for x in X: for k, v in x.items(): x[k] = round(v, 3) preproc = DynamicPreprocessing( ['naming', 'basic-fold-change-scaler', 'fva']).fit(X, y) print('model trained...') DataWriter('fva_solution_with_basic_fold_change') \ .write_json_stream(preproc.transform, X)