예제 #1
0
def main_test():
    features, labels = l.features_labels(df,
                                         sig,
                                         dict_params['train_cell'],
                                         all_cells=False,
                                         dmso=True)
    test_features, test_labels = l.features_labels(df,
                                                   sig,
                                                   dict_params['test_cell'],
                                                   all_cells=False,
                                                   dmso=True)
    stats, cm = le.test_model(features, labels, test_features, test_labels,
                              dict_params)
예제 #2
0
def test_best_estimators(df_max, test_cells):
    test_experiments = []
    list_dict_params = list(df_max.T.to_dict())
    for dict_params in df_max.T.to_dict().values():
        dict_params['holdout_off'] = True
        for test_cell in test_cells:
            dict_params['test_cell'] = test_cell
            features, labels = l.features_labels(df,
                                                 sig,
                                                 dict_params['train_cell'],
                                                 all_cells=False,
                                                 dmso=True)
            test_features, test_labels = l.features_labels(
                df, sig, dict_params['test_cell'], all_cells=False, dmso=True)
            if len(set(labels)) >= len(set(test_labels)):
                ts, cm = le.test_model(features, labels, test_features,
                                       test_labels, dict_params)
                test_experiments.append({**dict_params, **ts})
    return test_experiments
예제 #3
0
def main_cv():
    cv_experiments = []
    if dict_params['train_cell'] == 'artificial':
        features, labels = make_classification(n_samples=1000,
                                               n_classes=111,
                                               n_features=978,
                                               n_informative=100)
    else:
        features, labels = l.features_labels(df,
                                             sig,
                                             dict_params['train_cell'],
                                             all_cells=False,
                                             dmso=True)
    exp = le.crossval(features, labels, dict_params, n_folds=10)
    cv_experiments.append({**dict_params, **exp})
예제 #4
0
def experiments_cv(dict_experiments, randomized=0):
    cv_experiments = []
    ctrl = True
    list_dict_params = le.grid_search(dict_experiments)
    if randomized:
        list_dict_params = random.sample(list_dict_params, randomized)
    for dict_params in list_dict_params:
        print(pd.DataFrame(dict_params, index=[0]))
        features, labels = l.features_labels(
            df,
            sig,
            dict_params['train_cell'],
            all_cells=dict_params['all_cells'],
            dmso=True)
        exp = le.crossval(features, labels, dict_params, n_folds=10)
        with open('libsvm_experiments.csv', 'a') as f:
            results = {**dict_params, **exp}
            if ctrl is True:
                pd.DataFrame(results, index=[0]).to_csv(f, header=True)
                ctrl = False
            pd.DataFrame(results, index=[0]).to_csv(f, header=False)
        cv_experiments.append({**dict_params, **exp})
    return pd.DataFrame(cv_experiments)
예제 #5
0
import l1k as l
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder

df, sig, genes = l.load_data()

#%%
features, labels = l.features_labels(df,
                                     sig,
                                     cell='A375',
                                     all_cells=False,
                                     dmso=True)
le = LabelEncoder()
labels_b = le.fit_transform(labels)

#%%
#labels = l.cluster_kmeans(features,100)
pca = PCA(n_components=2)
features = pca.fit_transform(features)
#%%
model, preds, cm, met = l.run_model('liblinear',
                                    features,
                                    labels,
                                    n_splits=5,
                                    class_weight=None)
#%%