def main_test(): features, labels = l.features_labels(df, sig, dict_params['train_cell'], all_cells=False, dmso=True) test_features, test_labels = l.features_labels(df, sig, dict_params['test_cell'], all_cells=False, dmso=True) stats, cm = le.test_model(features, labels, test_features, test_labels, dict_params)
def test_best_estimators(df_max, test_cells): test_experiments = [] list_dict_params = list(df_max.T.to_dict()) for dict_params in df_max.T.to_dict().values(): dict_params['holdout_off'] = True for test_cell in test_cells: dict_params['test_cell'] = test_cell features, labels = l.features_labels(df, sig, dict_params['train_cell'], all_cells=False, dmso=True) test_features, test_labels = l.features_labels( df, sig, dict_params['test_cell'], all_cells=False, dmso=True) if len(set(labels)) >= len(set(test_labels)): ts, cm = le.test_model(features, labels, test_features, test_labels, dict_params) test_experiments.append({**dict_params, **ts}) return test_experiments
def main_cv(): cv_experiments = [] if dict_params['train_cell'] == 'artificial': features, labels = make_classification(n_samples=1000, n_classes=111, n_features=978, n_informative=100) else: features, labels = l.features_labels(df, sig, dict_params['train_cell'], all_cells=False, dmso=True) exp = le.crossval(features, labels, dict_params, n_folds=10) cv_experiments.append({**dict_params, **exp})
def experiments_cv(dict_experiments, randomized=0): cv_experiments = [] ctrl = True list_dict_params = le.grid_search(dict_experiments) if randomized: list_dict_params = random.sample(list_dict_params, randomized) for dict_params in list_dict_params: print(pd.DataFrame(dict_params, index=[0])) features, labels = l.features_labels( df, sig, dict_params['train_cell'], all_cells=dict_params['all_cells'], dmso=True) exp = le.crossval(features, labels, dict_params, n_folds=10) with open('libsvm_experiments.csv', 'a') as f: results = {**dict_params, **exp} if ctrl is True: pd.DataFrame(results, index=[0]).to_csv(f, header=True) ctrl = False pd.DataFrame(results, index=[0]).to_csv(f, header=False) cv_experiments.append({**dict_params, **exp}) return pd.DataFrame(cv_experiments)
import l1k as l import seaborn as sns import matplotlib.pyplot as plt import numpy as np from collections import Counter from sklearn.decomposition import PCA from sklearn.manifold import TSNE from sklearn.preprocessing import LabelEncoder df, sig, genes = l.load_data() #%% features, labels = l.features_labels(df, sig, cell='A375', all_cells=False, dmso=True) le = LabelEncoder() labels_b = le.fit_transform(labels) #%% #labels = l.cluster_kmeans(features,100) pca = PCA(n_components=2) features = pca.fit_transform(features) #%% model, preds, cm, met = l.run_model('liblinear', features, labels, n_splits=5, class_weight=None) #%%