def predict(self, X, x_features, x_features_genecode, x_sample_names=None, outdir=None, modelnames=None, get_preds_dict=False, return_plotdf=False): self.predict_dict = {} my_model_list = self.model_names if modelnames is not None: my_model_list = modelnames ## Iterating over each model in the ensemble, for k_model in my_model_list: ## Set up each individual model (clas scopemodel) lmodel = cancerscope.scopemodel(self.downloaded_models_dict[k_model]) lmodel.fit() ## Map training features to the genecode in the input mapped_model_features = map_gene_names(lmodel.features, genecode_out = x_features_genecode, genecode_in = "SCOPE") feat_subset_x = map_train_test_features_x(X, training_features = mapped_model_features, test_features = x_features, fillzero=True, defaultvalue=0.0) self.predict_dict[k_model] = lmodel.predict(feat_subset_x, get_predictions_dict=True) ### GC to free up memory lmodel = None for i in range(0,3): gc.collect() if get_preds_dict is True: return self.predict_dict if return_plotdf is True: ens_df = cancerscope.scope_plots.get_plotting_df(self.predict_dict, x_sample_names = x_sample_names) else: ## Do something to process output for ensemble score ens_df = get_ensemble_score(self.predict_dict) if x_sample_names is not None: ens_df['sample_name'] = [x_sample_names[m] for m in ens_df['sample_ix'].tolist()] if outdir is not None: sys.stdout.write("\nOutdir provided, so writing prediction dataframe and plotting background data to files\n\tAlso generating per-sample plots at {0}".format(outdir)) plot_bg_df = cancerscope.scope_plots.get_plotting_df(self.predict_dict, x_sample_names = x_sample_names) cancerscope.scope_plots.plot_cases(ens_df, plot_bg_df, outdir=outdir, save_txt=True) return ens_df
def test_predict(self): """Test if prediction works""" MY_TEST_MODEL = "v1_rm500" x_test = np.genfromtxt("tests/data/ensg_input.txt", delimiter="\t") query_localdirs = cancerscope.get_models.findmodel( os.path.dirname(cancerscope.__file__), MY_TEST_MODEL) if query_localdirs is not None: model_in = query_localdirs[MY_TEST_MODEL] else: model_in = cancerscope.get_models.downloadmodel( model_label=MY_TEST_MODEL) """Compare results with what you get from 'getmodel()'""" modeldir_v1_rm500 = cancerscope.get_models.getmodel( model_label=MY_TEST_MODEL) self.assertEqual(modeldir_v1_rm500[MY_TEST_MODEL][MY_TEST_MODEL], model_in) """Test prediction""" lmodel = cancerscope.scopemodel(model_in) lmodel.fit() random_sample = np.nan_to_num(x_test[0:17688, 1].reshape(1, 17688)) pred_testX = lmodel.predict(random_sample)[0][0][0] self.assertEqual(pred_testX, "ESCA_TS") allpreds_testX = lmodel.predict(random_sample, get_all_predictions=True, get_numeric=False, get_predictions_dict=False)[0] allpredsNumeric_testX = lmodel.predict(random_sample, get_all_predictions=True, get_numeric=True, get_predictions_dict=False)[0] self.assertEqual(len(allpreds_testX), 66) self.assertEqual(len(allpredsNumeric_testX), 66) self.assertEqual(allpreds_testX[0], "BRCA_TS") self.assertTrue(isinstance(allpredsNumeric_testX[0], Number)) """Test if normalization works and is evaluated to correct floatpoint""" normalized_testX = lmodel.get_normalized_input(random_sample)[0] self.assertEqual(normalized_testX[0], 0.60640558591378269) """Test if Jacobian is evaluated correctly""" jacobian_test = lmodel.get_jacobian(random_sample) class0_highestjacobian = np.amax(jacobian_test[0, :]) self.assertEqual(jacobian_test.shape[0], 66) ## Num rows = classes self.assertEqual(jacobian_test.shape[1], 17688) ## Num columns = genes self.assertEqual(round(class0_highestjacobian, 17), 0.00012377805544766)
def test_downloadModel(self): """Test if models can be downloaded correctly""" model_in = "" query_localdirs = cancerscope.get_models.findmodel( os.path.dirname(cancerscope.__file__), "v1_rm500") if query_localdirs is not None: model_in = query_localdirs["v1_rm500"] else: model_in = cancerscope.get_models.downloadmodel( model_label="v1_rm500") self.assertTrue(os.path.isdir(model_in)) self.assertTrue( os.path.exists("".join([model_in, "/lasagne_bestparams.npz"]))) """Test if model can be setup correctly""" lmodel = cancerscope.scopemodel(model_in) lmodel.fit() self.assertEqual(len(lmodel.features), 17688)
def test_rm500dropout(self): """Test if all models can be downloaded correctly""" my_test_file = "/".join([ os.path.dirname(sys.modules["cancerscope"].__file__), "../tests/data/test_tcga.txt" ]) scope_ensemble_obj = cancerscope.scope() test_X = scope_ensemble_obj.load_data( my_test_file) # X, samples, features_test, in_genecode ## Get the model of interest model_name = "v1_rm500dropout" model_in = "" query_localdirs = cancerscope.get_models.findmodel( os.path.dirname(cancerscope.__file__), model_name) if query_localdirs is not None: model_in = query_localdirs[model_name] else: model_in = cancerscope.get_models.downloadmodel( model_label=model_name) self.assertTrue(os.path.isdir(model_in)) self.assertTrue( os.path.exists("".join([model_in, "/lasagne_bestparams.npz"]))) """Test if model can be setup correctly""" lmodel = cancerscope.scopemodel(model_in) lmodel.fit() self.assertEqual(len(lmodel.features), 17688) x_input = lmodel.prepare_input_featorders( X=test_X[0], x_features_genecode=test_X[3], x_features=test_X[2]) """Test if it predicts properly""" allpreds_names = lmodel.predict(x_input, get_all_predictions=True, get_numeric=False, get_predictions_dict=False) allpreds_values = lmodel.predict(x_input, get_all_predictions=True, get_numeric=True, get_predictions_dict=False) toppreds_names = lmodel.predict(x_input, get_all_predictions=False, get_numeric=False, get_predictions_dict=False) toppreds_values = lmodel.predict(x_input, get_all_predictions=False, get_numeric=True, get_predictions_dict=False) toppreds_df = lmodel.predict(x_input, get_all_predictions=True, get_numeric=False, get_predictions_dict=True) self.assertEqual(len(allpreds_names[0]), 66) self.assertEqual(len(allpreds_names[1]), 66) self.assertEqual(allpreds_values.shape[1], 66) self.assertEqual(round(allpreds_values[0][1], 12), round(0.001936952939, 12)) self.assertEqual(toppreds_names[0], "TFRI_GBM_NCL_TS") self.assertEqual(toppreds_names[1], "TFRI_GBM_NCL_TS") self.assertEqual(round(toppreds_values[0], 12), round(0.576824542222, 12)) self.assertEqual(round(toppreds_values[1], 12), round(0.516448831323, 12)) self.assertEqual(toppreds_df[0][0][0], toppreds_names[0]) self.assertEqual(round(float(toppreds_df[0][0][1]), 12), round(toppreds_values[0], 12)) self.assertEqual(toppreds_df[1][0][0], toppreds_names[1]) self.assertEqual(round(float(toppreds_df[1][0][1]), 12), round(toppreds_values[1], 12))
def test_rm500(self): """Test if all models can be downloaded correctly""" my_test_file = "/".join([ os.path.dirname(sys.modules["cancerscope"].__file__), "../tests/data/test_tcga.txt" ]) scope_ensemble_obj = cancerscope.scope() test_X = scope_ensemble_obj.load_data( my_test_file) # X, samples, features_test, in_genecode ## Get the model of interest model_name = "v1_rm500" model_in = "" query_localdirs = cancerscope.get_models.findmodel( os.path.dirname(cancerscope.__file__), model_name) if query_localdirs is not None: model_in = query_localdirs[model_name] else: model_in = cancerscope.get_models.downloadmodel( model_label=model_name) self.assertTrue(os.path.isdir(model_in)) self.assertTrue( os.path.exists("".join([model_in, "/lasagne_bestparams.npz"]))) """Test if model can be setup correctly""" lmodel = cancerscope.scopemodel(model_in) lmodel.fit() self.assertEqual(len(lmodel.features), 17688) x_input = lmodel.prepare_input_featorders( X=test_X[0], x_features_genecode=test_X[3], x_features=test_X[2]) """Test if it predicts properly""" allpreds_names = lmodel.predict(x_input, get_all_predictions=True, get_numeric=False, get_predictions_dict=False) allpreds_values = lmodel.predict(x_input, get_all_predictions=True, get_numeric=True, get_predictions_dict=False) toppreds_names = lmodel.predict(x_input, get_all_predictions=False, get_numeric=False, get_predictions_dict=False) toppreds_values = lmodel.predict(x_input, get_all_predictions=False, get_numeric=True, get_predictions_dict=False) toppreds_df = lmodel.predict(x_input, get_all_predictions=True, get_numeric=False, get_predictions_dict=True) self.assertEqual(len(allpreds_names[0]), 66) self.assertEqual(len(allpreds_names[1]), 66) self.assertEqual(allpreds_values.shape[1], 66) self.assertAlmostEqual(allpreds_values[0][1], 0.003065253372039) self.assertEqual(toppreds_names[0], "PAAD_TS") self.assertEqual(toppreds_names[1], "HNSC_TS") self.assertAlmostEqual(toppreds_values[0], 0.20889836023919614, 6, 0.000001) self.assertAlmostEqual(toppreds_values[1], 0.44416348623870444, 6, 0.000001) #self.assertEqual(round(toppreds_values[0],12), round(0.208874390780809,12)); self.assertEqual(round(toppreds_values[1],12), round(0.444162763077693,12)) self.assertEqual(toppreds_df[0][0][0], toppreds_names[0]) self.assertAlmostEqual(float(toppreds_df[0][0][1]), toppreds_values[0]) self.assertEqual(toppreds_df[1][0][0], toppreds_names[1]) self.assertAlmostEqual(float(toppreds_df[1][0][1]), toppreds_values[1])