예제 #1
0
	def predict(self, X, x_features, x_features_genecode, x_sample_names=None, outdir=None, modelnames=None, get_preds_dict=False, return_plotdf=False):
		self.predict_dict = {}
		my_model_list = self.model_names
		if modelnames is not None:
			my_model_list = modelnames
		## Iterating over each model in the ensemble,
		for k_model in my_model_list:
			## Set up each individual model (clas scopemodel)
			lmodel = cancerscope.scopemodel(self.downloaded_models_dict[k_model])
			lmodel.fit()
			## Map training features to the genecode in the input
			mapped_model_features = map_gene_names(lmodel.features, genecode_out = x_features_genecode, genecode_in = "SCOPE")
			feat_subset_x = map_train_test_features_x(X, training_features = mapped_model_features, test_features = x_features, fillzero=True, defaultvalue=0.0)
			self.predict_dict[k_model] = lmodel.predict(feat_subset_x, get_predictions_dict=True)
			### GC to free up memory
			lmodel = None
			for i in range(0,3):
				gc.collect()
		if get_preds_dict is True:
			return self.predict_dict
		if return_plotdf is True:
			ens_df = cancerscope.scope_plots.get_plotting_df(self.predict_dict, x_sample_names = x_sample_names)
		else:
			## Do something to process output for ensemble score  
			ens_df = get_ensemble_score(self.predict_dict)
			if x_sample_names is not None:
				ens_df['sample_name'] = [x_sample_names[m] for m in ens_df['sample_ix'].tolist()]
			if outdir is not None:
				sys.stdout.write("\nOutdir provided, so writing prediction dataframe and plotting background data to files\n\tAlso generating per-sample plots at {0}".format(outdir))
				plot_bg_df = cancerscope.scope_plots.get_plotting_df(self.predict_dict, x_sample_names = x_sample_names)
				cancerscope.scope_plots.plot_cases(ens_df, plot_bg_df, outdir=outdir, save_txt=True)
		return ens_df
예제 #2
0
    def test_predict(self):
        """Test if prediction works"""
        MY_TEST_MODEL = "v1_rm500"
        x_test = np.genfromtxt("tests/data/ensg_input.txt", delimiter="\t")
        query_localdirs = cancerscope.get_models.findmodel(
            os.path.dirname(cancerscope.__file__), MY_TEST_MODEL)
        if query_localdirs is not None:
            model_in = query_localdirs[MY_TEST_MODEL]
        else:
            model_in = cancerscope.get_models.downloadmodel(
                model_label=MY_TEST_MODEL)
        """Compare results with what you get from 'getmodel()'"""
        modeldir_v1_rm500 = cancerscope.get_models.getmodel(
            model_label=MY_TEST_MODEL)
        self.assertEqual(modeldir_v1_rm500[MY_TEST_MODEL][MY_TEST_MODEL],
                         model_in)
        """Test prediction"""
        lmodel = cancerscope.scopemodel(model_in)
        lmodel.fit()
        random_sample = np.nan_to_num(x_test[0:17688, 1].reshape(1, 17688))
        pred_testX = lmodel.predict(random_sample)[0][0][0]
        self.assertEqual(pred_testX, "ESCA_TS")

        allpreds_testX = lmodel.predict(random_sample,
                                        get_all_predictions=True,
                                        get_numeric=False,
                                        get_predictions_dict=False)[0]
        allpredsNumeric_testX = lmodel.predict(random_sample,
                                               get_all_predictions=True,
                                               get_numeric=True,
                                               get_predictions_dict=False)[0]

        self.assertEqual(len(allpreds_testX), 66)
        self.assertEqual(len(allpredsNumeric_testX), 66)
        self.assertEqual(allpreds_testX[0], "BRCA_TS")
        self.assertTrue(isinstance(allpredsNumeric_testX[0], Number))
        """Test if normalization works and is evaluated to correct floatpoint"""
        normalized_testX = lmodel.get_normalized_input(random_sample)[0]
        self.assertEqual(normalized_testX[0], 0.60640558591378269)
        """Test if Jacobian is evaluated correctly"""
        jacobian_test = lmodel.get_jacobian(random_sample)
        class0_highestjacobian = np.amax(jacobian_test[0, :])
        self.assertEqual(jacobian_test.shape[0], 66)  ## Num rows = classes
        self.assertEqual(jacobian_test.shape[1], 17688)  ## Num columns = genes
        self.assertEqual(round(class0_highestjacobian, 17),
                         0.00012377805544766)
예제 #3
0
    def test_downloadModel(self):
        """Test if models can be downloaded correctly"""
        model_in = ""
        query_localdirs = cancerscope.get_models.findmodel(
            os.path.dirname(cancerscope.__file__), "v1_rm500")
        if query_localdirs is not None:
            model_in = query_localdirs["v1_rm500"]
        else:
            model_in = cancerscope.get_models.downloadmodel(
                model_label="v1_rm500")

        self.assertTrue(os.path.isdir(model_in))
        self.assertTrue(
            os.path.exists("".join([model_in, "/lasagne_bestparams.npz"])))
        """Test if model can be setup correctly"""
        lmodel = cancerscope.scopemodel(model_in)
        lmodel.fit()

        self.assertEqual(len(lmodel.features), 17688)
예제 #4
0
 def test_rm500dropout(self):
     """Test if all models can be downloaded correctly"""
     my_test_file = "/".join([
         os.path.dirname(sys.modules["cancerscope"].__file__),
         "../tests/data/test_tcga.txt"
     ])
     scope_ensemble_obj = cancerscope.scope()
     test_X = scope_ensemble_obj.load_data(
         my_test_file)  # X, samples, features_test, in_genecode
     ## Get the model of interest
     model_name = "v1_rm500dropout"
     model_in = ""
     query_localdirs = cancerscope.get_models.findmodel(
         os.path.dirname(cancerscope.__file__), model_name)
     if query_localdirs is not None:
         model_in = query_localdirs[model_name]
     else:
         model_in = cancerscope.get_models.downloadmodel(
             model_label=model_name)
     self.assertTrue(os.path.isdir(model_in))
     self.assertTrue(
         os.path.exists("".join([model_in, "/lasagne_bestparams.npz"])))
     """Test if model can be setup correctly"""
     lmodel = cancerscope.scopemodel(model_in)
     lmodel.fit()
     self.assertEqual(len(lmodel.features), 17688)
     x_input = lmodel.prepare_input_featorders(
         X=test_X[0], x_features_genecode=test_X[3], x_features=test_X[2])
     """Test if it predicts properly"""
     allpreds_names = lmodel.predict(x_input,
                                     get_all_predictions=True,
                                     get_numeric=False,
                                     get_predictions_dict=False)
     allpreds_values = lmodel.predict(x_input,
                                      get_all_predictions=True,
                                      get_numeric=True,
                                      get_predictions_dict=False)
     toppreds_names = lmodel.predict(x_input,
                                     get_all_predictions=False,
                                     get_numeric=False,
                                     get_predictions_dict=False)
     toppreds_values = lmodel.predict(x_input,
                                      get_all_predictions=False,
                                      get_numeric=True,
                                      get_predictions_dict=False)
     toppreds_df = lmodel.predict(x_input,
                                  get_all_predictions=True,
                                  get_numeric=False,
                                  get_predictions_dict=True)
     self.assertEqual(len(allpreds_names[0]), 66)
     self.assertEqual(len(allpreds_names[1]), 66)
     self.assertEqual(allpreds_values.shape[1], 66)
     self.assertEqual(round(allpreds_values[0][1], 12),
                      round(0.001936952939, 12))
     self.assertEqual(toppreds_names[0], "TFRI_GBM_NCL_TS")
     self.assertEqual(toppreds_names[1], "TFRI_GBM_NCL_TS")
     self.assertEqual(round(toppreds_values[0], 12),
                      round(0.576824542222, 12))
     self.assertEqual(round(toppreds_values[1], 12),
                      round(0.516448831323, 12))
     self.assertEqual(toppreds_df[0][0][0], toppreds_names[0])
     self.assertEqual(round(float(toppreds_df[0][0][1]), 12),
                      round(toppreds_values[0], 12))
     self.assertEqual(toppreds_df[1][0][0], toppreds_names[1])
     self.assertEqual(round(float(toppreds_df[1][0][1]), 12),
                      round(toppreds_values[1], 12))
예제 #5
0
 def test_rm500(self):
     """Test if all models can be downloaded correctly"""
     my_test_file = "/".join([
         os.path.dirname(sys.modules["cancerscope"].__file__),
         "../tests/data/test_tcga.txt"
     ])
     scope_ensemble_obj = cancerscope.scope()
     test_X = scope_ensemble_obj.load_data(
         my_test_file)  # X, samples, features_test, in_genecode
     ## Get the model of interest
     model_name = "v1_rm500"
     model_in = ""
     query_localdirs = cancerscope.get_models.findmodel(
         os.path.dirname(cancerscope.__file__), model_name)
     if query_localdirs is not None:
         model_in = query_localdirs[model_name]
     else:
         model_in = cancerscope.get_models.downloadmodel(
             model_label=model_name)
     self.assertTrue(os.path.isdir(model_in))
     self.assertTrue(
         os.path.exists("".join([model_in, "/lasagne_bestparams.npz"])))
     """Test if model can be setup correctly"""
     lmodel = cancerscope.scopemodel(model_in)
     lmodel.fit()
     self.assertEqual(len(lmodel.features), 17688)
     x_input = lmodel.prepare_input_featorders(
         X=test_X[0], x_features_genecode=test_X[3], x_features=test_X[2])
     """Test if it predicts properly"""
     allpreds_names = lmodel.predict(x_input,
                                     get_all_predictions=True,
                                     get_numeric=False,
                                     get_predictions_dict=False)
     allpreds_values = lmodel.predict(x_input,
                                      get_all_predictions=True,
                                      get_numeric=True,
                                      get_predictions_dict=False)
     toppreds_names = lmodel.predict(x_input,
                                     get_all_predictions=False,
                                     get_numeric=False,
                                     get_predictions_dict=False)
     toppreds_values = lmodel.predict(x_input,
                                      get_all_predictions=False,
                                      get_numeric=True,
                                      get_predictions_dict=False)
     toppreds_df = lmodel.predict(x_input,
                                  get_all_predictions=True,
                                  get_numeric=False,
                                  get_predictions_dict=True)
     self.assertEqual(len(allpreds_names[0]), 66)
     self.assertEqual(len(allpreds_names[1]), 66)
     self.assertEqual(allpreds_values.shape[1], 66)
     self.assertAlmostEqual(allpreds_values[0][1], 0.003065253372039)
     self.assertEqual(toppreds_names[0], "PAAD_TS")
     self.assertEqual(toppreds_names[1], "HNSC_TS")
     self.assertAlmostEqual(toppreds_values[0], 0.20889836023919614, 6,
                            0.000001)
     self.assertAlmostEqual(toppreds_values[1], 0.44416348623870444, 6,
                            0.000001)
     #self.assertEqual(round(toppreds_values[0],12), round(0.208874390780809,12)); self.assertEqual(round(toppreds_values[1],12), round(0.444162763077693,12))
     self.assertEqual(toppreds_df[0][0][0], toppreds_names[0])
     self.assertAlmostEqual(float(toppreds_df[0][0][1]), toppreds_values[0])
     self.assertEqual(toppreds_df[1][0][0], toppreds_names[1])
     self.assertAlmostEqual(float(toppreds_df[1][0][1]), toppreds_values[1])