def test_pipeline(self): if self.submodule is None: return d = sim.LinearSimulation(num_sample=1000, problem='classification', num_feature=5) d.reset() x_train, y_train, x_test, y_test = d.get() _, metrics = self.submodule.pipeline(x_train, y_train, x_test, y_test, problem='classification') self.assertGreater(metrics['test_acc'], 0.5) self.assertGreater(metrics['train_acc'], 0.5) self.assertGreater(metrics['micro_auc'], 0.5) self.assertGreater(metrics['macro_auc'], 0.5) d = sim.LinearSimulation(num_sample=1000, problem='regression', num_feature=5) d.reset() x_train, y_train, x_test, y_test = d.get() _, metrics = self.submodule.pipeline(x_train, y_train, x_test, y_test, problem='regression') self.assertLess(metrics['test_mse'], 1e2) self.assertLess(metrics['train_mse'], 1e2)
def test_predict(self): """Tests the predict() function that this class adds to its parent.""" d = sim.LinearSimulation(num_sample=500, problem='classification') d.reset() x_train, y_train, x_test, _ = d.get() num_class = len(set(y_train)) num_feature = x_train.shape[1] is_sparse = sparse.issparse(x_train) clf = dnn.FunctionalKerasClassifier( build_fn=dnn.keras_build_fn, num_feature=num_feature, num_output=num_class, is_sparse=is_sparse, verbose=False) clf.fit(x_train, y_train) y_pred = clf.predict(x_test) y_proba = clf.predict_proba(x_test) # check shape self.assertEqual(y_pred.shape, (np.size(x_test, 0),)) # check predicted values (should be integer labels) self.assertTrue(np.all(np.isclose(y_pred, y_pred.astype(int), 0.0001))) self.assertTrue(np.array_equal(y_pred, np.argmax(y_proba, axis=1)))
def get_sim(dataset, problem='classification', which_features='all', alternate=False, **kwargs): """Get simulated dataset. Args: dataset: str dataset name problem: str type of learning problem; values = 'classification', 'regression' which_features: str type of features to use; values = 'all', 'inform', 'uninform' alternate: bool whether alternate experiment is used **kwargs: Additional args. Returns: d: dataset object API defined in TOOD(jisungkim) Raises: ValueError: if dataset is unknown """ if dataset == 'sim_sparsity': return sim.SparsitySimulation(problem=problem, which_features=which_features, alternate=alternate, **kwargs) if dataset == 'sim_cardinality': return sim.CardinalitySimulation(problem=problem, which_features=which_features, alternate=alternate, **kwargs) if dataset == 'sim_linear': return sim.LinearSimulation(problem=problem, **kwargs) if dataset == 'sim_multiplicative': return sim.MultiplicativeSimulation(problem=problem, **kwargs) if dataset == 'sim_xor': return sim.XORSimulation(problem=problem, **kwargs) else: raise ValueError('Unknown dataset: {}'.format(dataset))