def test_pipeline(self):
        if self.submodule is None:
            return

        d = sim.LinearSimulation(num_sample=1000,
                                 problem='classification',
                                 num_feature=5)
        d.reset()
        x_train, y_train, x_test, y_test = d.get()

        _, metrics = self.submodule.pipeline(x_train,
                                             y_train,
                                             x_test,
                                             y_test,
                                             problem='classification')

        self.assertGreater(metrics['test_acc'], 0.5)
        self.assertGreater(metrics['train_acc'], 0.5)
        self.assertGreater(metrics['micro_auc'], 0.5)
        self.assertGreater(metrics['macro_auc'], 0.5)

        d = sim.LinearSimulation(num_sample=1000,
                                 problem='regression',
                                 num_feature=5)
        d.reset()
        x_train, y_train, x_test, y_test = d.get()

        _, metrics = self.submodule.pipeline(x_train,
                                             y_train,
                                             x_test,
                                             y_test,
                                             problem='regression')

        self.assertLess(metrics['test_mse'], 1e2)
        self.assertLess(metrics['train_mse'], 1e2)
Example #2
0
  def test_predict(self):
    """Tests the predict() function that this class adds to its parent."""
    d = sim.LinearSimulation(num_sample=500, problem='classification')
    d.reset()
    x_train, y_train, x_test, _ = d.get()

    num_class = len(set(y_train))
    num_feature = x_train.shape[1]
    is_sparse = sparse.issparse(x_train)

    clf = dnn.FunctionalKerasClassifier(
        build_fn=dnn.keras_build_fn,
        num_feature=num_feature,
        num_output=num_class,
        is_sparse=is_sparse,
        verbose=False)
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    y_proba = clf.predict_proba(x_test)

    # check shape
    self.assertEqual(y_pred.shape, (np.size(x_test, 0),))
    # check predicted values (should be integer labels)
    self.assertTrue(np.all(np.isclose(y_pred, y_pred.astype(int), 0.0001)))
    self.assertTrue(np.array_equal(y_pred, np.argmax(y_proba, axis=1)))
Example #3
0
def get_sim(dataset,
            problem='classification',
            which_features='all',
            alternate=False,
            **kwargs):
    """Get simulated dataset.

  Args:
    dataset: str dataset name
    problem: str type of learning problem; values = 'classification',
      'regression'
    which_features: str type of features to use; values = 'all', 'inform',
      'uninform'
    alternate: bool whether alternate experiment is used
    **kwargs: Additional args.

  Returns:
    d: dataset object
      API defined in TOOD(jisungkim)

  Raises:
    ValueError: if dataset is unknown
  """
    if dataset == 'sim_sparsity':
        return sim.SparsitySimulation(problem=problem,
                                      which_features=which_features,
                                      alternate=alternate,
                                      **kwargs)
    if dataset == 'sim_cardinality':
        return sim.CardinalitySimulation(problem=problem,
                                         which_features=which_features,
                                         alternate=alternate,
                                         **kwargs)
    if dataset == 'sim_linear':
        return sim.LinearSimulation(problem=problem, **kwargs)
    if dataset == 'sim_multiplicative':
        return sim.MultiplicativeSimulation(problem=problem, **kwargs)
    if dataset == 'sim_xor':
        return sim.XORSimulation(problem=problem, **kwargs)
    else:
        raise ValueError('Unknown dataset: {}'.format(dataset))