Ejemplo n.º 1
0
  def _get_eval_ops(self, features, targets, metrics):
    features, _, spec = data_ops.ParseDataTensorOrDict(features)
    labels = data_ops.ParseLabelTensorOrDict(targets)
    _assert_float32(features)
    _assert_float32(labels)

    graph_builder = self.graph_builder_class(
        self.params, device_assigner=self.device_assigner, training=False,
        **self.construction_args)

    probabilities = graph_builder.inference_graph(features, data_spec=spec)

    # One-hot the labels.
    if not self.params.regression:
      labels = math_ops.to_int64(array_ops.one_hot(math_ops.to_int64(
          array_ops.squeeze(labels)), self.params.num_classes, 1, 0))

    if metrics is None:
      metrics = {self.accuracy_metric:
                 eval_metrics.get_metric(self.accuracy_metric)}

    result = {}
    for name, metric in six.iteritems(metrics):
      result[name] = metric(probabilities, labels)

    return result
Ejemplo n.º 2
0
def rf_train(x_train, y_train, x_test, y_test):
    params = tensor_forest.ForestHParams(num_classes=10,
                                         num_features=784,
                                         num_trees=100,
                                         max_nodes=10000)

    graph_builder_class = tensor_forest.TrainingLossForest

    est = estimator.SKCompat(
        random_forest.TensorForestEstimator(
            params,
            graph_builder_class=graph_builder_class,
            model_dir="./models"))

    est.fit(x=x_train, y=y_train, batch_size=128)

    metric_name = "accuracy"

    metric = {
        metric_name:
        metric_spec.MetricSpec(
            eval_metrics.get_metric(metric_name),
            prediction_key=eval_metrics.get_prediction_key(metric_name))
    }

    results = est.score(x=x_test, y=y_test, batch_size=128, metrics=metric)

    for key in sorted(results):
        print("%s: %s" % (key, results[key]))
Ejemplo n.º 3
0
def train_and_eval(config):
    """Train and evaluate the model."""
    print 'model directory = %s' % config.model_output

    num_features = 1e3
    model = train_rf(num_features, config)

    # Early stopping if the forest is no longer growing.
    monitor = random_forest.TensorForestLossHook(config.early_stopping_rounds)

    # TFLearn doesn't support tfrecords; extract them by hand for now
    img, label, feat = get_records(
        os.path.join(config.tfrecord_dir, 'train.tfrecords'))
    model.fit(
        x=feat, y=label,
        batch_size=config.batch_size, monitors=[monitor])

    metric_name = 'accuracy'
    metric = {metric_name: metric_spec.MetricSpec(
                eval_metrics.get_metric(metric_name),
                prediction_key=eval_metrics.get_prediction_key(metric_name))}

    test_img, test_label, test_feat = get_records(
        os.path.join(config.tfrecord_dir, 'val.tfrecords'))
    results = model.evaluate(
        x=test_img, y=test_label,
        batch_size=config.batch_size, metrics=metric)
    return results
def train_and_eval():
    """Train and evaluate the model."""

    model_dir = 'data/model'
    print('model directory = %s' % model_dir)

    est = build_estimator(model_dir)

    mnist = input_data.read_data_sets('MNIST_data', one_hot=False)

    with tf.device('/gpu:0'):
        est.fit(x=mnist.train.images,
                y=mnist.train.labels,
                batch_size=100,
                steps=10)

        # results2=est.predict(x=mnist.test.images, y=mnist.test.labels, batch_size=100)
        # print(results2)

        metric_name = 'accuracy'
        metric = {
            metric_name:
            metric_spec.MetricSpec(
                eval_metrics.get_metric(metric_name),
                prediction_key=eval_metrics.get_prediction_key(metric_name))
        }

        results = est.score(x=mnist.test.images,
                            y=mnist.test.labels,
                            batch_size=100)
        for key in sorted(results):
            print('%s: %s' % (key, results[key]))
def train_and_eval():
    """Train and evaluate the model."""
    model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
    print('model directory = %s' % model_dir)

    est = build_estimator(model_dir)

    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)

    est.fit(x=mnist.train.images,
            y=mnist.train.labels,
            batch_size=FLAGS.batch_size)

    metric_name = 'accuracy'
    metric = {
        metric_name:
        metric_spec.MetricSpec(
            eval_metrics.get_metric(metric_name),
            prediction_key=eval_metrics.get_prediction_key(metric_name))
    }

    results = est.score(x=mnist.test.images,
                        y=mnist.test.labels,
                        batch_size=FLAGS.batch_size,
                        metrics=metric)
    for key in sorted(results):
        print('%s: %s' % (key, results[key]))
Ejemplo n.º 6
0
def train_and_eval():
  """Train and evaluate the model."""
  model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
  print('model directory = %s' % model_dir)

  estimator = build_estimator(model_dir)

  # TensorForest's loss hook allows training to terminate early if the
  # forest is no longer growing.
  early_stopping_rounds = 100
  monitor = random_forest.TensorForestLossHook(early_stopping_rounds)

  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)

  estimator.fit(x=mnist.train.images, y=mnist.train.labels,
                batch_size=FLAGS.batch_size, monitors=[monitor])

  metric_name = 'accuracy'
  metric = {metric_name:
            metric_spec.MetricSpec(
                eval_metrics.get_metric(metric_name),
                prediction_key=eval_metrics.get_prediction_key(metric_name))}

  results = estimator.evaluate(x=mnist.test.images, y=mnist.test.labels,
                               batch_size=FLAGS.batch_size,
                               metrics=metric)
  for key in sorted(results):
    print('%s: %s' % (key, results[key]))
def train_and_eval():
    """Train and evaluate the model."""
    model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
    print('model directory = %s' % model_dir)

    est = build_estimator(model_dir)

    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)

    train_input_fn = numpy_io.numpy_input_fn(x={'images': mnist.train.images},
                                             y=mnist.train.labels.astype(
                                                 numpy.int32),
                                             batch_size=FLAGS.batch_size,
                                             num_epochs=None,
                                             shuffle=True)
    est.fit(input_fn=train_input_fn, steps=None)

    metric_name = 'accuracy'
    metric = {
        metric_name:
        metric_spec.MetricSpec(
            eval_metrics.get_metric(metric_name),
            prediction_key=eval_metrics.get_prediction_key(metric_name))
    }

    test_input_fn = numpy_io.numpy_input_fn(x={'images': mnist.test.images},
                                            y=mnist.test.labels.astype(
                                                numpy.int32),
                                            num_epochs=1,
                                            batch_size=FLAGS.batch_size,
                                            shuffle=False)

    results = est.evaluate(input_fn=test_input_fn, metrics=metric)
    for key in sorted(results):
        print('%s: %s' % (key, results[key]))
def train_and_eval():
  """Train and evaluate the model."""
  model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
  print('model directory = %s' % model_dir)

  est = build_estimator(model_dir)

  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)

  train_input_fn = numpy_io.numpy_input_fn(
      x={'images': mnist.train.images},
      y=mnist.train.labels.astype(numpy.int32),
      batch_size=FLAGS.batch_size,
      num_epochs=None,
      shuffle=True)
  est.fit(input_fn=train_input_fn, steps=None)

  metric_name = 'accuracy'
  metric = {
      metric_name:
          metric_spec.MetricSpec(
              eval_metrics.get_metric(metric_name),
              prediction_key=eval_metrics.get_prediction_key(metric_name))
  }

  test_input_fn = numpy_io.numpy_input_fn(
      x={'images': mnist.test.images},
      y=mnist.test.labels.astype(numpy.int32),
      num_epochs=1,
      batch_size=FLAGS.batch_size,
      shuffle=False)

  results = est.evaluate(input_fn=test_input_fn, metrics=metric)
  for key in sorted(results):
    print('%s: %s' % (key, results[key]))
Ejemplo n.º 9
0
def eval(est):
  """Evaluate the model."""
  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)
  metric_name = 'accuracy'
  metric = {
      metric_name:
          metric_spec.MetricSpec(
              eval_metrics.get_metric(metric_name),
              prediction_key=eval_metrics.get_prediction_key(metric_name))
  }

  test_input_fn = numpy_io.numpy_input_fn(
      x={'images': mnist.test.images},
      y=mnist.test.labels.astype(numpy.int32),
      num_epochs=1,
      batch_size=FLAGS.batch_size,
      shuffle=False)

  results = est.evaluate(input_fn=test_input_fn, metrics=metric)
  for key in sorted(results):
    print('%s: %s' % (key, results[key]))
Ejemplo n.º 10
0
    def evaluate(self, data: np.ndarray, labels: np.ndarray):
        """Predicts and directly evaluates the results.

        Examples:
            To evaluate the prediction of the decision forest use:

            >>> results = forest.evaluate(data, labels)
            >>> for key in sorted(results):
            >>>     print('%s: %s' % (key, results[key]))

        Args:
            data (np.ndarray): The data to predict. ``data.shape`` is ``(n, f)`` with ``n`` observation and
                ``f`` features per observation.
            labels (np.ndarray): The labels of the `data`. ``labels[i]`` returns the label of observation ``i``.
                ``labels.shape`` is ``(n, 1)`` with ``n`` observation and the associated labels.
        Returns:
            dict: A dict of evaluation metrics.
        """

        if self.estimator is None:
            raise ValueError('Estimator not set')

        metrics = {
            'accuracy': metric_spec.MetricSpec(
                eval_metrics.get_metric('accuracy'),
                prediction_key=eval_metrics.get_prediction_key('accuracy')
            )
        }

        if self.report_feature_importances:
            metrics['feature_importance'] = metric_spec.MetricSpec(
                lambda x: x,
                prediction_key=eval_metrics.FEATURE_IMPORTANCE_NAME
            )

        results = self.estimator.score(x=data, y=labels, batch_size=self.batch_size,
                                       metrics=metrics)

        return results
Ejemplo n.º 11
0
def train_and_eval():
  """Train and evaluate the model."""
  model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
  print('model directory = %s' % model_dir)

  est = build_estimator(model_dir)

  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)

  est.fit(x=mnist.train.images, y=mnist.train.labels,
          batch_size=FLAGS.batch_size)

  metric_name = 'accuracy'
  metric = {metric_name:
            metric_spec.MetricSpec(
                eval_metrics.get_metric(metric_name),
                prediction_key=eval_metrics.get_prediction_key(metric_name))}

  results = est.score(x=mnist.test.images, y=mnist.test.labels,
                      batch_size=FLAGS.batch_size,
                      metrics=metric)
  for key in sorted(results):
    print('%s: %s' % (key, results[key]))
def train_and_eval(conf=None):
    global config

    # if an argument is provided, set config to this value - used for calling the method from outside of the file.
    # if no argument passed, then the arguments passed on the command line, as interpreted by the parser, are used.
    if conf:
        config = conf
    else:
        config = {
            'train_data': train_data,
            'train_labels': train_labels,
            'test_data': test_data,
            'test_labels': test_labels,
            'num_classes': num_classes,
            'num_features': num_features,
            'num_trees': num_trees,
            'max_nodes': max_nodes,
            'train_steps': train_steps,
            'batch_size': batch_size,
            'bagging_fraction': bagging_fraction,
            'feature_bagging_fraction': feature_bagging_fraction,
            'model_dir': model_dir,
            'delete_models': delete_models,
            'data_dir': data_dir,
            'use_training_loss': use_training_loss
        }

    # convert config dict into an object, for acceptance in the following lines
    config = objectview(config)

    # if a specific directory to store the generated model is specified in the arguments, use that
    # otherwise, use a temporary directory
    model_dir = tempfile.mkdtemp(
    ) if not config.model_dir else config.model_dir

    # load the training data and cast it to float32
    if not config.train_data:
        sys.exit('Usage: --train_data <csv file>')
    train_data = loc_genfromtxt(config.train_data)
    train_data = train_data.astype(np.float32)

    if not config.train_labels:
        sys.exit('Usage: --train_labels <csv file>')
    train_labels = loc_genfromtxt(config.train_labels)
    train_labels = train_labels.astype(np.float32)

    # auto-detect number of features in training data
    # print('train_data has number of features/columns = ' + str(train_data.shape[1]))
    config.num_features = train_data.shape[1]

    # get a random forest estimator object
    est = build_estimator(model_dir)

    # fit the random forest model using the training data
    est.fit(x=train_data, y=train_labels, batch_size=config.batch_size)

    # load the test data and cast it to float32
    if not config.test_data:
        sys.exit('Usage: --test_data <csv file>')
    test_data = loc_genfromtxt(config.test_data)
    test_data = test_data.astype(np.float32)

    if not config.test_labels:
        sys.exit('Usage: --test_labels <csv file>')
    test_labels = loc_genfromtxt(config.test_labels)
    test_labels = test_labels.astype(np.float32)

    # define the metric to be 'accuracy'
    metric_name = 'accuracy'
    metric = {
        metric_name:
        metric_spec.MetricSpec(
            eval_metrics.get_metric(metric_name),
            prediction_key=eval_metrics.get_prediction_key(metric_name))
    }

    # calculate the score using the test
    results = est.score(x=test_data,
                        y=test_labels,
                        batch_size=config.batch_size,
                        metrics=metric)

    # print each value with comma and space after it, except last value, which has line feed only
    i = 1
    length = len(sorted(results))
    for key in sorted(results):
        if i == length:
            print(str(results[key]))
        else:
            print(str(results[key]) + ',', end="")
        i = i + 1

    # if flag set, delete model dir in order to free up space / avoid out of memory
    if config.delete_models:
        call(['rm', '-r', model_dir])
def train_and_eval(wisdmFilename='../data/wisdm.txt'):

    wisdm = read_data_sets(csv=wisdmFilename)

    all_data = wisdm.data  # all_data is a Datasplit tuple in wisdm.py
    all_labels = wisdm.labels  # all_labels is a Datasplit tuple in wisdm.py
    nclasses = wisdm.n_classes
    nfeatures = wisdm.n_features

    print(nclasses, ' classes from ', nfeatures, 'features')

    if FLAGS.estimator == 'tensorflow':
        """Train and evaluate the model."""
        model_dir = FLAGS.model_dir or tempfile.mkdtemp()
        print('model directory = %s' % model_dir)

        tf_start = time.time()
        est = build_estimator(model_dir, nclasses, nfeatures)

        est.fit(x=all_data.train,
                y=all_labels.train,
                batch_size=FLAGS.batch_size)

        print('Done Fitting\n')

        metric_name = 'accuracy'
        mspec = metric_spec.MetricSpec(
            eval_metrics.get_metric(metric_name),
            prediction_key=eval_metrics.get_prediction_key(metric_name))

        metric = {metric_name: mspec}

        results = est.score(
            x=all_data.test,
            y=all_labels.test,
            # batch_size=FLAGS.batch_size,
            metrics=metric)

        tf_end = time.time()

        for key in sorted(results):
            print('%s: %s' % (key, results[key]))

        print('tf time:', tf_end - tf_start)

    elif FLAGS.estimator == 'sklearn':
        print('---------  Next: sklearn RandomForestClassifier ---------')

        skrf_start = time.time()

        param_grid = [{
            'n_estimators': [10, 30, 90],
            'max_features': [15, 25, 35, 43]
        }, {
            'bootstrap': [False],
            'n_estimators': [10, 30, 40],
            'max_features': [16, 24, 43]
        }]

        fc = RandomForestClassifier()
        grid_search = GridSearchCV(fc, param_grid, cv=10, scoring='accuracy')

        grid_search.fit(
            np.concatenate([all_data.train, all_data.validation]),
            np.concatenate([all_labels.train, all_labels.validation]))
        skrf_end = time.time()

        print('Best params', grid_search.best_params_)
        print('skRF time:', skrf_end - skrf_start)

        for params, mean, std in grid_search.grid_scores_:
            print(mean, std, params)

        s = grid_search.score(X=all_data.test, y=all_labels.test)
        print('Test score:', s)
Ejemplo n.º 14
0
num_features = 59
num_trees = 4
max_nodes = 1000

# Random forest parameters
hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                      num_features=num_features,
                                      num_trees=num_trees,
                                      max_nodes=max_nodes).fill()

classifier = random_forest.TensorForestEstimator(hparams)

classifier.fit(input_fn=train_input_fn, steps=None)

# Verify results
metric_name = 'accuracy'
metric = {
    metric_name:
    metric_spec.MetricSpec(
        eval_metrics.get_metric(metric_name),
        prediction_key=eval_metrics.get_prediction_key(metric_name))
}

test_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': features[0:10]},
                                                   y=labels[0:10],
                                                   num_epochs=None,
                                                   shuffle=True)
results = classifier.evaluate(input_fn=test_input_fn, metrics=metric)

for key in sorted(results):
    print('%s: %s' % (key, results[key]))