def testInferenceConstructionSparse(self): input_data = tf.SparseTensor(indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], values=[-1.0, 0.0, -1., 2., 1., -2.0], shape=[4, 10]) params = tensor_forest.ForestHParams(num_classes=4, num_features=10, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(graph, tf.Tensor))
def figShowAndWrite(dataSet, label): """ 对数据集进行决策树分类并用图表显示 :param dataSet: 数据集 :param label: 标签 :return: """ # 获取特征个数和类个数 featureNum = dataSet.shape[1] classNum = len(set(label)) # 调用高层api实现决策树 # 根据参数生成type=ForestHParams的决策树参数 params = tensor_forest.ForestHParams(num_classes=classNum, num_features=featureNum, num_trees=1, max_nodes=20) # 使用type=ForestHParams的参数生成决策树 classifier = random_forest.TensorForestEstimator(params) # 决策树拟合训练集 classifier.fit(dataSet, label) # 显示决策树的分类结果 # 画图 x_min, x_max = dataSet[:, 0].min() - 1, dataSet[:, 0].max() + 1 y_min, y_max = dataSet[:, 1].min() - 1, dataSet[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1)) # 使用生成的决策树进行分类 Z = classifier.predict(np.c_[xx.ravel(), yy.ravel()].astype(np.float32)) Z = np.array(list(Z)) for i in range(len(Z)): Z[i] = Z[i]['classes'] Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, alpha=0.2) plt.scatter(dataSet[:, 0], dataSet[:, 1], c=label + 3, alpha=1) plt.show()
def _build_estimator(self, X=None, Y=None): if not self._estimator_built: if self.num_features is None: self.num_features = data_util.get_num_features(X) if self.num_classes is None: if not self.regression: self.num_classes = data_util.get_num_classes(Y) else: self.num_classes = data_util.get_num_features(Y) # Reload params from checkpoint if available if self._to_be_restored and self.num_features is None: self.num_features = misc.read_tensor_in_checkpoint( 'num_features', self._to_be_restored) if self._to_be_restored and self.num_classes is None: self.num_classes = misc.read_tensor_in_checkpoint( 'num_classes', self._to_be_restored) # Purity checks if self.num_classes is None: raise ValueError("'num_classes' cannot be None.") if self.num_features is None: raise ValueError("'num_features' cannot be None.") # Persistent Parameters tf.Variable(self.num_classes, dtype=tf.int32, name='num_classes') tf.Variable(self.num_features, dtype=tf.int32, name='num_features') # Random Forest Parameters self.params = tensor_forest.ForestHParams( num_classes=self.num_classes, num_features=self.num_features, num_trees=self.num_estimators, max_nodes=self.max_nodes, split_after_samples=self.split_after_samples, min_split_samples=self.min_samples_split, regression=self.regression, bagging_fraction=self.bagging_fraction, num_splits_to_consider=self.num_splits_to_consider, feature_bagging_fraction=self.feature_bagging_fraction, max_fertile_nodes=self.max_fertile_nodes, valid_leaf_threshold=self.valid_leaf_threshold, dominate_method=self.dominate_method, dominate_fraction=self.dominate_fraction).fill() self.forest_graph = tensor_forest.RandomForestGraphs(self.params) self._estimator_built = True self._init_graph()
def testTrainingConstructionClassificationSparse(self): input_data = tf.SparseTensor(indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], values=[-1.0, 0.0, -1., 2., 1., -2.0], shape=[4, 10]) input_labels = [0, 1, 2, 3] params = tensor_forest.ForestHParams(num_classes=4, num_features=10, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.training_graph(input_data, input_labels) self.assertTrue(isinstance(graph, tf.Operation))
def testInferenceConstruction(self): input_data = [ [-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.] ] # node 2 params = tensor_forest.ForestHParams(num_classes=4, num_features=2, num_trees=10, max_nodes=1000).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(graph, tf.Tensor))
def testForestHParams(self): hparams = tensor_forest.ForestHParams(num_classes=2, num_trees=100, max_nodes=1000, split_after_samples=25, num_features=60).fill() self.assertEquals(2, hparams.num_classes) self.assertEquals(3, hparams.num_output_columns) self.assertEquals(60, hparams.num_splits_to_consider) # Don't have more fertile nodes than max # leaves, which is 500. self.assertEquals(500, hparams.max_fertile_nodes) # Default value of valid_leaf_threshold self.assertEquals(1, hparams.valid_leaf_threshold) # floor(60 / 25) = 2 self.assertEquals(2, hparams.split_initializations_per_input) self.assertEquals(0, hparams.base_random_seed)
def build_estimator(model_dir): """Build an estimator.""" params = tensor_forest.ForestHParams(num_classes=10, num_features=784, num_trees=FLAGS.num_trees, max_nodes=FLAGS.max_nodes) graph_builder_class = tensor_forest.RandomForestGraphs if FLAGS.use_training_loss: graph_builder_class = tensor_forest.TrainingLossForest # Use the SKCompat wrapper, which gives us a convenient way to split # in-memory data like MNIST into batches. return estimator.SKCompat( random_forest.TensorForestEstimator( params, graph_builder_class=graph_builder_class, model_dir=model_dir))
def testClassification(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.TensorForestEstimator(hparams.fill()) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) classifier.fit(x=data, y=labels, steps=100, batch_size=50) classifier.evaluate(x=data, y=labels, steps=10)
def testInferenceConstruction(self): input_data = [[-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.]] # node 2 params = tensor_forest.ForestHParams( num_classes=4, num_features=2, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) probs, paths, var = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(probs, ops.Tensor)) self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor))
def testTrainingConstructionRegression(self): input_data = [[-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.]] # node 2 input_labels = [0, 1, 2, 3] params = tensor_forest.ForestHParams( num_classes=4, num_features=2, num_trees=10, max_nodes=1000, split_after_samples=25, regression=True).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.training_graph(input_data, input_labels) self.assertTrue(isinstance(graph, ops.Operation))
def testClassificationTrainingLoss(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4) classifier = random_forest.TensorForestEstimator( hparams, graph_builder_class=(tensor_forest.TrainingLossForest)) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.float32) monitors = [random_forest.TensorForestLossHook(10)] classifier.fit(x=data, y=labels, steps=100, monitors=monitors) classifier.evaluate(x=data, y=labels, steps=10)
def setUp(self): self.params = tensor_forest.ForestHParams(num_classes=2, num_features=31, layer_size=11, num_layers=13, num_trees=17, connection_probability=0.1, hybrid_tree_depth=4, regularization_strength=0.01, learning_rate=0.01, regularization="", weight_init_mean=0.0, weight_init_std=0.1) self.params.regression = False self.params.num_nodes = 2**self.params.hybrid_tree_depth - 1 self.params.num_leaves = 2**(self.params.hybrid_tree_depth - 1)
def testAutofillsClassificationHead(self): hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator(hparams.fill()) input_fn, _ = _get_classification_input_fns() est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss'])
def testForestHParams(self): hparams = tensor_forest.ForestHParams( num_classes=2, num_trees=100, max_nodes=1000, split_after_samples=25, num_features=60).fill() self.assertEquals(2, hparams.num_classes) self.assertEquals(3, hparams.num_output_columns) # 2 * ceil(log_2(1000)) = 20 self.assertEquals(20, hparams.max_depth) # sqrt(num_features) < 10, so num_splits_to_consider should be 10. self.assertEquals(10, hparams.num_splits_to_consider) # Don't have more fertile nodes than max # leaves, which is 500. self.assertEquals(500, hparams.max_fertile_nodes) # Default value of valid_leaf_threshold self.assertEquals(1, hparams.valid_leaf_threshold) # split_after_samples is larger than 10 self.assertEquals(1, hparams.split_initializations_per_input) self.assertEquals(0, hparams.base_random_seed)
def build_estimator(model_dir): params = tensor_forest.ForestHParams( num_classes=config.num_classes, num_features=config.num_features, num_trees=config.num_trees, max_nodes=config.max_nodes, bagging_fraction=config.bagging_fraction, feature_bagging_fraction=config.feature_bagging_fraction) graph_builder_class = tensor_forest.RandomForestGraphs if config.use_training_loss: graph_builder_class = tensor_forest.TrainingLossForest # Use the SKCompat wrapper, which gives us a convenient way to split # in-memory data like MNIST into batches. return estimator.SKCompat( random_forest.TensorForestEstimator( params, graph_builder_class=graph_builder_class, model_dir=model_dir))
def testRegression(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=1, num_features=13, regression=True, split_after_samples=20) regressor = random_forest.TensorForestEstimator(hparams.fill()) boston = base.load_boston() data = boston.data.astype(np.float32) labels = boston.target.astype(np.int32) regressor.fit(x=data, y=labels, steps=100, batch_size=50) regressor.evaluate(x=data, y=labels, steps=10)
def set_parameters(self, parameters: DecisionForestParameters): """Sets the decision forest parameters. Args: parameters (DecisionForestParameters): The parameters. """ self.parameters = tensor_forest.ForestHParams( num_classes=parameters.num_classes, num_features=parameters.num_features, num_trees=parameters.num_trees, max_nodes=parameters.max_nodes, inference_tree_paths=parameters.inference_tree_paths ).fill() self.batch_size = parameters.batch_size self.use_training_loss = parameters.use_training_loss self.report_feature_importances = parameters.report_feature_importances self.model_dir = parameters.model_dir
def testEarlyStopping(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=100, max_nodes=10000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.TensorForestEstimator( hparams.fill(), # Set a crazy threshold - 30% loss change. early_stopping_loss_threshold=0.3, early_stopping_rounds=2) input_fn, _ = _get_classification_input_fns() classifier.fit(input_fn=input_fn, steps=100) # We stopped early. self._assert_checkpoint(classifier.model_dir, global_step=5)
def testClassification(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.TensorForestEstimator(hparams.fill()) input_fn, predict_input_fn = _get_classification_input_fns() classifier.fit(input_fn=input_fn, steps=100) res = classifier.evaluate(input_fn=input_fn, steps=10) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss']) predictions = list(classifier.predict(input_fn=predict_input_fn)) self.assertAllClose([[0.576117, 0.211942, 0.211942]], [pred['probabilities'] for pred in predictions])
def testAutofillsRegressionHead(self): hparams = tensor_forest.ForestHParams(num_trees=5, max_nodes=1000, num_classes=1, num_features=13, regression=True, split_after_samples=20) regressor = random_forest.CoreTensorForestEstimator(hparams.fill()) input_fn, predict_input_fn = _get_regression_input_fns() regressor.train(input_fn=input_fn, steps=100) res = regressor.evaluate(input_fn=input_fn, steps=10) self.assertGreaterEqual(0.1, res['loss']) predictions = list(regressor.predict(input_fn=predict_input_fn)) self.assertAllClose([[24.]], [pred['predictions'] for pred in predictions], atol=1)
def testRegression(self): """Tests regression using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=5, max_nodes=1000, num_classes=1, num_features=13, regression=True, split_after_samples=20) regressor = random_forest.TensorForestEstimator(hparams.fill()) input_fn, predict_input_fn = _get_regression_input_fns() regressor.fit(input_fn=input_fn, steps=100) res = regressor.evaluate(input_fn=input_fn, steps=10) self.assertGreaterEqual(0.1, res['loss']) predictions = list(regressor.predict(input_fn=predict_input_fn)) self.assertAllClose([24.], [pred['scores'] for pred in predictions], atol=1)
def setUp(self): self.params = tensor_forest.ForestHParams(num_classes=2, num_features=31, layer_size=11, num_layers=13, num_trees=17, connection_probability=0.1, hybrid_tree_depth=4, regularization_strength=0.01, regularization="", learning_rate=0.01, weight_init_mean=0.0, weight_init_std=0.1) self.params.regression = False self.params.num_nodes = 2**self.params.hybrid_tree_depth - 1 self.params.num_leaves = 2**(self.params.hybrid_tree_depth - 1) # pylint: disable=W0612 self.input_data = constant_op.constant( [[random.uniform(-1, 1) for i in range(self.params.num_features)] for _ in range(100)])
def testInfrenceFromRestoredModel(self): input_data = [[-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.]] # node 2 expected_prediction = [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]] hparams = tensor_forest.ForestHParams( num_classes=2, num_features=2, num_trees=1, max_nodes=1000, split_after_samples=25).fill() tree_weight = {'decisionTree': {'nodes': [{'binaryNode': {'rightChildId': 2, 'leftChildId': 1, 'inequalityLeftChildTest': {'featureId': {'id': '0'}, 'threshold': {'floatValue': 0}}}}, {'leaf': {'vector': {'value': [{'floatValue': 0.0}, {'floatValue': 1.0}]}}, 'nodeId': 1}, {'leaf': {'vector': {'value': [{'floatValue': 0.0}, {'floatValue': 1.0}]}}, 'nodeId': 2}]}} restored_tree_param = ParseDict(tree_weight, _tree_proto.Model()).SerializeToString() graph_builder = tensor_forest.RandomForestGraphs(hparams, [restored_tree_param]) probs, paths, var = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(probs, ops.Tensor)) self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor)) with self.test_session(): variables.global_variables_initializer().run() resources.initialize_resources(resources.shared_resources()).run() self.assertEquals(probs.eval().shape, (4, 2)) self.assertEquals(probs.eval().tolist(), expected_prediction)
def randomforest(x, y, features_dim, class_num, tree_num): with tf.name_scope('random_forest'): Hparams = tensor_forest.ForestHParams(num_classes=class_num, num_features=features_dim, num_trees=tree_num).fill() forest_graph = tensor_forest.RandomForestGraphs(Hparams) train_step = forest_graph.training_graph(x, y) with tf.name_scope('random_forest_loss'): loss = forest_graph.training_loss(x, y) tf.summary.scalar("svm_loss", loss) with tf.name_scope('accuracy'): output, _, _ = forest_graph.inference_graph(x) correct_prediction = tf.equal(tf.argmax(output, 1), tf.cast(y, tf.int64)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar("accuracy", accuracy) return train_step, loss, accuracy, output
def _build_model(self): self.input_x = tf.placeholder(tf.int32, [None, self.seqlen], name="input_x") self.input_y = tf.placeholder(tf.float32, [None], name="input_y") params = tensor_forest.ForestHParams( num_classes=self.total_class, num_trees=100, max_nodes=10000000, num_features=10000, ).fill() graph = tensor_forest.RandomForestGraphs(params) self.train_op = graph.training_graph(self.input_x, self.input_y) self.loss = graph.loss_graph(self.input_x, self.input_y) self.pred = graph.inference_graph(self.input_x) self.acc = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(self.pred, 1), tf.cast(self.input_y, tf.int64)), tf.float32)) summary.append(tf.summary.scalar("loss", self.loss)) summary.append(tf.summary.scalar("acc", self.acc)) self.summary = tf.summary.merge(summary, name="merge_summary")
def setUp(self): self.input_data = [[-1., 0.], [-1., 2.], [1., 0.], [1., -2.]] self.input_labels = [0., 1., 2., 3.] self.tree = [[1, 0], [-1, 0], [-1, 0]] self.tree_weights = [[1.0, 0.0], [1.0, 0.0], [1.0, 0.0]] self.tree_thresholds = [0., 0., 0.] self.ops = training_ops.Load() self.params = tensor_forest.ForestHParams(num_features=2, hybrid_tree_depth=2, base_random_seed=10, feature_bagging_fraction=1.0, regularization_strength=0.01, regularization="", weight_init_mean=0.0, weight_init_std=0.1) self.params.num_nodes = 2**self.params.hybrid_tree_depth - 1 self.params.num_leaves = 2**(self.params.hybrid_tree_depth - 1) self.params.num_features_per_node = ( self.params.feature_bagging_fraction * self.params.num_features) self.params.regression = False
def build_model(self): self.is_training = tf.placeholder(tf.bool, name="is_training") num_classes = 10 num_features = 784 num_trees = 10 max_nodes = 1000 # Input and Target data self.x = tf.placeholder(tf.float32, shape=[None, num_features], name="image") # For random forest, labels must be integers (the class id) self.y = tf.placeholder(tf.int32, shape=[None], name="labels") # Random Forest Parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() # Build the Random Forest forest_graph = tensor_forest.RandomForestGraphs(hparams) output, _, _ = forest_graph.inference_graph(self.x) self.increment_global_step_op = tf.assign(self.global_step_tensor, self.global_step_tensor + 1) with tf.name_scope("loss"): # Get training graph and loss self.train_step = forest_graph.training_graph(self.x, self.y) self.loss = forest_graph.training_loss(self.x, self.y) correct_prediction = tf.equal(tf.argmax(output, 1), tf.cast(self.y, tf.int64)) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32))
def testEarlyStopping(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator( hparams.fill(), head=head_fn, # Set a crazy threshold - 30% loss change. early_stopping_loss_threshold=0.3, early_stopping_rounds=2) input_fn, _ = _get_classification_input_fns() est.train(input_fn=input_fn, steps=100) # We stopped early. self._assert_checkpoint(est.model_dir, global_step=8)
def set_parameter(self, param): for name in self.default_param: if name not in param: param[name] = self.default_param[name] self.build_model() num_trees = param['num_trees'] max_nodes = param['max_nodes'] # Random Forest Parameters self.hparams = tensor_forest.ForestHParams( num_classes=self.class_num, num_features=self.feature_num, num_trees=num_trees, max_nodes=max_nodes).fill() # Build the Random Forest self.forest_graph = tensor_forest.RandomForestGraphs(self.hparams) # Get training graph and loss self.train_op = self.forest_graph.training_graph( self.inputs, self.labels) self.loss = self.forest_graph.training_loss(self.inputs, self.labels) # Measure the accuracy self.infer_op, _, _ = self.forest_graph.inference_graph(self.inputs) self.correct_prediction = tf.equal(tf.argmax(self.infer_op, 1), tf.cast(self.labels, tf.int64)) self.accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32)) #metrics = [self.get_metric(metric) for metric in param["metrics"]] #self.metrics = [metric_fun(self.output, self.ground_truth) for metric_fun in metrics] self.init_vars = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources())) self.batch_size = param["batch_size"] self.num_epochs = param["num_epochs"]
num_steps = 500 batch_size = 1024 num_classes = 4 num_features = 59 num_trees = 4 max_nodes = 1000 # Input and Target data X = tf.placeholder(tf.float32, shape=[None, num_features]) # Labels must be integers in random forest Y = tf.placeholder(tf.int32, shape=[None]) # Random forest parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() # Build the Random Forest forest_graph = tensor_forest.RandomForestGraphs(hparams) # Get training graph and loss train_op = forest_graph.training_graph(X, Y) loss_op = forest_graph.training_loss(X, Y) # Measure the accuracy infer_op, _ = forest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast( Y, tf.int64)) # maybe switch this to check if in bucket accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))