def _get_tfbt(output_dir): """Configures TF Boosted Trees estimator based on flags.""" learner_config = learner_pb2.LearnerConfig() num_classes = 10 learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate learner_config.num_classes = num_classes learner_config.regularization.l1 = 0.0 learner_config.regularization.l2 = FLAGS.l2 / FLAGS.examples_per_layer learner_config.constraints.max_tree_depth = FLAGS.depth growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.DIAGONAL_HESSIAN) # Create a TF Boosted trees estimator that can take in custom loss. estimator = GradientBoostedDecisionTreeClassifier( learner_config=learner_config, n_classes=num_classes, examples_per_layer=FLAGS.examples_per_layer, model_dir=output_dir, num_trees=FLAGS.num_trees, center_bias=False, config=run_config) return estimator
def _get_tfbt(output_dir, feature_cols): """Configures TF Boosted Trees estimator based on flags.""" learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate learner_config.regularization.l1 = 0.0 # Set the regularization per instance in such a way that # regularization for the full training data is equal to l2 flag. learner_config.regularization.l2 = FLAGS.l2 / FLAGS.batch_size learner_config.constraints.max_tree_depth = FLAGS.depth learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=30) # Create a TF Boosted trees regression estimator. estimator = GradientBoostedDecisionTreeClassifier( learner_config=learner_config, examples_per_layer=FLAGS.examples_per_layer, n_classes=2, num_trees=FLAGS.num_trees, feature_columns=feature_cols, model_dir=output_dir, config=run_config, center_bias=False) return estimator
def set_parameter(self, param): for name in self.default_param: if name not in param: param[name] = self.default_param[name] self.build_model() self.learner_config = learner_pb2.LearnerConfig() self.learner_config.learning_rate_tuner.fixed.learning_rate = float( param['learning_rate']) self.learner_config.regularization.l1 = 0.0 self.learner_config.regularization.l2 = float(param['L2']) / int( param['examples_per_layer']) self.learner_config.constraints.max_tree_depth = int(param['depth']) self.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER self.learner_config.growing_mode = self.growing_mode self.run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) self.model_path = param['model_path'] self.class_num = int(param['class_num']) if param['objective'] is "multiclass": print("here") self.learner_config.num_classes = param['class_num'] self.learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.DIAGONAL_HESSIAN) # Create a TF Boosted trees estimator that can take in custom loss. self.estimator = GradientBoostedDecisionTreeClassifier( learner_config=self.learner_config, n_classes=int(self.class_num), examples_per_layer=int(param['examples_per_layer']), model_dir=self.model_path, num_trees=int(param['num_trees']), center_bias=False, config=self.run_config) else: pass self.batch_size = int(param["batch_size"]) self.eval_batch_size = int(param['eval_batch_size']) self.num_epochs = param["num_epochs"]
learner_config = gbdt_learner.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate learner_config.regularization.l1 = l1_regul learner_config.regularization.l2 = l2_regul / examples_per_layer learner_config.constraints.max_tree_depth = max_depth growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) learner_config.multi_class_strategy = ( gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN)\ # create a tensorflow GBDT estimator gbdt_model = GradientBoostedDecisionTreeClassifier( model_dir=None, # No save directory specified learner_config=learner_config, n_classes=num_classes, examples_per_layer=examples_per_layer, num_trees=num_trees, center_bias=False, config=run_config) # Display TF info logs tf.logging.set_verbosity(tf.logging.INFO) # Define the input function for training input_fn = tf.estimator.inputs.numpy_input_fn(x={'images': mnist.train.images}, y=mnist.train.labels, batch_size=batch_size, num_epochs=None, shuffle=True) # Train the Model gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)
def test2(): import tensorflow as tf from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier from tensorflow.contrib.boosted_trees.proto import learner_pb2 as gbdt_learner # Ignore all GPUs (current TF GBDT does not support GPU). import os os.environ["CUDA_VISIBLE_DEVICES"] = "" # 设置日志级别 tf.logging.set_verbosity(tf.logging.ERROR) from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets( "MNIST_data", one_hot=False, source_url='http://yann.lecun.com/exdb/mnist/') # 参数 batch_size = 4096 # 批次大小 num_classes = 10 # 标签数 num_features = 784 # 特征数 max_steps = 10000 # 最大步数 # GBDT Parameters learning_rate = 0.1 l1_regul = 0. l2_regul = 1. examples_per_layer = 1000 num_trees = 10 max_depth = 16 # 设置参数 learner_config = gbdt_learner.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate learner_config.regularization.l1 = l1_regul learner_config.regularization.l2 = l2_regul / examples_per_layer learner_config.constraints.max_tree_depth = max_depth growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) learner_config.multi_class_strategy = ( gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN) # 创建模型 gbdt_model = GradientBoostedDecisionTreeClassifier( model_dir=None, # No save directory specified learner_config=learner_config, n_classes=num_classes, examples_per_layer=examples_per_layer, num_trees=num_trees, center_bias=False, config=run_config) # Display TF info logs tf.logging.set_verbosity(tf.logging.INFO) # Define the input function for training input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': mnist.train.images}, y=mnist.train.labels, batch_size=batch_size, num_epochs=None, shuffle=True) # 训练 gbdt_model.fit(input_fn=input_fn, max_steps=max_steps) # 预测 input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': mnist.test.images}, y=mnist.test.labels, batch_size=batch_size, shuffle=False) e = gbdt_model.evaluate(input_fn=input_fn) print("Testing Accuracy:", e['accuracy'])
learner_config = gbdt_learner.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate learner_config.regularization.l1 = l1_regul learner_config.regularization.l2 = l2_regul / examples_per_layer learner_config.constraints.max_tree_depth = max_depth growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) learner_config.multi_class_strategy = ( gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN)\ # Create a TensorFlor GBDT Estimator gbdt_model = GradientBoostedDecisionTreeClassifier( model_dir=None, # No save directory specified learner_config=learner_config, n_classes=num_classes, examples_per_layer=examples_per_layer, num_trees=num_trees, center_bias=False, config=run_config) # Display TF info logs tf.logging.set_verbosity(tf.logging.INFO) # Define the input function for training input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': mnist.train.images}, y=mnist.train.labels, batch_size=batch_size, num_epochs=None, shuffle=True) # Train the Model gbdt_model.fit(input_fn=input_fn, max_steps=max_steps) # Evaluate the Model
# -*- coding: utf-8 -*- # @Time : 2019/8/7 17:28 # @Author : skydm # @Email : [email protected] # @File : gbdt.py # @Software: PyCharm import os os.environ["CUDA_VISIBLE_DEVICES"] = "5" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import numpy as np import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier import tensorflow.contrib.boosted_trees.proto.learner_pb2 as gbdt_learner # Set verbosity to display errors only (Remove this line for showing warnings) tf.logging.set_verbosity(tf.logging.ERROR) mnist = input_data.read_data_sets("./data", one_hot=False) # params learner_config = gbdt_learner.LearnerConfig() learner_config.learn gbdt_model = GradientBoostedDecisionTreeClassifier()
def run(): # 忽略所有的 GPU, 当前的 TF GBDT 不支持 GPU os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # 设置 Tensorflow 的 log 只显示 error tf.logging.set_verbosity(tf.logging.ERROR) # 导入 MNIST 数据集 mnist = input_data.read_data_sets( "/tmp/data/", one_hot=False, source_url='http://yann.lecun.com/exdb/mnist/') # 超参数设置 batch_size = 4096 # 每批用来训练的数据的大小 num_classes = 10 # 最终结果一共有基类 num_features = 784 # 输入数据的特征数——每张图片的大小是 28 * 28 像素 max_steps = 10000 # GBDT 超参数 learning_rate = 0.1 # 学习率 l1_regul = 0. # L1 正则化 l2_regul = 1. # L2 正则化 examples_per_layer = 1000 num_trees = 10 # 树的数量 max_depth = 16 # 最大深度 # 设置 GBDT 的超参数 learner_config = gbdt_learner.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate learner_config.regularization.l1 = l1_regul learner_config.regularization.l2 = l2_regul / examples_per_layer learner_config.constraints.max_tree_depth = max_depth growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) learner_config.multi_class_strategy = ( gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN) # 创建TensorFlor GBDT 模型 gbdt_model = GradientBoostedDecisionTreeClassifier( model_dir=None, # 不指定保存位置 learner_config=learner_config, n_classes=num_classes, examples_per_layer=examples_per_layer, num_trees=num_trees, center_bias=False, config=run_config) # 展示 log 的 info 信息 tf.logging.set_verbosity(tf.logging.INFO) # 定义用来训练的输入数据 input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': mnist.train.images}, y=mnist.train.labels, batch_size=batch_size, num_epochs=None, shuffle=True) # 拟合模型 gbdt_model.fit(input_fn=input_fn, max_steps=max_steps) # 评估拟合后的模型 # 定义用于评估的输入 input_fn = tf.estimator.inputs.numpy_input_fn( x={'images': mnist.test.images}, y=mnist.test.labels, batch_size=batch_size, shuffle=False) # 使用 评估 方法来进行评估 e = gbdt_model.evaluate(input_fn=input_fn) print("测试准确度为:", e['accuracy']) return
learner_config = gbdt_learner.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate learner_config.regularization.l1 = l1_regul learner_config.regularization.l2 = l2_regul / examples_per_layer learner_config.constraints.max_tree_depth = max_depth growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) learner_config.multi_class_strategy = ( gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN) #定义分类器 gbdt_model = GradientBoostedDecisionTreeClassifier( learner_config=learner_config, n_classes=num_classes, examples_per_layer=examples_per_layer, num_trees=num_trees, center_bias=False, config=run_config) #mini-batch输入, 训练模型 input_fn = tf.estimator.inputs.numpy_input_fn(x={'images': mnist.train.images}, y=mnist.train.labels, batch_size=batch_size, num_epochs=None, shuffle=True) gbdt_model.fit(input_fn=input_fn, max_steps=max_steps) #测试 input_fn = tf.estimator.inputs.numpy_input_fn(x={'images': mnist.test.images}, y=mnist.test.labels,
learner_config.regularization.l1 = l1_regul learner_config.regularization.l2 = l2_regul / examples_per_layer learner_config.constraints.max_tree_depth = max_depth growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) learner_config.multi_class_strategy = gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN print("-0-----------------------------------------------------------") print(mnist.train.images.dtype, mnist.train.labels.dtype) # Create a TensorFlor GBDT Estimator gbdt_model = GradientBoostedDecisionTreeClassifier( model_dir=None, # No save directory specified learner_config=learner_config, n_classes=num_classes, examples_per_layer=examples_per_layer, num_trees=num_trees, center_bias=False, config=run_config) # Define the input function for training input_fn = tf.estimator.inputs.numpy_input_fn( x={'im': np.asarray(mnist.train.images)}, y=np.asarray(mnist.train.labels), batch_size=batch_size, num_epochs=None, shuffle=False) # Train the Model # gbdt_model.fit(input_fn=input_fn, max_steps=max_steps) # Evaluate the Model