def tree_models(X, y, num_feat, num_class): X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1) num_features = num_feat num_steps = 400 num_classes = num_class num_trees = 10 max_nodes = 1000 X = tf.placeholder(tf.float32, shape=[None, num_features]) Y = tf.placeholder(tf.int64, shape=[None]) hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() forest_graph = tensor_forest.RandomForestGraphs(hparams) train_op = forest_graph.training_graph(X, Y) loss_op = forest_graph.training_loss(X, Y) infer_op, _, _, = forest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64)) accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init_vars = tf.group(tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources())) rf_sess = tf.Session() rf_sess.run(init_vars) for i in range(1, num_steps + 1): _, l = rf_sess.run([train_op, loss_op], feed_dict={X: X_train, Y: y_train}) if i % 50 == 0 or i == 1: acc = rf_sess.run(accuracy_op, feed_dict={X: X_train, Y: y_train}) print("Step %i, Loss: %f, Acc: %f" % (i, l, acc)) print("Test Accuracy:", rf_sess.run(accuracy_op, feed_dict={X: X_test, Y: y_test}))
def run_rf(): x = tf.placeholder(tf.float32, shape=[None, num_features]) y = tf.placeholder(tf.int32, shape=[None]) batch_x, batch_y = get_data() print(num_classes, num_features, num_trees, max_nodes) hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() forest_graph = tensor_forest.RandomForestGraphs(hparams) train_op = forest_graph.training_graph(x, y) loss_op = forest_graph.training_loss(x, y) infer_op = forest_graph.inference_graph(x) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(y, tf.int64)) accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init_vars = tf.global_variables_initializer() sess = tf.Session() sess.run(init_vars) for i in range(1, num_steps + 1): _, l = sess.run([train_op, loss_op], feed_dict={x: batch_x, y: batch_y}) if i % 50 == 0 or i == 1: acc = sess.run(accuracy_op, feed_dict={x: batch_x, y: batch_y}) print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))
def random_forest(num_classes=2, num_features=46, num_trees=100, max_nodes=10000): X = tf.placeholder(tf.float32, shape=[None, num_features]) # For random forest, labels must be integers (the class id) Y = tf.placeholder(tf.int32, shape=[None]) # Random Forest Parameters hparams = tensor_forest.ForestHParams( num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes, ).fill() forest_graph = tensor_forest.RandomForestGraphs(hparams) train_op = forest_graph.training_graph(X, Y) loss_op = forest_graph.training_loss(X, Y) # Measure the accuracy infer_op, _, _ = forest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64)) accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init_vars = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources()), ) # sess = tf.Session() # sess.run(init_vars) return infer_op, accuracy_op, train_op, loss_op, X, Y
def __init__(self, num_features, num_classes, num_trees, max_nodes): tf.reset_default_graph() self.X = tf.placeholder(tf.float32, shape=[None, num_features]) self.Y = tf.placeholder(tf.int32, shape=[None]) self.hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() print("test 1") # build graph self.forest_graph = tensor_forest.RandomForestGraphs(self.hparams) self.train_op = self.forest_graph.training_graph(self.X, self.Y) self.loss_op = self.forest_graph.training_loss(self.X, self.Y) infer_op, _, _ = self.forest_graph.inference_graph(self.X) self.infer_op = infer_op print("test 2") self.correct_pred = tf.equal(tf.argmax(self.infer_op, 1), tf.cast(self.Y, tf.int64)) self.accuracy_op = tf.reduce_mean( tf.cast(self.correct_pred, tf.float32)) self.init = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources())) self.sess = None
def testInferenceConstructionSparse(self): input_data = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], values=[-1.0, 0.0, -1., 2., 1., -2.0], dense_shape=[4, 10]) params = tensor_forest.ForestHParams( num_classes=4, num_features=10, num_trees=10, max_nodes=1000, regression=True, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) probs, paths, var = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(probs, ops.Tensor)) self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor))
def infer_sts(infer_x): if np.nan in infer_x: return 0, _, _ # Parameters #num_steps = 200 # Total steps to train num_classes = 2 num_features = 13 num_trees = 20 max_nodes = 200 # Random Forest Parameters # fill():intelligently sets any non-specific parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, regression=False, num_trees=num_trees, max_nodes=max_nodes).fill() #Input and Target data X = tf.placeholder(tf.float32, shape=[None, num_features]) #Build the Random Forest forest_graph = tensor_forest.RandomForestGraphs(hparams) #Compute inference result infer_op = forest_graph.inference_graph(X) infer_result = tf.argmax(infer_op, 1) #max confidence infer_op = tf.reduce_max(infer_op, axis=1) feature_importance = forest_graph.feature_importances() # Initialize the variables (i.e. assign their default value) init_vars = tf.global_variables_initializer() # Start TensorFlow session sess = tf.Session() # Run the initializer sess.run(init_vars) model_path = "checkpoint_merge/variable" saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint("checkpoint_merge")) result, confidence, importances = sess.run( [infer_result, infer_op, feature_importance], feed_dict={X: infer_x}) ''' if result==0 : return "这个点不是病灶~!" else: return "这个点是病灶~!" ''' ''' #check graph weight variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) doc = open("variables.txt","w") variables_name= [v.name for v in variables] doc.write("".join(variables_name)) doc.close() ''' #print(variables) return result, confidence, importances
def testImpurityConstruction(self): params = tensor_forest.ForestHParams( num_classes=4, num_features=2, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.average_impurity() self.assertTrue(isinstance(graph, tf.Tensor))
def main(): current_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') mnist = input_data.read_data_sets(current_path, one_hot=False) # Parameters num_steps = 500 # Total steps to train batch_size = 1024 # The number of samples per batch num_classes = 10 # The 10 digits num_features = 784 # Each image is 28x28 pixels num_trees = 10 max_nodes = 1000 X = tf.placeholder(tf.float32, shape=[None, num_features]) # For random forest, labels must be integers (the class id) Y = tf.placeholder(tf.int32, shape=[None]) # Random Forest Parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() # build Random Forest forest_graph = tensor_forest.RandomForestGraphs(hparams) # get train forest_graph_train = forest_graph.training_graph(X, Y) # get loss forest_graph_loss = forest_graph.training_loss(X, Y) #measure the accuracy infer_op_, _, _ = forest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op_, 1), tf.cast(Y, tf.int64)) accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init_vars = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources())) with tf.Session() as sess: sess.run(init_vars) for i in range(num_steps): batch_x, batch_y = mnist.train.next_batch(batch_size) _, l = sess.run([forest_graph_train, forest_graph_loss], feed_dict={ X: batch_x, Y: batch_y }) if 50 % i == 0: acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y}) print('step {0} loss {1} accuracy {2}'.format(i, l, acc)) # test model test_x, test_y = mnist.test.images, mnist.test.labels print('test accuracy {0}'.format( sess.run(accuracy_op, feed_dict={ X: test_x, Y: test_y })))
def train_model(self, X_train, Y_train): # input features of size(None, 13) x_train = X_train.values # output label of size(None) y_train = Y_train.values # creating a placeholder for Input and Target data X = tf.placeholder(tf.float32, shape=[None, 13]) Y = tf.placeholder(tf.int8, shape=[None]) feature_names = config.FEATURE_NAMES features = self.getFeature(feature_names) params = tensor_forest.ForestHParams( # N_ESTIMATORS=100, # MIN_SAMPLES_SPLIT=25, # MIN_SAMPLES_LEAF=5, # RANDOM_STATE=12, feature_colums=features, num_trees=10, max_nodes=1000, num_classes=6, num_features=13, ).fill() # Build the Random Forest forest_graph = tensor_forest.RandomForestGraphs(params) # Get training graph and loss train_op = forest_graph.training_graph(X, Y) loss_op = forest_graph.training_loss(X, Y) saver = tf.train.Saver() init = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources()), ) batch_size = 1000 sess = tf.Session() sess.run(init) for i in range(100): Xtr, Ytr = self.next_batch(batch_size, x_train, y_train) # Feed actual data to the train operation sess.run([loss_op, train_op], feed_dict={X: Xtr, Y: Ytr}) # Create a checkpoint in every iteration saver.save(sess, "model_checkpoints/model_iter", global_step=i) # Save the final model saver.save(sess, "model_final_checkpoints/model_final")
def add_graph(self): """ Builds the forest graph based off of the hyper parameters in Config. """ hyper_parameters = tensor_forest.ForestHParams( num_classes=self.config.num_classes, num_features=self.config.num_features, num_trees=self.config.num_trees, max_nodes=self.config.max_nodes).fill() self.forest_graph = tensor_forest.RandomForestGraphs(hyper_parameters)
def build_graph(self): self.X = tf.placeholder(tf.float32, shape=[None, self.n_features]) self.y = tf.placeholder(tf.int32, shape=[None]) forest_graph = tensor_forest.RandomForestGraphs(self.hparams) self.train_op = forest_graph.training_graph(self.X, self.y) self.loss_op = forest_graph.training_loss(self.X, self.y) self.infer_op = forest_graph.inference_graph(self.X) correct_pred = tf.equal(tf.argmax(self.infer_op, 1), tf.cast(self.y, tf.int64)) self.acc_op = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
def testInferenceConstruction(self): input_data = [[-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.]] # node 2 params = tensor_forest.ForestHParams( num_classes=4, num_features=2, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(graph, tf.Tensor))
def __init__(self, loss_type, num_trees, num_classes, num_features): with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees).fill() # max_nodes=max_nodes # Build the Random Forest self.forest_graph = tensor_forest.RandomForestGraphs(hparams) # tensor_forest.TrainingLossForest(hparams, loss_fn=_loss_fn) self.loss_type = loss_type self.num_trees = num_trees
def testTrainingConstructionRegression(self): input_data = [[-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.]] # node 2 input_labels = [0, 1, 2, 3] params = tensor_forest.ForestHParams( num_classes=4, num_features=2, num_trees=10, max_nodes=1000, split_after_samples=25, regression=True).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.training_graph(input_data, input_labels) self.assertTrue(isinstance(graph, tf.Operation))
def _build_estimator(self, X=None, Y=None): if not self._estimator_built: if self.num_features is None: self.num_features = data_util.get_num_features(X) if self.num_classes is None: if not self.regression: self.num_classes = data_util.get_num_classes(Y) else: self.num_classes = data_util.get_num_features(Y) # Reload params from checkpoint if available if self._to_be_restored and self.num_features is None: self.num_features = misc.read_tensor_in_checkpoint( 'num_features', self._to_be_restored) if self._to_be_restored and self.num_classes is None: self.num_classes = misc.read_tensor_in_checkpoint( 'num_classes', self._to_be_restored) # Purity checks if self.num_classes is None: raise ValueError("'num_classes' cannot be None.") if self.num_features is None: raise ValueError("'num_features' cannot be None.") # Persistent Parameters tf.Variable(self.num_classes, dtype=tf.int32, name='num_classes') tf.Variable(self.num_features, dtype=tf.int32, name='num_features') # Random Forest Parameters self.params = tensor_forest.ForestHParams( num_classes=self.num_classes, num_features=self.num_features, num_trees=self.num_estimators, max_nodes=self.max_nodes, split_after_samples=self.split_after_samples, min_split_samples=self.min_samples_split, regression=self.regression, bagging_fraction=self.bagging_fraction, num_splits_to_consider=self.num_splits_to_consider, feature_bagging_fraction=self.feature_bagging_fraction, max_fertile_nodes=self.max_fertile_nodes, valid_leaf_threshold=self.valid_leaf_threshold, dominate_method=self.dominate_method, dominate_fraction=self.dominate_fraction).fill() self.forest_graph = tensor_forest.RandomForestGraphs(self.params) self._estimator_built = True self._init_graph()
def testTrainingConstructionClassificationSparse(self): input_data = tf.SparseTensor(indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], values=[-1.0, 0.0, -1., 2., 1., -2.0], shape=[4, 10]) input_labels = [0, 1, 2, 3] params = tensor_forest.ForestHParams(num_classes=4, num_features=10, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.training_graph(input_data, input_labels) self.assertTrue(isinstance(graph, tf.Operation))
def testInfrenceFromRestoredModel(self): input_data = [[-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.]] # node 2 expected_prediction = [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]] hparams = tensor_forest.ForestHParams( num_classes=2, num_features=2, num_trees=1, max_nodes=1000, split_after_samples=25).fill() tree_weight = {'decisionTree': {'nodes': [{'binaryNode': {'rightChildId': 2, 'leftChildId': 1, 'inequalityLeftChildTest': {'featureId': {'id': '0'}, 'threshold': {'floatValue': 0}}}}, {'leaf': {'vector': {'value': [{'floatValue': 0.0}, {'floatValue': 1.0}]}}, 'nodeId': 1}, {'leaf': {'vector': {'value': [{'floatValue': 0.0}, {'floatValue': 1.0}]}}, 'nodeId': 2}]}} restored_tree_param = ParseDict(tree_weight, _tree_proto.Model()).SerializeToString() graph_builder = tensor_forest.RandomForestGraphs(hparams, [restored_tree_param]) probs, paths, var = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(probs, ops.Tensor)) self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor)) with self.test_session(): variables.global_variables_initializer().run() resources.initialize_resources(resources.shared_resources()).run() self.assertEquals(probs.eval().shape, (4, 2)) self.assertEquals(probs.eval().tolist(), expected_prediction)
def _build_model(self): self.input_x = tf.placeholder(tf.int32, [None, self.seqlen], name="input_x") self.input_y = tf.placeholder(tf.float32, [None], name="input_y") params = tensor_forest.ForestHParams( num_classes=self.total_class, num_trees=100, max_nodes=10000000, num_features=10000, ).fill() graph = tensor_forest.RandomForestGraphs(params) self.train_op = graph.training_graph(self.input_x, self.input_y) self.loss = graph.loss_graph(self.input_x, self.input_y) self.pred = graph.inference_graph(self.input_x) self.acc = tf.reduce_mean(tf.cast( tf.equal(tf.argmax(self.pred, 1), tf.cast(self.input_y, tf.int64)), tf.float32)) summary.append(tf.summary.scalar("loss", self.loss)) summary.append(tf.summary.scalar("acc", self.acc)) self.summary = tf.summary.merge(summary, name="merge_summary")
def randomforest(x, y, features_dim, class_num, tree_num): with tf.name_scope('random_forest'): Hparams = tensor_forest.ForestHParams(num_classes=class_num, num_features=features_dim, num_trees=tree_num).fill() forest_graph = tensor_forest.RandomForestGraphs(Hparams) train_step = forest_graph.training_graph(x, y) with tf.name_scope('random_forest_loss'): loss = forest_graph.training_loss(x, y) tf.summary.scalar("svm_loss", loss) with tf.name_scope('accuracy'): output, _, _ = forest_graph.inference_graph(x) correct_prediction = tf.equal(tf.argmax(output, 1), tf.cast(y, tf.int64)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar("accuracy", accuracy) return train_step, loss, accuracy, output
def build_model(self): self.is_training = tf.placeholder(tf.bool, name="is_training") num_classes = 10 num_features = 784 num_trees = 10 max_nodes = 1000 # Input and Target data self.x = tf.placeholder(tf.float32, shape=[None, num_features], name="image") # For random forest, labels must be integers (the class id) self.y = tf.placeholder(tf.int32, shape=[None], name="labels") # Random Forest Parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() # Build the Random Forest forest_graph = tensor_forest.RandomForestGraphs(hparams) output, _, _ = forest_graph.inference_graph(self.x) self.increment_global_step_op = tf.assign(self.global_step_tensor, self.global_step_tensor + 1) with tf.name_scope("loss"): # Get training graph and loss self.train_step = forest_graph.training_graph(self.x, self.y) self.loss = forest_graph.training_loss(self.x, self.y) correct_prediction = tf.equal(tf.argmax(output, 1), tf.cast(self.y, tf.int64)) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32))
def set_parameter(self, param): for name in self.default_param: if name not in param: param[name] = self.default_param[name] self.build_model() num_trees = param['num_trees'] max_nodes = param['max_nodes'] # Random Forest Parameters self.hparams = tensor_forest.ForestHParams( num_classes=self.class_num, num_features=self.feature_num, num_trees=num_trees, max_nodes=max_nodes).fill() # Build the Random Forest self.forest_graph = tensor_forest.RandomForestGraphs(self.hparams) # Get training graph and loss self.train_op = self.forest_graph.training_graph( self.inputs, self.labels) self.loss = self.forest_graph.training_loss(self.inputs, self.labels) # Measure the accuracy self.infer_op, _, _ = self.forest_graph.inference_graph(self.inputs) self.correct_prediction = tf.equal(tf.argmax(self.infer_op, 1), tf.cast(self.labels, tf.int64)) self.accuracy = tf.reduce_mean( tf.cast(self.correct_prediction, tf.float32)) #metrics = [self.get_metric(metric) for metric in param["metrics"]] #self.metrics = [metric_fun(self.output, self.ground_truth) for metric_fun in metrics] self.init_vars = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources())) self.batch_size = param["batch_size"] self.num_epochs = param["num_epochs"]
max_nodes = 1000 # Input and Target data X = tf.placeholder(tf.float32, shape=[None, num_features]) # Labels must be integers in random forest Y = tf.placeholder(tf.int32, shape=[None]) # Random forest parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() # Build the Random Forest forest_graph = tensor_forest.RandomForestGraphs(hparams) # Get training graph and loss train_op = forest_graph.training_graph(X, Y) loss_op = forest_graph.training_loss(X, Y) # Measure the accuracy infer_op, _ = forest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast( Y, tf.int64)) # maybe switch this to check if in bucket accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Initialize the variables and forest resources init_vars = tf.group(tf.global_variables_initializer()) # init_vars = tf.group(tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources()))
def main(): input_path = FLAGS.input checkpoint_path = FLAGS.checkpoint model_path = get_model_path() # Parameters num_steps = 50 num_classes = 100 num_features = 8 num_trees = 10 max_nodes = 1000 features = tf.placeholder(tf.float32, shape=[None, num_features]) label = tf.placeholder(tf.int32, shape=[None]) # Random Forest Parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() # Build the Random Forest forest_graph = tensor_forest.RandomForestGraphs(hparams) # Get training graph and loss train_op = forest_graph.training_graph(features, label) loss_op = forest_graph.training_loss(features, label) # Measure the accuracy infer_op, _, _ = forest_graph.inference_graph(features) # Initialize the variables (i.e. assign their default value) and forest resources init_vars = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources())) saver = tf.train.Saver() # Start TensorFlow session sess = tf.Session() # Run the initializer sess.run(init_vars) if os.path.exists(checkpoint_path): saver.restore(sess, checkpoint_path) # Training for i in range(1, num_steps + 1): # Prepare Data for file in get_files(input_path, ext='csv'): data = pd.read_csv(file) input_x = data.iloc[:, 0:-1].values input_y = data.iloc[:, -1].values _, l = sess.run([train_op, loss_op], feed_dict={ features: input_x, label: input_y }) break print('saved path: ', saver.save(sess, checkpoint_path)) # export SavedModel signature = tf.saved_model.signature_def_utils.predict_signature_def( inputs={'x': features}, outputs={'y': infer_op}) builder = tf.saved_model.builder.SavedModelBuilder(model_path) builder.add_meta_graph_and_variables( sess=sess, tags=[tf.saved_model.tag_constants.SERVING], signature_def_map={"predict": signature}) builder.save()
def run_LAmbDA2(gamma, delta, tau, prc_cut, bs_prc, num_trees, max_nodes): global X, Y, Gnp, Dnp, train, test, prt, cv D = tf.cast(Dnp, tf.float32) G = tf.cast(Gnp, tf.float32) #optunity_it = optunity_it+1; num_trees = int(num_trees) max_nodes = int(max_nodes) prc_cut = int(np.ceil(prc_cut)) print( "gamma=%.4f, delta=%.4f, tau=%.4f, prc_cut=%i, bs_prc=%.4f, num_trees=%i, max_nodes=%i" % (gamma, delta, tau, prc_cut, bs_prc, num_trees, max_nodes)) input_feats = X.shape[1] num_labls = G.shape.as_list() output_feats = num_labls[1] #print(output_feats) num_labls = num_labls[0] rowsums = np.sum(Gnp, axis=1) train2 = resample(prc_cut, Y, Gnp, train, gamma) # Bug?? bs = int(np.ceil(bs_prc * train2.size)) xs = tf.placeholder(tf.float32, [None, input_feats]) #ys = tf.placeholder(tf.float32, [None,num_labls]) yin = tf.placeholder(tf.int32, [None]) print("Vars loaded xs and ys created") hparams = tensor_forest.ForestHParams(num_classes=output_feats, num_features=input_feats, num_trees=num_trees, max_nodes=max_nodes).fill() print("Tensor forest hparams created") forest_graph = tensor_forest.RandomForestGraphs(hparams) print("Tensor forest graph created") train_op = forest_graph.training_graph(xs, yin) loss_op = forest_graph.training_loss(xs, yin) print("Loss and train ops created") predict, _, _ = forest_graph.inference_graph(xs) print("Tensor forest variables created through predict") accuracy_op = tf.reduce_mean( tf.reduce_sum(tf.square(tf.one_hot(yin, output_feats) - predict), reduction_indices=[1])) print( tf.reduce_sum(tf.square(tf.one_hot(yin, output_feats) - predict), reduction_indices=[1])) #predict = tf.one_hot(pred); print("Lambda specific variables created") # Creating training and testing steps G2 = np.copy(Gnp) G2[rowsums > 1, :] = 0 YI = np.matmul(Y, G2) YIrs = np.sum(YI, axis=1) trainI = train2[np.in1d(train2, np.where(YIrs == 1))] print("data type trainI,", trainI.dtype) testI = test[np.in1d(test, np.where(YIrs == 1))] print("trainI testI created") #init_vars=tf.global_variables_initializer() init_vars = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources())) sess = tf.Session() sess.run(init_vars) print("Session started") #beep = sess.run(predict,feed_dict={xs:X[1:100,:]}); #beep = sess.run(predict,feed_dict={xs:X[train2[0:bs],:]}); tensor_trainI = { xs: X[trainI, :], yin: sess.run(tf.argmax(get_yi(rowsums, G2, Y[trainI, :]), axis=1)) } print("tensor_trainI made") tensor_testI = { xs: X[testI, :], yin: sess.run(tf.argmax(get_yi(rowsums, G2, Y[testI, :]), axis=1)) } print("tensor_testI made") tensor_train = { xs: X[train2[0:bs], :], yin: sess.run( tf.argmax(get_yn( sess.run(predict, feed_dict={xs: X[train2[0:bs], :]}), Y[train2[0:bs], :], delta, tau, output_feats), axis=1)) } print("tensor_train made") tensor_test = { xs: X[test, :], yin: sess.run( tf.argmax(get_yn(sess.run(predict, feed_dict={xs: X[test, :]}), Y[test, :], delta, tau, output_feats), axis=1)) } print("tensor_test made") #********************************** #print("Loss and training steps created with sample tensors") # Setting params and initializing print("Beginning iterations") # Starting training iterations print(X.shape) for i in range(1, 101): if i < 50: sess.run(train_op, feed_dict=tensor_trainI) #print("ran train op") if i % 10 == 0: print( str(sess.run(accuracy_op, feed_dict=tensor_trainI)) + ' ' + str(sess.run(accuracy_op, feed_dict=tensor_testI))) else: sess.run(train_op, feed_dict=tensor_train) if i % 10 == 0: print( str(sess.run(accuracy_op, feed_dict=tensor_train)) + ' ' + str(sess.run(accuracy_op, feed_dict=tensor_test))) elif i % 10 == 0: np.random_shuffle(train2) tensor_train = { xs: X[train2[0:bs], :], yin: sess.run( get_yn( sess.run(predict, feed_dict={xs: X[train2[0:bs], :]}), Y[train2[0:bs], :], delta, tau, output_feats)) } if prt: blah = sess.run(predict, feed_dict=tensor_test) sio.savemat('preds_cv' + str(cv) + '.mat', {'preds': blah}) sio.savemat('truth_cv' + str(cv) + '.mat', {'labels': Y[test, :]}) acc = sess.run(accuracy_op, feed_dict=tensor_test) print( "loss1=%.4f, gamma=%.4f, delta=%.4f, tau=%.4f, prc_cut=%i, bs_prc=%.4f, num_trees=%i, max_nodes=%i" % (acc, gamma, delta, tau, prc_cut, bs_prc, num_trees, max_nodes)) tf.reset_default_graph() return (acc)
def classifyByTFRandomForest(self, noOfEpochs, n_estimators, maxNoOfNodes): if self.noOfClasses is None: print("Warning: No of classes must be defined in constructor") else: with tf.name_scope("placeholders"): X = tf.placeholder(tf.float32, shape=[None, self.noOfFeatures]) Y = tf.placeholder(tf.int32, shape=[None]) with tf.name_scope("forest"): hParams = tensor_forest.ForestHParams( num_classes=self.noOfClasses, num_features=self.noOfFeatures, num_trees=n_estimators, max_nodes=maxNoOfNodes).fill() forestGraph = tensor_forest.RandomForestGraphs(hParams) with tf.name_scope("optimisers"): trainOp = forestGraph.training_graph(X, Y) lossOp = forestGraph.training_loss(X, Y) with tf.name_scope("accuracy"): inferOp, _, _ = forestGraph.inference_graph(X) correctPrediction = tf.equal(tf.arg_max(inferOp, 1), tf.cast(Y, tf.int64)) accuracyOp = tf.reduce_mean( tf.cast(correctPrediction, tf.float32)) initVars = tf.group(tf.global_variables_initializer, resources.shared_resources()) losses = [] with tf.Session() as sess: sess.run(initVars) for i in range(1, noOfEpochs + 1): _, l = sess.run([trainOp, lossOp], feed_dict={ X: self.trainX, Y: tf.cast(self.trainY, tf.int32) }) if i % 50 == 0 or i == 1: acc = sess.run(accuracyOp, feed_dict={ X: self.trainX, Y: tf.cast(self.trainY, tf.int32) }) print('Step %i, Loss: %f, Acc: %f' % (i, l, acc)) losses.append(l) trainAccs = sess.run(accuracyOp, feed_dict={ X: self.trainX, Y: tf.cast(self.trainY, tf.int32) }) print("Training accuracy:", trainAccs) if self.validX is not None: validAccs = sess.run(accuracyOp, feed_dict={ X: self.validX, Y: tf.cast(self.validY, tf.int32) }) print("Validation accuracy:", validAccs) testAccs = sess.run(accuracyOp, feed_dict={ X: self.trainX, Y: tf.cast(self.trainY, tf.int32) }) print("Test accuracy:", testAccs) return losses
def main(): irisData = fu.importData() random.shuffle(irisData) trainSplit = (len(irisData) // 10) * 6 testSplit = len(irisData) - trainSplit irisTrain = irisData[:trainSplit] irisTest = irisData[:-testSplit] #forrestParams num_epocs = 500 #epocs to train batch_size = 50 #samples per batch num_classes = 3 #3 iris num_features = 4 #4 features num_trees = 10 # number of trees max_nodes = 100 # maximum number of nodes #input and target data X = tf.placeholder(tf.float32, shape=[None, num_features]) # For random forest, labels must be integers (the class id) Y = tf.placeholder(tf.int32, shape=[None]) hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() #buildthe forest forest_graph = tensor_forest.RandomForestGraphs(hparams) #Get training graph and loss train_op = forest_graph.training_graph(X, Y) loss_op = forest_graph.training_loss(X, Y) #measure accurracy infer_op = forest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64)) accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # Initialize the variables init_vars = tf.global_variables_initializer() #start tf session sess = tf.Session() # Run the initializer sess.run(init_vars) # Training for i in range(1, num_epocs + 1): # Prepare Data # Get the next batch of MNIST data (only images are needed, not labels) batch_x, batch_y = fu.getBatch(irisTrain, batch_size) _, l = sess.run([train_op, loss_op], feed_dict={ X: batch_x, Y: batch_y }) if i % 50 == 0 or i == 1: acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y}) print('Step %i, Loss: %f, Acc: %f' % (i, l, acc)) # Test Model test_x, test_y = fu.getBatch(irisTest, len(irisTest)) print("Test Accuracy:", sess.run(accuracy_op, feed_dict={ X: test_x, Y: test_y }))
def main(args): vocab = build_vocab(args.data_path) data = pd.DataFrame({ 'label': vocab.labels, 'lprox': vocab.lprox, 'rprox': vocab.rprox, 'x': vocab.x, 'y': vocab.y, 'z': vocab.z, }) y = data['label'] lprox = pd.DataFrame(data['lprox'].values.tolist()) rprox = pd.DataFrame(data['rprox'].values.tolist()) xax = pd.DataFrame(data['x'].values.tolist()) yax = pd.DataFrame(data['y'].values.tolist()) zax = pd.DataFrame(data['z'].values.tolist()) X = pd.concat([lprox, rprox, xax, yax, zax], axis=1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) num_steps = 100 # Total steps to train num_classes = 2 num_features = 585 num_trees = 10 max_nodes = 1000 X = tf.placeholder(tf.float32, shape=[None, num_features]) Y = tf.placeholder(tf.int64, shape=[None]) hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() forest_graph = tensor_forest.RandomForestGraphs(hparams) train_op = forest_graph.training_graph(X, Y) loss_op = forest_graph.training_loss(X, Y) infer_op, _, _ = forest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64)) accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init_vars = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources())) sess = tf.Session() sess.run(init_vars) for i in range(1, num_steps + 1): saver = tf.train.Saver() _, l = sess.run([train_op, loss_op], feed_dict={ X: X_train, Y: y_train }) if i % 50 == 0 or i == 1: acc = sess.run(accuracy_op, feed_dict={X: X_train, Y: y_train}) save_path = saver.save(sess, 'models/model%i.ckpt' % (i)) print('Step %i, Loss: %f, Acc: %f' % (i, l, acc)) print("Test Accuracy:", sess.run(accuracy_op, feed_dict={ X: X_test, Y: y_test }))
def random_forest_sts(batch_x, batch_y): # standard method for import MNIST data #from tensorflow.examples.tutorials.mnist import input_data #mnist = input_data.read_data_sets("./data", one_hot=False) # Parameters num_steps = 200 # Total steps to train num_classes = 2 # non 0 labeled 1 num_features = 13 #max min mean std self num_trees = 20 max_nodes = 200 # Random Forest Parameters # fill():intelligently sets any non-specific parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, regression=False, num_trees=num_trees, max_nodes=max_nodes).fill() # Input and Target data X = tf.placeholder(tf.float32, shape=[None, num_features]) # For random forest, labels must be integers (the class id) #shape(Y)=[None] because not it's one_hot label Y = tf.placeholder(tf.int32, shape=[None]) # Build the Random Forest forest_graph = tensor_forest.RandomForestGraphs(hparams) # Get training graph and loss train_op = forest_graph.training_graph(X, Y) loss_op = forest_graph.training_loss(X, Y) # Measure the accuracy infer_op = forest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64)) #because X is totol instances ,accuracy is average of all instance accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #use tf.metrics acc, acc_op = tf.metrics.accuracy(labels=Y, predictions=tf.argmax(infer_op, 1)) pre, pre_op = tf.metrics.precision(labels=Y, predictions=tf.argmax(infer_op, 1)) rec, rec_op = tf.metrics.recall(labels=Y, predictions=tf.argmax(infer_op, 1)) # Initialize the variables (i.e. assign their default value) init_vars = [ tf.global_variables_initializer(), tf.local_variables_initializer() ] # Start TensorFlow session sess = tf.Session() # Run the initializer sess.run(init_vars) #model_path = "checkpoint_/variable" #model_path = "checkpoint_merge/variable" saver = tf.train.Saver() #saver.restore(sess, tf.train.latest_checkpoint("checkpoint_"))#for homo3 saver.restore(sess, tf.train.latest_checkpoint("checkpoint_merge")) #saver.restore(sess, tf.train.latest_checkpoint("checkpoint")) #for no h**o #Test Model print("Validation Accuracy:", sess.run(accuracy_op, feed_dict={ X: batch_x, Y: batch_y })) print("Validation Accuracy:", sess.run([acc, acc_op], feed_dict={ X: batch_x, Y: batch_y })) _, p = sess.run([pre, pre_op], feed_dict={X: batch_x, Y: batch_y}) print("Validation Precision:", p) _, r = sess.run([rec, rec_op], feed_dict={X: batch_x, Y: batch_y}) print("Validation Recall:", r) print("Validation F1 score:", 2 * p * r / (p + r)) #预测结果比较 prediction_list = sess.run(correct_prediction, feed_dict={ X: batch_x, Y: batch_y }) #print("prediction: ", prediction_list) FN = [ i for i in range(0,len(prediction_list)) \ if (batch_y[i]=='1' and prediction_list[i]==False)] FP = [ i for i in range(0,len(prediction_list)) \ if (batch_y[i]=='0' and prediction_list[i]==False)] print(batch_x[13]) print(len(FP)) print(len(FN)) f = open("pre_list.txt", "w") for id in FP: print(str(id), file=f) print("\n", file=f) for id in FN: print(str(id), file=f) f.close() '''
def model_func(features, labels, mode, params): """ model function for linear regression """ # Define parameters # Define placeholders for input # X = tf.placeholder(tf.float32, name='X') # y = tf.placeholder(tf.float32, name='y') if type(features) is dict: X = features['X'] else: X = features y_pred = labels try: n_samples, n_dim = X.shape except ValueError: n_samples = None n_dim = params['n_dim'] logging.debug('n_dim: {} | n_smaples: {}'.format(n_dim, n_samples)) train_losses, val_losses = [], [] num_steps = 2 # Total steps to train batch_size = 1024 # The number of samples per batch num_classes = 1000 num_features = params['n_dim'] num_trees = 10 max_nodes = 1000 #X = tf.placeholder(tf.float32, shape=[None, num_features], name='X') #y_pred = tf.placeholder(tf.float32, shape=[None], name='y_pred') hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() forest_graph = tensor_forest.RandomForestGraphs(hparams) #y_pred = tf.matmul(X, W) + b if mode == tf.estimator.ModeKeys.PREDICT: export_outputs = { 'predict_output': tf.estimator.export.PredictOutput({"pred_output": y_pred}) } predictions_dict = {"late_minutes": y_pred} # In `PREDICT` mode we only need to return predictions. return tf.estimator.EstimatorSpec(mode=mode, predictions={"y_pred": y_pred}, export_outputs=export_outputs) # Define optimizer operation train_op = forest_graph.training_graph(X, y_pred) loss = forest_graph.training_loss(X, y_pred) infer_op, _, _ = forest_graph.inference_graph(X) # optimizer = tf.train.AdagradOptimizer(0.05) # opt = optimizer.minimize(loss, global_step=tf.train.get_global_step()) if mode == tf.estimator.ModeKeys.TRAIN: #optimize = train_op.minimize( # loss #) print('in mode TRAIN') return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=infer_op) assert mode == tf.estimator.ModeKeys.EVAL # Metrics rmse = tf.metrics.root_mean_squared_error(labels, y_pred) def r_squared(labels, y_pred): unexplained_error = tf.reduce_sum(tf.square((labels - y_pred))) total_error = tf.reduce_sum( tf.square((labels - tf.reduce_mean(labels)))) r2 = tf.subtract(tf.constant(1., dtype='float64'), tf.div(unexplained_error, total_error)) return r2, constant_op.constant(1.) metrics = { 'rmse': rmse, 'mae': tf.metrics.mean_absolute_error(labels, y_pred), 'rmse_below_10': tf.metrics.percentage_below(rmse, 10), 'rmse_below_5': tf.metrics.percentage_below(rmse, 5), 'rmse_below_3': tf.metrics.percentage_below(rmse, 3), 'rmse_below_1': tf.metrics.percentage_below(rmse, 1), 'y_pred_below_10': tf.metrics.percentage_below(y_pred, 10), 'y_pred_below_5': tf.metrics.percentage_below(y_pred, 5), 'y_pred_below_3': tf.metrics.percentage_below(y_pred, 3), 'y_pred_below_1': tf.metrics.percentage_below(y_pred, 1), 'r2': r_squared(labels, y_pred) } return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=metrics)
def random_forest_sts(batch_x, batch_y, test_x, test_y): with tf.device('/cpu:0'): # standard method for import MNIST data #from tensorflow.examples.tutorials.mnist import input_data #mnist = input_data.read_data_sets("./data", one_hot=False) # Parameters num_epochs = 200 # Total epochs to train num_classes = 2 # non 0 labeled 1 num_features = 13 #max min mean std self num_trees = 20 max_nodes = 200 # Random Forest Parameters # fill():intelligently sets any non-specific parameters hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, regression=False, num_trees=num_trees, max_nodes=max_nodes).fill() # Input and Target data X = tf.placeholder(tf.float32, shape=[None, num_features]) # For random forest, labels must be integers (the class id) #shape(Y)=[None] because not it's one_hot label Y = tf.placeholder(tf.int32, shape=[None]) # Build the Random Forest forest_graph = tensor_forest.RandomForestGraphs(hparams) # input weights #weights = [1]*18707+[0.7]*(103640-18707) weights = [1] * 15683 + [0.1] * 709329 print("weight len:", len(weights)) # Get training graph and loss train_op = forest_graph.training_graph( X, Y, input_weights=tf.constant(weights)) loss_op = forest_graph.training_loss(X, Y) # Measure the accuracy infer_op = forest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64)) #because X is totol instances ,accuracy is average of all instance accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('Accuracy', accuracy_op) #feature importances feature_importances = forest_graph.feature_importances() # Initialize the variables (i.e. assign their default value) init_vars = tf.global_variables_initializer() merged = tf.summary.merge_all() # Start TensorFlow session sess = tf.Session() #sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) writer = tf.summary.FileWriter('./graph', sess.graph) # Run the initializer sess.run(init_vars) #model_path = "checkpoint/variable" model_path = "checkpoint_merge/variable" saver = tf.train.Saver() def cross_validate(session, split_size=5): results = [] #kf = KFold(n_splits=split_size, shuffle=True) kf = StratifiedKFold(n_splits=split_size, shuffle=True) for train_idx, val_idx in kf.split(batch_x, batch_y): #print("type of train_idx~!!!!!!",(train_idx[0])) #print("type of batch_x~!!!!!!",type(batch_x)) #return 0 train_x = np.array(batch_x)[train_idx] train_y = np.array(batch_y)[train_idx] val_x = np.array(batch_x)[val_idx] val_y = np.array(batch_y)[val_idx] print("Strat a new fold training...") run_train(session, train_x, train_y) results.append( session.run(accuracy_op, feed_dict={ X: val_x, Y: val_y })) return results def run_train(sess, train_x, train_y): # Training for i in range(1, num_epochs + 1): #[1,201), no batch # Prepare Data # Get the next batch of MNIST data (only images are needed, not labels) #batch_x, batch_y = mnist.train.next_batch(batch_size) _, l = sess.run([train_op, loss_op], feed_dict={ X: train_x, Y: train_y }) if i % 50 == 0 or i == 1: summary, acc = sess.run([merged, accuracy_op], feed_dict={ X: batch_x, Y: batch_y }) print('Epoch %i, Loss: %f, Acc: %f' % (i, l, acc)) writer.add_summary(summary, i) importances = sess.run(feature_importances, feed_dict={ X: batch_x, Y: batch_y }) print("impotances of feature= ", importances) save_path = saver.save(sess, model_path) # result = cross_validate(sess) # print("Cross-validation result: %s" % result) # print("Mean of Cross-validation result: %s" % np.mean(result)) run_train(sess, batch_x, batch_y) writer.close() sess.close()