def tree_models(X, y, num_feat, num_class):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)
    num_features = num_feat
    num_steps = 400
    num_classes = num_class
    num_trees = 10
    max_nodes = 1000

    X = tf.placeholder(tf.float32, shape=[None, num_features])
    Y = tf.placeholder(tf.int64, shape=[None])

    hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill()

    forest_graph = tensor_forest.RandomForestGraphs(hparams)

    train_op = forest_graph.training_graph(X, Y)
    loss_op = forest_graph.training_loss(X, Y)

    infer_op, _, _, = forest_graph.inference_graph(X)
    correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    init_vars = tf.group(tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources()))

    rf_sess = tf.Session()

    rf_sess.run(init_vars)

    for i in range(1, num_steps + 1):
        _, l = rf_sess.run([train_op, loss_op], feed_dict={X: X_train, Y: y_train})
        if i % 50 == 0 or i == 1:
            acc = rf_sess.run(accuracy_op, feed_dict={X: X_train, Y: y_train})
            print("Step %i, Loss: %f, Acc: %f" % (i, l, acc))

    print("Test Accuracy:", rf_sess.run(accuracy_op, feed_dict={X: X_test, Y: y_test}))
Esempio n. 2
0
def run_rf():
    x = tf.placeholder(tf.float32, shape=[None, num_features])
    y = tf.placeholder(tf.int32, shape=[None])
    batch_x, batch_y = get_data()

    print(num_classes, num_features, num_trees, max_nodes)
    hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                          num_features=num_features,
                                          num_trees=num_trees,
                                          max_nodes=max_nodes).fill()

    forest_graph = tensor_forest.RandomForestGraphs(hparams)
    train_op = forest_graph.training_graph(x, y)
    loss_op = forest_graph.training_loss(x, y)

    infer_op = forest_graph.inference_graph(x)
    correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(y, tf.int64))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    init_vars = tf.global_variables_initializer()

    sess = tf.Session()
    sess.run(init_vars)


    for i in range(1, num_steps + 1):
        _, l = sess.run([train_op, loss_op], feed_dict={x: batch_x, y: batch_y})
        if i % 50 == 0 or i == 1:
            acc = sess.run(accuracy_op, feed_dict={x: batch_x, y: batch_y})
            print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))
Esempio n. 3
0
def random_forest(num_classes=2,
                  num_features=46,
                  num_trees=100,
                  max_nodes=10000):
    X = tf.placeholder(tf.float32, shape=[None, num_features])
    # For random forest, labels must be integers (the class id)
    Y = tf.placeholder(tf.int32, shape=[None])

    # Random Forest Parameters
    hparams = tensor_forest.ForestHParams(
        num_classes=num_classes,
        num_features=num_features,
        num_trees=num_trees,
        max_nodes=max_nodes,
    ).fill()

    forest_graph = tensor_forest.RandomForestGraphs(hparams)
    train_op = forest_graph.training_graph(X, Y)
    loss_op = forest_graph.training_loss(X, Y)

    # Measure the accuracy
    infer_op, _, _ = forest_graph.inference_graph(X)
    correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    init_vars = tf.group(
        tf.global_variables_initializer(),
        resources.initialize_resources(resources.shared_resources()),
    )

    # sess = tf.Session()
    # sess.run(init_vars)
    return infer_op, accuracy_op, train_op, loss_op, X, Y
Esempio n. 4
0
    def __init__(self, num_features, num_classes, num_trees, max_nodes):
        tf.reset_default_graph()

        self.X = tf.placeholder(tf.float32, shape=[None, num_features])
        self.Y = tf.placeholder(tf.int32, shape=[None])

        self.hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                                   num_features=num_features,
                                                   num_trees=num_trees,
                                                   max_nodes=max_nodes).fill()
        print("test 1")
        # build graph
        self.forest_graph = tensor_forest.RandomForestGraphs(self.hparams)

        self.train_op = self.forest_graph.training_graph(self.X, self.Y)
        self.loss_op = self.forest_graph.training_loss(self.X, self.Y)

        infer_op, _, _ = self.forest_graph.inference_graph(self.X)
        self.infer_op = infer_op

        print("test 2")
        self.correct_pred = tf.equal(tf.argmax(self.infer_op, 1),
                                     tf.cast(self.Y, tf.int64))

        self.accuracy_op = tf.reduce_mean(
            tf.cast(self.correct_pred, tf.float32))

        self.init = tf.group(
            tf.global_variables_initializer(),
            resources.initialize_resources(resources.shared_resources()))

        self.sess = None
  def testInferenceConstructionSparse(self):
    input_data = sparse_tensor.SparseTensor(
        indices=[[0, 0], [0, 3],
                 [1, 0], [1, 7],
                 [2, 1],
                 [3, 9]],
        values=[-1.0, 0.0,
                -1., 2.,
                1.,
                -2.0],
        dense_shape=[4, 10])

    params = tensor_forest.ForestHParams(
        num_classes=4,
        num_features=10,
        num_trees=10,
        max_nodes=1000,
        regression=True,
        split_after_samples=25).fill()

    graph_builder = tensor_forest.RandomForestGraphs(params)
    probs, paths, var = graph_builder.inference_graph(input_data)
    self.assertTrue(isinstance(probs, ops.Tensor))
    self.assertTrue(isinstance(paths, ops.Tensor))
    self.assertTrue(isinstance(var, ops.Tensor))
Esempio n. 6
0
def infer_sts(infer_x):
    if np.nan in infer_x:
        return 0, _, _
    # Parameters
    #num_steps = 200 # Total steps to train
    num_classes = 2
    num_features = 13
    num_trees = 20
    max_nodes = 200

    # Random Forest Parameters
    # fill():intelligently sets any non-specific parameters
    hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                          num_features=num_features,
                                          regression=False,
                                          num_trees=num_trees,
                                          max_nodes=max_nodes).fill()

    #Input and Target data
    X = tf.placeholder(tf.float32, shape=[None, num_features])

    #Build the Random Forest
    forest_graph = tensor_forest.RandomForestGraphs(hparams)

    #Compute inference result
    infer_op = forest_graph.inference_graph(X)
    infer_result = tf.argmax(infer_op, 1)
    #max confidence
    infer_op = tf.reduce_max(infer_op, axis=1)
    feature_importance = forest_graph.feature_importances()

    # Initialize the variables (i.e. assign their default value)
    init_vars = tf.global_variables_initializer()

    # Start TensorFlow session
    sess = tf.Session()

    # Run the initializer
    sess.run(init_vars)

    model_path = "checkpoint_merge/variable"
    saver = tf.train.Saver()
    saver.restore(sess, tf.train.latest_checkpoint("checkpoint_merge"))

    result, confidence, importances = sess.run(
        [infer_result, infer_op, feature_importance], feed_dict={X: infer_x})
    '''
    if result==0 : return "这个点不是病灶~!"
    else: return "这个点是病灶~!"
    '''
    '''
    #check graph weight
    variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    doc = open("variables.txt","w")
    variables_name= [v.name for v in variables]
    doc.write("".join(variables_name))
    doc.close()
    '''
    #print(variables)
    return result, confidence, importances
  def testImpurityConstruction(self):
    params = tensor_forest.ForestHParams(
        num_classes=4, num_features=2, num_trees=10, max_nodes=1000,
        split_after_samples=25).fill()

    graph_builder = tensor_forest.RandomForestGraphs(params)
    graph = graph_builder.average_impurity()
    self.assertTrue(isinstance(graph, tf.Tensor))
Esempio n. 8
0
def main():
    current_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                'data')
    mnist = input_data.read_data_sets(current_path, one_hot=False)
    # Parameters
    num_steps = 500  # Total steps to train
    batch_size = 1024  # The number of samples per batch
    num_classes = 10  # The 10 digits
    num_features = 784  # Each image is 28x28 pixels
    num_trees = 10
    max_nodes = 1000
    X = tf.placeholder(tf.float32, shape=[None, num_features])
    # For random forest, labels must be integers (the class id)
    Y = tf.placeholder(tf.int32, shape=[None])

    # Random Forest Parameters
    hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                          num_features=num_features,
                                          num_trees=num_trees,
                                          max_nodes=max_nodes).fill()
    # build Random Forest
    forest_graph = tensor_forest.RandomForestGraphs(hparams)
    # get train
    forest_graph_train = forest_graph.training_graph(X, Y)
    # get loss
    forest_graph_loss = forest_graph.training_loss(X, Y)

    #measure the accuracy
    infer_op_, _, _ = forest_graph.inference_graph(X)
    correct_prediction = tf.equal(tf.argmax(infer_op_, 1),
                                  tf.cast(Y, tf.int64))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    init_vars = tf.group(
        tf.global_variables_initializer(),
        resources.initialize_resources(resources.shared_resources()))

    with tf.Session() as sess:
        sess.run(init_vars)
        for i in range(num_steps):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            _, l = sess.run([forest_graph_train, forest_graph_loss],
                            feed_dict={
                                X: batch_x,
                                Y: batch_y
                            })
            if 50 % i == 0:
                acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y})
                print('step {0} loss {1} accuracy {2}'.format(i, l, acc))

    # test model
        test_x, test_y = mnist.test.images, mnist.test.labels
        print('test accuracy {0}'.format(
            sess.run(accuracy_op, feed_dict={
                X: test_x,
                Y: test_y
            })))
Esempio n. 9
0
    def train_model(self, X_train, Y_train):

        # input features of size(None, 13)
        x_train = X_train.values

        # output label of size(None)
        y_train = Y_train.values

        # creating a placeholder for Input and Target data
        X = tf.placeholder(tf.float32, shape=[None, 13])
        Y = tf.placeholder(tf.int8, shape=[None])

        feature_names = config.FEATURE_NAMES
        features = self.getFeature(feature_names)

        params = tensor_forest.ForestHParams(
            # N_ESTIMATORS=100,
            # MIN_SAMPLES_SPLIT=25,
            # MIN_SAMPLES_LEAF=5,
            # RANDOM_STATE=12,
            feature_colums=features,
            num_trees=10,
            max_nodes=1000,
            num_classes=6,
            num_features=13,
        ).fill()

        # Build the Random Forest
        forest_graph = tensor_forest.RandomForestGraphs(params)

        # Get training graph and loss
        train_op = forest_graph.training_graph(X, Y)
        loss_op = forest_graph.training_loss(X, Y)

        saver = tf.train.Saver()

        init = tf.group(
            tf.global_variables_initializer(),
            resources.initialize_resources(resources.shared_resources()),
        )

        batch_size = 1000

        sess = tf.Session()

        sess.run(init)
        for i in range(100):
            Xtr, Ytr = self.next_batch(batch_size, x_train, y_train)

            # Feed actual data to the train operation
            sess.run([loss_op, train_op], feed_dict={X: Xtr, Y: Ytr})

            # Create a checkpoint in every iteration
            saver.save(sess, "model_checkpoints/model_iter", global_step=i)

        # Save the final model
        saver.save(sess, "model_final_checkpoints/model_final")
Esempio n. 10
0
 def add_graph(self):
     """ Builds the forest graph based off of the hyper parameters in Config.
     """
     hyper_parameters = tensor_forest.ForestHParams(
         num_classes=self.config.num_classes,
         num_features=self.config.num_features,
         num_trees=self.config.num_trees,
         max_nodes=self.config.max_nodes).fill()
     self.forest_graph = tensor_forest.RandomForestGraphs(hyper_parameters)
Esempio n. 11
0
 def build_graph(self):
     self.X = tf.placeholder(tf.float32, shape=[None, self.n_features])
     self.y = tf.placeholder(tf.int32, shape=[None])
     forest_graph = tensor_forest.RandomForestGraphs(self.hparams)
     self.train_op = forest_graph.training_graph(self.X, self.y)
     self.loss_op = forest_graph.training_loss(self.X, self.y)
     self.infer_op = forest_graph.inference_graph(self.X)
     correct_pred = tf.equal(tf.argmax(self.infer_op, 1),
                             tf.cast(self.y, tf.int64))
     self.acc_op = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
  def testInferenceConstruction(self):
    input_data = [[-1., 0.], [-1., 2.],  # node 1
                  [1., 0.], [1., -2.]]  # node 2

    params = tensor_forest.ForestHParams(
        num_classes=4, num_features=2, num_trees=10, max_nodes=1000,
        split_after_samples=25).fill()

    graph_builder = tensor_forest.RandomForestGraphs(params)
    graph = graph_builder.inference_graph(input_data)
    self.assertTrue(isinstance(graph, tf.Tensor))
Esempio n. 13
0
    def __init__(self, loss_type, num_trees, num_classes, num_features):
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            hparams = tensor_forest.ForestHParams(num_classes=num_classes, 
                                                  num_features=num_features, 
                                                  num_trees=num_trees).fill() # max_nodes=max_nodes
            # Build the Random Forest
            self.forest_graph =  tensor_forest.RandomForestGraphs(hparams)       
    #         tensor_forest.TrainingLossForest(hparams, loss_fn=_loss_fn)

       
        self.loss_type = loss_type
        self.num_trees = num_trees
  def testTrainingConstructionRegression(self):
    input_data = [[-1., 0.], [-1., 2.],  # node 1
                  [1., 0.], [1., -2.]]  # node 2
    input_labels = [0, 1, 2, 3]

    params = tensor_forest.ForestHParams(
        num_classes=4, num_features=2, num_trees=10, max_nodes=1000,
        split_after_samples=25, regression=True).fill()

    graph_builder = tensor_forest.RandomForestGraphs(params)
    graph = graph_builder.training_graph(input_data, input_labels)
    self.assertTrue(isinstance(graph, tf.Operation))
Esempio n. 15
0
    def _build_estimator(self, X=None, Y=None):

        if not self._estimator_built:
            if self.num_features is None:
                self.num_features = data_util.get_num_features(X)
            if self.num_classes is None:
                if not self.regression:
                    self.num_classes = data_util.get_num_classes(Y)
                else:
                    self.num_classes = data_util.get_num_features(Y)

            # Reload params from checkpoint if available
            if self._to_be_restored and self.num_features is None:
                self.num_features = misc.read_tensor_in_checkpoint(
                    'num_features', self._to_be_restored)
            if self._to_be_restored and self.num_classes is None:
                self.num_classes = misc.read_tensor_in_checkpoint(
                    'num_classes', self._to_be_restored)

            # Purity checks
            if self.num_classes is None:
                raise ValueError("'num_classes' cannot be None.")
            if self.num_features is None:
                raise ValueError("'num_features' cannot be None.")

            # Persistent Parameters
            tf.Variable(self.num_classes, dtype=tf.int32, name='num_classes')
            tf.Variable(self.num_features, dtype=tf.int32, name='num_features')

            # Random Forest Parameters
            self.params = tensor_forest.ForestHParams(
                num_classes=self.num_classes,
                num_features=self.num_features,
                num_trees=self.num_estimators,
                max_nodes=self.max_nodes,
                split_after_samples=self.split_after_samples,
                min_split_samples=self.min_samples_split,
                regression=self.regression,
                bagging_fraction=self.bagging_fraction,
                num_splits_to_consider=self.num_splits_to_consider,
                feature_bagging_fraction=self.feature_bagging_fraction,
                max_fertile_nodes=self.max_fertile_nodes,
                valid_leaf_threshold=self.valid_leaf_threshold,
                dominate_method=self.dominate_method,
                dominate_fraction=self.dominate_fraction).fill()
            self.forest_graph = tensor_forest.RandomForestGraphs(self.params)
            self._estimator_built = True
            self._init_graph()
Esempio n. 16
0
    def testTrainingConstructionClassificationSparse(self):
        input_data = tf.SparseTensor(indices=[[0, 0], [0, 3], [1, 0], [1, 7],
                                              [2, 1], [3, 9]],
                                     values=[-1.0, 0.0, -1., 2., 1., -2.0],
                                     shape=[4, 10])
        input_labels = [0, 1, 2, 3]

        params = tensor_forest.ForestHParams(num_classes=4,
                                             num_features=10,
                                             num_trees=10,
                                             max_nodes=1000,
                                             split_after_samples=25).fill()

        graph_builder = tensor_forest.RandomForestGraphs(params)
        graph = graph_builder.training_graph(input_data, input_labels)
        self.assertTrue(isinstance(graph, tf.Operation))
 def testInfrenceFromRestoredModel(self):
   input_data = [[-1., 0.], [-1., 2.],  # node 1
                 [1., 0.], [1., -2.]]  # node 2
   expected_prediction = [[0.0, 1.0], [0.0, 1.0],
                          [0.0, 1.0], [0.0, 1.0]]
   hparams = tensor_forest.ForestHParams(
       num_classes=2,
       num_features=2,
       num_trees=1,
       max_nodes=1000,
       split_after_samples=25).fill()
   tree_weight = {'decisionTree':
                      {'nodes':
                       [{'binaryNode':
                         {'rightChildId': 2,
                          'leftChildId': 1,
                          'inequalityLeftChildTest':
                          {'featureId': {'id': '0'},
                           'threshold': {'floatValue': 0}}}},
                        {'leaf': {'vector':
                                  {'value': [{'floatValue': 0.0},
                                             {'floatValue': 1.0}]}},
                         'nodeId': 1},
                        {'leaf': {'vector':
                                  {'value': [{'floatValue': 0.0},
                                             {'floatValue': 1.0}]}},
                         'nodeId': 2}]}}
   restored_tree_param = ParseDict(tree_weight,
                                   _tree_proto.Model()).SerializeToString()
   graph_builder = tensor_forest.RandomForestGraphs(hparams,
                                                    [restored_tree_param])
   probs, paths, var = graph_builder.inference_graph(input_data)
   self.assertTrue(isinstance(probs, ops.Tensor))
   self.assertTrue(isinstance(paths, ops.Tensor))
   self.assertTrue(isinstance(var, ops.Tensor))
   with self.test_session():
     variables.global_variables_initializer().run()
     resources.initialize_resources(resources.shared_resources()).run()
     self.assertEquals(probs.eval().shape, (4, 2))
     self.assertEquals(probs.eval().tolist(), expected_prediction)
Esempio n. 18
0
    def _build_model(self):
        self.input_x = tf.placeholder(tf.int32, [None, self.seqlen], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None], name="input_y")

        params = tensor_forest.ForestHParams(
            num_classes=self.total_class,
            num_trees=100,
            max_nodes=10000000,
            num_features=10000,
            ).fill()
        graph = tensor_forest.RandomForestGraphs(params)
        self.train_op = graph.training_graph(self.input_x, self.input_y)
        self.loss = graph.loss_graph(self.input_x, self.input_y)
        self.pred = graph.inference_graph(self.input_x)
        self.acc = tf.reduce_mean(tf.cast(
            tf.equal(tf.argmax(self.pred, 1),
                tf.cast(self.input_y, tf.int64)), tf.float32))

        summary.append(tf.summary.scalar("loss", self.loss))
        summary.append(tf.summary.scalar("acc", self.acc))

        self.summary = tf.summary.merge(summary, name="merge_summary")
def randomforest(x, y, features_dim, class_num, tree_num):
    with tf.name_scope('random_forest'):
        Hparams = tensor_forest.ForestHParams(num_classes=class_num,
                                              num_features=features_dim,
                                              num_trees=tree_num).fill()

        forest_graph = tensor_forest.RandomForestGraphs(Hparams)

    train_step = forest_graph.training_graph(x, y)

    with tf.name_scope('random_forest_loss'):
        loss = forest_graph.training_loss(x, y)
        tf.summary.scalar("svm_loss", loss)

    with tf.name_scope('accuracy'):
        output, _, _ = forest_graph.inference_graph(x)
        correct_prediction = tf.equal(tf.argmax(output, 1),
                                      tf.cast(y, tf.int64))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    return train_step, loss, accuracy, output
    def build_model(self):
        self.is_training = tf.placeholder(tf.bool, name="is_training")

        num_classes = 10
        num_features = 784
        num_trees = 10
        max_nodes = 1000

        # Input and Target data
        self.x = tf.placeholder(tf.float32,
                                shape=[None, num_features],
                                name="image")
        # For random forest, labels must be integers (the class id)
        self.y = tf.placeholder(tf.int32, shape=[None], name="labels")

        # Random Forest Parameters
        hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                              num_features=num_features,
                                              num_trees=num_trees,
                                              max_nodes=max_nodes).fill()

        # Build the Random Forest
        forest_graph = tensor_forest.RandomForestGraphs(hparams)

        output, _, _ = forest_graph.inference_graph(self.x)

        self.increment_global_step_op = tf.assign(self.global_step_tensor,
                                                  self.global_step_tensor + 1)

        with tf.name_scope("loss"):
            # Get training graph and loss
            self.train_step = forest_graph.training_graph(self.x, self.y)
            self.loss = forest_graph.training_loss(self.x, self.y)

            correct_prediction = tf.equal(tf.argmax(output, 1),
                                          tf.cast(self.y, tf.int64))
            self.accuracy = tf.reduce_mean(
                tf.cast(correct_prediction, tf.float32))
Esempio n. 21
0
    def set_parameter(self, param):
        for name in self.default_param:
            if name not in param:
                param[name] = self.default_param[name]

        self.build_model()
        num_trees = param['num_trees']
        max_nodes = param['max_nodes']

        # Random Forest Parameters
        self.hparams = tensor_forest.ForestHParams(
            num_classes=self.class_num,
            num_features=self.feature_num,
            num_trees=num_trees,
            max_nodes=max_nodes).fill()

        # Build the Random Forest
        self.forest_graph = tensor_forest.RandomForestGraphs(self.hparams)
        # Get training graph and loss
        self.train_op = self.forest_graph.training_graph(
            self.inputs, self.labels)
        self.loss = self.forest_graph.training_loss(self.inputs, self.labels)

        # Measure the accuracy
        self.infer_op, _, _ = self.forest_graph.inference_graph(self.inputs)
        self.correct_prediction = tf.equal(tf.argmax(self.infer_op, 1),
                                           tf.cast(self.labels, tf.int64))
        self.accuracy = tf.reduce_mean(
            tf.cast(self.correct_prediction, tf.float32))

        #metrics = [self.get_metric(metric) for metric in param["metrics"]]
        #self.metrics = [metric_fun(self.output, self.ground_truth) for metric_fun in metrics]
        self.init_vars = tf.group(
            tf.global_variables_initializer(),
            resources.initialize_resources(resources.shared_resources()))
        self.batch_size = param["batch_size"]
        self.num_epochs = param["num_epochs"]
Esempio n. 22
0
max_nodes = 1000

# Input and Target data
X = tf.placeholder(tf.float32, shape=[None, num_features])

# Labels must be integers in random forest
Y = tf.placeholder(tf.int32, shape=[None])

# Random forest parameters
hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                      num_features=num_features,
                                      num_trees=num_trees,
                                      max_nodes=max_nodes).fill()

# Build the Random Forest
forest_graph = tensor_forest.RandomForestGraphs(hparams)

# Get training graph and loss
train_op = forest_graph.training_graph(X, Y)
loss_op = forest_graph.training_loss(X, Y)

# Measure the accuracy
infer_op, _ = forest_graph.inference_graph(X)
correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(
    Y, tf.int64))  # maybe switch this to check if in bucket
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Initialize the variables and forest resources
init_vars = tf.group(tf.global_variables_initializer())
# init_vars = tf.group(tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources()))
Esempio n. 23
0
def main():
    input_path = FLAGS.input
    checkpoint_path = FLAGS.checkpoint
    model_path = get_model_path()

    # Parameters
    num_steps = 50
    num_classes = 100
    num_features = 8
    num_trees = 10
    max_nodes = 1000

    features = tf.placeholder(tf.float32, shape=[None, num_features])
    label = tf.placeholder(tf.int32, shape=[None])

    # Random Forest Parameters
    hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                          num_features=num_features,
                                          num_trees=num_trees,
                                          max_nodes=max_nodes).fill()

    # Build the Random Forest
    forest_graph = tensor_forest.RandomForestGraphs(hparams)
    # Get training graph and loss
    train_op = forest_graph.training_graph(features, label)
    loss_op = forest_graph.training_loss(features, label)
    # Measure the accuracy
    infer_op, _, _ = forest_graph.inference_graph(features)

    # Initialize the variables (i.e. assign their default value) and forest resources
    init_vars = tf.group(
        tf.global_variables_initializer(),
        resources.initialize_resources(resources.shared_resources()))

    saver = tf.train.Saver()

    # Start TensorFlow session
    sess = tf.Session()

    # Run the initializer
    sess.run(init_vars)

    if os.path.exists(checkpoint_path):
        saver.restore(sess, checkpoint_path)

    # Training
    for i in range(1, num_steps + 1):
        # Prepare Data
        for file in get_files(input_path, ext='csv'):
            data = pd.read_csv(file)
            input_x = data.iloc[:, 0:-1].values
            input_y = data.iloc[:, -1].values
            _, l = sess.run([train_op, loss_op],
                            feed_dict={
                                features: input_x,
                                label: input_y
                            })
            break

    print('saved path: ', saver.save(sess, checkpoint_path))

    # export SavedModel
    signature = tf.saved_model.signature_def_utils.predict_signature_def(
        inputs={'x': features}, outputs={'y': infer_op})
    builder = tf.saved_model.builder.SavedModelBuilder(model_path)
    builder.add_meta_graph_and_variables(
        sess=sess,
        tags=[tf.saved_model.tag_constants.SERVING],
        signature_def_map={"predict": signature})
    builder.save()
Esempio n. 24
0
def run_LAmbDA2(gamma, delta, tau, prc_cut, bs_prc, num_trees, max_nodes):
    global X, Y, Gnp, Dnp, train, test, prt, cv
    D = tf.cast(Dnp, tf.float32)
    G = tf.cast(Gnp, tf.float32)
    #optunity_it = optunity_it+1;
    num_trees = int(num_trees)
    max_nodes = int(max_nodes)
    prc_cut = int(np.ceil(prc_cut))
    print(
        "gamma=%.4f, delta=%.4f, tau=%.4f, prc_cut=%i, bs_prc=%.4f, num_trees=%i, max_nodes=%i"
        % (gamma, delta, tau, prc_cut, bs_prc, num_trees, max_nodes))
    input_feats = X.shape[1]
    num_labls = G.shape.as_list()
    output_feats = num_labls[1]
    #print(output_feats)
    num_labls = num_labls[0]
    rowsums = np.sum(Gnp, axis=1)
    train2 = resample(prc_cut, Y, Gnp, train, gamma)
    # Bug??
    bs = int(np.ceil(bs_prc * train2.size))
    xs = tf.placeholder(tf.float32, [None, input_feats])
    #ys = tf.placeholder(tf.float32, [None,num_labls])
    yin = tf.placeholder(tf.int32, [None])
    print("Vars loaded xs and ys created")
    hparams = tensor_forest.ForestHParams(num_classes=output_feats,
                                          num_features=input_feats,
                                          num_trees=num_trees,
                                          max_nodes=max_nodes).fill()
    print("Tensor forest hparams created")
    forest_graph = tensor_forest.RandomForestGraphs(hparams)
    print("Tensor forest graph created")
    train_op = forest_graph.training_graph(xs, yin)
    loss_op = forest_graph.training_loss(xs, yin)
    print("Loss and train ops created")
    predict, _, _ = forest_graph.inference_graph(xs)
    print("Tensor forest variables created through predict")
    accuracy_op = tf.reduce_mean(
        tf.reduce_sum(tf.square(tf.one_hot(yin, output_feats) - predict),
                      reduction_indices=[1]))
    print(
        tf.reduce_sum(tf.square(tf.one_hot(yin, output_feats) - predict),
                      reduction_indices=[1]))
    #predict = tf.one_hot(pred);
    print("Lambda specific variables created")
    # Creating training and testing steps
    G2 = np.copy(Gnp)
    G2[rowsums > 1, :] = 0
    YI = np.matmul(Y, G2)
    YIrs = np.sum(YI, axis=1)
    trainI = train2[np.in1d(train2, np.where(YIrs == 1))]
    print("data type trainI,", trainI.dtype)
    testI = test[np.in1d(test, np.where(YIrs == 1))]
    print("trainI testI created")
    #init_vars=tf.global_variables_initializer()
    init_vars = tf.group(
        tf.global_variables_initializer(),
        resources.initialize_resources(resources.shared_resources()))
    sess = tf.Session()
    sess.run(init_vars)
    print("Session started")
    #beep = sess.run(predict,feed_dict={xs:X[1:100,:]});
    #beep = sess.run(predict,feed_dict={xs:X[train2[0:bs],:]});
    tensor_trainI = {
        xs: X[trainI, :],
        yin: sess.run(tf.argmax(get_yi(rowsums, G2, Y[trainI, :]), axis=1))
    }
    print("tensor_trainI made")
    tensor_testI = {
        xs: X[testI, :],
        yin: sess.run(tf.argmax(get_yi(rowsums, G2, Y[testI, :]), axis=1))
    }
    print("tensor_testI made")
    tensor_train = {
        xs:
        X[train2[0:bs], :],
        yin:
        sess.run(
            tf.argmax(get_yn(
                sess.run(predict, feed_dict={xs: X[train2[0:bs], :]}),
                Y[train2[0:bs], :], delta, tau, output_feats),
                      axis=1))
    }
    print("tensor_train made")
    tensor_test = {
        xs:
        X[test, :],
        yin:
        sess.run(
            tf.argmax(get_yn(sess.run(predict, feed_dict={xs: X[test, :]}),
                             Y[test, :], delta, tau, output_feats),
                      axis=1))
    }
    print("tensor_test made")
    #**********************************
    #print("Loss and training steps created with sample tensors")
    # Setting params and initializing
    print("Beginning iterations")
    # Starting training iterations
    print(X.shape)
    for i in range(1, 101):
        if i < 50:
            sess.run(train_op, feed_dict=tensor_trainI)
            #print("ran train op")
            if i % 10 == 0:
                print(
                    str(sess.run(accuracy_op, feed_dict=tensor_trainI)) + ' ' +
                    str(sess.run(accuracy_op, feed_dict=tensor_testI)))
        else:
            sess.run(train_op, feed_dict=tensor_train)
            if i % 10 == 0:
                print(
                    str(sess.run(accuracy_op, feed_dict=tensor_train)) + ' ' +
                    str(sess.run(accuracy_op, feed_dict=tensor_test)))
            elif i % 10 == 0:
                np.random_shuffle(train2)
                tensor_train = {
                    xs:
                    X[train2[0:bs], :],
                    yin:
                    sess.run(
                        get_yn(
                            sess.run(predict,
                                     feed_dict={xs: X[train2[0:bs], :]}),
                            Y[train2[0:bs], :], delta, tau, output_feats))
                }
    if prt:
        blah = sess.run(predict, feed_dict=tensor_test)
        sio.savemat('preds_cv' + str(cv) + '.mat', {'preds': blah})
        sio.savemat('truth_cv' + str(cv) + '.mat', {'labels': Y[test, :]})
    acc = sess.run(accuracy_op, feed_dict=tensor_test)
    print(
        "loss1=%.4f, gamma=%.4f, delta=%.4f, tau=%.4f, prc_cut=%i, bs_prc=%.4f, num_trees=%i, max_nodes=%i"
        % (acc, gamma, delta, tau, prc_cut, bs_prc, num_trees, max_nodes))
    tf.reset_default_graph()
    return (acc)
    def classifyByTFRandomForest(self, noOfEpochs, n_estimators, maxNoOfNodes):
        if self.noOfClasses is None:
            print("Warning: No of classes must be defined in constructor")
        else:
            with tf.name_scope("placeholders"):
                X = tf.placeholder(tf.float32, shape=[None, self.noOfFeatures])
                Y = tf.placeholder(tf.int32, shape=[None])

            with tf.name_scope("forest"):
                hParams = tensor_forest.ForestHParams(
                    num_classes=self.noOfClasses,
                    num_features=self.noOfFeatures,
                    num_trees=n_estimators,
                    max_nodes=maxNoOfNodes).fill()
                forestGraph = tensor_forest.RandomForestGraphs(hParams)

            with tf.name_scope("optimisers"):
                trainOp = forestGraph.training_graph(X, Y)
                lossOp = forestGraph.training_loss(X, Y)

            with tf.name_scope("accuracy"):
                inferOp, _, _ = forestGraph.inference_graph(X)
                correctPrediction = tf.equal(tf.arg_max(inferOp, 1),
                                             tf.cast(Y, tf.int64))
                accuracyOp = tf.reduce_mean(
                    tf.cast(correctPrediction, tf.float32))

            initVars = tf.group(tf.global_variables_initializer,
                                resources.shared_resources())

            losses = []

            with tf.Session() as sess:
                sess.run(initVars)
                for i in range(1, noOfEpochs + 1):
                    _, l = sess.run([trainOp, lossOp],
                                    feed_dict={
                                        X: self.trainX,
                                        Y: tf.cast(self.trainY, tf.int32)
                                    })
                    if i % 50 == 0 or i == 1:
                        acc = sess.run(accuracyOp,
                                       feed_dict={
                                           X: self.trainX,
                                           Y: tf.cast(self.trainY, tf.int32)
                                       })
                        print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))

                    losses.append(l)

                trainAccs = sess.run(accuracyOp,
                                     feed_dict={
                                         X: self.trainX,
                                         Y: tf.cast(self.trainY, tf.int32)
                                     })
                print("Training accuracy:", trainAccs)
                if self.validX is not None:
                    validAccs = sess.run(accuracyOp,
                                         feed_dict={
                                             X: self.validX,
                                             Y: tf.cast(self.validY, tf.int32)
                                         })
                    print("Validation accuracy:", validAccs)
                testAccs = sess.run(accuracyOp,
                                    feed_dict={
                                        X: self.trainX,
                                        Y: tf.cast(self.trainY, tf.int32)
                                    })
                print("Test accuracy:", testAccs)

            return losses
def main():
    irisData = fu.importData()
    random.shuffle(irisData)
    trainSplit = (len(irisData) // 10) * 6
    testSplit = len(irisData) - trainSplit
    irisTrain = irisData[:trainSplit]
    irisTest = irisData[:-testSplit]

    #forrestParams
    num_epocs = 500  #epocs to train
    batch_size = 50  #samples per batch
    num_classes = 3  #3 iris
    num_features = 4  #4 features
    num_trees = 10  # number of trees
    max_nodes = 100  # maximum number of nodes

    #input and target data
    X = tf.placeholder(tf.float32, shape=[None, num_features])

    # For random forest, labels must be integers (the class id)
    Y = tf.placeholder(tf.int32, shape=[None])

    hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                          num_features=num_features,
                                          num_trees=num_trees,
                                          max_nodes=max_nodes).fill()

    #buildthe forest
    forest_graph = tensor_forest.RandomForestGraphs(hparams)
    #Get training graph and loss
    train_op = forest_graph.training_graph(X, Y)
    loss_op = forest_graph.training_loss(X, Y)

    #measure accurracy
    infer_op = forest_graph.inference_graph(X)
    correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # Initialize the variables
    init_vars = tf.global_variables_initializer()

    #start tf session
    sess = tf.Session()

    # Run the initializer
    sess.run(init_vars)

    # Training
    for i in range(1, num_epocs + 1):
        # Prepare Data
        # Get the next batch of MNIST data (only images are needed, not labels)
        batch_x, batch_y = fu.getBatch(irisTrain, batch_size)
        _, l = sess.run([train_op, loss_op],
                        feed_dict={
                            X: batch_x,
                            Y: batch_y
                        })
        if i % 50 == 0 or i == 1:
            acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y})
            print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))

    # Test Model
    test_x, test_y = fu.getBatch(irisTest, len(irisTest))
    print("Test Accuracy:",
          sess.run(accuracy_op, feed_dict={
              X: test_x,
              Y: test_y
          }))
Esempio n. 27
0
def main(args):
    vocab = build_vocab(args.data_path)
    data = pd.DataFrame({
        'label': vocab.labels,
        'lprox': vocab.lprox,
        'rprox': vocab.rprox,
        'x': vocab.x,
        'y': vocab.y,
        'z': vocab.z,
    })
    y = data['label']
    lprox = pd.DataFrame(data['lprox'].values.tolist())
    rprox = pd.DataFrame(data['rprox'].values.tolist())
    xax = pd.DataFrame(data['x'].values.tolist())
    yax = pd.DataFrame(data['y'].values.tolist())
    zax = pd.DataFrame(data['z'].values.tolist())

    X = pd.concat([lprox, rprox, xax, yax, zax], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    num_steps = 100  # Total steps to train
    num_classes = 2
    num_features = 585
    num_trees = 10
    max_nodes = 1000

    X = tf.placeholder(tf.float32, shape=[None, num_features])
    Y = tf.placeholder(tf.int64, shape=[None])

    hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                          num_features=num_features,
                                          num_trees=num_trees,
                                          max_nodes=max_nodes).fill()
    forest_graph = tensor_forest.RandomForestGraphs(hparams)
    train_op = forest_graph.training_graph(X, Y)

    loss_op = forest_graph.training_loss(X, Y)
    infer_op, _, _ = forest_graph.inference_graph(X)
    correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    init_vars = tf.group(
        tf.global_variables_initializer(),
        resources.initialize_resources(resources.shared_resources()))

    sess = tf.Session()
    sess.run(init_vars)

    for i in range(1, num_steps + 1):
        saver = tf.train.Saver()
        _, l = sess.run([train_op, loss_op],
                        feed_dict={
                            X: X_train,
                            Y: y_train
                        })
        if i % 50 == 0 or i == 1:
            acc = sess.run(accuracy_op, feed_dict={X: X_train, Y: y_train})
            save_path = saver.save(sess, 'models/model%i.ckpt' % (i))
            print('Step %i, Loss: %f, Acc: %f' % (i, l, acc))

    print("Test Accuracy:",
          sess.run(accuracy_op, feed_dict={
              X: X_test,
              Y: y_test
          }))
Esempio n. 28
0
def random_forest_sts(batch_x, batch_y):

    # standard method for import MNIST data
    #from tensorflow.examples.tutorials.mnist import input_data
    #mnist = input_data.read_data_sets("./data", one_hot=False)

    # Parameters
    num_steps = 200  # Total steps to train
    num_classes = 2  # non 0  labeled 1
    num_features = 13  #max min mean std self
    num_trees = 20
    max_nodes = 200

    # Random Forest Parameters
    # fill():intelligently sets any non-specific parameters
    hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                          num_features=num_features,
                                          regression=False,
                                          num_trees=num_trees,
                                          max_nodes=max_nodes).fill()

    # Input and Target data
    X = tf.placeholder(tf.float32, shape=[None, num_features])
    # For random forest, labels must be integers (the class id)
    #shape(Y)=[None] because not it's one_hot label
    Y = tf.placeholder(tf.int32, shape=[None])

    # Build the Random Forest
    forest_graph = tensor_forest.RandomForestGraphs(hparams)
    # Get training graph and loss
    train_op = forest_graph.training_graph(X, Y)
    loss_op = forest_graph.training_loss(X, Y)

    # Measure the accuracy
    infer_op = forest_graph.inference_graph(X)
    correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64))
    #because X is totol instances ,accuracy is average of all instance
    accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    #use tf.metrics
    acc, acc_op = tf.metrics.accuracy(labels=Y,
                                      predictions=tf.argmax(infer_op, 1))
    pre, pre_op = tf.metrics.precision(labels=Y,
                                       predictions=tf.argmax(infer_op, 1))
    rec, rec_op = tf.metrics.recall(labels=Y,
                                    predictions=tf.argmax(infer_op, 1))

    # Initialize the variables (i.e. assign their default value)
    init_vars = [
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    ]
    # Start TensorFlow session
    sess = tf.Session()

    # Run the initializer
    sess.run(init_vars)

    #model_path = "checkpoint_/variable"
    #model_path = "checkpoint_merge/variable"
    saver = tf.train.Saver()
    #saver.restore(sess, tf.train.latest_checkpoint("checkpoint_"))#for homo3
    saver.restore(sess, tf.train.latest_checkpoint("checkpoint_merge"))
    #saver.restore(sess, tf.train.latest_checkpoint("checkpoint")) #for no h**o
    #Test Model
    print("Validation Accuracy:",
          sess.run(accuracy_op, feed_dict={
              X: batch_x,
              Y: batch_y
          }))
    print("Validation Accuracy:",
          sess.run([acc, acc_op], feed_dict={
              X: batch_x,
              Y: batch_y
          }))
    _, p = sess.run([pre, pre_op], feed_dict={X: batch_x, Y: batch_y})
    print("Validation Precision:", p)
    _, r = sess.run([rec, rec_op], feed_dict={X: batch_x, Y: batch_y})
    print("Validation Recall:", r)
    print("Validation F1 score:", 2 * p * r / (p + r))
    #预测结果比较
    prediction_list = sess.run(correct_prediction,
                               feed_dict={
                                   X: batch_x,
                                   Y: batch_y
                               })
    #print("prediction: ", prediction_list)

    FN = [ i for i in range(0,len(prediction_list)) \
            if (batch_y[i]=='1' and  prediction_list[i]==False)]
    FP = [ i for i in range(0,len(prediction_list)) \
            if (batch_y[i]=='0' and  prediction_list[i]==False)]
    print(batch_x[13])
    print(len(FP))
    print(len(FN))
    f = open("pre_list.txt", "w")
    for id in FP:
        print(str(id), file=f)
    print("\n", file=f)
    for id in FN:
        print(str(id), file=f)
    f.close()
    '''
Esempio n. 29
0
def model_func(features, labels, mode, params):
    """
    model function for linear regression
    """
    # Define parameters

    # Define placeholders for input
    #    X = tf.placeholder(tf.float32, name='X')
    #    y = tf.placeholder(tf.float32, name='y')
    if type(features) is dict:
        X = features['X']
    else:
        X = features
    y_pred = labels

    try:
        n_samples, n_dim = X.shape
    except ValueError:
        n_samples = None
        n_dim = params['n_dim']

    logging.debug('n_dim: {} | n_smaples: {}'.format(n_dim, n_samples))
    train_losses, val_losses = [], []

    num_steps = 2  # Total steps to train
    batch_size = 1024  # The number of samples per batch
    num_classes = 1000
    num_features = params['n_dim']
    num_trees = 10
    max_nodes = 1000

    #X = tf.placeholder(tf.float32, shape=[None, num_features], name='X')
    #y_pred = tf.placeholder(tf.float32, shape=[None], name='y_pred')

    hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                          num_features=num_features,
                                          num_trees=num_trees,
                                          max_nodes=max_nodes).fill()

    forest_graph = tensor_forest.RandomForestGraphs(hparams)

    #y_pred = tf.matmul(X, W) + b

    if mode == tf.estimator.ModeKeys.PREDICT:
        export_outputs = {
            'predict_output':
            tf.estimator.export.PredictOutput({"pred_output": y_pred})
        }
        predictions_dict = {"late_minutes": y_pred}
        # In `PREDICT` mode we only need to return predictions.
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions={"y_pred": y_pred},
                                          export_outputs=export_outputs)

    # Define optimizer operation
    train_op = forest_graph.training_graph(X, y_pred)
    loss = forest_graph.training_loss(X, y_pred)
    infer_op, _, _ = forest_graph.inference_graph(X)

    # optimizer = tf.train.AdagradOptimizer(0.05)
    # opt = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    if mode == tf.estimator.ModeKeys.TRAIN:
        #optimize = train_op.minimize(
        #    loss
        #)
        print('in mode TRAIN')
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=infer_op)

    assert mode == tf.estimator.ModeKeys.EVAL

    # Metrics
    rmse = tf.metrics.root_mean_squared_error(labels, y_pred)

    def r_squared(labels, y_pred):
        unexplained_error = tf.reduce_sum(tf.square((labels - y_pred)))
        total_error = tf.reduce_sum(
            tf.square((labels - tf.reduce_mean(labels))))
        r2 = tf.subtract(tf.constant(1., dtype='float64'),
                         tf.div(unexplained_error, total_error))
        return r2, constant_op.constant(1.)

    metrics = {
        'rmse': rmse,
        'mae': tf.metrics.mean_absolute_error(labels, y_pred),
        'rmse_below_10': tf.metrics.percentage_below(rmse, 10),
        'rmse_below_5': tf.metrics.percentage_below(rmse, 5),
        'rmse_below_3': tf.metrics.percentage_below(rmse, 3),
        'rmse_below_1': tf.metrics.percentage_below(rmse, 1),
        'y_pred_below_10': tf.metrics.percentage_below(y_pred, 10),
        'y_pred_below_5': tf.metrics.percentage_below(y_pred, 5),
        'y_pred_below_3': tf.metrics.percentage_below(y_pred, 3),
        'y_pred_below_1': tf.metrics.percentage_below(y_pred, 1),
        'r2': r_squared(labels, y_pred)
    }

    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      eval_metric_ops=metrics)
Esempio n. 30
0
def random_forest_sts(batch_x, batch_y, test_x, test_y):
    with tf.device('/cpu:0'):
        # standard method for import MNIST data
        #from tensorflow.examples.tutorials.mnist import input_data
        #mnist = input_data.read_data_sets("./data", one_hot=False)

        # Parameters
        num_epochs = 200  # Total epochs  to train
        num_classes = 2  # non 0  labeled 1
        num_features = 13  #max min mean std self
        num_trees = 20
        max_nodes = 200

        # Random Forest Parameters
        # fill():intelligently sets any non-specific parameters
        hparams = tensor_forest.ForestHParams(num_classes=num_classes,
                                              num_features=num_features,
                                              regression=False,
                                              num_trees=num_trees,
                                              max_nodes=max_nodes).fill()

        # Input and Target data
        X = tf.placeholder(tf.float32, shape=[None, num_features])
        # For random forest, labels must be integers (the class id)
        #shape(Y)=[None] because not it's one_hot label
        Y = tf.placeholder(tf.int32, shape=[None])

        # Build the Random Forest
        forest_graph = tensor_forest.RandomForestGraphs(hparams)

        # input weights
        #weights = [1]*18707+[0.7]*(103640-18707)
        weights = [1] * 15683 + [0.1] * 709329
        print("weight len:", len(weights))
        # Get training graph and loss
        train_op = forest_graph.training_graph(
            X, Y, input_weights=tf.constant(weights))
        loss_op = forest_graph.training_loss(X, Y)

        # Measure the accuracy
        infer_op = forest_graph.inference_graph(X)
        correct_prediction = tf.equal(tf.argmax(infer_op, 1),
                                      tf.cast(Y, tf.int64))
        #because X is totol instances ,accuracy is average of all instance
        accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('Accuracy', accuracy_op)

        #feature importances
        feature_importances = forest_graph.feature_importances()

        # Initialize the variables (i.e. assign their default value)
        init_vars = tf.global_variables_initializer()
        merged = tf.summary.merge_all()

    # Start TensorFlow session
    sess = tf.Session()
    #sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    writer = tf.summary.FileWriter('./graph', sess.graph)

    # Run the initializer
    sess.run(init_vars)

    #model_path = "checkpoint/variable"
    model_path = "checkpoint_merge/variable"
    saver = tf.train.Saver()

    def cross_validate(session, split_size=5):
        results = []
        #kf = KFold(n_splits=split_size, shuffle=True)
        kf = StratifiedKFold(n_splits=split_size, shuffle=True)
        for train_idx, val_idx in kf.split(batch_x, batch_y):
            #print("type of train_idx~!!!!!!",(train_idx[0]))
            #print("type of batch_x~!!!!!!",type(batch_x))
            #return 0
            train_x = np.array(batch_x)[train_idx]
            train_y = np.array(batch_y)[train_idx]
            val_x = np.array(batch_x)[val_idx]
            val_y = np.array(batch_y)[val_idx]
            print("Strat a new fold training...")
            run_train(session, train_x, train_y)
            results.append(
                session.run(accuracy_op, feed_dict={
                    X: val_x,
                    Y: val_y
                }))
        return results

    def run_train(sess, train_x, train_y):
        # Training
        for i in range(1, num_epochs + 1):  #[1,201), no batch
            # Prepare Data
            # Get the next batch of MNIST data (only images are needed, not labels)
            #batch_x, batch_y = mnist.train.next_batch(batch_size)
            _, l = sess.run([train_op, loss_op],
                            feed_dict={
                                X: train_x,
                                Y: train_y
                            })
            if i % 50 == 0 or i == 1:
                summary, acc = sess.run([merged, accuracy_op],
                                        feed_dict={
                                            X: batch_x,
                                            Y: batch_y
                                        })
                print('Epoch %i, Loss: %f, Acc: %f' % (i, l, acc))
                writer.add_summary(summary, i)

        importances = sess.run(feature_importances,
                               feed_dict={
                                   X: batch_x,
                                   Y: batch_y
                               })
        print("impotances of feature= ", importances)
        save_path = saver.save(sess, model_path)

#   result = cross_validate(sess)
#   print("Cross-validation result: %s" % result)
#   print("Mean of Cross-validation result: %s" % np.mean(result))

    run_train(sess, batch_x, batch_y)

    writer.close()
    sess.close()