def test_with_train_policy(self):
     data = self.mnist_data
     input = InputLayer(self.MNIST_INPUT_NODES)
     d_1 = DuelStateReluLayer(input, 1, width_regularizer_constant=1e-7, width_binarizer_constant=1e-9,
                              session=self.session)
     bn_1 = BatchNormLayer(d_1)
     d_2 = DuelStateReluLayer(bn_1, 1, width_regularizer_constant=1e-7, width_binarizer_constant=1e-9, )
     bn_2 = BatchNormLayer(d_2)
     output = Layer(bn_2, self.MNIST_OUTPUT_NODES)
     trainer = CategoricalTrainer(output, 0.1)
     trainer = DuelStateReluTrainPolicy(trainer, data, 100, max_iterations=300, stop_accuracy=90.,
                                        grow_after_turns_without_improvement=1,)
     trainer.run_full(True)
Beispiel #2
0
    def test_accuracy_bug(self):
        import tensor_dynamic.data.input_data as mnist
        data = mnist.read_data_sets("../data/MNIST_data", one_hot=True)

        inputs = tf.placeholder(tf.float32, shape=(None, 784))
        input_layer = InputLayer(inputs)
        outputs = Layer(input_layer, 10, self.session, non_liniarity=tf.sigmoid)

        trainer = CategoricalTrainer(outputs, 0.1)

        trainer.train(data.validation.images, data.validation.labels)

        # this was throwing an exception
        accuracy = trainer.accuracy(data.validation.images, data.validation.labels)
        self.assertLessEqual(accuracy, 100.)
        self.assertGreaterEqual(accuracy, 0.)
def create_network(sess, hidden_layers):
    inputs = tf.placeholder(tf.float32, shape=(None, 784))
    bactivate = True
    noise_std = 0.3
    non_lin = tf.nn.relu
    input_layer = InputLayer(inputs)
    last = BatchNormLayer(input_layer, sess)
    for hidden_nodes in hidden_layers:
        last = HiddenLayer(last,
                           hidden_nodes,
                           sess,
                           bactivate=bactivate,
                           non_liniarity=non_lin,
                           unsupervised_cost=.1,
                           noise_std=noise_std)
        last = BatchNormLayer(last, sess)

    outputs = HiddenLayer(last,
                          10,
                          sess,
                          non_liniarity=tf.sigmoid,
                          bactivate=False,
                          supervised_cost=1.)

    trainer = CategoricalTrainer(outputs, initail_learning_rate)

    return outputs, trainer
def train_until_no_improvement_for_epochs(data_set, net, max_epochs_without_improvement):
    trainer = CategoricalTrainer(net, 0.1)
    best_error = sys.float_info.max
    epochs_since_best_error = 0

    for x in range(MAX_EPOCHS):
        error = trainer.train_one_epoch(data_set, 100)
        print("iteration {0} error {1}".format(x, error))
        trainer.learn_rate *= 0.995

        if error < best_error:
            best_error = error
            epochs_since_best_error = 0
        else:
            if epochs_since_best_error > max_epochs_without_improvement:
                break
            epochs_since_best_error += 1

    return best_error
    def test_mnist(self):
        data = self.mnist_data
        input = InputLayer(self.MNIST_INPUT_NODES)
        d_1 = DuelStateReluLayer(input, 3, width_regularizer_constant=1e-7, width_binarizer_constant=1e-10,
                                 session=self.session)

        d_2 = DuelStateReluLayer(d_1, 3, width_regularizer_constant=1e-7, width_binarizer_constant=1e-10, )
        output = HiddenLayer(d_2, self.MNIST_OUTPUT_NODES)

        trainer = CategoricalTrainer(output, 0.1)
        end_epoch = data.train.epochs_completed + 20

        while data.train.epochs_completed <= end_epoch:
            train_x, train_y = data.train.next_batch(100)
            trainer.train(train_x, train_y)

        accuracy, cost = trainer.accuracy(data.test.features, data.test.labels)
        print(accuracy, cost)
        print("active nodes ", d_1.active_nodes())
        self.assertGreater(accuracy, 70.)
    def test_mnist(self):
        data = self.mnist_data
        input = InputLayer(self.MNIST_INPUT_NODES)
        d_1 = DuelStateReluLayer(input, 3, width_regularizer_constant=1e-7, width_binarizer_constant=1e-10,
                                 session=self.session)
        # when we add in batch norm layers we find that no active nodes are created, width is always less than 0.5?
        # bn_1 = BatchNormLayer(d_1)
        d_2 = DuelStateReluLayer(d_1, 3, width_regularizer_constant=1e-7, width_binarizer_constant=1e-10, )
        # bn_2 = BatchNormLayer(d_2)
        output = Layer(d_2, self.MNIST_OUTPUT_NODES)

        trainer = CategoricalTrainer(output, 0.1)
        end_epoch = data.train.epochs_completed + 20

        while data.train.epochs_completed <= end_epoch:
            train_x, train_y = data.train.next_batch(100)
            trainer.train(train_x, train_y)

        accuracy, cost = trainer.accuracy(data.test.images, data.test.labels)
        print(accuracy, cost)
        print("active nodes ", d_1.active_nodes())
        self.assertGreater(accuracy, 70.)
    def test_mnist_start_large(self):
        data = self.mnist_data

        input_layer = InputLayer(784)
        hidden_1 = DuelStateReluLayer(input_layer, 200, session=self.session, inactive_nodes_to_leave=200)
        output = Layer(hidden_1, self.MNIST_OUTPUT_NODES, session=self.session)
        trainer = CategoricalTrainer(output, 0.1)

        end_epoch = data.train.epochs_completed + 5

        print(trainer.accuracy(data.test.images, data.test.labels))

        while data.train.epochs_completed <= end_epoch:
            train_x, train_y = data.train.next_batch(100)
            trainer.train(train_x, train_y)

        accuracy, cost = trainer.accuracy(data.test.images, data.test.labels)
        print(accuracy, cost)
        # print(output.active_nodes())
        print(hidden_1.active_nodes())

        self.assertGreater(accuracy, 90.)
        # self.assertEqual(output.active_nodes(), self.MNIST_OUTPUT_NODES, msg='expect all output nodes to be active')
        self.assertLess(hidden_1.active_nodes(), hidden_1.output_nodes, msg='expect not all hidden nodes to be active')
    def test_prune_layer(self):
        # create layer and active in such a way that all but 1 output node is useless
        self.INPUT_NODES = 3
        self.OUTPUT_NODES = 1
        x = np.zeros((self.INPUT_NODES, self.OUTPUT_NODES), np.float32)
        for i in range(self.OUTPUT_NODES):
            x[0, i - 1] = 1.0
        y = np.zeros((self.OUTPUT_NODES, self.OUTPUT_NODES), np.float32)
        np.fill_diagonal(y, 1.)
        layer_1 = DuelStateReluLayer(InputLayer(self.INPUT_NODES), self.INPUT_NODES, session=self.session, weights=x,
                                     width_regularizer_constant=1e-2)
        layer_2 = HiddenLayer(layer_1, self.OUTPUT_NODES, weights=y, freeze=True)
        trainer = CategoricalTrainer(layer_2, 0.1)

        data_1 = [1.0] * self.INPUT_NODES
        data_2 = [0.0] * self.INPUT_NODES
        label_1 = [1.0] + [0.0] * (self.OUTPUT_NODES - 1)  # only the first node is correlated with the input
        label_2 = [0.0] * self.OUTPUT_NODES

        inputs = [data_1, data_2]
        labels = [label_1, label_2]

        for i in range(500):
            self.session.run([trainer._train],
                             feed_dict={layer_2.input_placeholder: inputs[:1],
                                        trainer._target_placeholder: labels[:1],
                                        trainer._learn_rate_placeholder: 0.05})
            self.session.run([trainer._train],
                             feed_dict={layer_2.input_placeholder: inputs[1:],
                                        trainer._target_placeholder: labels[1:],
                                        trainer._learn_rate_placeholder: 0.05})

        # layer should only have 1 active node
        self.assertGreater(layer_1.width()[0], DuelStateReluLayer.ACTIVE_THRESHOLD)
        self.assertEqual(layer_1.active_nodes(), 1)

        activation_pre_prune = self.session.run([layer_2.activation_predict],
                                                feed_dict={layer_1.input_placeholder: inputs})

        # after pruning layer should have 2 nodes
        layer_1.prune(inactive_nodes_to_leave=1)

        self.assertEqual(layer_1.output_nodes, 2)

        activation_post_prune = self.session.run([layer_2.activation_predict],
                                                 feed_dict={layer_1.input_placeholder: inputs})

        np.testing.assert_array_almost_equal(activation_pre_prune, activation_post_prune, decimal=2)
    def test_mnist_start_large(self):
        data = self.mnist_data

        input_layer = InputLayer(784)
        hidden_1 = DuelStateReluLayer(input_layer, 200, session=self.session, inactive_nodes_to_leave=200)
        output = HiddenLayer(hidden_1, self.MNIST_OUTPUT_NODES, session=self.session)
        trainer = CategoricalTrainer(output, 0.1)

        end_epoch = data.train.epochs_completed + 5

        print(trainer.accuracy(data.test.features, data.test.labels))

        while data.train.epochs_completed <= end_epoch:
            train_x, train_y = data.train.next_batch(100)
            trainer.train(train_x, train_y)

        accuracy, cost = trainer.accuracy(data.test.features, data.test.labels)
        print(accuracy, cost)
        # print(output.active_nodes())
        print(hidden_1.active_nodes())

        self.assertGreater(accuracy, 90.)
        # self.assertEqual(output.active_nodes(), self.MNIST_OUTPUT_NODES, msg='expect all output nodes to be active')
        self.assertLess(hidden_1.active_nodes(), hidden_1.output_nodes, msg='expect not all hidden nodes to be active')
Beispiel #10
0
    bn4 = BatchNormLayer(net3, sess, beta=beta, gamma=gamma)
    net4 = HiddenLayer(bn4,
                       1,
                       sess,
                       non_liniarity=non_lin,
                       bactivate=bactivate,
                       unsupervised_cost=.001,
                       noise_std=noise_std)
    bn5 = BatchNormLayer(net4, sess, beta=beta, gamma=gamma)
    outputNet = HiddenLayer(bn5,
                            10,
                            sess,
                            non_liniarity=tf.sigmoid,
                            bactivate=False,
                            supervised_cost=1.)

    trainer = CategoricalTrainer(outputNet, 0.15)
    trainPolicy = TrainPolicy(trainer,
                              data,
                              batch_size,
                              max_iterations=3000,
                              grow_after_turns_without_improvement=2,
                              start_grow_epoch=1,
                              learn_rate_decay=0.99,
                              learn_rate_boost=0.01,
                              back_loss_on_misclassified_only=True)

    trainPolicy.run_full()

    print trainer.accuracy(data.test.features, data.test.labels)
    inputs = tf.placeholder(tf.float32, shape=(None, 784))

    bactivate = True
    noise_std = 0.3
    beta = 0.5
    gamma = 0.5
    non_lin = tf.nn.sigmoid
    input_layer = InputLayer(inputs)
    bn1 = BatchNormLayer(input_layer, sess, beta=beta, gamma=gamma)
    net1 = Layer(bn1, 1, sess, non_liniarity=non_lin, bactivate=bactivate, unsupervised_cost=.001, noise_std=noise_std)
    bn2 = BatchNormLayer(net1, sess, beta=beta, gamma=gamma)
    net2 = Layer(bn2, 1, sess, non_liniarity=non_lin, bactivate=bactivate, unsupervised_cost=.001, noise_std=noise_std)
    bn3 = BatchNormLayer(net2, sess, beta=beta, gamma=gamma)
    net3 = Layer(bn3, 1, sess, non_liniarity=non_lin, bactivate=bactivate, unsupervised_cost=.001, noise_std=noise_std)
    bn4 = BatchNormLayer(net3, sess, beta=beta, gamma=gamma)
    net4 = Layer(bn4, 1, sess, non_liniarity=non_lin, bactivate=bactivate, unsupervised_cost=.001, noise_std=noise_std)
    bn5 = BatchNormLayer(net4, sess, beta=beta, gamma=gamma)
    outputNet = Layer(bn5, 10, sess, non_liniarity=tf.sigmoid, bactivate=False, supervised_cost=1.)

    trainer = CategoricalTrainer(outputNet, 0.15)
    trainPolicy = TrainPolicy(trainer, data, batch_size, max_iterations=3000,
                              grow_after_turns_without_improvement=2,
                              start_grow_epoch=1,
                              learn_rate_decay=0.99,
                              learn_rate_boost=0.01,
                              back_loss_on_misclassified_only=True)

    trainPolicy.run_full()

    print trainer.accuracy(data.test.images, data.test.labels)
    # get reconstruction errors
    print trainer.back_losses_per_layer(data.train.features)

    # get error just on miss-classifications
    print trainer.back_losses_per_layer(data.train.features,
                                        misclassification_only=True,
                                        labels=data.train.labels)

    results = {}

    # try each different resize, see how it does
    for x in range(len(hidden_layers)):
        print("resizing layer ", x)
        cloned = net.clone()
        hidden_layers = [
            layer for layer in cloned.all_connected_layers
            if type(layer) == HiddenLayer
        ]
        hidden_layers[x].resize()  # add 1 node
        new_trainer = CategoricalTrainer(net, resize_learning_rate)
        new_tp = TrainPolicy(new_trainer,
                             data,
                             batch_size,
                             learn_rate_decay=learn_rate_decay)
        new_tp.train_till_convergence()
        acc, cost = trainer.accuracy(data.validation.features,
                                     data.validation.labels)
        print("train error ", acc, cost)
        results[x] = (acc, cost)

print results