def test_with_train_policy(self): data = self.mnist_data input = InputLayer(self.MNIST_INPUT_NODES) d_1 = DuelStateReluLayer(input, 1, width_regularizer_constant=1e-7, width_binarizer_constant=1e-9, session=self.session) bn_1 = BatchNormLayer(d_1) d_2 = DuelStateReluLayer(bn_1, 1, width_regularizer_constant=1e-7, width_binarizer_constant=1e-9, ) bn_2 = BatchNormLayer(d_2) output = Layer(bn_2, self.MNIST_OUTPUT_NODES) trainer = CategoricalTrainer(output, 0.1) trainer = DuelStateReluTrainPolicy(trainer, data, 100, max_iterations=300, stop_accuracy=90., grow_after_turns_without_improvement=1,) trainer.run_full(True)
def test_accuracy_bug(self): import tensor_dynamic.data.input_data as mnist data = mnist.read_data_sets("../data/MNIST_data", one_hot=True) inputs = tf.placeholder(tf.float32, shape=(None, 784)) input_layer = InputLayer(inputs) outputs = Layer(input_layer, 10, self.session, non_liniarity=tf.sigmoid) trainer = CategoricalTrainer(outputs, 0.1) trainer.train(data.validation.images, data.validation.labels) # this was throwing an exception accuracy = trainer.accuracy(data.validation.images, data.validation.labels) self.assertLessEqual(accuracy, 100.) self.assertGreaterEqual(accuracy, 0.)
def create_network(sess, hidden_layers): inputs = tf.placeholder(tf.float32, shape=(None, 784)) bactivate = True noise_std = 0.3 non_lin = tf.nn.relu input_layer = InputLayer(inputs) last = BatchNormLayer(input_layer, sess) for hidden_nodes in hidden_layers: last = HiddenLayer(last, hidden_nodes, sess, bactivate=bactivate, non_liniarity=non_lin, unsupervised_cost=.1, noise_std=noise_std) last = BatchNormLayer(last, sess) outputs = HiddenLayer(last, 10, sess, non_liniarity=tf.sigmoid, bactivate=False, supervised_cost=1.) trainer = CategoricalTrainer(outputs, initail_learning_rate) return outputs, trainer
def train_until_no_improvement_for_epochs(data_set, net, max_epochs_without_improvement): trainer = CategoricalTrainer(net, 0.1) best_error = sys.float_info.max epochs_since_best_error = 0 for x in range(MAX_EPOCHS): error = trainer.train_one_epoch(data_set, 100) print("iteration {0} error {1}".format(x, error)) trainer.learn_rate *= 0.995 if error < best_error: best_error = error epochs_since_best_error = 0 else: if epochs_since_best_error > max_epochs_without_improvement: break epochs_since_best_error += 1 return best_error
def test_mnist(self): data = self.mnist_data input = InputLayer(self.MNIST_INPUT_NODES) d_1 = DuelStateReluLayer(input, 3, width_regularizer_constant=1e-7, width_binarizer_constant=1e-10, session=self.session) d_2 = DuelStateReluLayer(d_1, 3, width_regularizer_constant=1e-7, width_binarizer_constant=1e-10, ) output = HiddenLayer(d_2, self.MNIST_OUTPUT_NODES) trainer = CategoricalTrainer(output, 0.1) end_epoch = data.train.epochs_completed + 20 while data.train.epochs_completed <= end_epoch: train_x, train_y = data.train.next_batch(100) trainer.train(train_x, train_y) accuracy, cost = trainer.accuracy(data.test.features, data.test.labels) print(accuracy, cost) print("active nodes ", d_1.active_nodes()) self.assertGreater(accuracy, 70.)
def test_mnist(self): data = self.mnist_data input = InputLayer(self.MNIST_INPUT_NODES) d_1 = DuelStateReluLayer(input, 3, width_regularizer_constant=1e-7, width_binarizer_constant=1e-10, session=self.session) # when we add in batch norm layers we find that no active nodes are created, width is always less than 0.5? # bn_1 = BatchNormLayer(d_1) d_2 = DuelStateReluLayer(d_1, 3, width_regularizer_constant=1e-7, width_binarizer_constant=1e-10, ) # bn_2 = BatchNormLayer(d_2) output = Layer(d_2, self.MNIST_OUTPUT_NODES) trainer = CategoricalTrainer(output, 0.1) end_epoch = data.train.epochs_completed + 20 while data.train.epochs_completed <= end_epoch: train_x, train_y = data.train.next_batch(100) trainer.train(train_x, train_y) accuracy, cost = trainer.accuracy(data.test.images, data.test.labels) print(accuracy, cost) print("active nodes ", d_1.active_nodes()) self.assertGreater(accuracy, 70.)
def test_mnist_start_large(self): data = self.mnist_data input_layer = InputLayer(784) hidden_1 = DuelStateReluLayer(input_layer, 200, session=self.session, inactive_nodes_to_leave=200) output = Layer(hidden_1, self.MNIST_OUTPUT_NODES, session=self.session) trainer = CategoricalTrainer(output, 0.1) end_epoch = data.train.epochs_completed + 5 print(trainer.accuracy(data.test.images, data.test.labels)) while data.train.epochs_completed <= end_epoch: train_x, train_y = data.train.next_batch(100) trainer.train(train_x, train_y) accuracy, cost = trainer.accuracy(data.test.images, data.test.labels) print(accuracy, cost) # print(output.active_nodes()) print(hidden_1.active_nodes()) self.assertGreater(accuracy, 90.) # self.assertEqual(output.active_nodes(), self.MNIST_OUTPUT_NODES, msg='expect all output nodes to be active') self.assertLess(hidden_1.active_nodes(), hidden_1.output_nodes, msg='expect not all hidden nodes to be active')
def test_prune_layer(self): # create layer and active in such a way that all but 1 output node is useless self.INPUT_NODES = 3 self.OUTPUT_NODES = 1 x = np.zeros((self.INPUT_NODES, self.OUTPUT_NODES), np.float32) for i in range(self.OUTPUT_NODES): x[0, i - 1] = 1.0 y = np.zeros((self.OUTPUT_NODES, self.OUTPUT_NODES), np.float32) np.fill_diagonal(y, 1.) layer_1 = DuelStateReluLayer(InputLayer(self.INPUT_NODES), self.INPUT_NODES, session=self.session, weights=x, width_regularizer_constant=1e-2) layer_2 = HiddenLayer(layer_1, self.OUTPUT_NODES, weights=y, freeze=True) trainer = CategoricalTrainer(layer_2, 0.1) data_1 = [1.0] * self.INPUT_NODES data_2 = [0.0] * self.INPUT_NODES label_1 = [1.0] + [0.0] * (self.OUTPUT_NODES - 1) # only the first node is correlated with the input label_2 = [0.0] * self.OUTPUT_NODES inputs = [data_1, data_2] labels = [label_1, label_2] for i in range(500): self.session.run([trainer._train], feed_dict={layer_2.input_placeholder: inputs[:1], trainer._target_placeholder: labels[:1], trainer._learn_rate_placeholder: 0.05}) self.session.run([trainer._train], feed_dict={layer_2.input_placeholder: inputs[1:], trainer._target_placeholder: labels[1:], trainer._learn_rate_placeholder: 0.05}) # layer should only have 1 active node self.assertGreater(layer_1.width()[0], DuelStateReluLayer.ACTIVE_THRESHOLD) self.assertEqual(layer_1.active_nodes(), 1) activation_pre_prune = self.session.run([layer_2.activation_predict], feed_dict={layer_1.input_placeholder: inputs}) # after pruning layer should have 2 nodes layer_1.prune(inactive_nodes_to_leave=1) self.assertEqual(layer_1.output_nodes, 2) activation_post_prune = self.session.run([layer_2.activation_predict], feed_dict={layer_1.input_placeholder: inputs}) np.testing.assert_array_almost_equal(activation_pre_prune, activation_post_prune, decimal=2)
def test_mnist_start_large(self): data = self.mnist_data input_layer = InputLayer(784) hidden_1 = DuelStateReluLayer(input_layer, 200, session=self.session, inactive_nodes_to_leave=200) output = HiddenLayer(hidden_1, self.MNIST_OUTPUT_NODES, session=self.session) trainer = CategoricalTrainer(output, 0.1) end_epoch = data.train.epochs_completed + 5 print(trainer.accuracy(data.test.features, data.test.labels)) while data.train.epochs_completed <= end_epoch: train_x, train_y = data.train.next_batch(100) trainer.train(train_x, train_y) accuracy, cost = trainer.accuracy(data.test.features, data.test.labels) print(accuracy, cost) # print(output.active_nodes()) print(hidden_1.active_nodes()) self.assertGreater(accuracy, 90.) # self.assertEqual(output.active_nodes(), self.MNIST_OUTPUT_NODES, msg='expect all output nodes to be active') self.assertLess(hidden_1.active_nodes(), hidden_1.output_nodes, msg='expect not all hidden nodes to be active')
bn4 = BatchNormLayer(net3, sess, beta=beta, gamma=gamma) net4 = HiddenLayer(bn4, 1, sess, non_liniarity=non_lin, bactivate=bactivate, unsupervised_cost=.001, noise_std=noise_std) bn5 = BatchNormLayer(net4, sess, beta=beta, gamma=gamma) outputNet = HiddenLayer(bn5, 10, sess, non_liniarity=tf.sigmoid, bactivate=False, supervised_cost=1.) trainer = CategoricalTrainer(outputNet, 0.15) trainPolicy = TrainPolicy(trainer, data, batch_size, max_iterations=3000, grow_after_turns_without_improvement=2, start_grow_epoch=1, learn_rate_decay=0.99, learn_rate_boost=0.01, back_loss_on_misclassified_only=True) trainPolicy.run_full() print trainer.accuracy(data.test.features, data.test.labels)
inputs = tf.placeholder(tf.float32, shape=(None, 784)) bactivate = True noise_std = 0.3 beta = 0.5 gamma = 0.5 non_lin = tf.nn.sigmoid input_layer = InputLayer(inputs) bn1 = BatchNormLayer(input_layer, sess, beta=beta, gamma=gamma) net1 = Layer(bn1, 1, sess, non_liniarity=non_lin, bactivate=bactivate, unsupervised_cost=.001, noise_std=noise_std) bn2 = BatchNormLayer(net1, sess, beta=beta, gamma=gamma) net2 = Layer(bn2, 1, sess, non_liniarity=non_lin, bactivate=bactivate, unsupervised_cost=.001, noise_std=noise_std) bn3 = BatchNormLayer(net2, sess, beta=beta, gamma=gamma) net3 = Layer(bn3, 1, sess, non_liniarity=non_lin, bactivate=bactivate, unsupervised_cost=.001, noise_std=noise_std) bn4 = BatchNormLayer(net3, sess, beta=beta, gamma=gamma) net4 = Layer(bn4, 1, sess, non_liniarity=non_lin, bactivate=bactivate, unsupervised_cost=.001, noise_std=noise_std) bn5 = BatchNormLayer(net4, sess, beta=beta, gamma=gamma) outputNet = Layer(bn5, 10, sess, non_liniarity=tf.sigmoid, bactivate=False, supervised_cost=1.) trainer = CategoricalTrainer(outputNet, 0.15) trainPolicy = TrainPolicy(trainer, data, batch_size, max_iterations=3000, grow_after_turns_without_improvement=2, start_grow_epoch=1, learn_rate_decay=0.99, learn_rate_boost=0.01, back_loss_on_misclassified_only=True) trainPolicy.run_full() print trainer.accuracy(data.test.images, data.test.labels)
# get reconstruction errors print trainer.back_losses_per_layer(data.train.features) # get error just on miss-classifications print trainer.back_losses_per_layer(data.train.features, misclassification_only=True, labels=data.train.labels) results = {} # try each different resize, see how it does for x in range(len(hidden_layers)): print("resizing layer ", x) cloned = net.clone() hidden_layers = [ layer for layer in cloned.all_connected_layers if type(layer) == HiddenLayer ] hidden_layers[x].resize() # add 1 node new_trainer = CategoricalTrainer(net, resize_learning_rate) new_tp = TrainPolicy(new_trainer, data, batch_size, learn_rate_decay=learn_rate_decay) new_tp.train_till_convergence() acc, cost = trainer.accuracy(data.validation.features, data.validation.labels) print("train error ", acc, cost) results[x] = (acc, cost) print results