def test_use_state_to_remove_layer(self): input_layer = InputLayer(self.mnist_data.features_shape) layer = HiddenLayer(input_layer, 10, session=self.session, node_importance_func=node_importance_optimal_brain_damage) output = CategoricalOutputLayer(layer, self.mnist_data.labels_shape, regularizer_weighting=0.0001) initial_activation = self.session.run(output.activation_predict, feed_dict={output.input_placeholder: self.mnist_data.train.features[:1]}) state = output.get_network_state() layer.add_intermediate_cloned_layer() with_extra_layer_activation = self.session.run(output.activation_predict, feed_dict={ output.input_placeholder: self.mnist_data.train.features[ :1]}) self.assertNotEqual(tuple(with_extra_layer_activation[0]), tuple(initial_activation[0])) output.set_network_state(state) restored_activation = self.session.run(output.activation_predict, feed_dict={output.input_placeholder: self.mnist_data.train.features[:1]}) np.testing.assert_almost_equal(restored_activation, initial_activation)
def test_save_load_network(self): net1 = InputLayer(784) net2 = HiddenLayer(net1, 20, self.session) output_net = CategoricalOutputLayer(net2, 10, self.session) data = output_net.get_network_pickle() new_net = BaseLayer.load_network_from_pickle(data, self.session) print new_net
def test_find_best_layer_size(self): data = self.mnist_data input_layer = InputLayer(data.features_shape) layer = HiddenLayer(input_layer, 10, session=self.session, layer_noise_std=1.0, bactivate=False) output = CategoricalOutputLayer(layer, data.labels_shape) layer.find_best_size(data.train, data.test, lambda m, d: output.evaluation_stats(d)[0] - log(output.get_parameters_all_layers()), initial_learning_rate=0.1, tuning_learning_rate=0.1) assert layer.get_resizable_dimension_size() > 10
def test_bug_issue_1(self): non_liniarity = tf.nn.relu regularizer_coeff = 0.01 last_layer = InputLayer(self.mnist_data.features_shape, # drop_out_prob=.5, layer_noise_std=1. ) last_layer = HiddenLayer(last_layer, 100, self.session, non_liniarity=non_liniarity, batch_normalize_input=True) output = CategoricalOutputLayer(last_layer, self.mnist_data.labels_shape, self.session, batch_normalize_input=True, regularizer_weighting=regularizer_coeff) output.train_till_convergence(self.mnist_data.train, self.mnist_data.validation, learning_rate=.1) last_layer.resize(110) output.train_till_convergence(self.mnist_data.train, self.mnist_data.validation, learning_rate=.1) last_layer.resize(90) output.train_till_convergence(self.mnist_data.train, self.mnist_data.validation, learning_rate=.1)
def test_save_load_network_to_disk(self): net1 = InputLayer(784) net2 = HiddenLayer(net1, 20, self.session) output_net = CategoricalOutputLayer(net2, 10, self.session) data = output_net.get_network_pickle() with open("temp", "w") as f: f.write(data) new_data = pickle.load(open("temp", "r")) new_net = BaseLayer.load_network_from_state(new_data, self.session) print new_net
def test_get_and_set_state(self): input_layer = InputLayer(self.mnist_data.features_shape) layer = HiddenLayer(input_layer, 50, session=self.session, node_importance_func=node_importance_optimal_brain_damage) output = CategoricalOutputLayer(layer, self.mnist_data.labels_shape, regularizer_weighting=0.0001) acitvation = self.session.run(output.activation_predict, feed_dict={output.input_placeholder: self.mnist_data.train.features[:1]}) weights_hidden = layer._weights.eval() bias_hidden = layer._bias.eval() weights_output = output._weights.eval() bias_output = output._bias.eval() state = layer.get_network_state() layer.resize(10) layer.set_network_state(state) restored_acitvation = self.session.run(output.activation_predict, feed_dict={output.input_placeholder: self.mnist_data.train.features[:1]}) new_weights_hidden = layer._weights.eval() new_bias_hidden = layer._bias.eval() new_weights_output = output._weights.eval() new_bias_output = output._bias.eval() np.testing.assert_almost_equal(new_weights_hidden, weights_hidden) np.testing.assert_almost_equal(new_bias_hidden, bias_hidden) np.testing.assert_almost_equal(new_weights_output, weights_output) np.testing.assert_almost_equal(new_bias_output, bias_output) np.testing.assert_almost_equal(restored_acitvation, acitvation)
def test_accuracy_bug(self): import tensor_dynamic.data.mnist_data as mnist import tensor_dynamic.data.data_set as ds import os data = mnist.get_mnist_data_set_collection(os.path.dirname(ds.__file__) + "/MNIST_data", one_hot=True) input_layer = InputLayer(data.features_shape) outputs = CategoricalOutputLayer(input_layer, data.labels_shape, self.session) outputs.train_till_convergence(data.test, learning_rate=0.2, continue_epochs=1) # this was throwing an exception accuracy = outputs.accuracy(data.test) self.assertLessEqual(accuracy, 100.) self.assertGreaterEqual(accuracy, 0.)
def test_adding_hidden_layer_with_resize(self): non_liniarity = tf.nn.relu regularizer_coeff = None layer = InputLayer(self.mnist_data.features_shape) layer = HiddenLayer(layer, 100, self.session, non_liniarity=non_liniarity, batch_normalize_input=False) output = CategoricalOutputLayer(layer, self.mnist_data.labels_shape, self.session, batch_normalize_input=True, regularizer_weighting=regularizer_coeff) output.train_till_convergence(self.mnist_data.train, self.mnist_data.validation, learning_rate=.1) layer.add_intermediate_cloned_layer() layer.resize(110) self.session.run(output.activation_predict, feed_dict={output.input_placeholder: self.mnist_data.train.features[:3], output.target_placeholder: self.mnist_data.train.labels[:3]})
def test_hessian(self): layer = InputLayer(self.mnist_data.features_shape, layer_noise_std=1.) layer = HiddenLayer(layer, 6, self.session, batch_normalize_input=True) output = CategoricalOutputLayer(layer, self.mnist_data.labels_shape, self.session, batch_normalize_input=True) hession_op = layer.hessien_with_respect_to_error_op result = self.session.run(hession_op, feed_dict={output.input_placeholder:self.mnist_data.train.features, output.target_placeholder: self.mnist_data.train.labels}) print result
def test_remove_layer_from_network(self): input_layer = InputLayer(self.mnist_data.features_shape) layer = HiddenLayer(input_layer, 10, session=self.session, node_importance_func=node_importance_optimal_brain_damage) output = CategoricalOutputLayer(layer, self.mnist_data.labels_shape, regularizer_weighting=0.0001) activation = self.session.run(output.activation_predict, feed_dict={output.input_placeholder: self.mnist_data.train.features[:1]}) layer.remove_layer_from_network() activation = self.session.run(output.activation_predict, feed_dict={output.input_placeholder: self.mnist_data.train.features[:1]}) self.assertEqual(output.layer_number, 1) self.assertEqual(output.input_nodes, (784,))
def test_resize_with_batch_norm_and_2_layers_resize_2(self): input_layer = InputLayer(self.mnist_data.features_shape) layer1 = HiddenLayer(input_layer, 2, session=self.session, batch_normalize_input=True) layer2 = HiddenLayer(layer1, 2, session=self.session, batch_normalize_input=True) output = CategoricalOutputLayer(layer2, self.mnist_data.labels_shape, batch_normalize_input=False) output.train_till_convergence(self.mnist_data.train, learning_rate=0.1) layer2.resize(3) output.train_till_convergence(self.mnist_data.train, learning_rate=0.1)
def test_growing(self): input_layer = InputLayer(self.mnist_data.features_shape) layer = HiddenLayer(input_layer, 1, session=self.session, node_importance_func=node_importance_optimal_brain_damage) output = CategoricalOutputLayer(layer, self.mnist_data.labels_shape, regularizer_weighting=0.0001) weights_hidden = layer._weights.eval() bias_hidden = layer._bias.eval() weights_output = output._weights.eval() layer.resize(2) new_weights_hidden = layer._weights.eval() new_bias_hidden = layer._bias.eval() new_weights_output = output._weights.eval() np.testing.assert_almost_equal(new_weights_output[0], weights_output[0] / 2)
def test_bug_issue_with_state(self): non_liniarity = tf.nn.relu regularizer_coeff = 0.01 layer = InputLayer(self.mnist_data.features_shape, layer_noise_std=1.) layer = HiddenLayer(layer, 6, self.session, non_liniarity=non_liniarity, batch_normalize_input=True) output = CategoricalOutputLayer(layer, self.mnist_data.labels_shape, self.session, batch_normalize_input=True, regularizer_weighting=regularizer_coeff) state = output.get_network_state() layer.resize(10) output.train_till_convergence(self.mnist_data.train, self.mnist_data.validation, learning_rate=.1) output.set_network_state(state) output.train_till_convergence(self.mnist_data.train, self.mnist_data.validation, learning_rate=.1)
def test_bug_issue_2(self): last_layer = InputLayer((1,)) last_layer = HiddenLayer(last_layer, 2, self.session, batch_normalize_input=True) output = CategoricalOutputLayer(last_layer, (1,), self.session, batch_normalize_input=True) print output.activate_predict([[0.]]) last_layer.resize(4) print output.activate_predict([[0.]])
def test_resize_with_batch_norm_resize(self): input_layer = InputLayer(self.mnist_data.features_shape) layer = HiddenLayer(input_layer, 2, session=self.session, batch_normalize_input=True) output = CategoricalOutputLayer(layer, self.mnist_data.labels_shape, batch_normalize_input=False) # output.train_till_convergence(self.mnist_data.train, learning_rate=0.1) optimizer = tf.train.AdamOptimizer() loss = optimizer.minimize(output.activation_predict) self.session.run(tf.initialize_variables(list(get_tf_optimizer_variables(optimizer)))) self.session.run(loss, feed_dict={output.input_placeholder: self.mnist_data.train.features[:3], output.target_placeholder: self.mnist_data.train.labels[:3]}) layer.resize(3) optimizer2 = tf.train.AdamOptimizer() loss2 = optimizer2.minimize(output.activation_predict) self.session.run(tf.initialize_variables(list(get_tf_optimizer_variables(optimizer2)))) self.session.run(loss2, feed_dict={output.input_placeholder: self.mnist_data.train.features[:3], output.target_placeholder: self.mnist_data.train.labels[:3]})
def test_remove_unimportant_nodes_does_not_affect_test_error(self): data = self.mnist_data batch_normalize = False input_layer = InputLayer(data.features_shape, drop_out_prob=None) layer = HiddenLayer(input_layer, 800, session=self.session, batch_normalize_input=batch_normalize, # D.S TODO TEST node_importance_func=node_importance_optimal_brain_damage) output = CategoricalOutputLayer(layer, data.labels_shape, batch_normalize_input=batch_normalize) output.train_till_convergence(data.train, data.test, learning_rate=0.001) _, _, target_loss_before_resize = output.evaluation_stats(data.test) # Should this be on test or train? print(target_loss_before_resize) layer.resize(795, data_set_validation=data.test) _, _, target_loss_after_resize = output.evaluation_stats(data.test) print(target_loss_after_resize) self.assertAlmostEqual(target_loss_before_resize, target_loss_after_resize, delta=10.0)
for _ in range(1): last_layer = HiddenLayer( last_layer, 10, session, non_liniarity=non_liniarity, node_importance_func= node_importance_by_real_activation_from_input_layer_variance, layer_noise_std=noise, batch_normalize_input=True) output = CategoricalOutputLayer( last_layer, data_set_collection.labels_shape, session, batch_normalize_input=True, loss_cross_entropy_or_log_prob=False, layer_noise_std=noise, regularizer_weighting=regularizer_coeff) def get_file_root(): return data_set_collection.name + "_flat_noise_" + str(noise) def loss_comparison_evaluation(model, data_set): """Use bayesian model comparison to evaluate a trained model Args: model (OutputLayer): Trained model to evaluate data_set (DataSet): data set this model was trained on, tends to be test set, but can be train if set up so Returns:
def main(file_name_all="pruning_tests_noise_%s-%s-%s.csv" % (LAYER_NOISE_STD, start, end), file_name_avg="pruning_tests_avg_noise_%s-%s-%s.csv" % (LAYER_NOISE_STD, start, end)): data_set_collections = [get_mnist_data_set_collection(validation_ratio=.15), get_cifar_100_data_set_collection(validation_ratio=.15)] methods = [node_importance_by_dummy_activation_from_input_layer, node_importance_by_real_activation_from_input_layer, node_importance_by_square_sum, node_importance_by_removal, node_importance_random, node_importance_optimal_brain_damage, node_importance_full_taylor_series, node_importance_by_real_activation_from_input_layer_variance, node_importance_error_derrivative, dummy_random_weights ] final_dict = defaultdict(lambda: []) with open(file_name_all, 'w') as result_file: result_file.write( 'method, data_set, before_prune_train, before_prune_validation, before_prune_trest, after_prune_train, after_prune_validataion, after_prune_test, after_converge_train, after_converge_validataion, after_converge_test, converge_iterations\n') for data in data_set_collections: for _ in range(NUM_TRIES): tf.reset_default_graph() with tf.Session() as session: input_layer = InputLayer(data.features_shape) if len(data.features_shape) > 1: input_layer = FlattenLayer(input_layer) layer = HiddenLayer(input_layer, start, session=session, layer_noise_std=LAYER_NOISE_STD, node_importance_func=None, non_liniarity=tf.nn.relu, batch_normalize_input=True) output = CategoricalOutputLayer(layer, data.labels_shape, batch_normalize_input=True, regularizer_weighting=0.01, layer_noise_std=LAYER_NOISE_STD ) output.train_till_convergence(data.train, data.validation, learning_rate=0.0001) state = output.get_network_state() for method in methods: output.set_network_state(state) layer._node_importance_func = method _, _, target_loss_test_before_resize_test = output.evaluation_stats(data.test) _, _, target_loss_test_before_resize_validation = output.evaluation_stats(data.validation) _, _, target_loss_test_before_resize_train = output.evaluation_stats(data.train) no_splitting_or_pruning = method == dummy_random_weights layer.resize(end, data_set_train=data.train, data_set_validation=data.validation, no_splitting_or_pruning=no_splitting_or_pruning) _, _, target_loss_test_after_resize_test = output.evaluation_stats(data.test) _, _, target_loss_test_after_resize_validation = output.evaluation_stats(data.validation) _, _, target_loss_test_after_resize_train = output.evaluation_stats(data.train) error, iterations = output.train_till_convergence(data.train, data.validation, learning_rate=0.0001) _, _, after_converge_test = output.evaluation_stats(data.test) _, _, after_converge_validation = output.evaluation_stats(data.validation) _, _, after_converge_train = output.evaluation_stats(data.train) final_dict[method.__name__].append((target_loss_test_before_resize_train, target_loss_test_before_resize_validation, target_loss_test_before_resize_test, target_loss_test_after_resize_train, target_loss_test_after_resize_validation, target_loss_test_after_resize_test, after_converge_train, after_converge_validation, after_converge_test)) result_file.write('%s,%s,%s,%s,%s,%s,%s,%s, %s, %s, %s, %s\n' % ( method.__name__, data.name, target_loss_test_before_resize_train, target_loss_test_before_resize_validation, target_loss_test_before_resize_test, target_loss_test_after_resize_train, target_loss_test_after_resize_validation, target_loss_test_after_resize_test, after_converge_train, after_converge_validation, after_converge_test, iterations)) result_file.flush() with open(file_name_avg, "w") as file_avg: file_avg.write( 'method, before_prune_train, before_prune_validataion, before_prune_trest, after_prune_train, after_prune_validataion, after_prune_test, after_converge_train, after_converge_validataion, after_convert_test, test_diff\n') for name, values in final_dict.iteritems(): v_len = float(len(values)) averages = tuple(sum(x[i] for x in values) / v_len for i in range(len(values[0]))) averages = averages + (averages[2] - averages[-2],) file_avg.write('%s,%s,%s,%s,%s,%s,%s,%s, %s, %s, %s\n' % ((name,) + averages))
regularizer_coeff = 0.01 last_layer = InputLayer(data_set_collection.features_shape, # drop_out_prob=.5, # layer_noise_std=1. ) last_layer = FlattenLayer(last_layer, session) for _ in range(3): last_layer = HiddenLayer(last_layer, nodes_per_layer, session, non_liniarity=non_liniarity, layer_noise_std=NOISE, batch_normalize_input=True) output = CategoricalOutputLayer(last_layer, data_set_collection.labels_shape, session, batch_normalize_input=True, loss_cross_entropy_or_log_prob=True, layer_noise_std=NOISE, regularizer_weighting=regularizer_coeff) output.train_till_convergence(data_set_collection.train, data_set_collection.validation, learning_rate=0.0001) state = output.get_network_state() output.save_checkpoints('cifar-100-layers') print_stats(data_set_collection, output, -1) for i in range(3): try_intermediate_layer(4 - i)