num_hidden_nodes = 1024 num_batches = 10 dropout_prob = 0.3 nn_layers = [1024, 300, 50] beta_nn = 0.0000005 exp_decay = {'decay_steps': 20000, 'decay_rate': 0.5, 'staircase': False} # Creating the dataset (or retrieving it). train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = \ not_mnist.prepare_dataset(train_size, valid_size, data_folder) # Formatting the data by flattening the images and converting # labels to one-hot encoding. train_dataset = utils.flatten_batch(train_dataset) train_labels = utils.idx_to_onehot(train_labels, num_labels) valid_dataset = utils.flatten_batch(valid_dataset) valid_labels = utils.idx_to_onehot(valid_labels, num_labels) test_dataset = utils.flatten_batch(test_dataset) test_labels = utils.idx_to_onehot(test_labels, num_labels) # Logistic regression with l2 regularisation. print('Logistic regression with l2 regularisation...') tf_graph, optimizer, loss, tf_predictions = training.models.fully_connected_model( input_size, num_labels, [], valid_dataset, test_dataset, batch_size,
# Saving sanitised data in a separate file. print('Saving...') not_mnist.save_to_pickle(san_train_dataset, san_train_labels, san_valid_dataset, san_valid_labels, san_test_dataset, san_test_labels, pickle_sanitised_file) print('') # Finally, near duplicates could be found by quantising the samples, # so that each pixel can take a limited number of values. We can then # look for exact matches in the quantised images. # Training a simple logistic regression on the data. num_samples_list = [50, 100, 1000, 5000, len(train_dataset)] flat_test_dataset = utils.flatten_batch(test_dataset) flat_valid_dataset = utils.flatten_batch(valid_dataset) flat_train_dataset = utils.flatten_batch(train_dataset) for num_samples in num_samples_list: if num_samples > 10000: train = input( 'Are you sure you want to train a logistic regression on {0} samples? [y/n]' .format(num_samples)) if train != 'y': continue # Multinomial model is only available when using the L-BFGS solver. logistic_model = LogisticRegression(multi_class='multinomial', solver='lbfgs') logistic_model.fit(flat_train_dataset[:num_samples, :],
print('Number of validation samples in the sanitised set: {0}'.format(len(san_valid_dataset))) # Saving sanitised data in a separate file. print('Saving...') not_mnist.save_to_pickle(san_train_dataset, san_train_labels, san_valid_dataset, san_valid_labels, san_test_dataset, san_test_labels, pickle_sanitised_file) print('') # Finally, near duplicates could be found by quantising the samples, # so that each pixel can take a limited number of values. We can then # look for exact matches in the quantised images. # Training a simple logistic regression on the data. num_samples_list = [50, 100, 1000, 5000, len(train_dataset)] flat_test_dataset = utils.flatten_batch(test_dataset) flat_valid_dataset = utils.flatten_batch(valid_dataset) flat_train_dataset = utils.flatten_batch(train_dataset) for num_samples in num_samples_list: if num_samples > 10000: train = input( 'Are you sure you want to train a logistic regression on {0} samples? [y/n]'.format(num_samples)) if train != 'y': continue # Multinomial model is only available when using the L-BFGS solver. logistic_model = LogisticRegression(multi_class = 'multinomial', solver = 'lbfgs') logistic_model.fit(flat_train_dataset[:num_samples, :], train_labels[:num_samples]) valid_score = logistic_model.score(flat_valid_dataset, valid_labels)
nn_layers = [1024, 300, 50] beta_nn = 0.0000005 exp_decay = { 'decay_steps': 20000, 'decay_rate': 0.5, 'staircase': False } # Creating the dataset (or retrieving it). train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = \ not_mnist.prepare_dataset(train_size, valid_size, data_folder) # Formatting the data by flattening the images and converting # labels to one-hot encoding. train_dataset = utils.flatten_batch(train_dataset) train_labels = utils.idx_to_onehot(train_labels, num_labels) valid_dataset = utils.flatten_batch(valid_dataset) valid_labels = utils.idx_to_onehot(valid_labels, num_labels) test_dataset = utils.flatten_batch(test_dataset) test_labels = utils.idx_to_onehot(test_labels, num_labels) # Logistic regression with l2 regularisation. print('Logistic regression with l2 regularisation...') tf_graph, optimizer, loss, tf_predictions = training.models.fully_connected_model( input_size, num_labels, [], valid_dataset, test_dataset, batch_size, learning_rate, beta = beta_logreg) training.graph_optimisation.run(tf_graph, optimizer, loss, tf_predictions,