num_hidden_nodes = 1024

num_batches = 10
dropout_prob = 0.3

nn_layers = [1024, 300, 50]
beta_nn = 0.0000005
exp_decay = {'decay_steps': 20000, 'decay_rate': 0.5, 'staircase': False}

# Creating the dataset (or retrieving it).
train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = \
    not_mnist.prepare_dataset(train_size, valid_size, data_folder)

# Formatting the data by flattening the images and converting
# labels to one-hot encoding.
train_dataset = utils.flatten_batch(train_dataset)
train_labels = utils.idx_to_onehot(train_labels, num_labels)
valid_dataset = utils.flatten_batch(valid_dataset)
valid_labels = utils.idx_to_onehot(valid_labels, num_labels)
test_dataset = utils.flatten_batch(test_dataset)
test_labels = utils.idx_to_onehot(test_labels, num_labels)

# Logistic regression with l2 regularisation.
print('Logistic regression with l2 regularisation...')

tf_graph, optimizer, loss, tf_predictions = training.models.fully_connected_model(
    input_size,
    num_labels, [],
    valid_dataset,
    test_dataset,
    batch_size,
Example #2
0
# Saving sanitised data in a separate file.
print('Saving...')
not_mnist.save_to_pickle(san_train_dataset, san_train_labels,
                         san_valid_dataset, san_valid_labels, san_test_dataset,
                         san_test_labels, pickle_sanitised_file)
print('')

# Finally, near duplicates could be found by quantising the samples,
# so that each pixel can take a limited number of values. We can then
# look for exact matches in the quantised images.

# Training a simple logistic regression on the data.
num_samples_list = [50, 100, 1000, 5000, len(train_dataset)]

flat_test_dataset = utils.flatten_batch(test_dataset)
flat_valid_dataset = utils.flatten_batch(valid_dataset)
flat_train_dataset = utils.flatten_batch(train_dataset)

for num_samples in num_samples_list:
    if num_samples > 10000:
        train = input(
            'Are you sure you want to train a logistic regression on {0} samples? [y/n]'
            .format(num_samples))
        if train != 'y':
            continue

    # Multinomial model is only available when using the L-BFGS solver.
    logistic_model = LogisticRegression(multi_class='multinomial',
                                        solver='lbfgs')
    logistic_model.fit(flat_train_dataset[:num_samples, :],
print('Number of validation samples in the sanitised set: {0}'.format(len(san_valid_dataset)))

# Saving sanitised data in a separate file.
print('Saving...')
not_mnist.save_to_pickle(san_train_dataset, san_train_labels, san_valid_dataset, san_valid_labels,
    san_test_dataset, san_test_labels, pickle_sanitised_file)
print('')

# Finally, near duplicates could be found by quantising the samples,
# so that each pixel can take a limited number of values. We can then
# look for exact matches in the quantised images.

# Training a simple logistic regression on the data.
num_samples_list = [50, 100, 1000, 5000, len(train_dataset)]

flat_test_dataset = utils.flatten_batch(test_dataset)
flat_valid_dataset = utils.flatten_batch(valid_dataset)
flat_train_dataset = utils.flatten_batch(train_dataset)

for num_samples in num_samples_list:
    if num_samples > 10000:
        train = input(
            'Are you sure you want to train a logistic regression on {0} samples? [y/n]'.format(num_samples))
        if train != 'y':
            continue

    # Multinomial model is only available when using the L-BFGS solver.
    logistic_model = LogisticRegression(multi_class = 'multinomial', solver = 'lbfgs')
    logistic_model.fit(flat_train_dataset[:num_samples, :], train_labels[:num_samples])

    valid_score = logistic_model.score(flat_valid_dataset, valid_labels)
nn_layers = [1024, 300, 50]
beta_nn = 0.0000005
exp_decay = {
    'decay_steps': 20000,
    'decay_rate': 0.5,
    'staircase': False
}

# Creating the dataset (or retrieving it).
train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = \
    not_mnist.prepare_dataset(train_size, valid_size, data_folder)

# Formatting the data by flattening the images and converting
# labels to one-hot encoding.
train_dataset = utils.flatten_batch(train_dataset)
train_labels = utils.idx_to_onehot(train_labels, num_labels)
valid_dataset = utils.flatten_batch(valid_dataset)
valid_labels = utils.idx_to_onehot(valid_labels, num_labels)
test_dataset = utils.flatten_batch(test_dataset)
test_labels = utils.idx_to_onehot(test_labels, num_labels)

# Logistic regression with l2 regularisation.
print('Logistic regression with l2 regularisation...')

tf_graph, optimizer, loss, tf_predictions = training.models.fully_connected_model(
    input_size, num_labels, [],
    valid_dataset, test_dataset, batch_size,
    learning_rate, beta = beta_logreg)

training.graph_optimisation.run(tf_graph, optimizer, loss, tf_predictions,