def run_research(): # a research to exam which feature is best to ignore for highest accuracy knn_fac = classifier.knn_factory(1) folds = [ hw3_utils.load_data('ecg_fold_' + str(i + 1) + '.pickle') for i in range(2) ] features_num = len(folds[0][0][0]) max_accuracy_feature = None for run_num in range(1, 8): if max_accuracy_feature is not None: folds = [(np.delete(data, max_accuracy_feature, 1), labels, test) for data, labels, test in folds] features_num = len(folds[0][0][0]) results = [ evaluate_comp(knn_fac, folds, feature) for feature in range(features_num) ] max_accuracy_feature = max(results, key=lambda item: item[1])[0] with open('my_experiments' + str(run_num) + '.csv', 'w+') as result_file: for feature, accuracy, error in results: line = str(feature) + ',' + str(accuracy) + ',' + str( error) + '\n' result_file.write(line)
def main(): # import data train_set_full, train_tags, test_set_full = load_data('data/Data.pickle') # ### Pre-processing ### # Trim data train_set_full[train_set_full < 0] = 0 train_set_full[train_set_full > 1] = 1 test_set_full[test_set_full < 0] = 0 test_set_full[test_set_full > 1] = 1 # Select 70 best features feature_sel_1 = SelectKBest(f_classif, k=70) feature_sel_1.fit(train_set_full, train_tags) train_set_1 = feature_sel_1.transform(train_set_full) test_set_1 = feature_sel_1.transform(test_set_full) # ### Train classifiers ### clf_1 = neighbors.KNeighborsClassifier(n_neighbors=1, weights='uniform', p=2).fit(train_set_1, train_tags) clf_2 = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance', p=2).fit(train_set_1, train_tags) clf_3 = neighbors.KNeighborsClassifier(n_neighbors=5, weights='distance', p=1).fit(train_set_1, train_tags) clf_4 = svm.SVC(kernel='poly', C=0.78, degree=11, coef0=2, gamma='auto').fit(train_set_1, train_tags) clf_5 = RandomForestClassifier(n_estimators=200, criterion='entropy', max_depth=None).fit(train_set_1, train_tags) # create voting classifier final_clf = VotingClassifier(estimators=[('knn1', clf_1), ('knn3', clf_2), ('knn5', clf_3), ('svm', clf_4), ('rf', clf_5)], voting='hard') final_clf.fit(train_set_1, train_tags) write_prediction(final_clf.predict(test_set_1).astype(int))
def main(): train_features, train_labels, test_features = load_data() x = (train_features, train_labels) # split_crosscheck_groups(x, 2) # KNN_test() # Additional_tests() compete()
def train_model_and_classify_test(): training_set, labels, test_set = utils.load_data( r'Shuffled_scaled_data.data') training_set_pca, labels_pca, test_set_pca = utils.load_data( r'Shuffled_scaled_PCA_data.data') # r'Shuffled_scaled_PCA_data.data' # Create classifier and train them svm = Svm_factory() svm = svm.train(training_set, labels) tree_classifier = DecisionTree_factory() tree_classifier = tree_classifier.train(training_set_pca, labels_pca) knn_7 = knn_factory(7) knn_7 = knn_7.train(training_set_pca, labels_pca) knn_9 = knn_factory(9) knn_9 = knn_9.train(training_set_pca, labels_pca) knn_11 = knn_factory(11) knn_11 = knn_11.train(training_set_pca, labels_pca) # Predictions for test set predictions = [] #[svm.classify(sample) for sample in test_set] for sample, sample_with_pca in zip(test_set, test_set_pca): #sample, sample_with_pca in zip(training_set, training_set_pca): counter = 0 counter += 1 if svm.classify(sample) else 0 counter += 1 if tree_classifier.classify(sample_with_pca) else 0 counter += 1 if knn_7.classify(sample_with_pca) else 0 counter += 1 if knn_9.classify(sample_with_pca) else 0 counter += 1 if knn_11.classify(sample_with_pca) else 0 if counter > 3: predictions.append(True) else: predictions.append(False) print(np.where(np.array(predictions) == False)[0].shape)
def evaluate(classifier_factory, k): # load all folds folds = [load_data('ecg_fold_' + str(i + 1) + '.pickle') for i in range(k)] accuracies = [] errors = [] for i in range(k): # choose 1 group to be test group all others will be train groups test_data = folds[i][0] test_labels = folds[i][1] train_folds = [folds[j][0] for j in range(k) if j != i] train_data = [] for train_fold in train_folds: for features in train_fold: train_data.append(features) train_data = np.array(train_data) # converstion to np array train_labels = [] for j in range(k): if j != i: for train_label in folds[j][1]: train_labels.append(train_label) # run groups with classifier classifier = classifier_factory.train(train_data, train_labels) res_list = [classifier.classify(features) for features in test_data] ''' classify each classify result when True means subject is actually sick and res_list = 0 means classified as sick ''' test_false_positive = 0 test_false_negative = 0 test_true_positive = 0 test_true_negative = 0 N = len(res_list) for j in range(N): if res_list[j] == 1 and test_labels[j] == True: test_true_positive += 1 elif res_list[j] == 1 and test_labels[j] == False: test_false_positive += 1 elif res_list[j] == 0 and test_labels[j] == True: test_false_negative += 1 elif res_list[j] == 0 and test_labels[j] == False: test_true_negative += 1 accuracies.append((test_true_positive + test_true_negative) / N) errors.append((test_false_positive + test_false_negative) / N) return np.average(accuracies), np.average(errors)
def run_my_classify(): # predicts the test data with specific features data, labels, tests = hw3_utils.load_data() # features list to ignore that came from the research before features_to_ignore = [90, 23, 90, 103, 36] for feature in features_to_ignore: data = np.delete(data, feature, 1) tests = np.delete(tests, feature, 1) clf = classifier.knn_factory(1).train(data, labels) results = [clf.classify(test) for test in tests] hw3_utils.write_prediction(results)
def compete(): train_features, train_labels, test_features = load_data() # TODO: execute feature selection only once for saving time path = "updated_features.data" my_file = Path(path) if not my_file.is_file(): X_train_subset, X_test_subset = feature_selection( train_features, train_labels, test_features) with open(path, 'wb') as f: tuple_to_store = (X_train_subset, X_test_subset) pickle.dump(tuple_to_store, f, protocol=pickle.HIGHEST_PROTOCOL) else: with open(path, 'rb') as f: X_train_subset, X_test_subset = pickle.load(f) competition_test(X_train_subset, train_labels, X_test_subset)
def main(): train_set, train_tags, test_set = load_data() split_crosscheck_groups((train_set, train_tags), 2) with open('experiment6.csv', 'w') as f: k_vals, acc_list, err_list = [1, 3, 5, 7, 13], [], [] for k in k_vals: knn = knn_factory(k) acc, err = evaluate(knn, 2) f.write(', '.join([str(k), str(acc), str(err)]) + '\n') acc_list.append(acc) err_list.append(err_list) tf = tree_factory() tree_acc, tree_err = evaluate(tf, 2) pf = perceptron_factory() percp_acc, percp_err = evaluate(pf, 2) with open('experiment12.csv', 'w') as f: f.write(', '.join([str(1), str(tree_acc), str(tree_err)]) + '\n') f.write(', '.join([str(2), str(percp_acc), str(percp_err)]) + '\n')
def evaluate(classifier_factory, k): accuracy = 0 # create training and test sets for the i'th fold full_train_data, full_train_tags, _ = load_data() full_train_set = [list(l) for l in full_train_data] for fold in range(k): fold_test_set = load_k_fold_data(fold + 1) fold_train_set = [ smpl for smpl in full_train_set if smpl not in fold_test_set[0] ] fold_train_tags = [ full_train_tags[full_train_set.index(smpl)] for smpl in fold_train_set ] classifier = classifier_factory.train(fold_train_set, fold_train_tags) # compute accuracy and error accuracy += classifier.score(fold_test_set[0], fold_test_set[1]) accuracy /= k error = 1 - accuracy return accuracy, error
def test_CDNN(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, verbose=False, filter_size=5): """ Wrapper function for testing CNN in cascade with DNN """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer new_shape = (32 - filter_size + 1) // 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], new_shape, new_shape), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer new_factors = (new_shape - filter_size + 1) // 2 layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * new_factors * new_factors, n_out=500, activation=T.tanh) layer3 = HiddenLayer(rng, input=layer2.output, n_in=500, n_out=500, activation=T.tanh) # TODO: classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_noise_injection_at_weight(learning_rate=0.1, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=True,noise_level=0.001,noise_dist='uniform'): """ Wrapper function for experiment of noise injection at weights :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset datasets = load_data(ds_rate=5) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (stored in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] # TODO: modify updates to inject noise to the weight # # the parameters of the model are the parameters of the two layer it is made out of # self.params = sum([x.params for x in self.hiddenLayers], []) + self.logRegressionLayer.params # # parameters of hiddenlayer and logRegressionLayer # self.params = [self.W, self.b] updates = [ # W b W b W b layernumx2 # (classifier.params[0::2], classifier.params[0::2] - learning_rate * gparams[0::2]), # (classifier.params[1::2], classifier.params[1::2] - learning_rate * gparams[1::2]) # (param, param - learning_rate * gparam) # for param, gparam in zip(classifier.params, gparams) (param, param - learning_rate * gparam + noise_injection(param.get_value(),noise_level,noise_dist)) for param, gparam in zip(classifier.params[0::2], gparams[0::2]) + [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params[1::2], gparams[1::2])] ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_data_augmentation(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False): """ Wrapper function for experiment of data augmentation :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset in raw Format, since we need to preprocess it train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) # Repeat the training set 5 times train_set[1] = numpy.tile(train_set[1], 5) # TODO: translate the dataset train_set_x_u = translate_image(train_set[0], "w") train_set_x_d = translate_image(train_set[0], "s") train_set_x_r = translate_image(train_set[0], "d") train_set_x_l = translate_image(train_set[0], "a") # Stack the original dataset and the synthesized datasets train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d, train_set_x_r, train_set_x_l)) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP(rng=rng, input=x, n_in=32 * 32 * 3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') output = train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) return output
from id3 import Id3Estimator from hw3_utils import load_data from id3 import export_graphviz import classifier examples, labels, test = load_data() data = list() data.append(examples) data.append(labels) examples_len = len(examples) training_set_examples = list(examples[0:int(examples_len * (8 / 10))]) training_set_examples.extend(examples[int(examples_len * (9 / 10)):]) training_set_labels = list(labels[0:int(examples_len * (8 / 10))]) training_set_labels.extend(labels[int(examples_len * (9 / 10)):]) test_set_examples = examples[int(examples_len * (8 / 10)):int(examples_len * (9 / 10))] test_set_labels = labels[int(examples_len * (8 / 10)):int(examples_len * (9 / 10))] classifier.split_crosscheck_groups(data, 2) estimator = Id3Estimator() estimator.fit(training_set_examples, training_set_labels) # predicted_labels = estimator.predict(test_set_examples) print(estimator.predict_proba(test_set_examples)) def get_predictions(test_set): return
import sys import hw3_utils import cross_validation folds = 2 if len(sys.argv) > 1: folds = int(sys.argv[1]) dataset = hw3_utils.load_data() print("Splitting dataset to {} folds".format(folds)) cross_validation.split_crosscheck_groups((dataset[0], dataset[1]), folds)
updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] #y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) test_model = theano.function( #layer5.errors(y), [index],output, givens={ x: test_set_x[0+index: batch_size+index] } ) train_nn(train_model,validate_model,test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose=True) from hw3_utils import load_data # downsample the training and validation dataset if needed, ds_rate should be larger than 1. ds_rate = None datasets = [] gc.collect() datasets = load_data(ds_rate=ds_rate, theano_shared=True) train_set_x, train_set_y = datasets[0] print(train_set_x.get_value().shape) import sys #sys.exit() valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] evaluate_lenet5(train_set_x, train_set_y, valid_set_x, valid_set_y, test_set_x, test_set_y)
def test_dropout(learning_rate=0.1, n_epochs=1000, nkerns=[64, 128], batch_size=120, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels testing = T.iscalar('testing') testValue = testing getTestValue = theano.function([testing], testValue) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = DropOut(rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out=batch_size, testing=testing) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore', allow_input_downcast=True) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_adversarial_example(learning_rate=0.03, L1_reg=0.0001, L2_reg=0.0001, n_epochs=1, batch_size=128, n_hidden=400, n_hiddenLayers=12, verbose=False, noise_mean=0.0, noise_var=1.0): """ Wrapper function for testing adversarial examples """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset datasets = load_data(ds_rate=5) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) srng = RandomStreams(seed=234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) get_preds = theano.function( inputs=[index], outputs=[classifier.y_pred, classifier.p_y_given_x], givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] # TODO: modify updates to inject noise to the weight updates = [ (param, param - learning_rate * gparam + srng.normal(size=gparam.shape, avg=noise_mean, std=noise_var)) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # This function takes the gradient with respect to the input gparamx = T.grad(cost, classifier.input) calc_gradx = theano.function( [index], gparamx, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] }) # Intermedaite step to get the original data get_x = theano.function( [index], test_set_x[index * batch_size: (index + 1) * batch_size]) get_y = theano.function( [index], test_set_y[index * batch_size: (index + 1) * batch_size]) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) # Get the gradient for a batch of inputs x_adv = get_x(1) gx_adv = numpy.sign(calc_gradx(1)[0]) ad_example = x_adv + gx_adv * numpy.random.random(gx_adv.shape)*0.0000000001 shared_adv_x = theano.shared(numpy.asarray(ad_example, dtype=theano.config.floatX), borrow=True) get_predsadv = theano.function( inputs=[index], outputs=[classifier.y_pred, classifier.p_y_given_x], givens = { x: shared_adv_x[(index*0):] } ) ap = get_predsadv(1) op = get_preds(1) ys = get_y(1) indexes = [i for i in range(128) if ys[i]==op[0][i]] # This is the selection of the third element with correct class from the original prediction indx = indexes[3] return x_adv, op, ap, ad_example, ys, indx
def test_adversarial_example(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=True): """ Wrapper function for testing adversarial examples :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) y_pred_model = theano.function( inputs=[index], outputs=classifier.y_pred, givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], } ) p_y_given_x_model = theano.function( inputs=[index], outputs=classifier.p_y_given_x, givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], } ) y_pred=numpy.array([]) y_actual=numpy.array([]) for i in range(n_test_batches): y_pred=numpy.append(y_pred, y_pred_model(i)) y_actual=numpy.append(y_actual, test_set_y.eval()[i*batch_size:(i + 1) * batch_size]) print 'y_pred', y_pred print 'y_actual', y_actual grad_input=T.grad(cost, classifier.input) f1=theano.function( inputs=[x,y], outputs=T.add(x, T.sgn(grad_input))) new_x = f1(test_set_x.eval(), test_set_y.eval()) new_x = theano.shared(numpy.asarray(new_x, dtype=theano.config.floatX), borrow=True) y_pred_model_adverse = theano.function( inputs=[index], outputs=classifier.y_pred, givens={ x: new_x[index * batch_size:(index + 1) * batch_size], } ) p_y_given_x_model_adverse = theano.function( inputs=[index], outputs=classifier.p_y_given_x, givens={ x: new_x[index * batch_size:(index + 1) * batch_size], } ) p_y_given_x_adverse=numpy.array([]) p_y_given_x_original=numpy.array([]) y_pred_adverse=numpy.array([]) for i in range(n_test_batches): y_pred_adverse=numpy.append(y_pred_adverse, y_pred_model_adverse(i)) if i==0: p_y_given_x_adverse=p_y_given_x_model_adverse(i) p_y_given_x_original=p_y_given_x_model(i) elif i>0: p_y_given_x_adverse=numpy.vstack((p_y_given_x_adverse, p_y_given_x_model_adverse(i))) p_y_given_x_original=numpy.vstack((p_y_given_x_original, p_y_given_x_model(i))) f, ax = plt.subplots(5,4, figsize=(15,15)) for i in range(5): pred=y_pred[y_actual==y_pred][i] pred_adv=y_pred_adverse[y_actual==y_pred][i] pyx=p_y_given_x_original[y_actual==y_pred][i] pyx_adverse=p_y_given_x_adverse[y_actual==y_pred][i] img=numpy.array(test_set_x.eval())[y_actual==y_pred,:][i,:].reshape(3,32,32) img_adverse=numpy.array(new_x.eval())[y_actual==y_pred,:][i,:].reshape(3,32,32) ax[i,0].imshow(numpy.transpose(img,(1,2,0))) ax[i,0].axis('off') ax[i,0].set_title('Example %s:\nCorrectly predicted value: %s' % (i+1,int(pred))) ax[i,1].imshow(numpy.transpose(img_adverse,(1,2,0))) ax[i,1].axis('off') ax[i,1].set_title('Example %s:\nAdversarial example\nPredicted value: %s' % (i+1, int(pred_adv))) ax[i,2].bar(numpy.arange(0,10)-0.5, pyx) ax[i,2].set_xticks(numpy.arange(0,10)) ax[i,2].set_title('Example %s: Class specific\nprobabilities for original data' % (i+1)) ax[i,2].set_ylabel('p(y|x)') ax[i,3].bar(numpy.arange(0,10)-0.5, pyx_adverse) ax[i,3].set_xticks(numpy.arange(0,10)) ax[i,3].set_title('Example %s: Class specific\nprobabilities for adversarial data' % (i+1)) ax[i,3].set_ylabel('p(y|x)') plt.tight_layout() return p_y_given_x_adverse
def test_mlp_bonus(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=True): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) return [x.params[0].get_value() for x in classifier.hiddenLayers]+[classifier.logRegressionLayer.params[0].get_value()]
def main(): #q b.3 train_features, train_labels, test_features = hw3_utils.load_data() classifier.split_crosscheck_groups((train_features, train_labels), 8)
def main(): train_set_full, train_tags, _ = load_data('data/Data.pickle') clf_type = ['SVM', 'Tree', 'KNN', 'Perceptron', 'Bayes', 'RF', 'Voting'] kernel_type = ['Linear', 'Polynomial', 'Gaussian', 'Sigmoid'] selectors = ['f_classif', 'mutual_info_classif', 'chi2'] path = 'Classifiers_comparison.csv' with open(path, 'w', newline='') as csv_f: writer = csv.writer(csv_f) writer.writerow(['Selection', 'Features', 'Classifier', 'Param1', 'Param2', 'Param3', 'Param4', 'Param5', 'Accuracy', 'Var']) for num_of_features in [50, 70, 85, 98, 115, 140, 187]: best_svm_score = 0 for selection_method in selectors: if selection_method is selectors[0]: train_set = SelectKBest(f_classif, k=num_of_features).fit_transform(train_set_full, train_tags) elif selection_method is selectors[1]: train_set = SelectKBest(mutual_info_classif, k=num_of_features).fit_transform(train_set_full, train_tags) else: train_set = SelectKBest(chi2, k=num_of_features).fit_transform(abs(train_set_full), train_tags) # Testing SVM classifier C_param = np.linspace(0.78, 0.9, 12) kernel_param = ['linear', 'poly', 'rbf', 'sigmoid'] degree_param = np.linspace(7, 11, 5, dtype=int) coef0_param = np.linspace(-3, 3, 10) # Testing linear kernel for penal in C_param: clf = svm.SVC(kernel=kernel_param[0], C=penal, gamma='auto') scores = cross_val_score(clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[0], kernel_type[0], str(penal), '', '', '', avg_score, var]) if avg_score > best_svm_score: best_svm_score = avg_score svm_clf = svm.SVC(kernel=kernel_param[0], C=penal, gamma='auto') # Testing Polynomial kernel for penal in C_param: for deg in degree_param: for C0 in coef0_param: clf = svm.SVC(kernel=kernel_param[1], C=penal, degree=deg, coef0=C0, gamma='auto') scores = cross_val_score(clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[0], kernel_type[1], str(penal), str(deg), str(C0), '', avg_score, var]) if avg_score > best_svm_score: best_svm_score = avg_score svm_clf = svm.SVC(kernel=kernel_param[1], C=penal, degree=deg, coef0=C0, gamma='auto') # Testing Gaussian kernel for penal in C_param: clf = svm.SVC(kernel=kernel_param[2], C=penal, gamma='auto') scores = cross_val_score(clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[0], kernel_type[2], str(penal), '', '', '', avg_score, var]) if avg_score > best_svm_score: best_svm_score = avg_score svm_clf = svm.SVC(kernel=kernel_param[2], C=penal, gamma='auto') # Testing Sigmoidial kernel for penal in C_param: for C0 in coef0_param: clf = svm.SVC(kernel=kernel_param[3], C=penal, coef0=C0, gamma='auto') scores = cross_val_score(clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[0], kernel_type[3], str(penal), '', str(C0), '', avg_score, var]) if avg_score > best_svm_score: best_svm_score = avg_score svm_clf = svm.SVC(kernel=kernel_param[3], C=penal, coef0=C0, gamma='auto') print("SVM Done") # Testing Decision-tree classifier critirion_param = ['gini', 'entropy'] splitter_param = ['best', 'random'] weight_param = [None, 'balanced'] best_tree_score = 0 for crit in critirion_param: for split in splitter_param: for weight in weight_param: clf = tree.DecisionTreeClassifier(criterion=crit, splitter=split, class_weight=weight) scores = cross_val_score(clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[1], crit, split, str(weight), '', '', avg_score, var]) if avg_score > best_tree_score: best_tree_score = avg_score tree_clf = tree.DecisionTreeClassifier(criterion=crit, splitter=split, class_weight=weight) print("Decision Tree Done") # Testing KNN classifier neighbors_param = [1, 3, 5, 9] weight_param = ['uniform', 'distance'] dist_metric_param = [1, 2, 3] dst = ['manhattan', 'euclidean', 'minkowski'] best_knn1_score, best_knn3_score = 0, 0 for n in neighbors_param: for weight in weight_param: for dm in dist_metric_param: clf = neighbors.KNeighborsClassifier(n_neighbors=n, weights=weight, p=dm) scores = cross_val_score(clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[2], str(n), weight, '', '', dst[dm-1], avg_score, var]) if n == 1: if avg_score > best_knn1_score: best_knn1_score = avg_score knn1_clf = neighbors.KNeighborsClassifier(n_neighbors=n, weights=weight, p=dm) elif n == 3: if avg_score > best_knn3_score: best_knn3_score = avg_score knn3_clf = neighbors.KNeighborsClassifier(n_neighbors=n, weights=weight, p=dm) print("KNN Done") # Testing Perceptron classifier penalty_param = [None, 'l1', 'l2'] alpha_param = np.logspace(-7, -2, 6, dtype=float) intercept_param = [True, False] tol_param = np.logspace(-7, -2, 6, dtype=float) weight_param = [None, 'balanced'] for penal in penalty_param: for a in alpha_param: for fip in intercept_param: for tl in tol_param: for weight in weight_param: clf = linear_model.Perceptron(penalty=penal, alpha=a, fit_intercept=fip, tol=tl, class_weight=weight) scores = cross_val_score(clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[3], str(penal), str(a), str(fip), str(tl), weight, avg_score, var]) print("Perceptron Done") # Testing Naive Bayes classifier NB_type = ['Gaussian', 'Multinomial', 'Bernoulli'] alpha_param = np.linspace(1e-5, 1, 6, dtype=float) prio_param = [True, False] clf = naive_bayes.GaussianNB() scores = cross_val_score(clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[4], NB_type[0], '', '', '', '', avg_score, var]) for a in alpha_param: for prio in prio_param: clf = naive_bayes.MultinomialNB(alpha=a, fit_prior=prio) scores = cross_val_score(clf, abs(train_set), train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[4], NB_type[1], str(a), str(prio), '', '', avg_score, var]) for a in alpha_param: for prio in prio_param: clf = naive_bayes.BernoulliNB(alpha=a, fit_prior=prio) scores = cross_val_score(clf, abs(train_set), train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[4], NB_type[2], str(a), str(prio), '', '', avg_score, var]) print("Naive Bayes Done") # Testing Random Forest classifier n_param = [100, 200, 300] critirion_param = ['gini', 'entropy'] depth_param = [5, 50, None] best_rf_score = 0 for n in n_param: for crit in critirion_param: for d in depth_param: clf = RandomForestClassifier(n_estimators=n, criterion=crit, max_depth=d) scores = cross_val_score(clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[5], n, crit, str(d), '', '', avg_score, var]) if avg_score > best_rf_score: best_rf_score = avg_score rf_clf = RandomForestClassifier(n_estimators=n, criterion=crit, max_depth=d) print("Random forest Done") # Testing Voting classifier vote_clf = VotingClassifier(estimators=[('svm', svm_clf), ('tree', tree_clf), ('knn1', knn1_clf), ('knn3', knn3_clf), ('rf', rf_clf)], voting='hard') scores = cross_val_score(vote_clf, train_set, train_tags, cv=3) avg_score = np.mean(scores) var = np.var(scores) writer.writerow([selection_method, num_of_features, clf_type[6], 'Hard', '', '', '', '', avg_score, var]) print("Voting Done") print("\n ***** ALL Done *****")
def test_adversarial_example(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False): """ Wrapper function for testing adversarial examples """ # First, train a network using the small dataset. rng = numpy.random.RandomState(23455) # Load the smaller dataset train_set, valid_set, test_set = load_data(ds_rate=5) test_set_x, test_set_y = test_set valid_set_x, valid_set_y = valid_set train_set_x, train_set_y = train_set # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) probability = theano.function( inputs=[], outputs=[classifier.logRegressionLayer.p_y_given_x, y], givens={ x: test_set_x, y: test_set_y } ) gradient = theano.function( inputs=[], outputs=classifier.input + 0.007 * T.sgn(T.grad(cost, classifier.input)), givens={ x: test_set_x, y: test_set_y } ) # compute the gradient of cost with respect to theta (sorted in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) ori_prob, ori_y = probability() # I use MATLAB to compare the predicted classification and y in test_32x32.mat # the 14th test data is correctly classified thus using idx = 13 idx = 13 new_test_x = gradient() adversarial = theano.function( inputs=[], outputs=[classifier.logRegressionLayer.p_y_given_x, classifier.logRegressionLayer.y_pred, y], givens={ x: new_test_x, y: test_set_y } ) adver_prob, adver_y, _ = adversarial() return ori_prob[idx], ori_y[idx], adver_prob[idx], adver_y[idx], test_set_x.get_value(borrow=True), new_test_x
def test_data_augmentation(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False): """ Wrapper function for experiment of data augmentation :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ rng = numpy.random.RandomState(23455) # Load down-sampled dataset in raw format (numpy.darray, not Theano.shared) # train_set, valid_set, test_set format: tuple(input, target) # input is a numpy.ndarray of 2 dimensions (a matrix), where each row # corresponds to an example. target is a numpy.ndarray of 1 dimension # (vector) that has the same length as the number of rows in the input. # Load the smaller dataset in raw Format, since we need to preprocess it train_set, valid_set, test_set = load_data(ds_rate=5, theano_shared=False) # Repeat the training set 5 times train_set[1] = numpy.tile(train_set[1], 5) # TODO: translate the dataset train_set_x_u = translate_image(train_set[0], 1) train_set_x_d = translate_image(train_set[0], 2) train_set_x_r = translate_image(train_set[0], 3) train_set_x_l = translate_image(train_set[0], 4) # Stack the original dataset and the synthesized datasets train_set[0] = numpy.vstack((train_set[0], train_set_x_u, train_set_x_d, train_set_x_r, train_set_x_l)) # Convert raw dataset to Theano shared variables. test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def my_lenet(batch_size=250, n_epochs=2000, learning_rate=0.01, L2_reg=0.0001, activation=T.tanh): # load data ds_rate = None datasets = load_data(ds_rate=ds_rate, theano_shared=True) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size rng = np.random.RandomState(23455) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels training_enabled = T.iscalar( 'training_enabled' ) # pseudo boolean for switching between training and prediction ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') layer0_input = x.reshape((batch_size, 3, 32, 32)) # 4D output tensor is thus of shape (batch_size, 32, 16, 16) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=( 32, 3, 3, 3 ), # (number of output feature maps, number of input feature maps, height, width) poolsize=(2, 2), activation=activation) # 4D output tensor is thus of shape (batch_size, 64, 8, 8) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, 32, 16, 16), filter_shape=(64, 32, 3, 3), poolsize=(2, 2), activation=activation) layer2_input = layer1.output.flatten(2) layer2 = DropoutHiddenLayer(rng=rng, is_train=training_enabled, input=layer2_input, n_in=64 * 8 * 8, n_out=4096, W=None, b=None, activation=activation, p=0.7) layer3 = DropoutHiddenLayer(rng=rng, is_train=training_enabled, input=layer2.output, n_in=4096, n_out=512, W=None, b=None, activation=activation, p=0.7) layer4 = LogisticRegression(input=layer3.output, n_in=512, n_out=10) # the cost we minimize during training is the NLL of the model L2_sqr = (layer4.W**2).sum() + (layer3.W**2).sum() + ( layer2.W**2).sum() + (layer1.W**2).sum() + (layer0.W**2).sum() cost = (layer4.negative_log_likelihood(y) + L2_reg * L2_sqr) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](0) }) # create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # specify how to update the parameters of the model as a list of # (variable, update expression) pairs momentum = theano.shared(numpy.cast[theano.config.floatX](0.5), name='momentum') updates = [] for param in params: param_update = theano.shared(param.get_value() * numpy.cast[theano.config.floatX](0.)) updates.append((param, param - learning_rate * param_update)) updates.append((param_update, momentum * param_update + (numpy.cast[theano.config.floatX](1.) - momentum) * T.grad(cost, param))) ###################### # TRAIN ACTUAL MODEL # ###################### # early-stopping parameters patience = 20000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.85 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False verbose = True while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 ##### implement flip train_set_x, train_set_y = datasets[0] flip_image(train_set_x, 1) ##### redefine train_model train_model_FLIP = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) }) # train with augmentation data_set print('-----Training with augmented data (flip)-----') for minibatch_index in range(n_train_batches): cost_ij = train_model_FLIP(minibatch_index) print('-----Training over-----') # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) if verbose: print( 'epoch %i, train augmented data (flip), validation error %f %%' % (epoch, this_validation_loss * 100.)) ''' ##### add noise train_set_x, train_set_y = datasets[0] ran = int(random.uniform(0,2)) noise_injection(train_set_x, ran) ##### redefine train_model train_model_NOISE = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) } ) # train with augmentation data_set print('-----Training with augmented data (noise)-----') for minibatch_index in range(n_train_batches): cost_ij = train_model_NOISE(minibatch_index) print('-----Training over-----') # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) if verbose: print('epoch %i, train augmented data (noise), validation error %f %%' % (epoch, this_validation_loss * 100.)) ''' ##### get original data train_set_x, train_set_y = datasets[0] ##### redefine train_model train_model_1 = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], training_enabled: numpy.cast['int32'](1) }) # train with original data_set for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if (iter % 100 == 0) and verbose: print('training @ iter = ', iter) cost_ij = train_model_1(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) if verbose: print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) if verbose: print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() # Retrieve the name of function who invokes train_nn() (caller's name) curframe = inspect.currentframe() calframe = inspect.getouterframes(curframe, 2) # Print out summary print('Optimization complete.') print('Best validation error of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('Best validation accuracy: %f%%.' % ((1.0 - best_validation_loss) * 100.)) print('Best test accuracy: %f%%.' % ((1.0 - test_score) * 100.)) print(('The training process for function ' + calframe[1][3] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def test_lenet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, filter_size=5, dnn_layers=1, n_hidden=500, gabor=False, lmbda=None, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ print test_lenet.__name__, nkerns, filter_size, gabor, lmbda rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) if gabor is True: # Generate Gabor filters filters = build_gabor(filter_size, nkerns[0], lmbda) # filters = numpy.array([filters[i][0] for i in range(len(filters))]) filters = numpy.array([filters[i] for i in range(len(filters))]) # print filters.shape filter_weights = numpy.tile(filters, (1, 3, 1)).reshape(nkerns[0], 3, filter_size, filter_size) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2,2), weights = filter_weights ) print 'gabor filter weights are working' else: # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer i_s_1 = (32 - filter_size + 1) / 2 layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], i_s_1, i_s_1), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer i_s_2 = (i_s_1 - filter_size + 1) / 2 if hasattr(n_hidden, '__iter__'): assert(len(n_hidden) == dnn_layers) else: n_hidden = (n_hidden,)*dnn_layers DNN_Layers = [] for i in xrange(dnn_layers): h_input = layer2_input if i == 0 else DNN_Layers[i-1].output h_in = nkerns[1] * i_s_2 * i_s_2 if i == 0 else n_hidden[i-1] DNN_Layers.append( HiddenLayer( rng=rng, input=h_input, n_in=h_in, n_out=n_hidden[i], activation=T.tanh )) # layer2 = HiddenLayer( # rng, # input=layer2_input, # n_in=nkerns[1] * i_s_2 * i_s_2, # n_out=500, # activation=T.tanh # ) # TODO: classify the values of the fully-connected sigmoidal layer LR_Layer = LogisticRegression( input=DNN_Layers[-1].output, n_in=n_hidden[i], n_out=10 ) # the cost we minimize during training is the NLL of the model cost = LR_Layer.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], LR_Layer.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], LR_Layer.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = LR_Layer.params for layer in DNN_Layers: params += layer.params if gabor is True: print 'gabor params is workings' params += layer1.params else: params += layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_adversarial_example(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=False): """ Wrapper function for testing adversarial examples """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32 * 32 * 3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) filter_model = theano.function( inputs=[index], outputs=[ x, classifier.logRegressionLayer.y_pred, y, classifier.logRegressionLayer.p_y_given_x ], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) filter_output = [filter_model(i) for i in range(n_test_batches)] sample_x = None sample_y = None test_output = None expected_distribution = None for i in filter_output: if numpy.array_equal(i[1], i[2]): sample_x = i[0] sample_y = i[1] expected_distribution = i[3] print("successfully classified sample ", sample_y) t_sample_x, t_sample_y = shared_dataset((sample_x, sample_y)) grad_input = classifier.input + 0.1 * T.sgn( T.grad(cost, classifier.input)) grad_input_fn = theano.function(inputs=[], outputs=grad_input, givens={ x: t_sample_x, y: t_sample_y }) gradient = grad_input_fn() new_t_sample_x, t_sample_y = shared_dataset((gradient, sample_y)) testing_gradient = theano.function( inputs=[], outputs=[ y, classifier.logRegressionLayer.y_pred, classifier.logRegressionLayer.p_y_given_x ], givens={ x: new_t_sample_x, y: t_sample_y }) test_output = testing_gradient() if not numpy.array_equal(test_output[0], test_output[1]): break return test_output, expected_distribution
def main(): # Load the data train_set, train_tags, test_set = load_data('data/Data.pickle') # Range of the data print("max(train_set) = " + '{:.4f}'.format(np.max(train_set))) print("min(train_set) = " + '{:.4f}'.format(np.min(train_set))) print("mean(train_set) = " + '{:.4f}'.format(np.mean(train_set))) print("var(train_set) = " + '{:.4f}'.format(np.var(train_set))) print("") print("max(test_set) = " + '{:.4f}'.format(np.max(test_set))) print("min(test_set) = " + '{:.4f}'.format(np.min(test_set))) print("mean(test_set) = " + '{:.4f}'.format(np.mean(test_set))) print("var(test_set) = " + '{:.4f}'.format(np.var(test_set))) plt.figure(figsize=(10, 6)) plt.subplot(211) plt.title('Dynamic range vs feature (Train set)') plt.plot(range(train_set.shape[1]), np.min(train_set, axis=0), 'r') plt.plot(range(train_set.shape[1]), np.max(train_set, axis=0), 'g') plt.plot(range(train_set.shape[1]), np.mean(train_set, axis=0), 'b') plt.plot(range(train_set.shape[1]), np.mean(train_set, axis=0) + 2 * np.var(train_set, axis=0), linestyle=':', color='k') plt.plot(range(train_set.shape[1]), np.mean(train_set, axis=0) - 2 * np.var(train_set, axis=0), linestyle=':', color='k') plt.legend(['max', 'min', 'avg', '+2 Sigma', '-2 Sigma']) plt.grid() plt.subplot(212) plt.title('Dynamic range vs feature (Test set)') plt.plot(range(test_set.shape[1]), np.min(test_set, axis=0), 'r') plt.plot(range(test_set.shape[1]), np.max(test_set, axis=0), 'g') plt.plot(range(test_set.shape[1]), np.mean(test_set, axis=0), 'b') plt.plot(range(test_set.shape[1]), np.mean(test_set, axis=0) + 2 * np.var(test_set, axis=0), linestyle=':', color='k') plt.plot(range(test_set.shape[1]), np.mean(test_set, axis=0) - 2 * np.var(test_set, axis=0), linestyle=':', color='k') plt.legend(['max', 'min', 'avg', '+2 Sigma', '-2 Sigma']) plt.grid() plt.show() # Divide the examples to true/false good = train_set[train_tags] bad = np.asarray( [exmp for exmp in train_set if exmp not in train_set[train_tags]]) plt.figure(figsize=(10, 6)) plt.subplot(211) plt.title('Dynamic range vs feature (True examples)') plt.plot(range(good.shape[1]), np.min(good, axis=0), 'r') plt.plot(range(good.shape[1]), np.max(good, axis=0), 'g') plt.plot(range(good.shape[1]), np.mean(good, axis=0), 'b') plt.plot(range(good.shape[1]), np.mean(good, axis=0) + 2 * np.var(good, axis=0), linestyle=':', color='k') plt.plot(range(good.shape[1]), np.mean(good, axis=0) - 2 * np.var(good, axis=0), linestyle=':', color='k') plt.legend(['max', 'min', 'avg', '+2 Sigma', '-2 Sigma']) plt.grid() plt.subplot(212) plt.title('Dynamic range vs feature (False examples)') plt.plot(range(bad.shape[1]), np.min(bad, axis=0), 'r') plt.plot(range(bad.shape[1]), np.max(bad, axis=0), 'g') plt.plot(range(bad.shape[1]), np.mean(bad, axis=0), 'b') plt.plot(range(bad.shape[1]), np.mean(bad, axis=0) + 2 * np.var(bad, axis=0), linestyle=':', color='k') plt.plot(range(bad.shape[1]), np.mean(bad, axis=0) - 2 * np.var(bad, axis=0), linestyle=':', color='k') plt.legend(['max', 'min', 'avg', '+2 Sigma', '-2 Sigma']) plt.grid() plt.show()
def test_filter(learning_rate=0.1, n_epochs=1000, nkerns=[3, 512], batch_size=200, verbose=True): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,5,5), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, # (14-5+1)/2 n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.nnet.sigmoid ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) mean_w_0 = layer0.W.get_value().mean() plt.figure() for knkerns0 in range(nkerns[0]): for kch in range(3): plt.subplot(3,3,knkerns0*3+kch+1) plt.imshow(layer0.W.get_value()[knkerns0,kch,:,:]) plt.title('trained filter') ########################################################################### ########################################################################### ########################################################################### filter_shape_input = (nkerns[0],3,5,5) pt_input = numpy.zeros((filter_shape_input[2],filter_shape_input[3])) pt_input[(filter_shape_input[2]-1)/2,(filter_shape_input[3]-1)/2]=1.0 W = numpy.zeros(filter_shape_input) from scipy.ndimage.filters import gaussian_filter as gf for knkerns0 in range(nkerns[0]): for kch in range(3): W[knkerns0,kch,:,:]=gf(pt_input,(knkerns0+1.0)) W[knkerns0,kch,:,:] = W[knkerns0,kch,:,:]/W[knkerns0,kch,:,:].mean()*mean_w_0 W = theano.shared(W,borrow=True) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=filter_shape_input, poolsize=(2,2) ) layer0.W = W # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, # (14-5+1)/2 n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.nnet.sigmoid ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent # the param of layer0 is excluded params = layer3.params + layer2.params + layer1.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) plt.figure() for knkerns0 in range(nkerns[0]): for kch in range(3): plt.subplot(3,3,knkerns0*3+kch+1) plt.imshow(layer0.W.get_value()[knkerns0,kch,:,:]) plt.title('pre-defined filter')
def my_cnn(batch_size, n_epochs, learning_rate=0.01, patience=12000): # load data ds_rate = None datasets = load_data(ds_rate=ds_rate, theano_shared=True) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size rng = np.random.RandomState(23455) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') layerX_input = x.reshape((batch_size, 3, 32, 32)) layerX = DropLayer(input=layerX_input) layer0 = LeNetConvPoolLayer( rng, input=layerX.output, image_shape=(batch_size, 3, 32, 32), filter_shape=( 64, 3, 3, 3 ), # (number of output feature maps, number of input feature maps, height, width) poolsize=(1, 1)) # 4D output tensor is thus of shape (batch_size, 64, 32, 32) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, 64, 32, 32), filter_shape=(64, 64, 3, 3), poolsize=(2, 2)) # 4D output tensor is thus of shape (batch_size, 64, 16, 16) layer2 = LeNetConvPoolLayer(rng, input=layer1.output, image_shape=(batch_size, 64, 16, 16), filter_shape=(128, 64, 3, 3), poolsize=(1, 1)) # 4D output tensor is thus of shape (batch_size, 128, 16, 16) layer3 = LeNetConvPoolLayer(rng, input=layer2.output, image_shape=(batch_size, 128, 16, 16), filter_shape=(128, 128, 3, 3), poolsize=(2, 2)) # 4D output tensor is thus of shape (batch_size, 128, 8, 8) layer4 = LeNetConvPoolLayer(rng, input=layer3.output, image_shape=(batch_size, 128, 8, 8), filter_shape=(256, 128, 3, 3), poolsize=(1, 1)) # 4D output tensor is thus of shape (batch_size, 256, 8, 8) layer5 = UpSampleLayer(input=layer4.output) # 4D output tensor is thus of shape (batch_size, 256, 16, 16) layer6 = LeNetConvPoolLayer(rng, input=layer5.output, image_shape=(batch_size, 256, 16, 16), filter_shape=(128, 256, 3, 3), poolsize=(1, 1)) # 4D output tensor is thus of shape (batch_size, 128, 16, 16) layer7 = LeNetConvPoolLayer(rng, input=layer6.output, image_shape=(batch_size, 128, 16, 16), filter_shape=(128, 128, 3, 3), poolsize=(1, 1)) # 4D output tensor is thus of shape (batch_size, 128, 16, 16) layer8 = UpSampleLayer(input=layer7.output + layer3.output_x) # 4D output tensor is thus of shape (batch_size, 128, 32, 32) layer9 = LeNetConvPoolLayer(rng, input=layer8.output, image_shape=(batch_size, 128, 32, 32), filter_shape=(64, 128, 3, 3), poolsize=(1, 1)) # 4D output tensor is thus of shape (batch_size, 64, 32, 32) layer10 = LeNetConvPoolLayer(rng, input=layer9.output, image_shape=(batch_size, 64, 32, 32), filter_shape=(64, 64, 3, 3), poolsize=(1, 1)) # 4D output tensor is thus of shape (batch_size, 64, 32, 32) layer11 = LeNetConvPoolLayer(rng, input=layer10.output + layer1.output_x, image_shape=(batch_size, 64, 32, 32), filter_shape=(3, 64, 3, 3), poolsize=(1, 1)) # 4D output tensor is thus of shape (batch_size, 3, 32, 32) cost = layer11.ob_func(layerX_input) # create a function to compute the mistakes that are made by the model test_model = theano.function( [], [layerX_input, layerX.output, layer11.output, cost], givens={x: test_set_x[0:100]}) validate_model = theano.function( [index], cost, givens={x: valid_set_x[index * batch_size:(index + 1) * batch_size]}) # create a list of all model parameters to be fit by gradient descent params = layer11.params + layer10.params + layer9.params + layer7.params + layer6.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) print('... training the model') # early-stopping parameters patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.85 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_cost = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False verbose = True while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if (iter % 100 == 0) and verbose: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_cost = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_cost = numpy.mean(validation_cost) if verbose: print('epoch %i, minibatch %i/%i, validation cost %f' % (epoch, minibatch_index + 1, n_train_batches, this_validation_cost)) # if we got the best validation score until now if this_validation_cost < best_validation_cost: # save best validation score and iteration number best_validation_cost = this_validation_cost best_iter = iter if patience <= iter: done_looping = True break TEST_MODEL_RESULT = test_model() GT_Images_T = TEST_MODEL_RESULT[0] Drop_Images_T = TEST_MODEL_RESULT[1] Reconstructed_Images_T = TEST_MODEL_RESULT[2] cost_list = TEST_MODEL_RESULT[3] # plot 8*3 images print("Ground Truth, Corrupted Images, and Recontructed Images:") f, axarr = plt.subplots(8, 3, figsize=(20, 20)) for i in range(8): plt.axes(axarr[i, 0]) plt.imshow(np.transpose(GT_Images_T[i], (1, 2, 0))) plt.axes(axarr[i, 1]) plt.imshow(np.transpose(Drop_Images_T[i], (1, 2, 0))) plt.axes(axarr[i, 2]) plt.imshow(np.transpose(Reconstructed_Images_T[i], (1, 2, 0))) end_time = timeit.default_timer() # Retrieve the name of function who invokes train_nn() (caller's name) curframe = inspect.currentframe() calframe = inspect.getouterframes(curframe, 2) # Print out summary print('Optimization complete.') print('Best validation cost %f obtained at iteration %i, ' % (best_validation_cost, best_iter + 1)) print(('The training process for function ' + calframe[1][3] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def test_gaussian(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer # Construct the first convolutional pooling layer: # filtering reduces the image size to (32-5+1 , 32-5+1) = (28, 28) # maxpooling reduces this further to (28/2, 28/2) = (14, 14) # 4D output tensor is thus of shape (batch_size, nkerns[0], 14, 14) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2) ) # TODO: Construct the second convolutional pooling layer # Construct the second convolutional pooling layer # filtering reduces the image size to (14-5+1, 14-5+1) = (10, 10) # maxpooling reduces this further to (10/2, 10/2) = (5, 5) # 4D output tensor is thus of shape (batch_size, nkerns[1], 5, 5) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.tanh ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] layer0.W = [make_Gaussian(size = 5), make_Gaussian(size = 5), make_Gaussian(size = 5)] layer0.b = numpy.zeros((nkerns[0],), dtype=theano.config.floatX) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500, verbose=True, fileName='predictionsMLP'): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(learning_rate) testing = T.lscalar('testing') testValue = testing getTestValue = theano.function([testing], testValue) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) layer0_input = layer0_input.flatten(2) # TODO: Construct the first convolutional pooling layer layer0 = HiddenLayer(rng, input=layer0_input, n_in=32 * 32 * 3, n_out=n_hidden, activation=T.tanh) layer1 = HiddenLayer(rng, input=layer0.output, n_in=n_hidden, n_out=n_hidden, activation=T.tanh) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # TODO: construct a fully-connected sigmoidal layer layer2 = DropConnect(rng, input=layer1.output, n_in=n_hidden, n_out=batch_size, testing=testing) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=batch_size, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) print("Model building complete") # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') getPredictedValue = theano.function( [index], layer3.predictedValue(), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore') # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. #updates = [ # (param_i, param_i - learning_rate * layer2.maskW.get_value() * grad_i) if (param_i.name == 'WDrop') else (param_i, param_i - learning_rate * layer2.maskb.get_value() * grad_i) if(param_i.name == 'bDrop') else (param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads) #] updates = [] momentum = 0.9 for param in params: param_update = theano.shared(param.get_value() * 0., broadcastable=param.broadcastable) if (param.name == 'WDrop'): updates.append((param, param - learning_rate.get_value().item() * layer2.maskW.get_value() * param_update)) elif (param.name == 'bDrop'): updates.append((param, param - learning_rate.get_value().item() * layer2.maskb.get_value() * param_update)) else: updates.append( (param, param - learning_rate.get_value().item() * param_update)) updates.append( (param_update, momentum * param_update + (1. - momentum) * T.grad(cost, param))) ''' updates = [ (param_i, param_i - learning_rate * grad_i) if ((param_i.name == 'WDrop') or (param_i.name == 'bDrop')) else (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] ''' print("Commpiling the train model function") train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size], testing: getTestValue(0) }, on_unused_input='ignore', allow_input_downcast=True) ############### # TRAIN MODEL # ############### print('... training') predictions = train_nn(train_model, validate_model, test_model, getPredictedValue, n_train_batches, n_valid_batches, n_test_batches, n_epochs, learning_rate, verbose) f = open(fileName, 'wb') cPickle.dump(predictions, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
def test_convnet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20],filter_shape=[9,5], batch_size=200, verbose=True): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer: layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,filter_shape[0],filter_shape[0]), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-9+1)/2 = 12 image_shape=(batch_size,nkerns[0],(33-filter_shape[0])/2,(33-filter_shape[0])/2), filter_shape=(nkerns[1],nkerns[0],filter_shape[1],filter_shape[1]), poolsize=(2,2) ) # Combine Layer 0 output and Layer 1 output # TODO: downsample the first layer output to match the size of the second # layer output. layer0_output_ds = downsample.max_pool_2d( # nkerns[0] 12 x 12 # nkerns[1] 4 x 4 input=layer0.output, ds=(3,3), # TDOD: change ds ignore_border=False ) # concatenate layer layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1) filter_shape_2 = ((33-filter_shape[0])/2 - filter_shape[1]+1)/2 # TODO: Construct the third convolutional pooling layer layer2 = LeNetConvPoolLayer( rng, input=layer2_input, # (12-5+1)/2 = 4 image_shape=(batch_size,nkerns[1]+nkerns[0],filter_shape_2,filter_shape_2), #TODO filter_shape=(nkerns[2],nkerns[1]+nkerns[0],filter_shape_2,filter_shape_2), #TODO poolsize= (1,1)#TODO ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer layer3 = HiddenLayer( rng, input=layer3_input, n_in=nkerns[2] * 1 * 1, n_out= 10,#TODO, activation=T.nnet.sigmoid ) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in= 10,#TODO n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
import hw3_utils import our_utils import random #from classifier import * import pickle from sklearn.decomposition import PCA import sklearn.preprocessing data, labels, test = hw3_utils.load_data() #Shuffle combined = list(zip(data, labels)) random.shuffle(combined) data[:], labels[:] = zip(*combined) #Scale scaler = sklearn.preprocessing.StandardScaler().fit(data) data = scaler.transform(data) test = scaler.transform(test) # Use PCA pca=PCA(n_components=5) pca.fit(data) data = pca.fit_transform(data) test = pca.fit_transform(test) new_dataset = (data, labels, test) with open("Shuffled_scaled_PCA_data.data","wb") as f: pickle.dump(new_dataset, f)
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000, batch_size=20, n_hidden=500, verbose=True, fileName='predictionsMLP'): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels learning_rate = theano.shared(learning_rate) testing = T.lscalar('testing') testValue = testing getTestValue = theano.function([testing],testValue) ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) layer0_input = layer0_input.flatten(2) # TODO: Construct the first convolutional pooling layer layer0 = HiddenLayer( rng, input=layer0_input, n_in=32*32*3, n_out=n_hidden, activation=T.tanh ) layer1 = HiddenLayer( rng, input=layer0.output, n_in=n_hidden, n_out=n_hidden, activation=T.tanh ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # TODO: construct a fully-connected sigmoidal layer layer2 = DropConnect( rng, input=layer1.output, n_in=n_hidden, n_out=batch_size, testing=testing ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=batch_size, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) print("Model building complete") # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore' ) getPredictedValue = theano.function( [index], layer3.predictedValue(), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore' ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size], testing: getTestValue(1) }, on_unused_input='ignore' ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. #updates = [ # (param_i, param_i - learning_rate * layer2.maskW.get_value() * grad_i) if (param_i.name == 'WDrop') else (param_i, param_i - learning_rate * layer2.maskb.get_value() * grad_i) if(param_i.name == 'bDrop') else (param_i, param_i - learning_rate * grad_i) # for param_i, grad_i in zip(params, grads) #] updates = [] momentum = 0.9 for param in params: param_update = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) if (param.name == 'WDrop'): updates.append((param,param - learning_rate.get_value().item() * layer2.maskW.get_value() * param_update)) elif(param.name == 'bDrop'): updates.append((param,param - learning_rate.get_value().item() * layer2.maskb.get_value() * param_update)) else: updates.append((param,param - learning_rate.get_value().item() * param_update)) updates.append((param_update, momentum*param_update + (1. - momentum)*T.grad(cost, param))) ''' updates = [ (param_i, param_i - learning_rate * grad_i) if ((param_i.name == 'WDrop') or (param_i.name == 'bDrop')) else (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] ''' print("Commpiling the train model function") train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], testing : getTestValue(0) }, on_unused_input='ignore', allow_input_downcast=True ) ############### # TRAIN MODEL # ############### print('... training') predictions = train_nn(train_model, validate_model, test_model, getPredictedValue, n_train_batches, n_valid_batches, n_test_batches, n_epochs, learning_rate, verbose) f = open(fileName, 'wb') cPickle.dump(predictions, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close()
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=False, smaller_set=True): """ Wrapper function for training and testing MLP :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient. :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization). :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization). :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer. :type batch_size: int :param batch_szie: number of examples in minibatch. :type n_hidden: int or list of ints :param n_hidden: number of hidden units. If a list, it specifies the number of units in each hidden layers, and its length should equal to n_hiddenLayers. :type n_hiddenLayers: int :param n_hiddenLayers: number of hidden layers. :type verbose: boolean :param verbose: to print out epoch summary or not to. :type smaller_set: boolean :param smaller_set: to use the smaller dataset or not to. """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP(rng=rng, input=x, n_in=32 * 32 * 3, n_hidden=n_hidden, n_out=10, n_hiddenLayers=n_hiddenLayers) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_adversarial_example(learning_rate=0.1, L1_reg=0.00, L2_reg=0.0001, n_epochs=100, batch_size=128, n_hidden=500, n_hiddenLayers=3, verbose=True, smaller_set=True): """ Wrapper function for testing adversarial examples """ # load the dataset; download the dataset if it is not present if smaller_set: datasets = load_data(ds_rate=5) else: datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # test_set_x = test_set_x[0:1] # test_set_y = test_set_y[0:1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # TODO: construct a neural network, either MLP or CNN. classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) test_model_single = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index:index+1], y: test_set_y[index:index+1] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') gx = T.grad(cost, x) train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) f = theano.function( inputs=[index], outputs=gx, givens={ x: test_set_x[index : (index + 1)], y: test_set_y[index : (index + 1)] } ) ind_oi = 3 from matplotlib import pyplot as plt plt.figure() plt.imshow(test_set_x.get_value()[ind_oi,:].reshape(3,32,32).transpose((1,2,0))) h = theano.function( inputs=[index], outputs=classifier.logRegressionLayer.y_pred, givens={ x: test_set_x[index : (index + 1)] } ) print('predicted number original: %i' % h(ind_oi)) Y = T.matrix() X_update = (test_set_x, T.inc_subtensor(test_set_x[ind_oi:(ind_oi+1)], Y)) g = theano.function([Y], updates=[X_update]) g(0.01*numpy.sign(f(ind_oi))) print('predicted number adverserial: %i' % h(ind_oi)) plt.figure() plt.imshow(test_set_x.get_value()[ind_oi,:].reshape(3,32,32).transpose((1,2,0)))
import matplotlib.pyplot as plt import hw3_utils as utils import part_c_classifiers from classifier import id3_factory, perceptron_factory from classifier import split_crosscheck_groups, knn_factory, evaluate from sklearn.feature_selection import SelectKBest, f_classif # question 3.2 patients, labels, test = utils.load_data() split_crosscheck_groups([patients, labels], 2) # question 5.1 k_list = [1, 3, 5, 7, 13] accuracy_list = [] file_name = 'experiments6.csv' with open(file_name, 'wb') as file: for k in k_list: knn_f = knn_factory(k) accuracy, error = evaluate(knn_f, 2) line = str(k) + "," + str(accuracy) + "," + str(error) + "\n" accuracy_list.append(accuracy) file.write(line.encode()) # question 5.2 plt.plot(k_list, accuracy_list) plt.xlabel('K value') plt.ylabel('Average accuracy')
import hw3_utils as utils import classifier # TEST for question 1 list1 = [1, 2, 3, 4, 5, 6, 7] list2 = [7, 6, 5, 4, 3, 2, 1] print(classifier.euclidean_distance(list1, list2)) # TEST for question 3.2 data = utils.load_data() classifier.split_crosscheck_groups(data, 2) print(classifier.load_k_fold_data(1)[1][0])
def test_para_num(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512],L1_reg=0.00, L2_reg=0.0001, batch_size=128, n_hiddenLayers=2,verbose=True): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') ########################################################################### ################################## CNN #################################### ########################################################################### # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,5,5), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, # (14-5+1)/2 n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.nnet.sigmoid ) # TODO: classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression( input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose) ########################################################################### ################################## MLP #################################### ########################################################################### ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') n_hidden = [0,0]; n_hidden[0]=nkerns[0]*14*14 n_hidden[1]=nkerns[1]*5*5 # TODO: construct a neural network, either MLP or CNN. classifier = myMLP( rng=rng, input=x, n_in=32*32*3, n_hidden=n_hidden, n_hiddenLayers=n_hiddenLayers, n_out=10 ) # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = ( classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr ) # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [ (param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams) ] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def main(): # Variables used for debug skip_knn = True skip_tree = True skip_perc = True train_features, train_labels, test_features = load_data('data/Data.pickle') # Split once the dataset to two folds. folds = 2 #split_crosscheck_groups(train_features, train_labels, folds) if skip_knn != True: # Evaluating KNN with different k value: k_list = [1, 3, 5, 7, 13] acc_list = [] err_list = [] with open('experiments6.csv', mode='w', newline='') as csv_file: exp_writer = csv.writer(csv_file) for k in k_list: knn_fac = knn_factory(k) err, acc = evaluate(knn_fac, folds) print("k=", k, " acc=", acc, " err=", err) exp_writer.writerow([k, acc, err]) acc_list.append(acc) err_list.append(err) # Plot KNN Results plt.subplot(2, 1, 1) plt.plot(k_list, acc_list, '--', color='g') plt.plot(k_list, acc_list, 'bo') plt.ylabel("Accuracy") plt.xlabel("k") plt.xticks(k_list) plt.subplot(2, 1, 2) plt.plot(k_list, err_list, '--', color='r') plt.plot(k_list, err_list, 'bo') plt.ylabel("Error") plt.xlabel("k") plt.xticks(k_list) plt.tight_layout() plt.show() # Perform classification for Perceptron and Tree and write to files. with open('experiments12.csv', mode='w', newline='') as csv_file: exp_writer = csv.writer(csv_file) if skip_tree != True: # Decision Tree experiment myTree = tree.DecisionTreeClassifier(criterion="entropy") err, acc = evaluate(myTree, folds) print("tree acc=", acc, " tree err=", err) exp_writer.writerow([1, acc, err]) if skip_perc != True: # Perceptron experiment myPerc = Perceptron(tol=1e-3, random_state=0) err, acc = evaluate(myPerc, folds) print("perceptron acc=", acc, " perceptron err=", err) exp_writer.writerow([2, acc, err]) # Competition: Classify test_features print("Triple model") my_model = triple_model() my_model.fit(train_features, train_labels) res = my_model.final_predict(preprocessing.scale(test_features)) write_prediction(res)
def MY_CNN(learning_rate=0.1, n_epochs=0, batch_size=100): rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size test_im = test_set_x.get_value(borrow=True) #train_set_x_drop = drop(train_set_x, p=0.7) #valid_set_x_drop = drop(valid_set_x, p=0.7) #test_set_x_drop = drop(test_set_x, p=0.7) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.matrix('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') Input = x.reshape((batch_size, 3, 32, 32)) ConvLayer1_input = drop(Input, p=0.7) ConvLayer1 = ConvLayer( rng, input=ConvLayer1_input, filter_shape=(64, 3, 3, 3), image_shape=(batch_size, 3, 32, 32), padding='half' ) ConvLayer2 = ConvLayer( rng, input=ConvLayer1.output, filter_shape=(64, 64, 3, 3), image_shape=(batch_size, 64, 32, 32), padding='half' ) MaxPoolLayer1 = MaxPooling( input=ConvLayer2.output, poolsize=(2, 2), ignore_border=False ) ConvLayer3 = ConvLayer( rng, input=MaxPoolLayer1.output, filter_shape=(128, 64, 3, 3), image_shape=(batch_size, 64, 16, 16), padding='half' ) ConvLayer4 = ConvLayer( rng, input=ConvLayer3.output, filter_shape=(128, 128, 3, 3), image_shape=(batch_size, 128, 16, 16), padding='half' ) MaxPoolLayer2 = MaxPooling( input=ConvLayer4.output, poolsize=(2, 2), ignore_border=False ) ConvLayer5 = ConvLayer( rng, input=MaxPoolLayer2.output, filter_shape=(256, 128, 3, 3), image_shape=(batch_size, 128, 8, 8), padding='half' ) UpPoolLayer2 = Unpooling2D( input=ConvLayer5.output, poolsize=(2, 2) ) DeconvLayer5 = ConvLayer( rng, input=UpPoolLayer2.output, filter_shape=(128, 256, 3, 3), image_shape=(batch_size, 256, 16, 16), padding='half' ) DeconvLayer4 = ConvLayer( rng, input=DeconvLayer5.output, filter_shape=(128, 128, 3, 3), image_shape=(batch_size, 128, 16, 16), padding='half' ) # ADD INPUTS UpPoolLayer1_input = ConvLayer4.output + DeconvLayer4.output UpPoolLayer1 = Unpooling2D( input=UpPoolLayer1_input, poolsize=(2, 2) ) DeconvLayer3 = ConvLayer( rng, input=UpPoolLayer1.output, filter_shape=(64, 128, 3, 3), image_shape=(batch_size, 128, 32, 32), padding='half' ) DeconvLayer2 = ConvLayer( rng, input=DeconvLayer3.output, filter_shape=(64, 64, 3, 3), image_shape=(batch_size, 64, 32, 32), padding='half' ) # ADD INPUTS DeconvLayer1_input = ConvLayer2.output + DeconvLayer2.output DeconvLayer1 = ConvLayer( rng, input=DeconvLayer1_input, filter_shape=(3, 64, 3, 3), image_shape=(batch_size, 64, 32, 32), padding='half' ) Output = DeconvLayer1.output # create a list of all model parameters to be fit by gradient descent params = ( ConvLayer1.params + ConvLayer2.params + ConvLayer3.params + ConvLayer4.params + ConvLayer5.params + DeconvLayer1.params + DeconvLayer2.params + DeconvLayer3.params + DeconvLayer4.params + DeconvLayer5.params ) #cost = T.mean((Output - y) ** 2) cost = T.mean(T.sqr(Output - Input) ) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], Output, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size]#, #y: test_set_x[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], cost, givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size]#, #y: valid_set_x[index * batch_size: (index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams #gparams = [T.grad(cost, param) for param in params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs momentum =theano.shared(numpy.cast[theano.config.floatX](0.5), name='momentum') updates = [] for param in params: param_update = theano.shared(param.get_value()*numpy.cast[theano.config.floatX](0.)) updates.append((param, param - learning_rate*param_update)) updates.append( (param_update, momentum*param_update + (numpy.cast[theano.config.floatX](1.) - momentum)*T.grad(cost, param)) ) train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size]#, #y: train_set_x[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### train_nn_restore( train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose = True ) plt.figure(figsize=(16,6)) # drop_input = T.dtensor4('drop_input') imdrop = theano.function([x], drop(x, p=0.7)) drop_image = imdrop(test_im[0:8]) restored = test_model(0)[0:8,:,:,:] # print restored.shape # print test_im[0] for i in range(8): plt.subplot(3,8,i+1) img_original = (np.reshape(test_im[i],(3,32,32))).transpose(1,2,0) plt.imshow(img_original) plt.xticks([]) plt.yticks([]) plt.xlabel('Original Image') plt.subplot(3,8,i+9) img_drop = (np.reshape(drop_image[i],(3,32,32))).transpose(1,2,0) plt.imshow(img_drop) plt.xticks([]) plt.yticks([]) plt.xlabel('Corrupted Image') plt.subplot(3,8,i+17) img_restored = (restored[i,:,:,:]).transpose(1,2,0) plt.imshow(img_restored) plt.xticks([]) plt.yticks([]) plt.xlabel('Restored Image')
def test_lenet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, filter_size=5, dnn_layers=1, n_hidden=500, gabor=False, lmbda=None, verbose=False): """ Wrapper function for testing LeNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ print test_lenet.__name__, nkerns, filter_size, gabor, lmbda rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) if gabor is True: # Generate Gabor filters filters = build_gabor(filter_size, nkerns[0], lmbda) # filters = numpy.array([filters[i][0] for i in range(len(filters))]) filters = numpy.array([filters[i] for i in range(len(filters))]) # print filters.shape filter_weights = numpy.tile(filters, (1, 3, 1)).reshape(nkerns[0], 3, filter_size, filter_size) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2), weights=filter_weights) print 'gabor filter weights are working' else: # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer i_s_1 = (32 - filter_size + 1) / 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], i_s_1, i_s_1), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) # TODO: construct a fully-connected sigmoidal layer i_s_2 = (i_s_1 - filter_size + 1) / 2 if hasattr(n_hidden, '__iter__'): assert (len(n_hidden) == dnn_layers) else: n_hidden = (n_hidden, ) * dnn_layers DNN_Layers = [] for i in xrange(dnn_layers): h_input = layer2_input if i == 0 else DNN_Layers[i - 1].output h_in = nkerns[1] * i_s_2 * i_s_2 if i == 0 else n_hidden[i - 1] DNN_Layers.append( HiddenLayer(rng=rng, input=h_input, n_in=h_in, n_out=n_hidden[i], activation=T.tanh)) # layer2 = HiddenLayer( # rng, # input=layer2_input, # n_in=nkerns[1] * i_s_2 * i_s_2, # n_out=500, # activation=T.tanh # ) # TODO: classify the values of the fully-connected sigmoidal layer LR_Layer = LogisticRegression(input=DNN_Layers[-1].output, n_in=n_hidden[i], n_out=10) # the cost we minimize during training is the NLL of the model cost = LR_Layer.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], LR_Layer.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], LR_Layer.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # TODO: create a list of all model parameters to be fit by gradient descent params = LR_Layer.params for layer in DNN_Layers: params += layer.params if gabor is True: print 'gabor params is workings' params += layer1.params else: params += layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_CDNN(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512], batch_size=200, n_hidden=[200,200,200], verbose=True): """ Wrapper function for testing CNN in cascade with DNN """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer layer0 = LeNetConvPoolLayer( rng, input=layer0_input, # (batch size, num input feature maps,image height, image width) image_shape=(batch_size,3,32,32), # number of filters, num input feature maps,filter height, filter width) filter_shape=(nkerns[0],3,5,5), poolsize=(2,2) ) # TODO: Construct the second convolutional pooling layer layer1 = LeNetConvPoolLayer( rng, input=layer0.output, # (32-5+1)/2 image_shape=(batch_size,nkerns[0],14,14), filter_shape=(nkerns[1],nkerns[0],5,5), poolsize=(2,2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out= n_hidden[0],#TODO, activation=T.nnet.sigmoid ) layer3 = HiddenLayer( rng, input=layer2.output, n_in=n_hidden[0], n_out=n_hidden[1],#TODO, activation=T.nnet.sigmoid ) layer4 = HiddenLayer( rng, input=layer3.output, n_in=n_hidden[1], n_out=n_hidden[2],#TODO, activation=T.nnet.sigmoid ) layer5 = LogisticRegression( input=layer4.output, n_in=n_hidden[2], n_out=10 ) # the cost we minimize during training is the NLL of the model cost = layer5.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer5.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer5.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # TODO: create a list of all model parameters to be fit by gradient descent params = layer5.params + layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) ############### # TRAIN MODEL # ############### print('... training') train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)
def test_convnet(learning_rate=0.1, n_epochs=1000, nkerns=[16, 512, 20], batch_size=200, verbose=False, filter_size=2): """ Wrapper function for testing Multi-Stage ConvNet on SVHN dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer :type batch_size: int :param batch_szie: number of examples in minibatch. :type verbose: boolean :param verbose: to print out epoch summary or not to. """ rng = numpy.random.RandomState(23455) datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 3 * 32 * 32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 3, 32, 32)) # TODO: Construct the first convolutional pooling layer: layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, filter_size, filter_size), poolsize=(2, 2)) # TODO: Construct the second convolutional pooling layer new_shape = (32 - filter_size + 1) // 2 layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], new_shape, new_shape), filter_shape=(nkerns[1], nkerns[0], filter_size, filter_size), poolsize=(2, 2)) # Combine Layer 0 output and Layer 1 output # TODO: downsample the first layer output to match the size of the second # layer output. # TDOD: change ds layer0_output_ds = downsample.max_pool_2d(input=layer0.output, ds=(2, 2), ignore_border=True) # concatenate layer layer2_input = T.concatenate([layer1.output, layer0_output_ds], axis=1) # TODO: Construct the third convolutional pooling layer new_shape = (new_shape - filter_size + 1) // 2 layer2 = LeNetConvPoolLayer(rng, input=layer2_input, image_shape=(batch_size, nkerns[0] + nkerns[1], new_shape, new_shape), filter_shape=(nkerns[2], nkerns[0] + nkerns[1], filter_size, filter_size), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[2] * 1 * 1). layer3_input = layer2.output.flatten(2) # construct a fully-connected sigmoidal layer new_shape = (new_shape - filter_size + 1) // 2 layer3 = HiddenLayer(rng, input=layer3_input, n_in=nkerns[2] * new_shape * new_shape, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer4 = LogisticRegression(input=layer3.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model cost = layer4.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer4.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer4.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # TODO: create a list of all model parameters to be fit by gradient descent params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) ############### # TRAIN MODEL # ############### print('... training') return train_nn(train_model, validate_model, test_model, n_train_batches, n_valid_batches, n_test_batches, n_epochs, verbose)