def load_test_pictures(data, mode=0, data_labels=None): testY = None testX = None if mode == 1 and data_labels is not None: testX = np.load(data) testY = np.load(data_labels) testX = testX.reshape(-1, 1, 28, 28) testX = testX / 255. elif mode == 2 and data_labels is not None: testX = unpickle(data, 28 * 28) testY = unpickle(data_labels, 7) testX = testX.reshape(-1, 1, 28, 28) testX = testX / 255. elif mode == 3: img, width, height = wczytaj_obrazek(data, 28) img[0] = img[0] / 255. testX = img testY = np.zeros else: print 'Wrong open mode!' return testX, testY return testX, testY
def mnist(ntrain=60000,ntest=10000,onehot=True): fname = 'baza_uczaca_znaki.npy' trX = np.asarray(unpickle(fname, 28*28), np.uint8) fname = 'baza_uczaca_znaki_labels.npy' trY = np.asarray(unpickle(fname, 36), np.uint8) fname = 'baza_walidujaca_znaki.npy' teX = np.asarray(unpickle(fname, 28*28), np.uint8) fname = 'baza_walidujaca_znaki_labels.npy' teY = np.asarray(unpickle(fname, 36), np.uint8) randomize_training_set = np.arange(len(trX)) randomize_test_set = np.arange(len(teX)) np.random.shuffle(randomize_test_set) np.random.shuffle(randomize_training_set) trX = trX[randomize_training_set] trY = trY[randomize_training_set] teX = teX[randomize_test_set] teY = teY[randomize_test_set] trX = trX/255. teX = teX/255. trX = trX[:ntrain] trY = trY[:ntrain] teX = teX[:ntest] teY = teY[:ntest] return trX,teX,trY,teY
def load_data(dataset, mode='valid', amount='full'): ############# # LOAD DATA # ############# # Download the MNIST dataset if it is not present print '... loading data' ## Load the dataset if mode == 'valid': # load training and validation data if amount == 'full': print 'loading full valid set' train_set = unpickle('data/valid_set_gray.pkl') elif amount == 'min': print 'loading min valid set' train_set = unpickle('data/min_valid_set_gray.pkl') else: print 'amount shoule be either full or min' raise NotImplementedError() elif mode == 'test': # load test data if amount == 'full': print 'loading full test data...' train_set = [] for i in xrange(1, 301): # from 1 to 300 TBF: hard code print str(i), '/', str(300) train_set_batch = unpickle('data/test_set_gray_' + str(i) + '.pkl') train_set.extend(train_set_batch) train_set = (train_set, [0 for i in xrange(0,len(train_set))]) else: print 'loading min test data...' train_set = [] for i in xrange(1, 7): # from 1 to 6 TBF: hard code train_set_batch = unpickle('data/test_set_gray_' + str(i) + '.pkl') #train_set = (train_set, [0 for i in xrange(0,len(train_set))]) train_set.extend(train_set_batch) train_set = (train_set, [0 for i in xrange(0,len(train_set))]) print 'done!' def shared_dataset(data_xy, borrow=True): data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) return shared_x, T.cast(shared_y, 'int32') train_set_x, train_set_y = shared_dataset(train_set) rval = [(train_set_x, train_set_y)] return rval
def loan_prediction(): value = request.get_json() new_value = arrange_values.arrangemet(value) model = un.unpickle('save.p') result = model.predict([new_value]) print(result) return jsonify({'result': result[0]})
def main(): path = r"glove.6B.50d.txt.w2v" glove = KeyedVectors.load_word2vec_format(path, binary=False) resnet = unpickle.unpickle() # make_database.make_database() # uncomment this only if you want to repickle the files # unpickle files with open("idfs1.pkl", mode="rb") as idf: idfs = pickle.load(idf) with open("img_to_caption1.pkl", mode="rb") as cap: img_to_caption = pickle.load(cap) with open("img_to_coco1.pkl", mode="rb") as coco: img_to_coco = pickle.load(coco) # uncomment this only if you want to repickle the image embeddings # img_embeddings = {} # weights = np.load("weight.npy") # bias = np.load("bias.npy") # for image in resnet: # embedding = image*weights + bias # img_embeddings[image] = embedding # with open('img_embeddings.pkl', mode='wb') as file: # pickle.dump(img_embeddings, file) with open("img_embeddings.pkl", mode="rb") as file: img_embeddings = pickle.load(file) cos_sims = {} for x in img_embeddings: cos_sims[x] = sim.sim query = input("Welcome to Image Search! What would you like to search?\t")
def bank_request(): total_amount = 0 value = request.get_json() file = request.files['file'] df = dp.process_data(file) lst = df['balance'] model = un.unpickle('gradBoost.p') y = model.predict(df) new_df = dp.append_dataframe_prediction(df, y) plot_result = dp.final_data(new_df) for (amount, prediction) in zip(lst, y): if prediction == 'yes': total_amount = total_amount + amount return json.dumps({ 'result': int(total_amount), 'plot_result': plot_result })
def load_dataset(): batch_size = 500 data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1') data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2') data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3') data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4') data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5') test = unpickle('cifar-10-batches-py/test_batch') train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = numpy.concatenate((data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"], data_batch_4["labels"], data_batch_5["labels"])) test_set = test["data"] Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3) Yte = numpy.asarray(test["labels"]) Xval_rows = X_train[:7500, :] # take first 1000 for validation Yval = y_train[:7500] Xtr_rows = X_train[7500:50000, :] # keep last 49,000 for train Ytr = y_train[7500:50000] mean_train = Xtr_rows.mean(axis=0) stdv_train = Xte_rows.std(axis=0) Xtr_rows = (Xtr_rows - mean_train) / stdv_train Xval_rows = (Xval_rows - mean_train) / stdv_train Xte_rows = (Xte_rows - mean_train) / stdv_train train_set = (Xtr_rows, Ytr) valid_set = (Xval_rows, Yval) test_set = (Xte_rows, Yte) # test_set_x, test_set_y = shared_dataset(test_set) # valid_set_x, valid_set_y = shared_dataset(valid_set) # train_set_x, train_set_y = shared_dataset(train_set) # datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), # (test_set_x, test_set_y)] # # train_set_x, train_set_y = datasets[0] # valid_set_x, valid_set_y = datasets[1] # test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing # n_train_batches = train_set_x.get_value(borrow=True).shape[0] # n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] # n_test_batches = test_set_x.get_value(borrow=True).shape[0] return (Xtr_rows, Ytr, Xval_rows , Yval, Xte_rows, Yte)
def evaluate_lenet5(learning_rate=0.1, learning_rate2=0.05, learning_rate3=0.01, n_epochs=200, dataset='cifar-10-batches-py', nkerns=[6, 16], batch_size=20, mode='train', amount='full'): # nkerns coule be ok with [10, 50] """ Demonstrates lenet on MNIST dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: path to the dataset used for training /testing (MNIST here) :type nkerns: list of ints :param nkerns: number of kernels on each layer """ #learning_rate = theano.shared(value=learning_rate, borrow=True) rng = numpy.random.RandomState(23455) datasets = load_data(dataset, mode=mode, amount=amount) if mode == 'train': train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] else: test_set_x, test_set_y = datasets[0] # compute number of minibatches for training, validation and testing if mode == 'train': n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size n_valid_batches /= batch_size else: n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels ishape = (32, 32) # this is the size of CIFIA-10 images (gray-scaled) ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # Reshape matrix of rasterized images of shape (batch_size,32*32) # to a 4D tensor, compatible with our LeNetConvPoolLayer layer0_input = x.reshape((batch_size, 1, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (32-5+1,32-5+1)=(28,28) # maxpooling reduces this further to (28/2,28/2) = (14,14) # 4D output tensor is thus of shape (batch_size,nkerns[0],14,14) layer0 = LeNetConvPoolLayer(rng, input=layer0_input, image_shape=(batch_size, 1, 32, 32), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) # Construct the second convolutional pooling layer # filtering reduces the image size to (14-5+1,14-5+1)=(10,10) # maxpooling reduces this further to (10/2,10/2) = (5,5) # 4D output tensor is thus of shape (nkerns[0],nkerns[1],5,5) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 14, 14), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size,num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (20,50*5*5) = (20,1250) <-?? layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 5 * 5, n_out=500, activation=T.tanh) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) ## load the saved parameters if mode == 'test': learned_params = unpickle('params/convolutional_mlp_gray.pkl') # the cost we minimize during training is the NLL of the model cost = layer3.negative_log_likelihood(y) # create a function to compute the mistakes that are made by the model if mode == 'test': test_model = theano.function([index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) else: validate_model = theano.function([index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size]}) check_label = theano.function(inputs=[index], outputs=layer3.y_pair(y), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) # create a function to get the labels predicted by the model if mode == 'test': get_test_labels = theano.function([index], layer3.y_pred, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], layer0.W: learned_params[0], layer0.b: learned_params[1], layer1.W: learned_params[2], layer1.b: learned_params[3], layer2.W: learned_params[4], layer2.b: learned_params[5], layer3.W: learned_params[6], layer3.b: learned_params[7]}) if mode == 'train': # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i],grads[i]) pairs. if mode == 'train': updates = [] for param_i, grad_i in zip(params, grads): updates.append((param_i, param_i - learning_rate * grad_i)) updates2 = [] for param_i, grad_i in zip(params, grads): updates2.append((param_i, param_i - learning_rate2 * grad_i)) updates3 = [] for param_i, grad_i in zip(params, grads): updates3.append((param_i, param_i - learning_rate3 * grad_i)) if mode == 'train': train_model = theano.function([index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) train_model2 = theano.function([index], cost, updates=updates2, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) train_model3 = theano.function([index], cost, updates=updates3, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters if mode == 'train': patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch start_time = time.clock() if mode == 'train': best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. done_looping = False else: done_looping = True epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print 'training @ iter = ', iter if epoch == 1: cost_ij = train_model(minibatch_index) elif this_validation_loss < 0.45 and this_validation_loss > 0.35: cost_ij = train_model2(minibatch_index) elif this_validation_loss < 0.35: cost_ij = train_model3(minibatch_index) else: cost_ij = train_model(minibatch_index) ## check the contents of predictions occasionaly ''' if iter % 100 == 0: [prediction, true_label] = check_label(minibatch_index) print 'prediction:' print prediction print 'true_label:' print true_label ''' ## save the parameters if mode == 'train': get_params = theano.function(inputs=[], outputs=[layer0.W, layer0.b, layer1.W, layer1.b, layer2.W, layer2.b, layer3.W, layer3.b]) save_parameters(get_params(), 'convolutional_mlp_gray') if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, \ this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter ''' # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best ' 'model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' ''' if patience <= iter: done_looping = True break ''' if mode == 'test': print 'predicting the labels...' pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)] for i in xrange(n_test_batches): print str(i+1), '/', str(n_test_batches) pred_labels[i] = get_test_labels(i) writer = csv.writer(file('result/convolutional_mlp_gray.csv', 'w')) row = 1 print 'output test labels...' for i in xrange(len(pred_labels)): # TBF: hard code print str(i+1), '/', str(len(pred_labels)) for j in xrange(len(pred_labels[i])): writer.writerow([row, pred_labels[i][j]]) row += 1 end_time = time.clock() if mode == 'train': print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i,'\ 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def load_data(dataset): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' ############# # LOAD DATA # ############# # Download the MNIST dataset if it is not present print '... loading data' ## Load the dataset print 'min training...' train_set = unpickle('data/min_train_set_gray.pkl') valid_set = unpickle('data/min_valid_set_gray.pkl') print 'loading test data...' test_set = unpickle('data/test_set_gray_1.pkl') test_set = (test_set, [0 for i in xrange(0,len(test_set))]) print 'done!' #train_set, valid_set, test_set format: tuple(input, target) #input is an numpy.ndarray of 2 dimensions (a matrix) #witch row's correspond to an example. target is a #numpy.ndarray of 1 dimensions (vector)) that have the same length as #the number of rows in the input. It should give the target #target to the example with the same index in the input. def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') train_set_x, train_set_y = shared_dataset(train_set) valid_set_x, valid_set_y = shared_dataset(valid_set) test_set_x, test_set_y = shared_dataset(test_set) rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] return rval
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='cifar-10-batches-py', batch_size=1000, mode='train', amount='full'): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset, mode=mode, amount=amount) if mode == 'train': train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] else: test_set_x, test_set_y = datasets[0] # compute number of minibatches for training, validation and testing if mode == 'train': n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size else: n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 classifier = LogisticRegression(input=x, n_in=32 * 32, n_out=10) ## load the saved parameters if mode == 'test': learned_params = unpickle('params/logistic_sgd_gray.pkl') # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch if mode == 'test': test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size]}) else: validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) check_label = theano.function(inputs=[index], outputs=classifier.y_pair(y), givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size]}) # create a function to get the labels predicted by the model if mode == 'test': get_test_labels = theano.function([index], classifier.y_pred, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], classifier.W: learned_params[0], classifier.b: learned_params[1]}) # compute the gradient of cost with respect to theta = (W,b) if mode == 'train': g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. if mode == 'train': updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` if mode == 'train': train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters if mode == 'train': patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch start_time = time.clock() if mode == 'train': best_params = None best_validation_loss = numpy.inf test_score = 0. done_looping = False else: done_looping = True epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % \ (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss ''' # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of best' ' model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) ''' if patience <= iter: done_looping = True break #[prediction, true_label] = check_label(minibatch_index) #print 'prediction:', prediction, 'true_label:', true_label # output test labels if mode == 'test': print 'predicting the labels...' pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)] for i in xrange(n_test_batches): print str(i+1), '/', str(n_test_batches) pred_labels[i] = get_test_labels(i) writer = csv.writer(file('result/logistic_sgd_gray.csv', 'w')) row = 1 print 'output test labels...' for i in xrange(len(pred_labels)): # TBF: hard code print str(i+1), '/', str(len(pred_labels)) for j in xrange(len(pred_labels[i])): writer.writerow([row, pred_labels[i][j]]) row += 1 end_time = time.clock() if mode == 'train': print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time))) if mode == 'train': print 'saving the parameters learned...' get_params = theano.function(inputs=[], outputs=[classifier.W, classifier.b]) save_parameters(get_params(), 'logistic_sgd_gray')
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100000, dataset='cifar-10-batches-py', batch_size=32, test_batch_size=32, n_hidden_1=500, n_hidden_2=500, mode='train', amount='full', valid_num=10000): #batch_size: 32 datasets = load_data(dataset, mode, amount, valid_num) if mode == 'train': train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] else: test_set_x, test_set_y = datasets[0] # compute number of minibatches for training, validation and testing if mode == 'train': n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size else: n_test_batches = test_set_x.get_value(borrow=True).shape[0] / test_batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=769, n_hidden_1=n_hidden_1, n_hidden_2=n_hidden_2, n_out=2) ## load the saved parameters if mode == 'test': learned_params = unpickle('params/mlp.pkl') # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = classifier.negative_log_likelihood(y) \ + L1_reg * classifier.L1 \ + L2_reg * classifier.L2_sqr # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch if mode == 'test': test_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * test_batch_size: (index + 1) * test_batch_size], y: test_set_y[index * test_batch_size: (index + 1) * test_batch_size]}) else: validate_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) train_error_model = theano.function(inputs=[index], outputs=classifier.errors(y), givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) get_train_labels = theano.function([index], classifier.log_regression_layer.ex_y, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size]}) if mode == 'test': get_test_labels = theano.function([index], classifier.log_regression_layer.y_pred, givens={ x: test_set_x[index * test_batch_size: (index + 1) * test_batch_size], classifier.hidden_layer_1.W: learned_params[0], classifier.hidden_layer_1.b: learned_params[1], classifier.log_regression_layer.W: learned_params[2], classifier.log_regression_layer.b: learned_params[3]}) # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams if mode == 'train': gparams = [] for param in classifier.params: gparam = T.grad(cost, param) gparams.append(gparam) # specify how to update the parameters of the model as a list of # (variable, update expression) pairs updates = [] # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of # same length, zip generates a list C of same size, where each element # is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] for param, gparam in zip(classifier.params, gparams): updates.append((param, param - learning_rate * gparam)) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) #init_bias = [-1. for i in xrange(101)] ##init_bias = numpy.asarray(init_bias, dtype=numpy.float64) #init_bias[0] = 100. #initialize_bias = theano.function(inputs=[], outputs=classifier.logRegressionLayer.b, # updates={classifier.logRegressionLayer.b: init_bias}, # givens={classifier.logRegressionLayer.b: init_bias}) #bias = initialize_bias() #print bias ############### # TRAIN MODEL # ############### print '... training' # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.999 # a relative improvement of this much is # considered significant if mode == 'train': validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = time.clock() epoch = 0 if mode == 'train': done_looping = False else: done_looping = True while (epoch < n_epochs) and (not done_looping): epoch += 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) train_losses = [train_error_model(i) for i in xrange(n_train_batches)] this_train_loss = numpy.mean(train_losses) try: pred_labels = pred_labels except NameError: pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_train_batches)] #params = get_params() #print 'W[0:10]:', params[0][0:10], 'b[0:10]:', params[1][0:10] if mode == 'train': for i in xrange(n_train_batches): pred_labels[i] = get_train_labels(i) #print 'max predicted labels:', #for i in xrange(len(pred_labels)): # print max(pred_labels[i]), #print print('epoch %i, minibatch %i/%i, validation error (MAE) %f' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss)) print('epoch %i, minibatch %i/%i, training error (MAE) %f' % \ (epoch, minibatch_index + 1, n_train_batches, this_train_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: ## save the parameters get_params = theano.function(inputs=[], outputs=[classifier.hidden_layer_1.W, classifier.hidden_layer_1.b, classifier.log_regression_layer.W, classifier.log_regression_layer.b]) save_parameters(get_params(), 'mlp') #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True break if mode == 'train': for i in xrange(n_train_batches): pred_labels[i] = get_train_labels(i) print 'max predicted labels:', for i in xrange(len(pred_labels)): print max(pred_labels[i]), print if mode == 'test': print 'predicting the labels...' pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)] for i in xrange(n_test_batches): print str(i + 1), '/', str(n_test_batches) pred_labels[i] = get_test_labels(i) writer = csv.writer(file('result/mlp.csv', 'w')) writer.writerow(['id', 'loss']) row = 105472 # first ID of test data print 'output test labels...' for i in xrange(len(pred_labels)): print str(i + 1), '/', str(len(pred_labels)) for j in xrange(len(pred_labels[i])): writer.writerow([row, pred_labels[i][j]]) row += 1 end_time = time.clock() print(('Optimization complete. Best validation score of %f ' 'obtained at iteration %i') % (best_validation_loss, best_iter + 1)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_dA(learning_rate=0.1, training_epochs=20, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_data', mode='test', amount='full'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ datasets = load_data(dataset, mode, amount) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images ###################### # BUILDING THE MODEL # ###################### for noize in [0, 10, 20, 30, 40, 50]: print 'noize:', str(noize), '%' rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=32 * 32, n_hidden=784) # same as MNIST (28*28=784) ## load the saved parameters learned_params = unpickle('params/dA_' + str(noize) + '.pkl') comp_data = da.get_comp_values() get_comp_data = theano.function([index], comp_data, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], da.W: learned_params[0], da.b: learned_params[1]}) ## save compressed data (no corruption) print 'creating compressed data...' if mode == 'valid': data_da = [[0 for j in xrange(28*28)] for i in xrange(n_train_batches*batch_size)] for batch_index in xrange(n_train_batches): comp_x = get_comp_data(batch_index) for i in xrange(batch_size): comp_x[i] = numpy.asarray(comp_x[i], dtype=numpy.float64) data_da[batch_index * batch_size + i] = comp_x[i] data_da = numpy.asarray(data_da) pickle(data_da, 'dA_data/' + mode + '_data_da_' + str(noize) + '.pkl') else: if amount == 'full': step_size = 300 else: step_size = 6 for step in xrange(1,step_size+1): print str(step), '/', str(step_size) data_da = [[0 for j in xrange(28*28)] for i in xrange(n_train_batches*batch_size/step_size)] for batch_index in xrange(n_train_batches/step_size): comp_x = get_comp_data(batch_index + (n_train_batches / step_size) * (step - 1)) for i in xrange(batch_size): comp_x[i] = numpy.asarray(comp_x[i], dtype=numpy.float64) data_da[batch_index * batch_size + i] = comp_x[i] data_da = numpy.asarray(data_da) pickle(data_da, 'dA_data/' + mode + '_data_da_' + str(noize) + '_' + str(step) + '.pkl')
def generate(rootdir): ships = up.unpickle(rootdir) for ship in ships: wavfilepath = ship.filepath + ship.id + '.wav' #the original wav file destination = destination_folder + ship.year_month +'\\' + ship.id + '.png' #the destination for the spectrogram print(wavfilepath) converted_times,cpa_time,start,cpa_index = convert_time(ship) #convert all times and find the file start time and cpa time #print(start) #print(converted_times) #print(cpa_time) #print(cpa_index) pre_cpa = ship.distance[start:cpa_index] #find all distances after file_time and before cpa time using old index of cpa_time post_cpa = ship.distance[cpa_index:] #find all distances after cpa time cpa_index = converted_times.index(cpa_time) #update cpa index to its position in converted times pre_times = converted_times[:cpa_index] post_times = converted_times[cpa_index:] #print(post_times) #print(pre_cpa) #print(post_cpa) approach_inter = interpolate.interp1d(pre_times,pre_cpa, axis=0, fill_value="extrapolate") depart_inter = interpolate.interp1d(post_times,post_cpa, axis=0, fill_value="extrapolate") sample_rate, samples = wavfile.read(wavfilepath) #get original wav file samples at the original sample rate sound_length = len(samples)//sample_rate #print(sound_length) approach_times = np.arange(0,cpa_time) depart_times = np.arange(cpa_time,sound_length) frequencies, times, spectrogram = signal.spectrogram(samples,sample_rate, window = np.hanning(10e3), noverlap = 0, nfft = 10e3, mode='psd') #generate spectrogram uppc = tf.get_tf(ship.harp,frequencies) #get the transfer function results spectrogram = 10*np.log10(spectrogram) #convert to/from decibels ? uppc = npmb.repmat(uppc,np.size(spectrogram,1),1) #copy tf results several times to make it same size as spect results spectrogram = spectrogram + np.transpose(uppc) #add tf results to spect results range_step = .01 # step size of 1m closest_range = np.min(np.abs(ship.distance)) # find closest point of approach (cpa) range_approach = ((np.arange(pre_cpa[0], closest_range, -range_step))) # make a vector of distances between first range and cpa range_depart = (np.arange(closest_range, post_cpa[len(post_cpa)-1], range_step)) # make a vector of distances between cpa and last range range_desired = np.append(range_approach,range_depart)# stick them together number_range_samples = len(range_desired)# total length is the number of samples we expect. #print(spectrogram.shape) spect_dis_approach = approach_inter(approach_times) spect_dis_depart = depart_inter(depart_times) approach_bins = np.digitize(spect_dis_approach,range_approach) depart_bins = np.digitize(spect_dis_depart,range_depart) approach_spect = range_spect(approach_bins,spectrogram) depart_spect = range_spect(depart_bins,spectrogram) #print(approach_spect.shape) #print(depart_spect.shape) #print(spectrogram) #print(times) #print(times.shape) range_spectrogram = np.concatenate((approach_spect,depart_spect),axis=1) ship.spect = range_spectrogram #ranges = get_ranges(approach_bins,depart_bins,range_approach,range_depart) print(range_spectrogram) #print(ranges) #plt.yscale('log') #make y scale log to match the new decibel units #axes = plt.gca() #get axes object #axes.set_ylim([10,1000]) #set upper limit of data on axes to be 1000 # plt.pcolormesh(ranges,frequencies,range_spectrogram,vmin=60,vmax=110 ) #plot the data and add color # plt.set_cmap('jet') # plt.ylabel('Frequency [Hz]') # plt.xlabel('Distance [km]') # locs, ticks = plt.xticks() #get current time ticks # new_ticks = get_ticks(ranges,locs) # plt.xticks(locs,new_ticks) # plt.colorbar() #plt.xticks(locs, new_ticks) # Set locations and labels to the distance plt.savefig(destination) #save spectrogram at destination #plt.imshow(spectrogram) #plt.show() #show plot plt.close() up.store(ships)
def load_data(dataset, mode="train", amount="full"): """ Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) """ ############# # LOAD DATA # ############# # Download the MNIST dataset if it is not present print "... loading data" ## Load the dataset if mode == "train": # load training and validation data if amount == "full": print "full training..." train_set = unpickle("data/train_set_gray.pkl") valid_set = unpickle("data/valid_set_gray.pkl") elif amount == "min": print "min training..." train_set = unpickle("data/min_train_set_gray.pkl") valid_set = unpickle("data/min_valid_set_gray.pkl") else: print "amount shoule be either full or min" raise NotImplementedError() else: # load test data # test_set = unpickle('data/test_set_gray.pkl') print "loading test data..." if amount == "full": test_set = [] for i in xrange(1, 301): # from 1 to 300 TBF: hard code print str(i), "/", str(300) test_set_batch = unpickle("data/test_set_gray_" + str(i) + ".pkl") test_set.extend(test_set_batch) test_set = (test_set, [0 for i in xrange(0, len(test_set))]) else: test_set = unpickle("data/test_set_gray_1.pkl") test_set = (test_set, [0 for i in xrange(0, len(test_set))]) print "done!" # train_set, valid_set, test_set format: tuple(input, target) # input is an numpy.ndarray of 2 dimensions (a matrix) # witch row's correspond to an example. target is a # numpy.ndarray of 1 dimensions (vector)) that have the same length as # the number of rows in the input. It should give the target # target to the example with the same index in the input. def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, "int32") if mode == "train": train_set_x, train_set_y = shared_dataset(train_set) valid_set_x, valid_set_y = shared_dataset(valid_set) else: test_set_x, test_set_y = shared_dataset(test_set) if mode == "train": rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y)] else: rval = [(test_set_x, test_set_y)] return rval
def load_data(file_name): data = unpickle.unpickle(file_name) X = data["data"] print 1 return X
def evaluate_lenet5( learning_rate=0.15, n_epochs=200, dataset="mnist.pkl.gz", nkerns=[60, 80, 150, 150, 80], batch_size=200 ): """ Demonstrates lenet on CIFAR-10 dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, "int32") data_batch_1 = unpickle("cifar-10-batches-py/data_batch_1") data_batch_2 = unpickle("cifar-10-batches-py/data_batch_2") data_batch_3 = unpickle("cifar-10-batches-py/data_batch_3") data_batch_4 = unpickle("cifar-10-batches-py/data_batch_4") data_batch_5 = unpickle("cifar-10-batches-py/data_batch_5") test = unpickle("cifar-10-batches-py/test_batch") train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = numpy.concatenate( ( data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"], data_batch_4["labels"], data_batch_5["labels"], ) ) test_set = test["data"] Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3) Yte = numpy.asarray(test["labels"]) Xval_rows = X_train[:7500, :] # take first 1000 for validation Yval = y_train[:7500] Xtr_rows = X_train[7500:50000, :] # keep last 49,000 for train Ytr = y_train[7500:50000] mean_train = Xtr_rows.mean(axis=0) stdv_train = Xte_rows.std(axis=0) Xtr_rows = (Xtr_rows - mean_train) / stdv_train Xval_rows = (Xval_rows - mean_train) / stdv_train Xte_rows = (Xte_rows - mean_train) / stdv_train learning_rate = theano.shared(learning_rate) """whitening""" """ Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important) cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100]) # whiten the data: # divide by the eigenvalues (which are square roots of the singular values) Xwhite = Xrot / numpy.sqrt(S + 1e-5)""" """whitening""" # Xtr_rows = whiten(Xtr_rows) # zero-center the data (important) """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U) Xtr_rows = Xrot / numpy.sqrt(S + 1e-5) Xval_rot = numpy.dot(Xval_rows,U) Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5) Xte_rot = numpy.dot(Xte_rows,U) Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5) """ train_set = (Xtr_rows, Ytr) valid_set = (Xval_rows, Yval) test_set = (Xte_rows, Yte) test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (32+4-3+1 , 32+4-3+1) = (34, 34) # maxpooling reduces this further to (32/2, 32/2) = (17, 17) # 4D output tensor is thus of shape (batch_size, nkerns[0], 17, 17) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 3, 3), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (17+4-2+1, 17+4-2+1) = (20, 20) # maxpooling reduces this further to (20/2, 20/2) = (10, 10) # 4D output tensor is thus of shape (batch_size, nkerns[1], 10, 10) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 17, 17), filter_shape=(nkerns[1], nkerns[0], 2, 2), poolsize=(2, 2), ) # Construct the third convolutional pooling layer # filtering reduces the image size to (10+4-3+1, 10+4-3+1) = (12, 12) # maxpooling reduces this further to (12/2, 12/2) = (6, 6) # 4D output tensor is thus of shape (batch_size, nkerns[2], 6, 6) layer2conv = LeNetConvPoolLayer( rng, input=layer1.output, image_shape=(batch_size, nkerns[1], 10, 10), filter_shape=(nkerns[2], nkerns[1], 3, 3), poolsize=(2, 2), ) # Construct the fourth convolutional pooling layer # filtering reduces the image size to (6+4-3+1, 6+4-3+1) = (8, 8) # maxpooling reduces this further to (8/2, 8/2) = (4, 4) # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4) layer3conv = LeNetConvPoolLayer( rng, input=layer2conv.output, image_shape=(batch_size, nkerns[2], 6, 6), filter_shape=(nkerns[3], nkerns[2], 3, 3), poolsize=(2, 2), ) # Construct the fifth convolutional pooling layer # filtering reduces the image size to (4+4-3+1, 4+4-3+1) = (6, 6) # maxpooling reduces this further to (6/2, 6/2) = (3, 3) # 4D output tensor is thus of shape (batch_size, nkerns[2], 3, 3) layer4conv = LeNetConvPoolLayer( rng, input=layer3conv.output, image_shape=(batch_size, nkerns[3], 4, 4), filter_shape=(nkerns[4], nkerns[3], 3, 3), poolsize=(2, 2), ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. fc_input = layer4conv.output.flatten(2) # construct a fully-connected sigmoidal layer Fully_conected_layers = TLMLP(rng, fc_input, nkerns[4] * 3 * 3, 600, 600, 200, 10) # the cost we minimize during training is the NLL of the model L2_reg = 0.0008 W_layers = ( (layer0.W ** 2).sum() + (layer1.W ** 2).sum() + (layer2conv.W ** 2).sum() + (layer3conv.W ** 2).sum() + (layer4conv.W ** 2).sum() ) fc_cost = Fully_conected_layers.negative_log_likelihood(y) + L2_reg * (Fully_conected_layers.L2_sqr + W_layers) cost = fc_cost # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], Fully_conected_layers.errors(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( [index], Fully_conected_layers.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # create a list of all model parameters to be fit by gradient descent params = ( Fully_conected_layers.params + layer4conv.params + layer3conv.params + layer2conv.params + layer1.params + layer0.params ) # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) # end-snippet-1 ############### # TRAIN MODEL # ############### print("... training") # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0.0 start_time = timeit.default_timer() epoch = 0 done_looping = False epoch_loss_list = [] epoch_val_list = [] while (epoch < n_epochs) and (not done_looping): epoch += 1 # if epoch == 10: # learning_rate.set_value(0.1) # if epoch > 18: # learning_rate.set_value(learning_rate.get_value()*0.9995) if epoch > 3: epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) numpy.savetxt(fname="epoc_cost_pad.csv", X=epoch_loss_np, fmt="%1.3f") numpy.savetxt(fname="epoc_val_error_padd.csv", X=epoch_val_np, fmt="%1.3f") for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print("training @ iter = ", iter) cost_ij = train_model(minibatch_index) epoch_loss_entry = [iter, epoch, float(cost_ij)] epoch_loss_list.append(epoch_loss_entry) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( "epoch %i, minibatch %i/%i, validation error %f %%" % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0) ) epoch_val_entry = [iter, epoch, this_validation_loss] epoch_val_list.append(epoch_val_entry) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print( (" epoch %i, minibatch %i/%i, test error of " "best model %f %%") % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.0) ) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print("Optimization complete.") print( "Best validation score of %f %% obtained at iteration %i, " "with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0) ) print( ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0)), file=sys.stderr, ) epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) epoch_loss = pandas.DataFrame( {"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1], "cost": epoch_loss_np[:, 2]} ) epoch_vall = pandas.DataFrame( {"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1], "val_error": epoch_val_np[:, 2]} ) epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(["epoch"]).mean()["cost"]) epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(["epoch"]).mean()["val_error"]) epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]}) epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]}) epoc_avg_loss.plot(kind="line", x="epoch", y="cost") plt.show() epoc_avg_loss_val.plot(kind="line", x="epoch", y="val_error") plt.show()
from __future__ import absolute_import from __future__ import division from __future__ import print_function import tensorflow as tf import numpy as np from matplotlib import pyplot as plt import matplotlib.image as mpimg import os from unpickle import unpickle data1 = unpickle("./cifar-10-batches-py/data_batch_1") meta = unpickle('./cifar-10-batches-py/batches.meta') test = unpickle("./cifar-10-batches-py/test_batch") tf.logging.set_verbosity(tf.logging.INFO) # trainData = [] # trainLabel = [] # for i in range(0,10000): # print(i) # samp = np.array(data1[b'data'][i]) # sampr = np.reshape(samp[0:1024],(32,32)) # sampg = np.reshape(samp[1024:2*1024],(32,32)) # sampb = np.reshape(samp[1024*2:1024*3],(32,32)) # trainData.append(np.dstack((sampr,sampg,sampb))) # trainLabel = tf.constant(data1[b'labels']) # with tf.Session() as sess: # print(trainData1.eval()) def cnn_model_fn(features, labels, mode): """Model function for CNN."""
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.008, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=20, n_hidden1=500, n_hidden2=50): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1') data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2') data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3') data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4') data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5') test = unpickle('cifar-10-batches-py/test_batch') train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = numpy.concatenate( (train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = numpy.concatenate( (data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"], data_batch_4["labels"], data_batch_5["labels"])) test_set = test["data"] Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3) Yte = numpy.asarray(test["labels"]) Xval_rows = X_train[:7500, :] Yval = y_train[:7500] Xtr_rows = X_train[7500:50000, :] Ytr = y_train[7500:50000] mean_train = Xtr_rows.mean(axis=0) stdv_train = Xte_rows.std(axis=0) Xtr_rows = (Xtr_rows - mean_train) / stdv_train Xval_rows = (Xval_rows - mean_train) / stdv_train Xte_rows = (Xte_rows - mean_train) / stdv_train train_set = (Xtr_rows, Ytr) valid_set = (Xval_rows, Yval) test_set = (Xte_rows, Yte) test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=3072, n_hidden1=500, n_hidden2=500, n_out=10) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) # end-snippet-5 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch validation_frequency = n_train_batches best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 if epoch <= 3: learning_rate.set_value() for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print( ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def test_SdA(finetune_lr=0.1, pretraining_epochs=20, ## originally 15 pretrain_lr=0.001, training_epochs=1000, dataset='cifar-10-batches-py', batch_size=1, mode='train', amount='full'): """ Demonstrates how to train and test a stochastic denoising autoencoder. This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used in the finetune stage (factor for the stochastic gradient) :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type n_iter: int :param n_iter: maximal number of iterations ot run the optimizer :type dataset: string :param dataset: path the the pickled dataset """ datasets = load_data(dataset, mode=mode, amount=amount) if mode == 'train': train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] else: test_set_x, test_set_y = datasets[0] # compute number of minibatches for training, validation and testing if mode == 'train': n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size else: n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_test_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=32 * 32, hidden_layers_sizes=[1300, 1300, 1300], n_outs=10) ## load the saved parameters if mode == 'test': learned_params = unpickle('params/SdA.pkl') print '... getting the pretraining functions' if mode == 'train': pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) ######################### # PRETRAINING THE MODEL # ######################### if mode == 'train': print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = [.1, .2, .3] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d / %d, cost ' % (i, epoch + 1, pretraining_epochs), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' if mode == 'train': train_fn, validate_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetunning the model' # early-stopping parameters if mode == 'train': patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch # create a function to get the labels predicted by the model if mode == 'test': get_test_labels = theano.function([index], sda.logLayer.y_pred, givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], sda.sigmoid_layers[0].W: learned_params[0], sda.sigmoid_layers[0].b: learned_params[1], sda.sigmoid_layers[1].W: learned_params[2], sda.sigmoid_layers[1].b: learned_params[3], sda.sigmoid_layers[2].W: learned_params[4], sda.sigmoid_layers[2].b: learned_params[5], sda.logLayer.W: learned_params[6], sda.logLayer.b: learned_params[7]}) best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() if mode == 'train': done_looping = False else: done_looping = True epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) ## save the parameters if mode == 'train': get_params = theano.function(inputs=[], outputs=[sda.sigmoid_layers[0].W, sda.sigmoid_layers[0].b, sda.sigmoid_layers[1].W, sda.sigmoid_layers[1].b, sda.sigmoid_layers[2].W, sda.sigmoid_layers[2].b, sda.logLayer.W, sda.logLayer.b]) save_parameters(get_params(), 'SdA') iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True break if mode == 'test': print 'predicting the labels...' pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)] for i in xrange(n_test_batches): print str(i+1), '/', str(n_test_batches) pred_labels[i] = get_test_labels(i) writer = csv.writer(file('result/SdA.csv', 'w')) row = 1 print 'output test labels...' for i in xrange(len(pred_labels)): print str(i+1), '/', str(len(pred_labels)) for j in xrange(len(pred_labels[i])): writer.writerow([row, pred_labels[i][j]]) row += 1 end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.))
def load_data(dataset, mode='train', amount='full', valid_num=10000): ''' Loads the dataset :type dataset: string :param dataset: the path to the dataset (here MNIST) ''' print '... loading data' ## Load the dataset if mode == 'train': # load training and validation data if amount == 'full': print 'full training...' train_set = unpickle('data/train_f528_f274.pkl') # TBF: sampling of validation set should be randomized valid_set_x = train_set[0][-valid_num:] valid_set_y = train_set[1][-valid_num:] valid_set = (valid_set_x, valid_set_y) train_set_x = train_set[0][:-valid_num] train_set_y = train_set[1][:-valid_num] train_set = (train_set_x, train_set_y) elif amount == 'min': print 'min training...' train_set = unpickle('data/min_train_simple.pkl') valid_num = 200 # train_num: 1000 - 200 = 800 valid_set_x = train_set[0][-valid_num:] valid_set_y = train_set[1][-valid_num:] valid_set = (valid_set_x, valid_set_y) train_set_x = train_set[0][:-valid_num] train_set_y = train_set[1][:-valid_num] train_set = (train_set_x, train_set_y) else: print 'amount shoule be either full or min' raise NotImplementedError() else: # load test data #test_set = unpickle('data/test_set.pkl') print 'loading test data...' if amount == 'full': #test_set = [] #for i in xrange(1, 301): # from 1 to 300 TBF: hard code # print str(i), '/', str(300) # test_set_batch = unpickle('data/test_set_' + str(i) + '.pkl') # test_set.extend(test_set_batch) test_set = unpickle('data/test_simple.pkl') test_set = (test_set, [0 for i in xrange(0,len(test_set))]) else: test_set = unpickle('data/min_test_simple.pkl') test_set = (test_set, [0 for i in xrange(0,len(test_set))]) print 'done!' #train_set, valid_set, test_set format: tuple(input, target) #input is an numpy.ndarray of 2 dimensions (a matrix) #witch row's correspond to an example. target is a #numpy.ndarray of 1 dimensions (vector)) that have the same length as #the number of rows in the input. It should give the target #target to the example with the same index in the input. def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') if mode == 'train': train_set_x, train_set_y = shared_dataset(train_set) valid_set_x, valid_set_y = shared_dataset(valid_set) else: test_set_x, test_set_y = shared_dataset(test_set) if mode == 'train': rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y)] else: rval = [(test_set_x, test_set_y)] return rval
def predict(self, X): """ X is N x D where each row is an example we wish to predict label for """ num_test = X.shape[0] # lets make sure that the output type matches the input type Ypred = np.zeros(num_test) # loop over all test rows for i in xrange(num_test): distances = np.sum(np.abs(self.Xtr - X[i, :]), axis=1) min_index = np.argmin(distances) Ypred[i] = self.ytr[min_index] print("iteration number" + str(i)) return Ypred data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1') test = unpickle('cifar-10-batches-py/test_batch') def L_i(x, y, W): """ unvectorized version. Compute the multiclass svm loss for a single example (x,y) - x is a column vector representing an image (e.g. 3073 x 1 in CIFAR-10) with an appended bias dimension in the 3073-rd position (i.e. bias trick) - y is an integer giving index of correct class (e.g. between 0 and 9 in CIFAR-10) - W is the weight matrix (e.g. 10 x 3073 in CIFAR-10) """ delta = 1.0 # see notes about delta later in this section scores = W.dot( x) # scores becomes of size 10 x 1, the scores for each class correct_class_score = scores[y]
def test_mlp( learning_rate=0.01, L1_reg=0.00, L2_reg=0.008, n_epochs=1000, dataset="mnist.pkl.gz", batch_size=20, n_hidden1=500, n_hidden2=50, ): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, "int32") data_batch_1 = unpickle("cifar-10-batches-py/data_batch_1") data_batch_2 = unpickle("cifar-10-batches-py/data_batch_2") data_batch_3 = unpickle("cifar-10-batches-py/data_batch_3") data_batch_4 = unpickle("cifar-10-batches-py/data_batch_4") data_batch_5 = unpickle("cifar-10-batches-py/data_batch_5") test = unpickle("cifar-10-batches-py/test_batch") train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = numpy.concatenate( ( data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"], data_batch_4["labels"], data_batch_5["labels"], ) ) test_set = test["data"] Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3) Yte = numpy.asarray(test["labels"]) Xval_rows = X_train[:7500, :] Yval = y_train[:7500] Xtr_rows = X_train[7500:50000, :] Ytr = y_train[7500:50000] mean_train = Xtr_rows.mean(axis=0) stdv_train = Xte_rows.std(axis=0) Xtr_rows = (Xtr_rows - mean_train) / stdv_train Xval_rows = (Xval_rows - mean_train) / stdv_train Xte_rows = (Xte_rows - mean_train) / stdv_train train_set = (Xtr_rows, Ytr) valid_set = (Xval_rows, Yval) test_set = (Xte_rows, Yte) test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ###################### # BUILD ACTUAL MODEL # ###################### print("... building the model") # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix("x") # the data is presented as rasterized images y = T.ivector("y") # the labels are presented as 1D vector of # [int] labels rng = numpy.random.RandomState(1234) # construct the MLP class classifier = MLP(rng=rng, input=x, n_in=3072, n_hidden1=500, n_hidden2=500, n_out=10) # start-snippet-4 # the cost we minimize during training is the negative log likelihood of # the model plus the regularization terms (L1 and L2); cost is expressed # here symbolically cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr # end-snippet-4 # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size : (index + 1) * batch_size], y: test_set_y[index * batch_size : (index + 1) * batch_size], }, ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size : (index + 1) * batch_size], y: valid_set_y[index * batch_size : (index + 1) * batch_size], }, ) # start-snippet-5 # compute the gradient of cost with respect to theta (sotred in params) # the resulting gradients will be stored in a list gparams gparams = [T.grad(cost, param) for param in classifier.params] # specify how to update the parameters of the model as a list of # (variable, update expression) pairs # given two lists of the same length, A = [a1, a2, a3, a4] and # B = [b1, b2, b3, b4], zip generates a list C of same size, where each # element is a pair formed from the two lists : # C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)] updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size : (index + 1) * batch_size], y: train_set_y[index * batch_size : (index + 1) * batch_size], }, ) # end-snippet-5 ############### # TRAIN MODEL # ############### print("... training") # early-stopping parameters patience = 1000000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch validation_frequency = n_train_batches best_validation_loss = numpy.inf best_iter = 0 test_score = 0.0 start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 if epoch <= 3: learning_rate.set_value() for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( "epoch %i, minibatch %i/%i, validation error %f %%" % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [test_model(i) for i in range(n_test_batches)] test_score = numpy.mean(test_losses) print( (" epoch %i, minibatch %i/%i, test error of " "best model %f %%") % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.0) ) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print( ( "Optimization complete. Best validation score of %f %% " "obtained at iteration %i, with test performance %f %%" ) % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0) ) print( ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0)), file=sys.stderr, )
from os import strerror import time import tensorflow as tf from tensorflow import keras from progress.bar import IncrementalBar import unpickle import testingMPL CIFAR_DIR = '/Users/joeylee/downloads/cifar-10-batches-py/' CIFAR10_files = [ 'batches.meta', 'data_batch_1', 'data_batch_2', 'data_batch_3', 'data_batch_4', 'data_batch_5', 'test_batch' ] all_data = [0, 1, 2, 3, 4, 5, 6] for i, direc in zip(all_data, CIFAR10_files): all_data[i] = unpickle.unpickle(CIFAR_DIR + direc) batch_meta = all_data[0] db_1 = all_data[1] db_2 = all_data[2] db_3 = all_data[3] db_4 = all_data[4] db_5 = all_data[5] tb = all_data[6] testingMPL.testingMPL(CIFAR10_files, db_1)
def predict(self, X): """ X is N x D where each row is an example we wish to predict label for """ num_test = X.shape[0] # lets make sure that the output type matches the input type Ypred = np.zeros(num_test) # loop over all test rows for i in xrange(num_test): distances = np.sum(np.abs(self.Xtr - X[i,:]), axis = 1) min_index = np.argmin(distances) Ypred[i] = self.ytr[min_index] print("iteration number"+str(i)) return Ypred data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1') test = unpickle('cifar-10-batches-py/test_batch') def L_i(x, y, W): """ unvectorized version. Compute the multiclass svm loss for a single example (x,y) - x is a column vector representing an image (e.g. 3073 x 1 in CIFAR-10) with an appended bias dimension in the 3073-rd position (i.e. bias trick) - y is an integer giving index of correct class (e.g. between 0 and 9 in CIFAR-10) - W is the weight matrix (e.g. 10 x 3073 in CIFAR-10) """ delta = 1.0 # see notes about delta later in this section scores = W.dot(x) # scores becomes of size 10 x 1, the scores for each class correct_class_score = scores[y] D = W.shape[0] # number of classes, e.g. 10
def load_data(dataset, mode='train', amount='full', noize='30'): print '... loading data' ## Load the dataset if mode == 'train': # load training and validation data if amount == 'full': train_set_x = unpickle('dA_data/train_data_da_' + noize + '.pkl') train_set_y = unpickle('dA_data/train_set_y.pkl') valid_set_x = unpickle('dA_data/valid_data_da_' + noize + '.pkl') valid_set_y = unpickle('dA_data/valid_set_y.pkl') elif amount == 'min': train_set_x = unpickle('dA_data/train_data_da_' + noize + '_min.pkl') train_set_y = unpickle('dA_data/train_set_y_min.pkl') valid_set_x = unpickle('dA_data/valid_data_da_' + noize + '_min.pkl') valid_set_y = unpickle('dA_data/valid_set_y_min.pkl') else: print 'amount shoule be either full or min' raise NotImplementedError() else: # TBF # load test data print 'loading test data...' if amount == 'full': test_set = [] for i in xrange(1, 301): # from 1 to 300 TBF: hard code print str(i), '/', str(300) test_set_batch = unpickle('dA_data/test_set_gray_' + str(i) + '.pkl') test_set.extend(test_set_batch) #test_set = (test_set, [0 for i in xrange(0,len(test_set))]) test_set_x = test_set test_set_y = [0 for i in xrange(0,len(test_set))] else: print 'not compatible with min yet...' raise NotImplementedError() #test_set = unpickle('dA_data/test_set_gray_1.pkl') #test_set = (test_set, [0 for i in xrange(0,len(test_set))]) print 'done!' def shared_dataset_x(data_x, borrow=True): shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) return shared_x def shared_dataset_y(data_y, borrow=True): shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) return T.cast(shared_y, 'int32') if mode == 'train': train_set_x = shared_dataset_x(train_set_x) train_set_y = shared_dataset_y(train_set_y) valid_set_x = shared_dataset_x(valid_set_x) valid_set_y = shared_dataset_y(valid_set_y) else: test_set_x = shared_dataset_x(test_set_x) test_set_y = shared_dataset_y(test_set_y) if mode == 'train': rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y)] else: rval = [(test_set_x, test_set_y)] return rval
def evaluate_lenet5(learning_rate=0.15, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[20, 20], batch_size=500): """ Demonstrates lenet on CIFAR-10 dataset :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type nkerns: list of ints :param nkerns: number of kernels on each layer """ rng = numpy.random.RandomState(23455) def shared_dataset(data_xy, borrow=True): """ Function that loads the dataset into shared variables The reason we store our dataset in shared variables is to allow Theano to copy it into the GPU memory (when code is run on GPU). Since copying data into the GPU is slow, copying a minibatch everytime is needed (the default behaviour if the data is not in a shared variable) would lead to a large decrease in performance. """ data_x, data_y = data_xy shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow) shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow) # When storing data on the GPU it has to be stored as floats # therefore we will store the labels as ``floatX`` as well # (``shared_y`` does exactly that). But during our computations # we need them as ints (we use labels as index, and if they are # floats it doesn't make sense) therefore instead of returning # ``shared_y`` we will have to cast it to int. This little hack # lets ous get around this issue return shared_x, T.cast(shared_y, 'int32') data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1') data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2') data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3') data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4') data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5') test = unpickle('cifar-10-batches-py/test_batch') train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = numpy.concatenate((data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"], data_batch_4["labels"], data_batch_5["labels"])) test_set = test["data"] Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3) Yte = numpy.asarray(test["labels"]) Xval_rows = X_train[:7500, :] # take first 1000 for validation Yval = y_train[:7500] Xtr_rows = X_train[7500:50000, :] # keep last 49,000 for train Ytr = y_train[7500:50000] mean_train = Xtr_rows.mean(axis=0) stdv_train = Xte_rows.std(axis=0) Xtr_rows = (Xtr_rows - mean_train) / stdv_train Xval_rows = (Xval_rows - mean_train) / stdv_train Xte_rows = (Xte_rows - mean_train) / stdv_train learning_rate = theano.shared(learning_rate) """whitening""" """ Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important) cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100]) # whiten the data: # divide by the eigenvalues (which are square roots of the singular values) Xwhite = Xrot / numpy.sqrt(S + 1e-5)""" """whitening""" #Xtr_rows = whiten(Xtr_rows) # zero-center the data (important) """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0] U,S,V = numpy.linalg.svd(cov) Xrot = numpy.dot(Xtr_rows, U) Xtr_rows = Xrot / numpy.sqrt(S + 1e-5) Xval_rot = numpy.dot(Xval_rows,U) Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5) Xte_rot = numpy.dot(Xte_rows,U) Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5) """ train_set = (Xtr_rows, Ytr) valid_set = (Xval_rows, Yval) test_set = (Xte_rows, Yte) test_set_x, test_set_y = shared_dataset(test_set) valid_set_x, valid_set_y = shared_dataset(valid_set) train_set_x, train_set_y = shared_dataset(train_set) datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)] train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # start-snippet-1 x = T.matrix('x') # the data is presented as rasterized images y = T.ivector('y') # the labels are presented as 1D vector of [int] labels ###################### # BUILD ACTUAL MODEL # ###################### print('... building the model') # Reshape matrix of rasterized images of shape (batch_size, 28 * 28) # to a 4D tensor, compatible with our LeNetConvPoolLayer # (28, 28) is the size of MNIST images. layer0_input = x.reshape((batch_size, 3, 32, 32)) # Construct the first convolutional pooling layer: # filtering reduces the image size to (32+4-5+1 , 32+4-5+1) = (32, 32) # maxpooling reduces this further to (32/2, 32/2) = (16, 16) # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16) layer0 = LeNetConvPoolLayer( rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 5, 5), poolsize=(2, 2) ) # Construct the second convolutional pooling layer # filtering reduces the image size to (16+4-5+1, 16+4-5+1) = (16, 16) # maxpooling reduces this further to (16/2, 16/2) = (8, 8) # 4D output tensor is thus of shape (batch_size, nkerns[1], 8, 8) layer1 = LeNetConvPoolLayer( rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 16, 16), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2) ) # the HiddenLayer being fully-connected, it operates on 2D matrices of # shape (batch_size, num_pixels) (i.e matrix of rasterized images). # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4), # or (500, 50 * 4 * 4) = (500, 800) with the default values. layer2_input = layer1.output.flatten(2) # construct a fully-connected sigmoidal layer layer2 = HiddenLayer( rng, input=layer2_input, n_in=nkerns[1] * 8 * 8, n_out=500, activation=relu ) # classify the values of the fully-connected sigmoidal layer layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) # the cost we minimize during training is the NLL of the model L2_reg = 0.001 L2_sqr = ( (layer2.W ** 2).sum() + (layer3.W ** 2).sum() ) cost = layer3.negative_log_likelihood(y) + L2_reg * L2_sqr # create a function to compute the mistakes that are made by the model test_model = theano.function( [index], layer3.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( [index], layer3.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # create a list of all model parameters to be fit by gradient descent params = layer3.params + layer2.params + layer1.params + layer0.params # create a list of gradients for all model parameters grads = T.grad(cost, params) # train_model is a function that updates the model parameters by # SGD Since this model has many parameters, it would be tedious to # manually create an update rule for each model parameter. We thus # create the updates list by automatically looping over all # (params[i], grads[i]) pairs. updates = [ (param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads) ] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-1 ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False epoch_loss_list = [] epoch_val_list = [] while (epoch < n_epochs) and (not done_looping): epoch += 1 if epoch == 10: learning_rate.set_value(0.1) # if epoch > 30: # learning_rate.set_value(learning_rate.get_value()*0.9995) if epoch > 3: epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) numpy.savetxt(fname='epoc_cost.csv', X=epoch_loss_np, fmt='%1.3f') numpy.savetxt(fname='epoc_val_error.csv', X=epoch_val_np, fmt='%1.3f') for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) epoch_loss_entry = [iter, epoch, float(cost_ij)] epoch_loss_list.append(epoch_loss_entry) if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in range(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) epoch_val_entry = [iter, epoch, this_validation_loss] epoch_val_list.append(epoch_val_entry) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print('Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3)) epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3)) epoch_loss = pandas.DataFrame({"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1], "cost": epoch_loss_np[:, 2]}) epoch_vall = pandas.DataFrame({"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1], "val_error": epoch_val_np[:, 2]}) epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(['epoch']).mean()["cost"]) epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(['epoch']).mean()["val_error"]) epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]}) epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]}) epoc_avg_loss.plot(kind="line", x="epoch", y="cost") plt.show() epoc_avg_loss_val.plot(kind='line', x="epoch", y="val_error") plt.show()
from __future__ import print_function import os import sys import timeit import numpy #import matplotlib.pyplot as plt from LogisticRegression import LogisticRegression import theano import theano.tensor as T import theano.tensor.nnet as nnet import numpy as np from unpickle import unpickle data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1') data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2') data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3') data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4') data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5') test = unpickle('cifar-10-batches-py/test_batch') train_set_1 = data_batch_1["data"] train_set_2 = data_batch_2["data"] train_set_3 = data_batch_3["data"] train_set_4 = data_batch_4["data"] train_set_5 = data_batch_5["data"] X_train = np.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0) y_train = np.concatenate((data_batch_1["labels"],data_batch_2["labels"],data_batch_3["labels"],data_batch_4["labels"], data_batch_5["labels"]))
from unpickle import unpickle from json import dumps import pickle content = dumps( { 'mostCommon': unpickle(), 'rawData': pickle.load(open("shapecolour.p", "rb")) }, sort_keys=True, indent=4) with open('processed.json', 'w') as file: file.write(content)
def main(): path = r"glove.6B.50d.txt.w2v" glove = KeyedVectors.load_word2vec_format(path, binary=False) # loads the json file path_to_json = "captions_train2014.json" with open(path_to_json, "rb") as f: json_data = json.load(f) resnet = unpickle.unpickle() with open("idfs1.pkl", mode="rb") as idf: idfs = pickle.load(idf) with open("img_to_caption1.pkl", mode="rb") as cap: img_to_caption = pickle.load(cap) #with open("img_to_coco1.pkl", mode="rb") as coco: #img_to_coco=pickle.load(coco) model = Model() model.dense1.weight = mg.Tensor(np.load('weight.npy')) model.dense1.bias = mg.Tensor(np.load('bias.npy')) optim = Adam(model.parameters) batch_size = 100 for epoch_cnt in range(100): idxs = list(resnet.keys()) np.random.shuffle(idxs) for batch_cnt in range(0, len(idxs) // batch_size - 1): batch_indices = idxs[(batch_cnt * batch_size):((batch_cnt + 1) * batch_size)] batch_indices2 = idxs[((batch_cnt + 1) * batch_size):((batch_cnt + 2) * batch_size)] # id1 = np.random.choice(list(resnet.keys())) # print(id1) id1 = batch_indices # while id1 == id2: id2 = batch_indices2 # print(type(resnet[id1]),type(img_to_caption[id1][0]),type(resnet[id2])) good_image = resnet[id1[0]] bad_image = resnet[id2[0]] text = embed_text.se_text(img_to_caption[id1[0]][0], glove, idfs) for i in id1[1:]: good_image = np.vstack((good_image, resnet[i])) text = np.vstack( (text, embed_text.se_text(img_to_caption[i][0], glove, idfs))) for i in id2[1:]: bad_image = np.vstack((bad_image, resnet[i])) sim_to_good = cos_sim.cos_sim(model(good_image), text) sim_to_bad = cos_sim.cos_sim(model(bad_image), text) # compute the loss associated with our predictions(use softmax_cross_entropy) loss = margin_ranking_loss(sim_to_good, sim_to_bad, 1, 0.1) # back-propagate through your computational graph through your loss loss.backward() # compute the accuracy between the prediction and the truth acc = accuracy(sim_to_good.data, sim_to_bad.data) # execute gradient descent by calling step() of optim optim.step() # null your gradients loss.null_gradients() np.save('weight', model.dense1.parameters[0].data) np.save('bias', model.dense1.parameters[1].data)