Beispiel #1
0
def load_test_pictures(data, mode=0, data_labels=None):

    testY = None
    testX = None

    if mode == 1 and data_labels is not None:
        testX = np.load(data)
        testY = np.load(data_labels)
        testX = testX.reshape(-1, 1, 28, 28)
        testX = testX / 255.
    elif mode == 2 and data_labels is not None:
        testX = unpickle(data, 28 * 28)
        testY = unpickle(data_labels, 7)
        testX = testX.reshape(-1, 1, 28, 28)
        testX = testX / 255.
    elif mode == 3:
        img, width, height = wczytaj_obrazek(data, 28)
        img[0] = img[0] / 255.
        testX = img
        testY = np.zeros
    else:
        print 'Wrong open mode!'
        return testX, testY

    return testX, testY
def mnist(ntrain=60000,ntest=10000,onehot=True):

    fname = 'baza_uczaca_znaki.npy'
    trX = np.asarray(unpickle(fname, 28*28), np.uint8)
    fname = 'baza_uczaca_znaki_labels.npy'
    trY = np.asarray(unpickle(fname, 36), np.uint8)
    fname = 'baza_walidujaca_znaki.npy'
    teX = np.asarray(unpickle(fname, 28*28), np.uint8)
    fname = 'baza_walidujaca_znaki_labels.npy'
    teY = np.asarray(unpickle(fname, 36), np.uint8)


    randomize_training_set = np.arange(len(trX))
    randomize_test_set = np.arange(len(teX))
    np.random.shuffle(randomize_test_set)
    np.random.shuffle(randomize_training_set)

    trX = trX[randomize_training_set]
    trY = trY[randomize_training_set]
    teX = teX[randomize_test_set]
    teY = teY[randomize_test_set]

    trX = trX/255.
    teX = teX/255.

    trX = trX[:ntrain]
    trY = trY[:ntrain]

    teX = teX[:ntest]
    teY = teY[:ntest]

    return trX,teX,trY,teY
def load_data(dataset, mode='valid', amount='full'):
	#############
	# LOAD DATA #
	#############

	# Download the MNIST dataset if it is not present

	print '... loading data'


	## Load the dataset
	if mode == 'valid':
		# load training and validation data
		if amount == 'full':
			print 'loading full valid set'
			train_set = unpickle('data/valid_set_gray.pkl')
		elif amount == 'min':
			print 'loading min valid set'
			train_set = unpickle('data/min_valid_set_gray.pkl')
		else:
			print 'amount shoule be either full or min'
			raise NotImplementedError()
	elif mode == 'test':
		# load test data
		if amount == 'full':
			print 'loading full test data...'
			train_set = []
			for i in xrange(1, 301): # from 1 to 300 TBF: hard code
				print str(i), '/', str(300)
				train_set_batch = unpickle('data/test_set_gray_' + str(i) + '.pkl')
				train_set.extend(train_set_batch)
			train_set = (train_set, [0 for i in xrange(0,len(train_set))])
		else:
			print 'loading min test data...'
			train_set = []
			for i in xrange(1, 7): # from 1 to 6 TBF: hard code
				train_set_batch = unpickle('data/test_set_gray_' + str(i) + '.pkl')
				#train_set = (train_set, [0 for i in xrange(0,len(train_set))])
				train_set.extend(train_set_batch)
			train_set = (train_set, [0 for i in xrange(0,len(train_set))])
		print 'done!'


	def shared_dataset(data_xy, borrow=True):
		data_x, data_y = data_xy
		shared_x = theano.shared(numpy.asarray(data_x,
											   dtype=theano.config.floatX),
								 borrow=borrow)
		shared_y = theano.shared(numpy.asarray(data_y,
											   dtype=theano.config.floatX),
								 borrow=borrow)
		return shared_x, T.cast(shared_y, 'int32')

	train_set_x, train_set_y = shared_dataset(train_set)

	rval = [(train_set_x, train_set_y)]

	return rval
def loan_prediction():
    value = request.get_json()
    new_value = arrange_values.arrangemet(value)
    model = un.unpickle('save.p')
    result = model.predict([new_value])
    print(result)
    return jsonify({'result': result[0]})
Beispiel #5
0
def main():
    path = r"glove.6B.50d.txt.w2v"
    glove = KeyedVectors.load_word2vec_format(path, binary=False)

    resnet = unpickle.unpickle()
    # make_database.make_database()     # uncomment this only if you want to repickle the files

    # unpickle files
    with open("idfs1.pkl", mode="rb") as idf:
        idfs = pickle.load(idf)
    with open("img_to_caption1.pkl", mode="rb") as cap:
        img_to_caption = pickle.load(cap)
    with open("img_to_coco1.pkl", mode="rb") as coco:
        img_to_coco = pickle.load(coco)

    # uncomment this only if you want to repickle the image embeddings
    # img_embeddings = {}
    # weights = np.load("weight.npy")
    # bias = np.load("bias.npy")
    # for image in resnet:
    #     embedding = image*weights + bias
    #     img_embeddings[image] = embedding
    # with open('img_embeddings.pkl', mode='wb') as file:
    #     pickle.dump(img_embeddings, file)

    with open("img_embeddings.pkl", mode="rb") as file:
        img_embeddings = pickle.load(file)

    cos_sims = {}
    for x in img_embeddings:
        cos_sims[x] = sim.sim

    query = input("Welcome to Image Search! What would you like to search?\t")
def bank_request():
    total_amount = 0
    value = request.get_json()
    file = request.files['file']
    df = dp.process_data(file)
    lst = df['balance']
    model = un.unpickle('gradBoost.p')
    y = model.predict(df)
    new_df = dp.append_dataframe_prediction(df, y)
    plot_result = dp.final_data(new_df)
    for (amount, prediction) in zip(lst, y):
        if prediction == 'yes':
            total_amount = total_amount + amount
    return json.dumps({
        'result': int(total_amount),
        'plot_result': plot_result
    })
def load_dataset():
    batch_size = 500
    data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
    data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2')
    data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3')
    data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4')
    data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5')
    test = unpickle('cifar-10-batches-py/test_batch')

    train_set_1 = data_batch_1["data"]
    train_set_2 = data_batch_2["data"]
    train_set_3 = data_batch_3["data"]
    train_set_4 = data_batch_4["data"]
    train_set_5 = data_batch_5["data"]
    X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0)

    y_train = numpy.concatenate((data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"],
                                 data_batch_4["labels"], data_batch_5["labels"]))

    test_set = test["data"]
    Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3)
    Yte = numpy.asarray(test["labels"])

    Xval_rows = X_train[:7500, :]  # take first 1000 for validation
    Yval = y_train[:7500]
    Xtr_rows = X_train[7500:50000, :]  # keep last 49,000 for train
    Ytr = y_train[7500:50000]

    mean_train = Xtr_rows.mean(axis=0)
    stdv_train = Xte_rows.std(axis=0)
    Xtr_rows = (Xtr_rows - mean_train) / stdv_train
    Xval_rows = (Xval_rows - mean_train) / stdv_train
    Xte_rows = (Xte_rows - mean_train) / stdv_train
    train_set = (Xtr_rows, Ytr)
    valid_set = (Xval_rows, Yval)
    test_set = (Xte_rows, Yte)

    # test_set_x, test_set_y = shared_dataset(test_set)
    # valid_set_x, valid_set_y = shared_dataset(valid_set)
    # train_set_x, train_set_y = shared_dataset(train_set)
    # datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
    #             (test_set_x, test_set_y)]
    #
    # train_set_x, train_set_y = datasets[0]
    # valid_set_x, valid_set_y = datasets[1]
    # test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    # n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    # n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    # n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    return (Xtr_rows, Ytr, Xval_rows , Yval, Xte_rows, Yte)
def evaluate_lenet5(learning_rate=0.1, learning_rate2=0.05, learning_rate3=0.01, n_epochs=200,
					dataset='cifar-10-batches-py',
					nkerns=[6, 16], batch_size=20, mode='train', amount='full'): # nkerns coule be ok with [10, 50]
	""" Demonstrates lenet on MNIST dataset

	:type learning_rate: float
	:param learning_rate: learning rate used (factor for the stochastic
						  gradient)

	:type n_epochs: int
	:param n_epochs: maximal number of epochs to run the optimizer

	:type dataset: string
	:param dataset: path to the dataset used for training /testing (MNIST here)

	:type nkerns: list of ints
	:param nkerns: number of kernels on each layer
	"""

	#learning_rate = theano.shared(value=learning_rate, borrow=True)

	rng = numpy.random.RandomState(23455)

	datasets = load_data(dataset, mode=mode, amount=amount)

	if mode == 'train':
		train_set_x, train_set_y = datasets[0]
		valid_set_x, valid_set_y = datasets[1]
	else:
		test_set_x, test_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	if mode == 'train':
		n_train_batches = train_set_x.get_value(borrow=True).shape[0]
		n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
		n_train_batches /= batch_size
		n_valid_batches /= batch_size
	else:
		n_test_batches = test_set_x.get_value(borrow=True).shape[0]
		n_test_batches /= batch_size

	# allocate symbolic variables for the data
	index = T.lscalar()  # index to a [mini]batch
	x = T.matrix('x')   # the data is presented as rasterized images
	y = T.ivector('y')  # the labels are presented as 1D vector of
						# [int] labels

	ishape = (32, 32)  # this is the size of CIFIA-10 images (gray-scaled)

	######################
	# BUILD ACTUAL MODEL #
	######################
	print '... building the model'

	# Reshape matrix of rasterized images of shape (batch_size,32*32)
	# to a 4D tensor, compatible with our LeNetConvPoolLayer
	layer0_input = x.reshape((batch_size, 1, 32, 32))

	# Construct the first convolutional pooling layer:
	# filtering reduces the image size to (32-5+1,32-5+1)=(28,28)
	# maxpooling reduces this further to (28/2,28/2) = (14,14)
	# 4D output tensor is thus of shape (batch_size,nkerns[0],14,14)
	layer0 = LeNetConvPoolLayer(rng, input=layer0_input,
			image_shape=(batch_size, 1, 32, 32),
			filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2))

	# Construct the second convolutional pooling layer
	# filtering reduces the image size to (14-5+1,14-5+1)=(10,10)
	# maxpooling reduces this further to (10/2,10/2) = (5,5)
	# 4D output tensor is thus of shape (nkerns[0],nkerns[1],5,5)
	layer1 = LeNetConvPoolLayer(rng, input=layer0.output,
			image_shape=(batch_size, nkerns[0], 14, 14),
			filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2))

	# the HiddenLayer being fully-connected, it operates on 2D matrices of
	# shape (batch_size,num_pixels) (i.e matrix of rasterized images).
	# This will generate a matrix of shape (20,50*5*5) = (20,1250) <-??
	layer2_input = layer1.output.flatten(2)

	# construct a fully-connected sigmoidal layer
	layer2 = HiddenLayer(rng, input=layer2_input, n_in=nkerns[1] * 5 * 5,
						 n_out=500, activation=T.tanh)

	# classify the values of the fully-connected sigmoidal layer
	layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

	## load the saved parameters
	if mode == 'test':
		learned_params = unpickle('params/convolutional_mlp_gray.pkl')

	# the cost we minimize during training is the NLL of the model
	cost = layer3.negative_log_likelihood(y)

	# create a function to compute the mistakes that are made by the model
	if mode == 'test':
		test_model = theano.function([index], layer3.errors(y),
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					y: test_set_y[index * batch_size: (index + 1) * batch_size]})
	else:
		validate_model = theano.function([index], layer3.errors(y),
				givens={
					x: valid_set_x[index * batch_size: (index + 1) * batch_size],
					y: valid_set_y[index * batch_size: (index + 1) * batch_size]})

		check_label = theano.function(inputs=[index],
				outputs=layer3.y_pair(y),
					givens={
						x: train_set_x[index * batch_size: (index + 1) * batch_size],
						y: train_set_y[index * batch_size: (index + 1) * batch_size]})

	# create a function to get the labels predicted by the model
	if mode == 'test':
		get_test_labels = theano.function([index], layer3.y_pred,
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					layer0.W: learned_params[0],
					layer0.b: learned_params[1],
					layer1.W: learned_params[2],
					layer1.b: learned_params[3],
					layer2.W: learned_params[4],
					layer2.b: learned_params[5],
					layer3.W: learned_params[6],
					layer3.b: learned_params[7]})


	if mode == 'train':
		# create a list of all model parameters to be fit by gradient descent
		params = layer3.params + layer2.params + layer1.params + layer0.params
	
		# create a list of gradients for all model parameters
		grads = T.grad(cost, params)

	# train_model is a function that updates the model parameters by
	# SGD Since this model has many parameters, it would be tedious to
	# manually create an update rule for each model parameter. We thus
	# create the updates list by automatically looping over all
	# (params[i],grads[i]) pairs.
	if mode == 'train':
		updates = []
		for param_i, grad_i in zip(params, grads):
			updates.append((param_i, param_i - learning_rate * grad_i))

		updates2 = []
		for param_i, grad_i in zip(params, grads):
			updates2.append((param_i, param_i - learning_rate2 * grad_i))

		updates3 = []
		for param_i, grad_i in zip(params, grads):
			updates3.append((param_i, param_i - learning_rate3 * grad_i))

	if mode == 'train':
		train_model = theano.function([index], cost, updates=updates,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})
		
		train_model2 = theano.function([index], cost, updates=updates2,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})

		train_model3 = theano.function([index], cost, updates=updates3,
			  givens={
				x: train_set_x[index * batch_size: (index + 1) * batch_size],
				y: train_set_y[index * batch_size: (index + 1) * batch_size]})

	###############
	# TRAIN MODEL #
	###############
	print '... training the model'
	# early-stopping parameters
	if mode == 'train':
		patience = 10000  # look as this many examples regardless
		patience_increase = 2  # wait this much longer when a new best is
							   # found
		improvement_threshold = 0.999  # a relative improvement of this much is
									   # considered significant
		validation_frequency = min(n_train_batches, patience / 2)
								  # go through this many
								  # minibatche before checking the network
								  # on the validation set; in this case we
								  # check every epoch

	start_time = time.clock()

	if mode == 'train':
		best_params = None
		best_validation_loss = numpy.inf
		best_iter = 0
		test_score = 0.
		done_looping = False
	else:
		done_looping = True

	epoch = 0

	while (epoch < n_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):

			iter = (epoch - 1) * n_train_batches + minibatch_index

			if iter % 100 == 0:
				print 'training @ iter = ', iter

			if epoch == 1:
				cost_ij = train_model(minibatch_index)
			elif this_validation_loss < 0.45 and this_validation_loss > 0.35:
				cost_ij = train_model2(minibatch_index)
			elif this_validation_loss < 0.35:
				cost_ij = train_model3(minibatch_index)
			else:
				cost_ij = train_model(minibatch_index)

			## check the contents of predictions occasionaly
			'''
			if iter % 100 == 0:
				[prediction, true_label] = check_label(minibatch_index)
				print 'prediction:'
				print prediction
				print 'true_label:'
				print true_label
			'''

			## save the parameters
			if mode == 'train':
				get_params = theano.function(inputs=[], outputs=[layer0.W, layer0.b, layer1.W, layer1.b, layer2.W, layer2.b, layer3.W, layer3.b])
				save_parameters(get_params(), 'convolutional_mlp_gray')


			if (iter + 1) % validation_frequency == 0:

				# compute zero-one loss on validation set
				validation_losses = [validate_model(i) for i
									 in xrange(n_valid_batches)]
				this_validation_loss = numpy.mean(validation_losses)
				print('epoch %i, minibatch %i/%i, validation error %f %%' % \
					  (epoch, minibatch_index + 1, n_train_batches, \
					   this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:

					#improve patience if loss improvement is good enough
					if this_validation_loss < best_validation_loss *  \
					   improvement_threshold:
						patience = max(patience, iter * patience_increase)

					# save best validation score and iteration number
					best_validation_loss = this_validation_loss
					best_iter = iter

					'''
					# test it on the test set
					test_losses = [test_model(i) for i in xrange(n_test_batches)]
					test_score = numpy.mean(test_losses)
					print(('	 epoch %i, minibatch %i/%i, test error of best '
						   'model %f %%') %
						  (epoch, minibatch_index + 1, n_train_batches,
						   test_score * 100.))
					'''


			'''
			if patience <= iter:
				done_looping = True
				break
			'''


	if mode == 'test':
		print 'predicting the labels...'
		pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
		for i in xrange(n_test_batches):
			print str(i+1), '/', str(n_test_batches)
			pred_labels[i] = get_test_labels(i)

		writer = csv.writer(file('result/convolutional_mlp_gray.csv', 'w'))
		row = 1

		print 'output test labels...'
		for i in xrange(len(pred_labels)): # TBF: hard code
			print str(i+1), '/', str(len(pred_labels))
			for j in xrange(len(pred_labels[i])):
				writer.writerow([row, pred_labels[i][j]])
				row += 1


	end_time = time.clock()
	if mode == 'train':
		print('Optimization complete.')
		print('Best validation score of %f %% obtained at iteration %i,'\
			  'with test performance %f %%' %
			  (best_validation_loss * 100., best_iter + 1, test_score * 100.))
	print >> sys.stderr, ('The code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time) / 60.))
def load_data(dataset):
	''' Loads the dataset

	:type dataset: string
	:param dataset: the path to the dataset (here MNIST)
	'''

	#############
	# LOAD DATA #
	#############

	# Download the MNIST dataset if it is not present

	print '... loading data'


	## Load the dataset
	print 'min training...'
	train_set = unpickle('data/min_train_set_gray.pkl')
	valid_set = unpickle('data/min_valid_set_gray.pkl')
	print 'loading test data...'

	test_set = unpickle('data/test_set_gray_1.pkl')
	test_set = (test_set, [0 for i in xrange(0,len(test_set))])
	print 'done!'

	#train_set, valid_set, test_set format: tuple(input, target)
	#input is an numpy.ndarray of 2 dimensions (a matrix)
	#witch row's correspond to an example. target is a
	#numpy.ndarray of 1 dimensions (vector)) that have the same length as
	#the number of rows in the input. It should give the target
	#target to the example with the same index in the input.

	def shared_dataset(data_xy, borrow=True):
		""" Function that loads the dataset into shared variables

		The reason we store our dataset in shared variables is to allow
		Theano to copy it into the GPU memory (when code is run on GPU).
		Since copying data into the GPU is slow, copying a minibatch everytime
		is needed (the default behaviour if the data is not in a shared
		variable) would lead to a large decrease in performance.
		"""
		data_x, data_y = data_xy
		shared_x = theano.shared(numpy.asarray(data_x,
											   dtype=theano.config.floatX),
								 borrow=borrow)
		shared_y = theano.shared(numpy.asarray(data_y,
											   dtype=theano.config.floatX),
								 borrow=borrow)
		# When storing data on the GPU it has to be stored as floats
		# therefore we will store the labels as ``floatX`` as well
		# (``shared_y`` does exactly that). But during our computations
		# we need them as ints (we use labels as index, and if they are
		# floats it doesn't make sense) therefore instead of returning
		# ``shared_y`` we will have to cast it to int. This little hack
		# lets ous get around this issue
		return shared_x, T.cast(shared_y, 'int32')

	train_set_x, train_set_y = shared_dataset(train_set)
	valid_set_x, valid_set_y = shared_dataset(valid_set)
	test_set_x, test_set_y = shared_dataset(test_set)

	rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]

	return rval
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
						   dataset='cifar-10-batches-py',
						   batch_size=1000, mode='train', amount='full'):

	"""
	Demonstrate stochastic gradient descent optimization of a log-linear
	model

	This is demonstrated on MNIST.

	:type learning_rate: float
	:param learning_rate: learning rate used (factor for the stochastic
						  gradient)

	:type n_epochs: int
	:param n_epochs: maximal number of epochs to run the optimizer

	:type dataset: string
	:param dataset: the path of the MNIST dataset file from
				 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

	"""
	datasets = load_data(dataset, mode=mode, amount=amount)

	if mode == 'train':
		train_set_x, train_set_y = datasets[0]
		valid_set_x, valid_set_y = datasets[1]
	else:
		test_set_x, test_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	if mode == 'train':
		n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
		n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
	else:
		n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

	######################
	# BUILD ACTUAL MODEL #
	######################
	print '... building the model'

	# allocate symbolic variables for the data
	index = T.lscalar()  # index to a [mini]batch
	x = T.matrix('x')  # the data is presented as rasterized images
	y = T.ivector('y')  # the labels are presented as 1D vector of
						   # [int] labels

	# construct the logistic regression class
	# Each MNIST image has size 28*28
	classifier = LogisticRegression(input=x, n_in=32 * 32, n_out=10)


	## load the saved parameters
	if mode == 'test':
		learned_params = unpickle('params/logistic_sgd_gray.pkl')


	# the cost we minimize during training is the negative log likelihood of
	# the model in symbolic format
	cost = classifier.negative_log_likelihood(y)

	# compiling a Theano function that computes the mistakes that are made by
	# the model on a minibatch
	if mode == 'test':
		test_model = theano.function(inputs=[index],
				outputs=classifier.errors(y),
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					y: test_set_y[index * batch_size: (index + 1) * batch_size]})
	else:
		validate_model = theano.function(inputs=[index],
				outputs=classifier.errors(y),
				givens={
					x: valid_set_x[index * batch_size:(index + 1) * batch_size],
					y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

		check_label = theano.function(inputs=[index],
				outputs=classifier.y_pair(y),
					givens={
						x: train_set_x[index * batch_size: (index + 1) * batch_size],
						y: train_set_y[index * batch_size: (index + 1) * batch_size]})

	# create a function to get the labels predicted by the model
	if mode == 'test':
		get_test_labels = theano.function([index], classifier.y_pred,
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					classifier.W: learned_params[0],
					classifier.b: learned_params[1]})

	# compute the gradient of cost with respect to theta = (W,b)
	if mode == 'train':
		g_W = T.grad(cost=cost, wrt=classifier.W)
		g_b = T.grad(cost=cost, wrt=classifier.b)

	# specify how to update the parameters of the model as a list of
	# (variable, update expression) pairs.
	if mode == 'train':
		updates = [(classifier.W, classifier.W - learning_rate * g_W),
				   (classifier.b, classifier.b - learning_rate * g_b)]

	# compiling a Theano function `train_model` that returns the cost, but in
	# the same time updates the parameter of the model based on the rules
	# defined in `updates`
	if mode == 'train':
		train_model = theano.function(inputs=[index],
				outputs=cost,
				updates=updates,
				givens={
					x: train_set_x[index * batch_size:(index + 1) * batch_size],
					y: train_set_y[index * batch_size:(index + 1) * batch_size]})


	###############
	# TRAIN MODEL #
	###############
	print '... training the model'
	# early-stopping parameters
	if mode == 'train':
		patience = 5000  # look as this many examples regardless
		patience_increase = 2  # wait this much longer when a new best is
									  # found
		improvement_threshold = 0.995  # a relative improvement of this much is
									  # considered significant
		validation_frequency = min(n_train_batches, patience / 2)
									  # go through this many
									  # minibatche before checking the network
									  # on the validation set; in this case we
									  # check every epoch

	start_time = time.clock()

	if mode == 'train':
		best_params = None
		best_validation_loss = numpy.inf
		test_score = 0.
		done_looping = False
	else:
		done_looping = True

	epoch = 0
	while (epoch < n_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):

			minibatch_avg_cost = train_model(minibatch_index)
			
			# iteration number
			iter = (epoch - 1) * n_train_batches + minibatch_index

			if (iter + 1) % validation_frequency == 0:
				# compute zero-one loss on validation set
				validation_losses = [validate_model(i)
									 for i in xrange(n_valid_batches)]
				this_validation_loss = numpy.mean(validation_losses)

				print('epoch %i, minibatch %i/%i, validation error %f %%' % \
					(epoch, minibatch_index + 1, n_train_batches,
					this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:
					#improve patience if loss improvement is good enough
					if this_validation_loss < best_validation_loss *  \
					   improvement_threshold:
						patience = max(patience, iter * patience_increase)

					best_validation_loss = this_validation_loss
					'''
					# test it on the test set

					test_losses = [test_model(i)
								   for i in xrange(n_test_batches)]
					test_score = numpy.mean(test_losses)

					print(('	 epoch %i, minibatch %i/%i, test error of best'
					   ' model %f %%') %
						(epoch, minibatch_index + 1, n_train_batches,
						 test_score * 100.))
					'''


			if patience <= iter:
				done_looping = True
				break


	#[prediction, true_label] = check_label(minibatch_index)
	#print 'prediction:', prediction, 'true_label:', true_label

	# output test labels
	if mode == 'test':
		print 'predicting the labels...'
		pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
		for i in xrange(n_test_batches):
			print str(i+1), '/', str(n_test_batches)
			pred_labels[i] = get_test_labels(i)

		writer = csv.writer(file('result/logistic_sgd_gray.csv', 'w'))
		row = 1

		print 'output test labels...'
		for i in xrange(len(pred_labels)): # TBF: hard code
			print str(i+1), '/', str(len(pred_labels))
			for j in xrange(len(pred_labels[i])):
				writer.writerow([row, pred_labels[i][j]])
				row += 1


	end_time = time.clock()
	if mode == 'train':
		print(('Optimization complete with best validation score of %f %%,'
			   'with test performance %f %%') %
					 (best_validation_loss * 100., test_score * 100.))
		print 'The code run for %d epochs, with %f epochs/sec' % (
			epoch, 1. * epoch / (end_time - start_time))
	print >> sys.stderr, ('The code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.1fs' % ((end_time - start_time)))
	if mode == 'train':
		print 'saving the parameters learned...'
		get_params = theano.function(inputs=[], outputs=[classifier.W, classifier.b])
		save_parameters(get_params(), 'logistic_sgd_gray')
Beispiel #11
0
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=100000,
             dataset='cifar-10-batches-py', batch_size=32, test_batch_size=32, n_hidden_1=500, n_hidden_2=500, mode='train',
             amount='full', valid_num=10000):  #batch_size: 32

    datasets = load_data(dataset, mode, amount, valid_num)

    if mode == 'train':
        train_set_x, train_set_y = datasets[0]
        valid_set_x, valid_set_y = datasets[1]
    else:
        test_set_x, test_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    if mode == 'train':
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    else:
        n_test_batches = test_set_x.get_value(borrow=True).shape[0] / test_batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(rng=rng, input=x, n_in=769,
                     n_hidden_1=n_hidden_1, n_hidden_2=n_hidden_2, n_out=2)

    ## load the saved parameters
    if mode == 'test':
        learned_params = unpickle('params/mlp.pkl')


    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = classifier.negative_log_likelihood(y) \
           + L1_reg * classifier.L1 \
           + L2_reg * classifier.L2_sqr

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    if mode == 'test':
        test_model = theano.function(inputs=[index],
                                     outputs=classifier.errors(y),
                                     givens={
                                     x: test_set_x[index * test_batch_size: (index + 1) * test_batch_size],
                                     y: test_set_y[index * test_batch_size: (index + 1) * test_batch_size]})
    else:
        validate_model = theano.function(inputs=[index],
                                         outputs=classifier.errors(y),
                                         givens={
                                         x: valid_set_x[index * batch_size:(index + 1) * batch_size],
                                         y: valid_set_y[index * batch_size:(index + 1) * batch_size]})

        train_error_model = theano.function(inputs=[index],
                                            outputs=classifier.errors(y),
                                            givens={
                                            x: train_set_x[index * batch_size:(index + 1) * batch_size],
                                            y: train_set_y[index * batch_size:(index + 1) * batch_size]})

        get_train_labels = theano.function([index], classifier.log_regression_layer.ex_y,
                                           givens={
                                           x: train_set_x[index * batch_size: (index + 1) * batch_size]})

    if mode == 'test':
        get_test_labels = theano.function([index], classifier.log_regression_layer.y_pred,
                                          givens={
                                          x: test_set_x[index * test_batch_size: (index + 1) * test_batch_size],
                                          classifier.hidden_layer_1.W: learned_params[0],
                                          classifier.hidden_layer_1.b: learned_params[1],
                                          classifier.log_regression_layer.W: learned_params[2],
                                          classifier.log_regression_layer.b: learned_params[3]})

    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    if mode == 'train':
        gparams = []
        for param in classifier.params:
            gparam = T.grad(cost, param)
            gparams.append(gparam)

        # specify how to update the parameters of the model as a list of
        # (variable, update expression) pairs
        updates = []
        # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
        # same length, zip generates a list C of same size, where each element
        # is a pair formed from the two lists :
        #	C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
        for param, gparam in zip(classifier.params, gparams):
            updates.append((param, param - learning_rate * gparam))

        # compiling a Theano function `train_model` that returns the cost, but
        # in the same time updates the parameter of the model based on the rules
        # defined in `updates`
        train_model = theano.function(inputs=[index], outputs=cost,
                                      updates=updates,
                                      givens={
                                      x: train_set_x[index * batch_size:(index + 1) * batch_size],
                                      y: train_set_y[index * batch_size:(index + 1) * batch_size]})

    #init_bias = [-1. for i in xrange(101)]
    ##init_bias = numpy.asarray(init_bias, dtype=numpy.float64)
    #init_bias[0] = 100.
    #initialize_bias = theano.function(inputs=[], outputs=classifier.logRegressionLayer.b,
    #		updates={classifier.logRegressionLayer.b: init_bias},
    #		givens={classifier.logRegressionLayer.b: init_bias})

    #bias = initialize_bias()
    #print bias


    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 1000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.999  # a relative improvement of this much is
    # considered significant
    if mode == 'train':
        validation_frequency = min(n_train_batches, patience / 2)
        # go through this many
        # minibatche before checking the network
        # on the validation set; in this case we
        # check every epoch

    best_params = None
    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    if mode == 'train':
        done_looping = False
    else:
        done_looping = True

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                train_losses = [train_error_model(i)
                                for i in xrange(n_train_batches)]
                this_train_loss = numpy.mean(train_losses)

                try:
                    pred_labels = pred_labels
                except NameError:
                    pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_train_batches)]

                #params = get_params()
                #print 'W[0:10]:', params[0][0:10], 'b[0:10]:', params[1][0:10]

                if mode == 'train':
                    for i in xrange(n_train_batches):
                        pred_labels[i] = get_train_labels(i)

                    #print 'max predicted labels:',
                    #for i in xrange(len(pred_labels)):
                    #	print max(pred_labels[i]),
                    #print

                print('epoch %i, minibatch %i/%i, validation error (MAE) %f' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss))

                print('epoch %i, minibatch %i/%i, training error (MAE) %f' % \
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_train_loss))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    ## save the parameters
                    get_params = theano.function(inputs=[], outputs=[classifier.hidden_layer_1.W, classifier.hidden_layer_1.b,
                                                                     classifier.log_regression_layer.W,
                                                                     classifier.log_regression_layer.b])
                    save_parameters(get_params(), 'mlp')

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * \
                            improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

            if patience <= iter:
                done_looping = True
                break

    if mode == 'train':
        for i in xrange(n_train_batches):
            pred_labels[i] = get_train_labels(i)

        print 'max predicted labels:',
        for i in xrange(len(pred_labels)):
            print max(pred_labels[i]),
        print

    if mode == 'test':
        print 'predicting the labels...'
        pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
        for i in xrange(n_test_batches):
            print str(i + 1), '/', str(n_test_batches)
            pred_labels[i] = get_test_labels(i)

        writer = csv.writer(file('result/mlp.csv', 'w'))
        writer.writerow(['id', 'loss'])
        row = 105472  # first ID of test data

        print 'output test labels...'
        for i in xrange(len(pred_labels)):
            print str(i + 1), '/', str(len(pred_labels))
            for j in xrange(len(pred_labels[i])):
                writer.writerow([row, pred_labels[i][j]])
                row += 1

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f '
           'obtained at iteration %i') %
          (best_validation_loss, best_iter + 1))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
def test_dA(learning_rate=0.1, training_epochs=20,
			dataset='mnist.pkl.gz',
			batch_size=20, output_folder='dA_data', mode='test', amount='full'):

	"""
	This demo is tested on MNIST

	:type learning_rate: float
	:param learning_rate: learning rate used for training the DeNosing
						  AutoEncoder

	:type training_epochs: int
	:param training_epochs: number of epochs used for training

	:type dataset: string
	:param dataset: path to the picked dataset

	"""
	datasets = load_data(dataset, mode, amount)
	train_set_x, train_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

	# allocate symbolic variables for the data
	index = T.lscalar()	# index to a [mini]batch
	x = T.matrix('x')  # the data is presented as rasterized images

	######################
	# BUILDING THE MODEL #
	######################

	for noize in [0, 10, 20, 30, 40, 50]:
		print 'noize:', str(noize), '%'

		rng = numpy.random.RandomState(123)
		theano_rng = RandomStreams(rng.randint(2 ** 30))

		da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x,
				n_visible=32 * 32, n_hidden=784) # same as MNIST (28*28=784)

		## load the saved parameters
		learned_params = unpickle('params/dA_' + str(noize) + '.pkl')

		comp_data = da.get_comp_values()

		get_comp_data = theano.function([index], comp_data,
			 givens={
				 x: train_set_x[index * batch_size: (index + 1) * batch_size],
				 da.W: learned_params[0],
				 da.b: learned_params[1]})


		## save compressed data (no corruption)
		print 'creating compressed data...'
		if mode == 'valid':
			data_da = [[0 for j in xrange(28*28)] for i in xrange(n_train_batches*batch_size)]
			
			for batch_index in xrange(n_train_batches):
				comp_x = get_comp_data(batch_index)
			
				for i in xrange(batch_size):
					comp_x[i] = numpy.asarray(comp_x[i], dtype=numpy.float64)
					data_da[batch_index * batch_size + i] = comp_x[i]
			
			data_da = numpy.asarray(data_da)
			pickle(data_da, 'dA_data/' + mode + '_data_da_' + str(noize) + '.pkl')
		else:
			if amount == 'full':
				step_size = 300
			else:
				step_size = 6
			for step in xrange(1,step_size+1):
				print str(step), '/', str(step_size)
				data_da = [[0 for j in xrange(28*28)] for i in xrange(n_train_batches*batch_size/step_size)]
				
				for batch_index in xrange(n_train_batches/step_size):
					comp_x = get_comp_data(batch_index + (n_train_batches / step_size) * (step - 1))
				
					for i in xrange(batch_size):
						comp_x[i] = numpy.asarray(comp_x[i], dtype=numpy.float64)
						data_da[batch_index * batch_size + i] = comp_x[i]
				
				data_da = numpy.asarray(data_da)
				pickle(data_da, 'dA_data/' + mode + '_data_da_' + str(noize) + '_' + str(step) + '.pkl')
def generate(rootdir):
    ships = up.unpickle(rootdir)
    for ship in ships:
        
        
        wavfilepath = ship.filepath + ship.id + '.wav' #the original wav file
        destination =  destination_folder + ship.year_month +'\\' + ship.id + '.png' #the destination for the spectrogram
        print(wavfilepath)
        
        converted_times,cpa_time,start,cpa_index = convert_time(ship) #convert all times and find the file start time and cpa time
        #print(start)
        #print(converted_times)
        #print(cpa_time)
        #print(cpa_index)
        pre_cpa = ship.distance[start:cpa_index] #find all distances after file_time and before cpa time using old index of cpa_time
        post_cpa = ship.distance[cpa_index:] #find all distances after cpa time
        cpa_index = converted_times.index(cpa_time) #update cpa index to its position in converted times
        pre_times = converted_times[:cpa_index]
        post_times = converted_times[cpa_index:]
        #print(post_times)
        #print(pre_cpa)
        #print(post_cpa)
        approach_inter = interpolate.interp1d(pre_times,pre_cpa, axis=0, fill_value="extrapolate")
        depart_inter = interpolate.interp1d(post_times,post_cpa, axis=0, fill_value="extrapolate")
        
        
        sample_rate, samples = wavfile.read(wavfilepath) #get original wav file samples at the original sample rate
        
       
        sound_length = len(samples)//sample_rate
        #print(sound_length)
        approach_times = np.arange(0,cpa_time)
        depart_times = np.arange(cpa_time,sound_length)
        
        
        frequencies, times, spectrogram = signal.spectrogram(samples,sample_rate, window = np.hanning(10e3), noverlap = 0, nfft = 10e3, mode='psd') #generate spectrogram 
        
        uppc = tf.get_tf(ship.harp,frequencies) #get the transfer function results
        spectrogram = 10*np.log10(spectrogram) #convert to/from decibels ?
        uppc = npmb.repmat(uppc,np.size(spectrogram,1),1) #copy tf results several times to make it same size as spect results
        spectrogram = spectrogram + np.transpose(uppc) #add tf results to spect results

        range_step = .01 # step size of 1m
        closest_range = np.min(np.abs(ship.distance)) # find closest point of approach (cpa)

        
        range_approach = ((np.arange(pre_cpa[0], closest_range, -range_step))) # make a vector of distances between first range and cpa 
        range_depart  = (np.arange(closest_range, post_cpa[len(post_cpa)-1], range_step)) # make a vector of distances between cpa and last range
        range_desired = np.append(range_approach,range_depart)# stick them together
        number_range_samples = len(range_desired)# total length is the number of samples we expect. 
        
        

        #print(spectrogram.shape)


        
        spect_dis_approach = approach_inter(approach_times)
        spect_dis_depart = depart_inter(depart_times)

        approach_bins = np.digitize(spect_dis_approach,range_approach)

        depart_bins = np.digitize(spect_dis_depart,range_depart)


        approach_spect = range_spect(approach_bins,spectrogram)
        depart_spect = range_spect(depart_bins,spectrogram)
        #print(approach_spect.shape)
        #print(depart_spect.shape)
        #print(spectrogram)
        #print(times)
        #print(times.shape)
        range_spectrogram = np.concatenate((approach_spect,depart_spect),axis=1)
        ship.spect = range_spectrogram
        #ranges = get_ranges(approach_bins,depart_bins,range_approach,range_depart)
        print(range_spectrogram)
        #print(ranges)
        #plt.yscale('log') #make y scale log to match the new decibel units
        #axes = plt.gca() #get axes object
        #axes.set_ylim([10,1000]) #set upper limit of data on axes to be 1000
        # plt.pcolormesh(ranges,frequencies,range_spectrogram,vmin=60,vmax=110 ) #plot the data and add color
        # plt.set_cmap('jet')
        # plt.ylabel('Frequency [Hz]')
        # plt.xlabel('Distance [km]')
        # locs, ticks = plt.xticks() #get current time ticks
        # new_ticks = get_ticks(ranges,locs)
        # plt.xticks(locs,new_ticks)
        
        
        # plt.colorbar()
        #plt.xticks(locs, new_ticks)  # Set locations and labels to the distance 
        plt.savefig(destination) #save spectrogram at destination
        #plt.imshow(spectrogram)
        #plt.show() #show plot
        plt.close()
    up.store(ships)
def load_data(dataset, mode="train", amount="full"):
    """ Loads the dataset

	:type dataset: string
	:param dataset: the path to the dataset (here MNIST)
	"""

    #############
    # LOAD DATA #
    #############

    # Download the MNIST dataset if it is not present

    print "... loading data"

    ## Load the dataset
    if mode == "train":
        # load training and validation data
        if amount == "full":
            print "full training..."
            train_set = unpickle("data/train_set_gray.pkl")
            valid_set = unpickle("data/valid_set_gray.pkl")
        elif amount == "min":
            print "min training..."
            train_set = unpickle("data/min_train_set_gray.pkl")
            valid_set = unpickle("data/min_valid_set_gray.pkl")
        else:
            print "amount shoule be either full or min"
            raise NotImplementedError()
    else:
        # load test data
        # test_set = unpickle('data/test_set_gray.pkl')
        print "loading test data..."
        if amount == "full":
            test_set = []
            for i in xrange(1, 301):  # from 1 to 300 TBF: hard code
                print str(i), "/", str(300)
                test_set_batch = unpickle("data/test_set_gray_" + str(i) + ".pkl")
                test_set.extend(test_set_batch)
            test_set = (test_set, [0 for i in xrange(0, len(test_set))])
        else:
            test_set = unpickle("data/test_set_gray_1.pkl")
            test_set = (test_set, [0 for i in xrange(0, len(test_set))])
        print "done!"

        # train_set, valid_set, test_set format: tuple(input, target)
        # input is an numpy.ndarray of 2 dimensions (a matrix)
        # witch row's correspond to an example. target is a
        # numpy.ndarray of 1 dimensions (vector)) that have the same length as
        # the number of rows in the input. It should give the target
        # target to the example with the same index in the input.

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

		The reason we store our dataset in shared variables is to allow
		Theano to copy it into the GPU memory (when code is run on GPU).
		Since copying data into the GPU is slow, copying a minibatch everytime
		is needed (the default behaviour if the data is not in a shared
		variable) would lead to a large decrease in performance.
		"""
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, "int32")

    if mode == "train":
        train_set_x, train_set_y = shared_dataset(train_set)
        valid_set_x, valid_set_y = shared_dataset(valid_set)
    else:
        test_set_x, test_set_y = shared_dataset(test_set)

    if mode == "train":
        rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y)]
    else:
        rval = [(test_set_x, test_set_y)]

    return rval
def load_data(file_name):
    data = unpickle.unpickle(file_name)
    X = data["data"]
    print 1
    return X
def evaluate_lenet5(
    learning_rate=0.15, n_epochs=200, dataset="mnist.pkl.gz", nkerns=[60, 80, 150, 150, 80], batch_size=200
):
    """ Demonstrates lenet on CIFAR-10 dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    def shared_dataset(data_xy, borrow=True):

        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, "int32")

    data_batch_1 = unpickle("cifar-10-batches-py/data_batch_1")
    data_batch_2 = unpickle("cifar-10-batches-py/data_batch_2")
    data_batch_3 = unpickle("cifar-10-batches-py/data_batch_3")
    data_batch_4 = unpickle("cifar-10-batches-py/data_batch_4")
    data_batch_5 = unpickle("cifar-10-batches-py/data_batch_5")
    test = unpickle("cifar-10-batches-py/test_batch")

    train_set_1 = data_batch_1["data"]
    train_set_2 = data_batch_2["data"]
    train_set_3 = data_batch_3["data"]
    train_set_4 = data_batch_4["data"]
    train_set_5 = data_batch_5["data"]
    X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0)

    y_train = numpy.concatenate(
        (
            data_batch_1["labels"],
            data_batch_2["labels"],
            data_batch_3["labels"],
            data_batch_4["labels"],
            data_batch_5["labels"],
        )
    )

    test_set = test["data"]
    Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3)
    Yte = numpy.asarray(test["labels"])

    Xval_rows = X_train[:7500, :]  # take first 1000 for validation
    Yval = y_train[:7500]
    Xtr_rows = X_train[7500:50000, :]  # keep last 49,000 for train
    Ytr = y_train[7500:50000]

    mean_train = Xtr_rows.mean(axis=0)
    stdv_train = Xte_rows.std(axis=0)
    Xtr_rows = (Xtr_rows - mean_train) / stdv_train
    Xval_rows = (Xval_rows - mean_train) / stdv_train
    Xte_rows = (Xte_rows - mean_train) / stdv_train
    learning_rate = theano.shared(learning_rate)

    """whitening"""

    """
    Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important)
    cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data
    Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100])

    # whiten the data:
    # divide by the eigenvalues (which are square roots of the singular values)
    Xwhite = Xrot / numpy.sqrt(S + 1e-5)"""

    """whitening"""

    # Xtr_rows = whiten(Xtr_rows)
    # zero-center the data (important)
    """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)

    Xtr_rows = Xrot / numpy.sqrt(S + 1e-5)

    Xval_rot = numpy.dot(Xval_rows,U)
    Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5)

    Xte_rot = numpy.dot(Xte_rows,U)
    Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5)
    """

    train_set = (Xtr_rows, Ytr)
    valid_set = (Xval_rows, Yval)
    test_set = (Xte_rows, Yte)

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix("x")  # the data is presented as rasterized images
    y = T.ivector("y")  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print("... building the model")

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (32+4-3+1 , 32+4-3+1) = (34, 34)
    # maxpooling reduces this further to (32/2, 32/2) = (17, 17)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 17, 17)
    layer0 = LeNetConvPoolLayer(
        rng, input=layer0_input, image_shape=(batch_size, 3, 32, 32), filter_shape=(nkerns[0], 3, 3, 3), poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (17+4-2+1, 17+4-2+1) = (20, 20)
    # maxpooling reduces this further to (20/2, 20/2) = (10, 10)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 10, 10)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 17, 17),
        filter_shape=(nkerns[1], nkerns[0], 2, 2),
        poolsize=(2, 2),
    )

    # Construct the third convolutional pooling layer
    # filtering reduces the image size to (10+4-3+1, 10+4-3+1) = (12, 12)
    # maxpooling reduces this further to (12/2, 12/2) = (6, 6)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 6, 6)

    layer2conv = LeNetConvPoolLayer(
        rng,
        input=layer1.output,
        image_shape=(batch_size, nkerns[1], 10, 10),
        filter_shape=(nkerns[2], nkerns[1], 3, 3),
        poolsize=(2, 2),
    )

    # Construct the fourth convolutional pooling layer
    # filtering reduces the image size to (6+4-3+1, 6+4-3+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 4, 4)

    layer3conv = LeNetConvPoolLayer(
        rng,
        input=layer2conv.output,
        image_shape=(batch_size, nkerns[2], 6, 6),
        filter_shape=(nkerns[3], nkerns[2], 3, 3),
        poolsize=(2, 2),
    )

    # Construct the fifth convolutional pooling layer
    # filtering reduces the image size to (4+4-3+1, 4+4-3+1) = (6, 6)
    # maxpooling reduces this further to (6/2, 6/2) = (3, 3)
    # 4D output tensor is thus of shape (batch_size, nkerns[2], 3, 3)

    layer4conv = LeNetConvPoolLayer(
        rng,
        input=layer3conv.output,
        image_shape=(batch_size, nkerns[3], 4, 4),
        filter_shape=(nkerns[4], nkerns[3], 3, 3),
        poolsize=(2, 2),
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    fc_input = layer4conv.output.flatten(2)

    # construct a fully-connected sigmoidal layer

    Fully_conected_layers = TLMLP(rng, fc_input, nkerns[4] * 3 * 3, 600, 600, 200, 10)

    # the cost we minimize during training is the NLL of the model
    L2_reg = 0.0008
    W_layers = (
        (layer0.W ** 2).sum()
        + (layer1.W ** 2).sum()
        + (layer2conv.W ** 2).sum()
        + (layer3conv.W ** 2).sum()
        + (layer4conv.W ** 2).sum()
    )

    fc_cost = Fully_conected_layers.negative_log_likelihood(y) + L2_reg * (Fully_conected_layers.L2_sqr + W_layers)
    cost = fc_cost

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        Fully_conected_layers.errors(y),
        givens={
            x: test_set_x[index * batch_size : (index + 1) * batch_size],
            y: test_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    validate_model = theano.function(
        [index],
        Fully_conected_layers.errors(y),
        givens={
            x: valid_set_x[index * batch_size : (index + 1) * batch_size],
            y: valid_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    # create a list of all model parameters to be fit by gradient descent
    params = (
        Fully_conected_layers.params
        + layer4conv.params
        + layer3conv.params
        + layer2conv.params
        + layer1.params
        + layer0.params
    )

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size : (index + 1) * batch_size],
            y: train_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print("... training")
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    epoch_loss_list = []
    epoch_val_list = []

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        # if epoch == 10:
        #    learning_rate.set_value(0.1)
        # if epoch > 18:
        #   learning_rate.set_value(learning_rate.get_value()*0.9995)
        if epoch > 3:
            epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
            epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))
            numpy.savetxt(fname="epoc_cost_pad.csv", X=epoch_loss_np, fmt="%1.3f")
            numpy.savetxt(fname="epoc_val_error_padd.csv", X=epoch_val_np, fmt="%1.3f")

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print("training @ iter = ", iter)
            cost_ij = train_model(minibatch_index)

            epoch_loss_entry = [iter, epoch, float(cost_ij)]
            epoch_loss_list.append(epoch_loss_entry)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print(
                    "epoch %i, minibatch %i/%i, validation error %f %%"
                    % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0)
                )
                epoch_val_entry = [iter, epoch, this_validation_loss]
                epoch_val_list.append(epoch_val_entry)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)
                    print(
                        ("     epoch %i, minibatch %i/%i, test error of " "best model %f %%")
                        % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.0)
                    )

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print("Optimization complete.")
    print(
        "Best validation score of %f %% obtained at iteration %i, "
        "with test performance %f %%" % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0)
    )
    print(
        ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0)),
        file=sys.stderr,
    )

    epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
    epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))

    epoch_loss = pandas.DataFrame(
        {"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1], "cost": epoch_loss_np[:, 2]}
    )
    epoch_vall = pandas.DataFrame(
        {"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1], "val_error": epoch_val_np[:, 2]}
    )
    epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(["epoch"]).mean()["cost"])
    epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(["epoch"]).mean()["val_error"])
    epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]})
    epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]})
    epoc_avg_loss.plot(kind="line", x="epoch", y="cost")
    plt.show()
    epoc_avg_loss_val.plot(kind="line", x="epoch", y="val_error")
    plt.show()
Beispiel #17
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.image as mpimg
import os
from unpickle import unpickle

data1 = unpickle("./cifar-10-batches-py/data_batch_1")
meta = unpickle('./cifar-10-batches-py/batches.meta')
test = unpickle("./cifar-10-batches-py/test_batch")

tf.logging.set_verbosity(tf.logging.INFO)


# trainData = []
# trainLabel = []
# for i in range(0,10000):
#     print(i)
#     samp = np.array(data1[b'data'][i])
#     sampr = np.reshape(samp[0:1024],(32,32))
#     sampg = np.reshape(samp[1024:2*1024],(32,32))
#     sampb = np.reshape(samp[1024*2:1024*3],(32,32))
#     trainData.append(np.dstack((sampr,sampg,sampb)))
# trainLabel = tf.constant(data1[b'labels'])
# with tf.Session() as sess:
# print(trainData1.eval())
def cnn_model_fn(features, labels, mode):
    """Model function for CNN."""
def test_mlp(learning_rate=0.01,
             L1_reg=0.00,
             L2_reg=0.008,
             n_epochs=1000,
             dataset='mnist.pkl.gz',
             batch_size=20,
             n_hidden1=500,
             n_hidden2=50):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
    data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2')
    data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3')
    data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4')
    data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5')
    test = unpickle('cifar-10-batches-py/test_batch')

    train_set_1 = data_batch_1["data"]
    train_set_2 = data_batch_2["data"]
    train_set_3 = data_batch_3["data"]
    train_set_4 = data_batch_4["data"]
    train_set_5 = data_batch_5["data"]
    X_train = numpy.concatenate(
        (train_set_1, train_set_2, train_set_3, train_set_4, train_set_5),
        axis=0)

    y_train = numpy.concatenate(
        (data_batch_1["labels"], data_batch_2["labels"],
         data_batch_3["labels"], data_batch_4["labels"],
         data_batch_5["labels"]))

    test_set = test["data"]
    Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3)
    Yte = numpy.asarray(test["labels"])

    Xval_rows = X_train[:7500, :]
    Yval = y_train[:7500]
    Xtr_rows = X_train[7500:50000, :]
    Ytr = y_train[7500:50000]

    mean_train = Xtr_rows.mean(axis=0)
    stdv_train = Xte_rows.std(axis=0)
    Xtr_rows = (Xtr_rows - mean_train) / stdv_train
    Xval_rows = (Xval_rows - mean_train) / stdv_train
    Xte_rows = (Xte_rows - mean_train) / stdv_train

    train_set = (Xtr_rows, Ytr)
    valid_set = (Xval_rows, Yval)
    test_set = (Xte_rows, Yte)

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
                (test_set_x, test_set_y)]

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(rng=rng,
                     input=x,
                     n_in=3072,
                     n_hidden1=500,
                     n_hidden2=500,
                     n_out=10)

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 +
            L2_reg * classifier.L2_sqr)
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        })

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        })

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam)
               for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size:(index + 1) * batch_size],
            y: train_set_y[index * batch_size:(index + 1) * batch_size]
        })
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')

    # early-stopping parameters
    patience = 1000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch
    validation_frequency = n_train_batches

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        if epoch <= 3:
            learning_rate.set_value()
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [
                    validate_model(i) for i in range(n_valid_batches)
                ]
                this_validation_loss = numpy.mean(validation_losses)

                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (this_validation_loss <
                            best_validation_loss * improvement_threshold):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i) for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(
        ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' %
         ((end_time - start_time) / 60.)),
        file=sys.stderr)
Beispiel #19
0
def test_SdA(finetune_lr=0.1, pretraining_epochs=20, ## originally 15
			 pretrain_lr=0.001, training_epochs=1000,
			 dataset='cifar-10-batches-py', batch_size=1, mode='train', amount='full'):
	"""
	Demonstrates how to train and test a stochastic denoising autoencoder.

	This is demonstrated on MNIST.

	:type learning_rate: float
	:param learning_rate: learning rate used in the finetune stage
	(factor for the stochastic gradient)

	:type pretraining_epochs: int
	:param pretraining_epochs: number of epoch to do pretraining

	:type pretrain_lr: float
	:param pretrain_lr: learning rate to be used during pre-training

	:type n_iter: int
	:param n_iter: maximal number of iterations ot run the optimizer

	:type dataset: string
	:param dataset: path the the pickled dataset

	"""

	datasets = load_data(dataset, mode=mode, amount=amount)

	if mode == 'train':
		train_set_x, train_set_y = datasets[0]
		valid_set_x, valid_set_y = datasets[1]
	else:
		test_set_x, test_set_y = datasets[0]

	# compute number of minibatches for training, validation and testing
	if mode == 'train':
		n_train_batches = train_set_x.get_value(borrow=True).shape[0]
		n_train_batches /= batch_size
	else:
		n_test_batches = test_set_x.get_value(borrow=True).shape[0]
		n_test_batches /= batch_size

	# numpy random generator
	numpy_rng = numpy.random.RandomState(89677)


	# allocate symbolic variables for the data
	index = T.lscalar()  # index to a [mini]batch
	x = T.matrix('x')   # the data is presented as rasterized images
	y = T.ivector('y')  # the labels are presented as 1D vector of

	print '... building the model'
	# construct the stacked denoising autoencoder class
	sda = SdA(numpy_rng=numpy_rng, n_ins=32 * 32,
			  hidden_layers_sizes=[1300, 1300, 1300],
			  n_outs=10)

	## load the saved parameters
	if mode == 'test':
		learned_params = unpickle('params/SdA.pkl')


	print '... getting the pretraining functions'
	if mode == 'train':
		pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
													batch_size=batch_size)


	#########################
	# PRETRAINING THE MODEL #
	#########################

	if mode == 'train':
		print '... pre-training the model'
		start_time = time.clock()
		## Pre-train layer-wise
		corruption_levels = [.1, .2, .3]
		for i in xrange(sda.n_layers):
			# go through pretraining epochs
			for epoch in xrange(pretraining_epochs):
				# go through the training set
				c = []
				for batch_index in xrange(n_train_batches):
					c.append(pretraining_fns[i](index=batch_index,
							 corruption=corruption_levels[i],
							 lr=pretrain_lr))
				print 'Pre-training layer %i, epoch %d / %d, cost ' % (i, epoch + 1, pretraining_epochs),
				print numpy.mean(c)

		end_time = time.clock()

		print >> sys.stderr, ('The pretraining code for file ' +
							  os.path.split(__file__)[1] +
							  ' ran for %.2fm' % ((end_time - start_time) / 60.))

	########################
	# FINETUNING THE MODEL #
	########################

	# get the training, validation and testing function for the model
	print '... getting the finetuning functions'
	if mode == 'train':
		train_fn, validate_model = sda.build_finetune_functions(
					datasets=datasets, batch_size=batch_size,
					learning_rate=finetune_lr)

	print '... finetunning the model'
	# early-stopping parameters
	if mode == 'train':
		patience = 10 * n_train_batches  # look as this many examples regardless
		patience_increase = 2.  # wait this much longer when a new best is
								# found
		improvement_threshold = 0.995  # a relative improvement of this much is
									   # considered significant
		validation_frequency = min(n_train_batches, patience / 2)
									  # go through this many
									  # minibatche before checking the network
									  # on the validation set; in this case we
									  # check every epoch


	# create a function to get the labels predicted by the model
	if mode == 'test':
		get_test_labels = theano.function([index], sda.logLayer.y_pred,
				givens={
					x: test_set_x[index * batch_size: (index + 1) * batch_size],
					sda.sigmoid_layers[0].W: learned_params[0],
					sda.sigmoid_layers[0].b: learned_params[1],
					sda.sigmoid_layers[1].W: learned_params[2],
					sda.sigmoid_layers[1].b: learned_params[3],
					sda.sigmoid_layers[2].W: learned_params[4],
					sda.sigmoid_layers[2].b: learned_params[5],
					sda.logLayer.W: learned_params[6],
					sda.logLayer.b: learned_params[7]})


	best_params = None
	best_validation_loss = numpy.inf
	test_score = 0.
	start_time = time.clock()

	if mode == 'train':
		done_looping = False
	else:
		done_looping = True

	epoch = 0

	while (epoch < training_epochs) and (not done_looping):
		epoch = epoch + 1
		for minibatch_index in xrange(n_train_batches):
			minibatch_avg_cost = train_fn(minibatch_index)

			## save the parameters
			if mode == 'train':
				get_params = theano.function(inputs=[],
						outputs=[sda.sigmoid_layers[0].W, sda.sigmoid_layers[0].b,
							sda.sigmoid_layers[1].W, sda.sigmoid_layers[1].b,
							sda.sigmoid_layers[2].W, sda.sigmoid_layers[2].b,
							sda.logLayer.W, sda.logLayer.b])


				save_parameters(get_params(), 'SdA')

			iter = (epoch - 1) * n_train_batches + minibatch_index

			if (iter + 1) % validation_frequency == 0:
				validation_losses = validate_model()
				this_validation_loss = numpy.mean(validation_losses)
				print('epoch %i, minibatch %i/%i, validation error %f %%' %
					  (epoch, minibatch_index + 1, n_train_batches,
					   this_validation_loss * 100.))

				# if we got the best validation score until now
				if this_validation_loss < best_validation_loss:

					#improve patience if loss improvement is good enough
					if (this_validation_loss < best_validation_loss *
						improvement_threshold):
						patience = max(patience, iter * patience_increase)

					# save best validation score and iteration number
					best_validation_loss = this_validation_loss
					best_iter = iter


			if patience <= iter:
				done_looping = True
				break


	if mode == 'test':
		print 'predicting the labels...'
		pred_labels = [[0 for j in xrange(batch_size)] for i in xrange(n_test_batches)]
		for i in xrange(n_test_batches):
			print str(i+1), '/', str(n_test_batches)
			pred_labels[i] = get_test_labels(i)

		writer = csv.writer(file('result/SdA.csv', 'w'))
		row = 1

		print 'output test labels...'
		for i in xrange(len(pred_labels)):
			print str(i+1), '/', str(len(pred_labels))
			for j in xrange(len(pred_labels[i])):
				writer.writerow([row, pred_labels[i][j]])
				row += 1


	end_time = time.clock()
	print(('Optimization complete with best validation score of %f %%,'
		   'with test performance %f %%') %
				 (best_validation_loss * 100., test_score * 100.))
	print >> sys.stderr, ('The training code for file ' +
						  os.path.split(__file__)[1] +
						  ' ran for %.2fm' % ((end_time - start_time) / 60.))
def load_data(dataset, mode='train', amount='full', valid_num=10000):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    '''

    print '... loading data'


    ## Load the dataset
    if mode == 'train':
        # load training and validation data
        if amount == 'full':
            print 'full training...'
            train_set = unpickle('data/train_f528_f274.pkl')

            # TBF: sampling of validation set should be randomized
            valid_set_x = train_set[0][-valid_num:]
            valid_set_y = train_set[1][-valid_num:]
            valid_set = (valid_set_x, valid_set_y)

            train_set_x = train_set[0][:-valid_num]
            train_set_y = train_set[1][:-valid_num]
            train_set = (train_set_x, train_set_y)

        elif amount == 'min':
            print 'min training...'
            train_set = unpickle('data/min_train_simple.pkl')

            valid_num = 200  # train_num: 1000 - 200 = 800
            valid_set_x = train_set[0][-valid_num:]
            valid_set_y = train_set[1][-valid_num:]
            valid_set = (valid_set_x, valid_set_y)

            train_set_x = train_set[0][:-valid_num]
            train_set_y = train_set[1][:-valid_num]
            train_set = (train_set_x, train_set_y)

        else:
            print 'amount shoule be either full or min'
            raise NotImplementedError()
    else:
        # load test data
        #test_set = unpickle('data/test_set.pkl')
        print 'loading test data...'
        if amount == 'full':
            #test_set = []
            #for i in xrange(1, 301): # from 1 to 300 TBF: hard code
            #	print str(i), '/', str(300)
            #	test_set_batch = unpickle('data/test_set_' + str(i) + '.pkl')
            #	test_set.extend(test_set_batch)
            test_set = unpickle('data/test_simple.pkl')
            test_set = (test_set, [0 for i in xrange(0,len(test_set))])
        else:
            test_set = unpickle('data/min_test_simple.pkl')
            test_set = (test_set, [0 for i in xrange(0,len(test_set))])
        print 'done!'

    #train_set, valid_set, test_set format: tuple(input, target)
    #input is an numpy.ndarray of 2 dimensions (a matrix)
    #witch row's correspond to an example. target is a
    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
    #the number of rows in the input. It should give the target
    #target to the example with the same index in the input.

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)

        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    if mode == 'train':
        train_set_x, train_set_y = shared_dataset(train_set)
        valid_set_x, valid_set_y = shared_dataset(valid_set)
    else:
        test_set_x, test_set_y = shared_dataset(test_set)

    if mode == 'train':
        rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y)]
    else:
        rval = [(test_set_x, test_set_y)]

    return rval
Beispiel #21
0
    def predict(self, X):
        """ X is N x D where each row is an example we wish to predict label for """
        num_test = X.shape[0]
        # lets make sure that the output type matches the input type
        Ypred = np.zeros(num_test)

        # loop over all test rows
        for i in xrange(num_test):
            distances = np.sum(np.abs(self.Xtr - X[i, :]), axis=1)
            min_index = np.argmin(distances)
            Ypred[i] = self.ytr[min_index]
            print("iteration number" + str(i))
        return Ypred


data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
test = unpickle('cifar-10-batches-py/test_batch')


def L_i(x, y, W):
    """
  unvectorized version. Compute the multiclass svm loss for a single example (x,y)
  - x is a column vector representing an image (e.g. 3073 x 1 in CIFAR-10)
    with an appended bias dimension in the 3073-rd position (i.e. bias trick)
  - y is an integer giving index of correct class (e.g. between 0 and 9 in CIFAR-10)
  - W is the weight matrix (e.g. 10 x 3073 in CIFAR-10)
  """
    delta = 1.0  # see notes about delta later in this section
    scores = W.dot(
        x)  # scores becomes of size 10 x 1, the scores for each class
    correct_class_score = scores[y]
def test_mlp(
    learning_rate=0.01,
    L1_reg=0.00,
    L2_reg=0.008,
    n_epochs=1000,
    dataset="mnist.pkl.gz",
    batch_size=20,
    n_hidden1=500,
    n_hidden2=50,
):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, "int32")

    data_batch_1 = unpickle("cifar-10-batches-py/data_batch_1")
    data_batch_2 = unpickle("cifar-10-batches-py/data_batch_2")
    data_batch_3 = unpickle("cifar-10-batches-py/data_batch_3")
    data_batch_4 = unpickle("cifar-10-batches-py/data_batch_4")
    data_batch_5 = unpickle("cifar-10-batches-py/data_batch_5")
    test = unpickle("cifar-10-batches-py/test_batch")

    train_set_1 = data_batch_1["data"]
    train_set_2 = data_batch_2["data"]
    train_set_3 = data_batch_3["data"]
    train_set_4 = data_batch_4["data"]
    train_set_5 = data_batch_5["data"]
    X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0)

    y_train = numpy.concatenate(
        (
            data_batch_1["labels"],
            data_batch_2["labels"],
            data_batch_3["labels"],
            data_batch_4["labels"],
            data_batch_5["labels"],
        )
    )

    test_set = test["data"]
    Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3)
    Yte = numpy.asarray(test["labels"])

    Xval_rows = X_train[:7500, :]
    Yval = y_train[:7500]
    Xtr_rows = X_train[7500:50000, :]
    Ytr = y_train[7500:50000]

    mean_train = Xtr_rows.mean(axis=0)
    stdv_train = Xte_rows.std(axis=0)
    Xtr_rows = (Xtr_rows - mean_train) / stdv_train
    Xval_rows = (Xval_rows - mean_train) / stdv_train
    Xte_rows = (Xte_rows - mean_train) / stdv_train

    train_set = (Xtr_rows, Ytr)
    valid_set = (Xval_rows, Yval)
    test_set = (Xte_rows, Yte)

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y), (test_set_x, test_set_y)]

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print("... building the model")

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix("x")  # the data is presented as rasterized images
    y = T.ivector("y")  # the labels are presented as 1D vector of
    # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(rng=rng, input=x, n_in=3072, n_hidden1=500, n_hidden2=500, n_out=10)

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size : (index + 1) * batch_size],
            y: test_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size : (index + 1) * batch_size],
            y: valid_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two lists of the same length, A = [a1, a2, a3, a4] and
    # B = [b1, b2, b3, b4], zip generates a list C of same size, where each
    # element is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size : (index + 1) * batch_size],
            y: train_set_y[index * batch_size : (index + 1) * batch_size],
        },
    )
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print("... training")

    # early-stopping parameters
    patience = 1000000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
    # found
    improvement_threshold = 0.995  # a relative improvement of this much is
    # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
    # go through this many
    # minibatche before checking the network
    # on the validation set; in this case we
    # check every epoch
    validation_frequency = n_train_batches

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.0
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        if epoch <= 3:
            learning_rate.set_value()
        for minibatch_index in range(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    "epoch %i, minibatch %i/%i, validation error %f %%"
                    % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.0)
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i in range(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(
                        ("     epoch %i, minibatch %i/%i, test error of " "best model %f %%")
                        % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.0)
                    )

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print(
        (
            "Optimization complete. Best validation score of %f %% "
            "obtained at iteration %i, with test performance %f %%"
        )
        % (best_validation_loss * 100.0, best_iter + 1, test_score * 100.0)
    )
    print(
        ("The code for file " + os.path.split(__file__)[1] + " ran for %.2fm" % ((end_time - start_time) / 60.0)),
        file=sys.stderr,
    )
Beispiel #23
0
from os import strerror
import time
import tensorflow as tf
from tensorflow import keras
from progress.bar import IncrementalBar

import unpickle
import testingMPL

CIFAR_DIR = '/Users/joeylee/downloads/cifar-10-batches-py/'

CIFAR10_files = [
    'batches.meta', 'data_batch_1', 'data_batch_2', 'data_batch_3',
    'data_batch_4', 'data_batch_5', 'test_batch'
]
all_data = [0, 1, 2, 3, 4, 5, 6]

for i, direc in zip(all_data, CIFAR10_files):
    all_data[i] = unpickle.unpickle(CIFAR_DIR + direc)

batch_meta = all_data[0]
db_1 = all_data[1]
db_2 = all_data[2]
db_3 = all_data[3]
db_4 = all_data[4]
db_5 = all_data[5]
tb = all_data[6]

testingMPL.testingMPL(CIFAR10_files, db_1)
  def predict(self, X):
    """ X is N x D where each row is an example we wish to predict label for """
    num_test = X.shape[0]
    # lets make sure that the output type matches the input type
    Ypred = np.zeros(num_test)

    # loop over all test rows
    for i in xrange(num_test):
        distances = np.sum(np.abs(self.Xtr - X[i,:]), axis = 1)
        min_index = np.argmin(distances)
        Ypred[i] = self.ytr[min_index]
        print("iteration number"+str(i))
    return Ypred


data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
test = unpickle('cifar-10-batches-py/test_batch')


def L_i(x, y, W):
  """
  unvectorized version. Compute the multiclass svm loss for a single example (x,y)
  - x is a column vector representing an image (e.g. 3073 x 1 in CIFAR-10)
    with an appended bias dimension in the 3073-rd position (i.e. bias trick)
  - y is an integer giving index of correct class (e.g. between 0 and 9 in CIFAR-10)
  - W is the weight matrix (e.g. 10 x 3073 in CIFAR-10)
  """
  delta = 1.0 # see notes about delta later in this section
  scores = W.dot(x) # scores becomes of size 10 x 1, the scores for each class
  correct_class_score = scores[y]
  D = W.shape[0] # number of classes, e.g. 10
def load_data(dataset, mode='train', amount='full', noize='30'):
	print '... loading data'

	## Load the dataset
	if mode == 'train':
		# load training and validation data
		if amount == 'full':
			train_set_x = unpickle('dA_data/train_data_da_' + noize + '.pkl')
			train_set_y = unpickle('dA_data/train_set_y.pkl')
			valid_set_x = unpickle('dA_data/valid_data_da_' + noize + '.pkl')
			valid_set_y = unpickle('dA_data/valid_set_y.pkl')
		elif amount == 'min':
			train_set_x = unpickle('dA_data/train_data_da_' + noize + '_min.pkl')
			train_set_y = unpickle('dA_data/train_set_y_min.pkl')
			valid_set_x = unpickle('dA_data/valid_data_da_' + noize + '_min.pkl')
			valid_set_y = unpickle('dA_data/valid_set_y_min.pkl')
		else:
			print 'amount shoule be either full or min'
			raise NotImplementedError()
	else:
		# TBF
		# load test data
		print 'loading test data...'
		if amount == 'full':
			test_set = []
			for i in xrange(1, 301): # from 1 to 300 TBF: hard code
				print str(i), '/', str(300)
				test_set_batch = unpickle('dA_data/test_set_gray_' + str(i) + '.pkl')
				test_set.extend(test_set_batch)
			#test_set = (test_set, [0 for i in xrange(0,len(test_set))])
			test_set_x = test_set
			test_set_y = [0 for i in xrange(0,len(test_set))]
		else:
			print 'not compatible with min yet...'
			raise NotImplementedError()
			#test_set = unpickle('dA_data/test_set_gray_1.pkl')
			#test_set = (test_set, [0 for i in xrange(0,len(test_set))])
		print 'done!'

	def shared_dataset_x(data_x, borrow=True):
		shared_x = theano.shared(numpy.asarray(data_x, dtype=theano.config.floatX), borrow=borrow)
		return shared_x

	def shared_dataset_y(data_y, borrow=True):
		shared_y = theano.shared(numpy.asarray(data_y, dtype=theano.config.floatX), borrow=borrow)
		return T.cast(shared_y, 'int32')

	if mode == 'train':
		train_set_x = shared_dataset_x(train_set_x)
		train_set_y = shared_dataset_y(train_set_y)
		valid_set_x = shared_dataset_x(valid_set_x)
		valid_set_y = shared_dataset_y(valid_set_y)
	else:
		test_set_x = shared_dataset_x(test_set_x)
		test_set_y = shared_dataset_y(test_set_y)

	if mode == 'train':
		rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y)]
	else:
		rval = [(test_set_x, test_set_y)]

	return rval
Beispiel #26
0
def evaluate_lenet5(learning_rate=0.15, n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 20], batch_size=500):
    """ Demonstrates lenet on CIFAR-10 dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    def shared_dataset(data_xy, borrow=True):

        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
    data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2')
    data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3')
    data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4')
    data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5')
    test = unpickle('cifar-10-batches-py/test_batch')

    train_set_1 = data_batch_1["data"]
    train_set_2 = data_batch_2["data"]
    train_set_3 = data_batch_3["data"]
    train_set_4 = data_batch_4["data"]
    train_set_5 = data_batch_5["data"]
    X_train = numpy.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0)

    y_train = numpy.concatenate((data_batch_1["labels"], data_batch_2["labels"], data_batch_3["labels"],
                                 data_batch_4["labels"], data_batch_5["labels"]))

    test_set = test["data"]
    Xte_rows = test_set.reshape(train_set_1.shape[0], 32 * 32 * 3)
    Yte = numpy.asarray(test["labels"])

    Xval_rows = X_train[:7500, :]  # take first 1000 for validation
    Yval = y_train[:7500]
    Xtr_rows = X_train[7500:50000, :]  # keep last 49,000 for train
    Ytr = y_train[7500:50000]

    mean_train = Xtr_rows.mean(axis=0)
    stdv_train = Xte_rows.std(axis=0)
    Xtr_rows = (Xtr_rows - mean_train) / stdv_train
    Xval_rows = (Xval_rows - mean_train) / stdv_train
    Xte_rows = (Xte_rows - mean_train) / stdv_train
    learning_rate = theano.shared(learning_rate)

    """whitening"""

    """
    Xtr_rows -= numpy.mean(Xtr_rows, axis=0) # zero-center the data (important)
    cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)# decorrelate the data
    Xrot_reduced = numpy.dot(Xtr_rows, U[:,:100])

    # whiten the data:
    # divide by the eigenvalues (which are square roots of the singular values)
    Xwhite = Xrot / numpy.sqrt(S + 1e-5)"""

    """whitening"""

    #Xtr_rows = whiten(Xtr_rows)
    # zero-center the data (important)
    """cov = numpy.dot(Xtr_rows.T, Xtr_rows) / Xtr_rows.shape[0]
    U,S,V = numpy.linalg.svd(cov)

    Xrot = numpy.dot(Xtr_rows, U)

    Xtr_rows = Xrot / numpy.sqrt(S + 1e-5)

    Xval_rot = numpy.dot(Xval_rows,U)
    Xval_rows = Xval_rot / numpy.sqrt(S + 1e-5)

    Xte_rot = numpy.dot(Xte_rows,U)
    Xte_rows = Xte_rot / numpy.sqrt(S + 1e-5)
    """

    train_set = (Xtr_rows, Ytr)
    valid_set = (Xval_rows, Yval)
    test_set = (Xte_rows, Yte)

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)
    datasets = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
                (test_set_x, test_set_y)]

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches //= batch_size
    n_valid_batches //= batch_size
    n_test_batches //= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print('... building the model')

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 3, 32, 32))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (32+4-5+1 , 32+4-5+1) = (32, 32)
    # maxpooling reduces this further to (32/2, 32/2) = (16, 16)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 16, 16)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 3, 32, 32),
        filter_shape=(nkerns[0], 3, 5, 5),
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (16+4-5+1, 16+4-5+1) = (16, 16)
    # maxpooling reduces this further to (16/2, 16/2) = (8, 8)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 8, 8)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 16, 16),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 8 * 8,
        n_out=500,
        activation=relu
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    L2_reg = 0.001
    L2_sqr = (
            (layer2.W ** 2).sum()
            + (layer3.W ** 2).sum()
        )

    cost = layer3.negative_log_likelihood(y)  + L2_reg * L2_sqr

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print('... training')
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience // 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = timeit.default_timer()

    epoch = 0
    done_looping = False

    epoch_loss_list = []
    epoch_val_list = []

    while (epoch < n_epochs) and (not done_looping):
        epoch += 1
        if epoch == 10:
            learning_rate.set_value(0.1)
        # if epoch > 30:
        #    learning_rate.set_value(learning_rate.get_value()*0.9995)
        if epoch > 3:
            epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
            epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))
            numpy.savetxt(fname='epoc_cost.csv', X=epoch_loss_np,
                          fmt='%1.3f')
            numpy.savetxt(fname='epoc_val_error.csv', X=epoch_val_np,
                          fmt='%1.3f')

        for minibatch_index in range(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print('training @ iter = ', iter)
            cost_ij = train_model(minibatch_index)

            epoch_loss_entry = [iter, epoch, float(cost_ij)]
            epoch_loss_list.append(epoch_loss_entry)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in range(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))
                epoch_val_entry = [iter, epoch, this_validation_loss]
                epoch_val_list.append(epoch_val_entry)

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    # improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in range(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print(('The code for file ' +
           os.path.split(__file__)[1] +
           ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)

    epoch_loss_np = numpy.reshape(epoch_loss_list, newshape=(len(epoch_loss_list), 3))
    epoch_val_np = numpy.reshape(epoch_val_list, newshape=(len(epoch_val_list), 3))

    epoch_loss = pandas.DataFrame({"iter": epoch_loss_np[:, 0], "epoch": epoch_loss_np[:, 1],
                                   "cost": epoch_loss_np[:, 2]})
    epoch_vall = pandas.DataFrame({"iter": epoch_val_np[:, 0], "epoch": epoch_val_np[:, 1],
                                   "val_error": epoch_val_np[:, 2]})
    epoc_avg_loss = pandas.DataFrame(epoch_loss.groupby(['epoch']).mean()["cost"])
    epoc_avg_val = pandas.DataFrame(epoch_vall.groupby(['epoch']).mean()["val_error"])
    epoc_avg_loss = pandas.DataFrame({"epoch": epoc_avg_loss.index.values, "cost": epoc_avg_loss["cost"]})
    epoc_avg_loss_val = pandas.DataFrame({"epoch": epoc_avg_val.index.values, "val_error": epoc_avg_val["val_error"]})
    epoc_avg_loss.plot(kind="line", x="epoch", y="cost")
    plt.show()
    epoc_avg_loss_val.plot(kind='line', x="epoch", y="val_error")
    plt.show()
from __future__ import print_function
import os
import sys
import timeit

import numpy

#import matplotlib.pyplot as plt
from LogisticRegression import LogisticRegression
import theano
import theano.tensor as T
import theano.tensor.nnet as nnet
import numpy as np
from unpickle import unpickle

data_batch_1 = unpickle('cifar-10-batches-py/data_batch_1')
data_batch_2 = unpickle('cifar-10-batches-py/data_batch_2')
data_batch_3 = unpickle('cifar-10-batches-py/data_batch_3')
data_batch_4 = unpickle('cifar-10-batches-py/data_batch_4')
data_batch_5 = unpickle('cifar-10-batches-py/data_batch_5')
test = unpickle('cifar-10-batches-py/test_batch')

train_set_1 = data_batch_1["data"]
train_set_2 = data_batch_2["data"]
train_set_3 = data_batch_3["data"]
train_set_4 = data_batch_4["data"]
train_set_5 = data_batch_5["data"]
X_train = np.concatenate((train_set_1, train_set_2, train_set_3, train_set_4, train_set_5), axis=0)

y_train = np.concatenate((data_batch_1["labels"],data_batch_2["labels"],data_batch_3["labels"],data_batch_4["labels"],
                          data_batch_5["labels"]))
Beispiel #28
0
from unpickle import unpickle
from json import dumps
import pickle

content = dumps(
    {
        'mostCommon': unpickle(),
        'rawData': pickle.load(open("shapecolour.p", "rb"))
    },
    sort_keys=True,
    indent=4)

with open('processed.json', 'w') as file:
    file.write(content)
Beispiel #29
0
def main():
    path = r"glove.6B.50d.txt.w2v"
    glove = KeyedVectors.load_word2vec_format(path, binary=False)

    # loads the json file
    path_to_json = "captions_train2014.json"
    with open(path_to_json, "rb") as f:
        json_data = json.load(f)
    resnet = unpickle.unpickle()

    with open("idfs1.pkl", mode="rb") as idf:
        idfs = pickle.load(idf)
    with open("img_to_caption1.pkl", mode="rb") as cap:
        img_to_caption = pickle.load(cap)
    #with open("img_to_coco1.pkl", mode="rb") as coco:
    #img_to_coco=pickle.load(coco)
    model = Model()

    model.dense1.weight = mg.Tensor(np.load('weight.npy'))
    model.dense1.bias = mg.Tensor(np.load('bias.npy'))
    optim = Adam(model.parameters)

    batch_size = 100
    for epoch_cnt in range(100):

        idxs = list(resnet.keys())
        np.random.shuffle(idxs)
        for batch_cnt in range(0, len(idxs) // batch_size - 1):
            batch_indices = idxs[(batch_cnt * batch_size):((batch_cnt + 1) *
                                                           batch_size)]
            batch_indices2 = idxs[((batch_cnt + 1) *
                                   batch_size):((batch_cnt + 2) * batch_size)]
            # id1 = np.random.choice(list(resnet.keys()))
            # print(id1)
            id1 = batch_indices
            # while id1 == id2:
            id2 = batch_indices2

            # print(type(resnet[id1]),type(img_to_caption[id1][0]),type(resnet[id2]))
            good_image = resnet[id1[0]]
            bad_image = resnet[id2[0]]
            text = embed_text.se_text(img_to_caption[id1[0]][0], glove, idfs)
            for i in id1[1:]:
                good_image = np.vstack((good_image, resnet[i]))
                text = np.vstack(
                    (text, embed_text.se_text(img_to_caption[i][0], glove,
                                              idfs)))

            for i in id2[1:]:
                bad_image = np.vstack((bad_image, resnet[i]))

            sim_to_good = cos_sim.cos_sim(model(good_image), text)
            sim_to_bad = cos_sim.cos_sim(model(bad_image), text)

            # compute the loss associated with our predictions(use softmax_cross_entropy)
            loss = margin_ranking_loss(sim_to_good, sim_to_bad, 1, 0.1)
            # back-propagate through your computational graph through your loss
            loss.backward()

            # compute the accuracy between the prediction and the truth
            acc = accuracy(sim_to_good.data, sim_to_bad.data)
            # execute gradient descent by calling step() of optim
            optim.step()
            # null your gradients
            loss.null_gradients()

    np.save('weight', model.dense1.parameters[0].data)
    np.save('bias', model.dense1.parameters[1].data)