Python LookUpTrain.errors Examples

Programming Language: Python

Namespace/Package Name: unsupervised

Class/Type: LookUpTrain

Method/Function: errors

Examples at hotexamples.com: 2

Python LookUpTrain.errors - 2 examples found. These are the top rated real world Python examples of unsupervised.LookUpTrain.errors extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LookUpTrain(7)

initialize(7)

cost(3)

embedding(2)

errors(2)

save(2)

get_Params(1)

load(1)

predict(1)

predict_confidency(1)

score(1)

Example #1

Show file

File: training_lookup.py Project: mducoffe/hyperdeep

def unsupervised_training(learning_rate, decay_rate, epochs, repo, output_dico,
                          database_name):
    dwin = 9
    with closing(open(os.path.join(repo, output_dico), 'rb')) as f:
        dico = pickle.load(f)
    n_mot = [len(dico[i]) for i in dico.keys()]
    vect_size = [20, 10, 5, 5]
    n_hidden = 100
    x = T.itensor3('x')
    xc = T.itensor3('x')
    y = T.ivector('y')
    #xc = T.itensor3('xc')
    t_nlp = LookUpTrain(dwin, n_mot, vect_size, n_hidden)
    t_nlp.initialize()
    cost = T.mean(t_nlp.cost(x, y))
    error = T.mean(t_nlp.errors(x, y))

    params = getParams(t_nlp, x)
    for p, i in zip(params, range(len(params))):
        p.name += '_' + str(i)

    #calcul du gradient avec RMSProp
    updates = []
    caches = {}
    grad_params = T.grad(cost, params)
    for param, grad_param in zip(params, grad_params):

        if not caches.has_key(param.name):
            caches[param.name] = shared_floatx(param.get_value() * 0.,
                                               "cache_" + param.name)
        # update rule
        update_cache = decay_rate*caches[param.name]\
           + (1 - decay_rate)*grad_param**2
        update_param = param - learning_rate * grad_param / T.sqrt(
            update_cache + 1e-8)
        updates.append((caches[param.name], update_cache))
        updates.append((param, update_param))

    train_model = theano.function(inputs=[x, y],
                                  outputs=cost,
                                  updates=updates,
                                  allow_input_downcast=True)

    valid_model = theano.function(inputs=[x, y],
                                  outputs=cost,
                                  allow_input_downcast=True)
    test_model = theano.function(inputs=[x, y],
                                 outputs=error,
                                 allow_input_downcast=True)
    data_path = os.path.join(repo, database_name)
    with closing(open(data_path, 'rb')) as f:
        data, data_c = pickle.load(f)
    data = numpy.asarray(data).astype(int)
    labels = numpy.asarray(data_c).astype(int)

    # test : reduce data
    data = data
    data_c = data_c
    # reading by minibatch
    batch_size = 15
    n_sample = data.shape[0] / batch_size
    # 80% of the data will go into the training set
    n_train = (int)(n_sample * 0.8)

    y_value = numpy.zeros((2 * batch_size), dtype=int)
    y_value[batch_size:] = 1 + y_value[batch_size:]
    index_filename = 0

    saving = "params_savings_bis_v4_"

    #t_nlp.load(repo, (saving+str(95)))
    #index_filename = 96
    #saving = "params_savings_bis"

    for epoch in range(4):
        train_cost = []
        valid_cost = []
        index_valid = n_train
        for minibatch_index in range(n_train):

            correct_sentences = data[minibatch_index *
                                     batch_size:(minibatch_index + 1) *
                                     batch_size, :, :]
            incorrect_sentences = data_c[minibatch_index *
                                         batch_size:(minibatch_index + 1) *
                                         batch_size, :, :]
            sentences = numpy.concatenate(
                [incorrect_sentences, correct_sentences], axis=0)
            train_value = train_model(sentences, y_value)
            if minibatch_index % 10 == 0:
                train_cost = []
                for minibatch_train in range(n_train):
                    correct_sentences = data[minibatch_train *
                                             batch_size:(minibatch_train + 1) *
                                             batch_size, :, :]
                    incorrect_sentences = data_c[minibatch_train *
                                                 batch_size:(minibatch_train +
                                                             1) *
                                                 batch_size, :, :]
                    sentences = numpy.concatenate(
                        [incorrect_sentences, correct_sentences], axis=0)
                    train_value = valid_model(sentences, y_value)
                    train_cost.append(train_value)
                print "Train : " + str(numpy.mean(train_cost) * 100)
                valid_cost = []
                for minibatch_valid in range(n_train, n_sample):
                    correct_sentences = data[minibatch_valid *
                                             batch_size:(minibatch_valid + 1) *
                                             batch_size, :, :]
                    incorrect_sentences = data_c[minibatch_valid *
                                                 batch_size:(minibatch_valid +
                                                             1) *
                                                 batch_size, :, :]
                    sentences = numpy.concatenate(
                        [incorrect_sentences, correct_sentences], axis=0)
                    valid_value = test_model(sentences, y_value)
                    #import pdb
                    #pdb.set_trace()
                    valid_cost.append(valid_value)
                print "Valid : " + str(
                    numpy.mean(valid_value) *
                    100) + " in : " + (saving + str(index_filename))
                t_nlp.save(repo, (saving + str(index_filename)))
                index_filename += 1

Example #2

Show file

def training_Hollande(repo, output_dico, learning_rate, decay_rate, filenames):
	
	#########
	# MODEL #
	#########
	dwin = 20
	with closing(open(os.path.join(repo, output_dico), 'rb')) as f:
		dico = pickle.load(f)
	n_mot = [len(dico[i]) for i in dico.keys()]
	vect_size = [100, 10, 5, 5]
	n_hidden = [100, 50]

	t_nlp = LookUpTrain(dwin, n_mot, vect_size, n_hidden, n_out=2)
	t_nlp.initialize()
	#t_nlp.load(repo, filename_load)

	x = T.itensor3('x')
	y = T.ivector('y')

	cost = T.mean(t_nlp.cost(x, y))
	error = T.mean(t_nlp.errors(x,y))


	params = getParams(t_nlp, x)
	updates, _ = Adam(cost, params, learning_rate)
	"""
	for p, i in zip(params, range(len(params))):
		p.name+='_'+str(i)

	#calcul du gradient avec RMSProp
	updates = []
	caches = {}
	grad_params = T.grad(cost, params)
	for param, grad_param in zip(params, grad_params):

		if not caches.has_key(param.name):
			caches[param.name] = shared_floatx(param.get_value() * 0.,
												"cache_"+param.name)
		# update rule
		update_cache = decay_rate*caches[param.name]\
					+ (1 - decay_rate)*grad_param**2
		update_param = param  - learning_rate*grad_param/T.sqrt(update_cache + 1e-8)
		updates.append((caches[param.name], update_cache))
		updates.append((param, update_param))
	"""

	train_model = theano.function(inputs=[x,y], outputs=cost, updates=updates,
					allow_input_downcast=True)

	valid_model = theano.function(inputs=[x, y], outputs=cost, allow_input_downcast=True)
	test_model = theano.function(inputs=[x, y], outputs=error, allow_input_downcast=True)
        predict = theano.function(inputs=[x], outputs=t_nlp.predict(x), allow_input_downcast=True)
	predict_confidency = theano.function(inputs=[x], outputs=t_nlp.predict_confidency(x)[0], allow_input_downcast=True)
	index = 0
	y_value = []
	x_value = []
	with closing(open(os.path.join(repo, output_dico), 'rb')) as f:
		dico = pickle.load(f)
	for filename in filenames:
		lines, _ = get_input_from_files(repo, [filename], dico)
		for line in lines:
			x_value.append(line)
			y_value.append(index)
		if index ==0:
			index+=1
	y_value = np.asarray(y_value, dtype=int)
	# balance the samples
	x_value_0 = [ x_value[i] for i in range(np.argmax(y_value))]# put the 0
	y_value_0 = [ y_value[i] for i in range(np.argmax(y_value))]# put the 0
	indexes = np.random.permutation(y_value.shape[0] - np.argmax(y_value))[:np.argmax(y_value)]
	x_value_1 = [x_value[i+np.argmax(y_value)] for i in indexes]# balance the numbers
	y_value_1 = [y_value[i+np.argmax(y_value)] for i in indexes]# balance the numbers

	pos_percentage = (int) (len(y_value_0)*0.8)
	neg_percentage = (int) (len(y_value_1)*0.8)
	other_pos_percentage = (len(y_value_0) - pos_percentage)/2
	other_neg_percentage = (len(y_value_1) - neg_percentage)/2

	pos_permut = np.random.permutation(len(y_value_0))
	neg_permut = np.random.permutation(len(y_value_1))
	x_train = [x_value_0[i] for i in pos_permut[:pos_percentage]] + [x_value_1[i] for i in neg_permut[:neg_percentage]]
	x_valid = [x_value_0[i] for i in pos_permut[pos_percentage:pos_percentage+other_pos_percentage]] + \
		  [x_value_1[i] for i in neg_permut[neg_percentage:neg_percentage+other_neg_percentage]]
	x_test = [x_value_0[i] for i in pos_permut[pos_percentage+other_pos_percentage:]] + \
		  [x_value_1[i] for i in neg_permut[neg_percentage+other_neg_percentage:]]

	y_train = [y_value_0[i] for i in pos_permut[:pos_percentage]] + [y_value_1[i] for i in neg_permut[:neg_percentage]]
	y_valid = [y_value_0[i] for i in pos_permut[pos_percentage:pos_percentage+other_pos_percentage]] + \
		  [y_value_1[i] for i in neg_permut[neg_percentage:neg_percentage+other_neg_percentage]]
	y_test = [y_value_0[i] for i in pos_permut[pos_percentage+other_pos_percentage:]] + \
		  [y_value_1[i] for i in neg_permut[neg_percentage+other_neg_percentage:]]

	index_train = np.random.permutation(len(y_train))
	batch_size = 32
	index_valid = np.random.permutation(len(y_valid))
	index_test = np.random.permutation(len(y_test))
	x_train_ = [x_train[i].astype(int) for i in index_train]
	x_valid_ = [x_valid[i].astype(int) for i in index_valid]
	x_test_ = [x_test[i].astype(int) for i in index_test]
	y_train_ = [y_train[i] for i in index_train]
	y_valid_ = [y_valid[i] for i in index_valid]
	y_test_ = [y_test[i] for i in index_test]

	paddings = [ [], [], [], []]
	for i in range(dwin/2):
		for i in xrange(4):
			paddings[i].append(dico[i]['PARSING'])
	paddings = np.asarray(paddings)
	#paddings = paddings.reshape((1, paddings.shape[0], paddings.shape[1]))
	x_train_ = [add_padding(elem, paddings) for elem in x_train_]
	x_valid_ = [add_padding(elem, paddings) for elem in x_valid_]
	x_test_ = [add_padding(elem, paddings) for elem in x_test_]

	x_train=[]; x_valid=[]; x_test=[]
	y_train=[]; y_valid=[]; y_test=[]
	for elem, label in zip(x_train_, y_train_):
		for i in range(elem.shape[1] -dwin):
			x_train.append(elem[:,i:i+dwin])
			y_train.append(label)
	for elem, label in zip(x_valid_, y_valid_):
		for i in range(elem.shape[1] -dwin):
			x_valid.append(elem[:,i:i+dwin])
			y_valid.append(label)
	for elem, label in zip(x_test_, y_test_):
		for i in range(elem.shape[1] -dwin):
			x_test.append(elem[:,i:i+dwin])
			y_test.append(label)

	index_train = np.random.permutation(len(y_train))
	index_valid = np.random.permutation(len(y_valid))
	index_test = np.random.permutation(len(y_test))
	x_train = [x_train[i].astype(int) for i in index_train]
	x_valid = [x_valid[i].astype(int) for i in index_valid]
	x_test = [x_test[i].astype(int) for i in index_test]
	y_train = [y_train[i] for i in index_train]
	y_valid = [y_valid[i] for i in index_valid]
	y_test = [y_test[i] for i in index_test]

	n_train = len(y_train)/batch_size
	n_valid = len(y_valid)/batch_size
	n_test = len(y_test)/batch_size
	print (n_train, n_valid, n_test)
	print (1.*sum(y_valid))/len(y_valid)
	print (1.*sum(y_test))/len(y_test)
	print "#############################"
	saving ='JADT_2_Fev_H_G_'
	index_filename=0
        epochs = 10 # number of iterations on the corpus
	for epoch in range(epochs):
		index_valid = n_train
		for minibatch_index in range(n_train):

			sentence = x_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
			y_value = y_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
			#before = valid_model(sentence, y_value)
			train_value = train_model(sentence, y_value)
			#after = valid_model(sentence, y_value)
			#print before - after
		if True:
			train_cost=[]
			for minibatch_train in range(n_train):
				sentence = x_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size]
				y_value = y_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size]
				train_value = valid_model(sentence, y_value)
				train_cost.append(train_value)
			print "Train : "+str(np.mean(train_cost)*100)
			valid_cost=[]
			predictions=[]
                        for minibatch_valid in range(n_valid):
				y_value = y_valid[minibatch_valid*batch_size:(minibatch_valid+1)*batch_size]
                                sentence = x_valid[minibatch_valid*batch_size:(minibatch_valid+1)*batch_size]
				valid_value = test_model(sentence, y_value)
                                valid_cost.append(valid_value)
			print "Valid : "+str(np.mean(valid_cost)*100)+" in : "+(saving+str(index_filename))
			test_cost=[]
			for minibatch_test in range(n_test):
				sentence = x_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size]
				y_value = y_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size]
				test_value = test_model(sentence, y_value)
				test_cost.append(test_value)
			print "Test : "+str(np.mean(test_cost)*100)
			index_filename+=1

	t_nlp.save(repo, saving)
	return
	#### parcourir le test : take the 10 most accurate sentence ###
	#### parcourir le test : take the 10 less accurate sentence ###
	scores = []
	for index in range(len(y_test)):
		x_value=x_test[index:index+1]
		scores.append(predict_confidency(x_value))
	right = [x_test[i] for i in np.argsort(scores)[::-1][:20]]
	false = [x_test[i] for i in np.argsort(scores)[:20]]
	print scores[:10]

	with closing(open('data/sentence/relevant_sentence_H_G', 'wb')) as f:
		pickle.dump([right, false], f, protocol=pickle.HIGHEST_PROTOCOL)