예제 #1
0
def training(repo, learning_rate, batch_size, filenames):

    print 'LOAD DATA'
    (x_train,
     y_train), (x_valid,
                y_valid), (x_test,
                           y_test) = load_datasets_mnist(repo, filenames)

    print 'BUILD MODEL'
    train_f, valid_f, test_f, model, fisher, params = build_training()
    x_train = x_train[:1000]
    y_train = y_train[:1000]

    x = T.tensor4()
    y = T.imatrix()
    output = model.apply(x)
    output = output.reshape(
        (x.shape[0],
         model.get_dim('output')))  #TO DO : get_dim('name') for Architecture
    cost = Softmax().categorical_cross_entropy(y.flatten(), output).mean()
    cg = ComputationGraph(cost)

    inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg)
    outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg)
    inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
    outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
    dico = OrderedDict([('conv_output', outputs_conv[0])])
    [grad_s] = T.grad(cost, outputs_conv)
    dico['conv_output'] = grad_s

    f = theano.function([x, y],
                        grad_s,
                        allow_input_downcast=True,
                        on_unused_input='ignore')
    print np.mean(f(x_train[:10], y_train[:10]))
예제 #2
0
def training(repo, learning_rate, batch_size, filenames, percentage=1):

    momentum = 0.5
    print 'LOAD DATA'
    (x_train,
     y_train), (x_valid,
                y_valid), (x_test,
                           y_test) = load_datasets_mnist(repo, filenames)
    x_train, y_train = permut_data(x_train, y_train)

    print 'BUILD MODEL'
    train_f, valid_f, test_f, model, reinit = build_training()

    n_train = len(y_train) / batch_size
    n_train = (int)(n_train * percentage)
    n_valid = len(y_valid) / batch_size
    n_test = len(y_test) / batch_size
    init_lr_decay = 6
    done = False
    increment = init_lr_decay
    epochs = 2000
    best_valid = np.inf
    best_train = np.inf
    best_test = np.inf
    state_of_train = {}
    state_of_train['TRAIN'] = best_train
    state_of_train['VALID'] = best_valid
    state_of_train['TEST'] = best_test
    print 'TRAINING IN PROGRESS'
    for epoch in range(epochs):
        if epoch + 1 % 5 == 0:
            learning_rate = learning_rate * (1 - 0.2 / epoch)
        for minibatch_index in range(n_train):
            x_value = x_train[minibatch_index *
                              batch_size:(minibatch_index + 1) * batch_size]
            y_value = y_train[minibatch_index *
                              batch_size:(minibatch_index + 1) * batch_size]
            value = train_f(learning_rate, x_value, y_value)
            if np.isnan(value):
                import pdb
                pdb.set_trace()
        valid_cost = []
        for minibatch_index in range(n_valid):
            x_value = x_valid[minibatch_index *
                              batch_size:(minibatch_index + 1) * batch_size]
            y_value = y_valid[minibatch_index *
                              batch_size:(minibatch_index + 1) * batch_size]
            value = test_f(x_value, y_value)
            valid_cost.append(value)

        # deciding when to stop the training on the sub batch
        valid_result = np.mean(valid_cost)
        if valid_result <= best_valid * 0.95:
            increment = init_lr_decay
            best_valid = valid_result
            # compute best_train and best_test
            train_cost = []
            for minibatch_train in range(n_train):
                x_value = x_train[minibatch_train *
                                  batch_size:(minibatch_train + 1) *
                                  batch_size]
                y_value = y_train[minibatch_train *
                                  batch_size:(minibatch_train + 1) *
                                  batch_size]
                train_cost.append(valid_f(x_value, y_value))
            test_cost = []
            for minibatch_test in range(n_test):
                x_value = x_test[minibatch_test *
                                 batch_size:(minibatch_test + 1) * batch_size]
                y_value = y_test[minibatch_test *
                                 batch_size:(minibatch_test + 1) * batch_size]
                test_cost.append(test_f(x_value, y_value))
            best_train = np.mean(train_cost)
            best_test = np.mean(test_cost)
            #print "TRAIN : "+str(best_train)
            #print "VALID : "+str(best_valid*100)
            #print "TEST : "+str(best_test*100)
        else:
            increment -= 1

        if not done and increment == 0:
            learning_rate /= 2.
            #train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate*0.1, model=model)
            increment = init_lr_decay
            done = True
        if increment == 0:
            print 'END OF TRAINING'
            print percentage * 100
            print "TRAIN : " + str(best_train)
            print "VALID : " + str(best_valid * 100)
            print "TEST : " + str(best_test * 100)
            return
def training(repo, learning_rate, batch_size, filenames, option="qbc", record_repo=None, record_filename=None):

	print 'LOAD DATA'
	(x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_datasets_mnist(repo, filenames)

	print 'BUILD MODEL'
	train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate)

	n_train = len(y_train)/batch_size
	n_valid = len(y_valid)/batch_size
	n_test = len(y_test)/batch_size

	epochs = 2000
	best_valid = np.inf; best_train = np.inf; best_test=np.inf
	init_increment = 5 # 20 5 8 
	increment = 0
	n_train_batches=int (n_train*1./100.)
	state_of_train = {}
	state_of_train['TRAIN']=best_train; state_of_train['VALID']=best_valid; state_of_train['TEST']=best_test; 
	print 'TRAINING IN PROGRESS'

	for epoch in range(epochs):

		try:
			for minibatch_index in range(n_train_batches):
				x_value = x_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
				y_value = y_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
				value = train_f(x_value, y_value)
				if np.isnan(value):
					import pdb
					pdb.set_trace()
			valid_cost=[]
			for minibatch_index in range(n_valid):
				x_value = x_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
				y_value = y_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
				value = test_f(x_value, y_value)
				valid_cost.append(value)

			# deciding when to stop the training on the sub batch
	    		valid_result = np.mean(valid_cost)
	    		if valid_result <= best_valid*0.95:
				model.save_model() # record the best architecture so to apply active learning on it (overfitting may appear in a few epochs)
	    			best_valid = valid_result
				# compute best_train and best_test
				train_cost=[]
				for minibatch_train in range(n_train_batches):
					x_value = x_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size]
					y_value = y_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size]
					train_cost.append(valid_f(x_value, y_value))
				test_cost=[]
				for minibatch_test in range(n_test):
					x_value = x_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size]
					y_value = y_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size]
					test_cost.append(test_f(x_value, y_value))
				best_train=np.mean(train_cost)
				best_test=np.mean(test_cost)
	    			increment=init_increment
			else:
				increment-=1
			increment = 0
			if increment==0:
				# keep the best set of params found during training
				model.load_model()
				increment = init_increment
				record_state(n_train_batches, n_train, best_train, best_valid, best_test, record_repo, record_filename)
				# record in a file
				if state_of_train['VALID'] > best_valid :
					state_of_train['TRAIN']=best_train
					state_of_train['VALID']=best_valid
					state_of_train['TEST']=best_test;
				import time
				start = time.clock()
				(x_train, y_train), n_train_batches = active_selection(model, x_train, y_train, n_train_batches, batch_size, valid_f, option)
				end = time.clock()
				print end -start
				return
				model.initialize()
				reinit()
				best_valid=np.inf; best_train=np.inf; best_test=np.inf

		except KeyboardInterrupt:
			# ask confirmation if you want to check state of training or really quit
			print 'BEST STATE OF TRAINING ACHIEVED'
			print "RATIO :"+str(1.*n_train_batches/n_train*100)
			print "TRAIN : "+str(state_of_train['TRAIN']*100)
			print "VALID : "+str(state_of_train['VALID']*100)
			print "TEST : "+str(state_of_train['TEST']*100)
			import pdb
			pdb.set_trace()
def training(repo,
             learning_rate,
             batch_size,
             filenames,
             record_repo=None,
             record_filename=None):
    lr_init = learning_rate
    epoch_tmp = 0
    print 'LOAD DATA'
    (x_train,
     y_train), (x_valid,
                y_valid), (x_test,
                           y_test) = load_datasets_mnist(repo, filenames)
    x_train, y_train = permut_data(x_train, y_train)

    print 'BUILD MODEL'
    train_f, valid_f, test_f, model, reinit = build_training()

    n_train = len(y_train) / batch_size
    n_valid = len(y_valid) / batch_size
    n_test = len(y_test) / batch_size

    epochs = 3000
    best_valid = np.inf
    best_train = np.inf
    best_test = np.inf
    init_increment = 5  # 20 5 8
    done = False
    increment = init_increment
    n_train_batches = int(n_train * 10. / 100.)
    state_of_train = {}
    state_of_train['TRAIN'] = best_train
    state_of_train['VALID'] = best_valid
    state_of_train['TEST'] = best_test
    print 'TRAINING IN PROGRESS'

    for epoch in range(epochs):

        try:
            if epoch_tmp + 1 % 5 == 0:
                learning_rate = learning_rate * (1 - 0.2 / epoch_tmp)
            for minibatch_index in range(n_train_batches):
                x_value = x_train[minibatch_index *
                                  batch_size:(minibatch_index + 1) *
                                  batch_size]
                y_value = y_train[minibatch_index *
                                  batch_size:(minibatch_index + 1) *
                                  batch_size]
                value = train_f(learning_rate, x_value, y_value)
                if np.isnan(value):
                    model.initialize()
                    reinit()
                    best_valid = np.inf
                    increment = init_increment
                    #import pdb
                    #pdb.set_trace()
            valid_cost = []
            for minibatch_index in range(n_valid):
                x_value = x_valid[minibatch_index *
                                  batch_size:(minibatch_index + 1) *
                                  batch_size]
                y_value = y_valid[minibatch_index *
                                  batch_size:(minibatch_index + 1) *
                                  batch_size]
                value = test_f(x_value, y_value)
                valid_cost.append(value)

            # deciding when to stop the training on the sub batch
            valid_result = np.mean(valid_cost)
            if valid_result <= best_valid * 0.95:
                model.save_model(
                )  # record the best architecture so to apply active learning on it (overfitting may appear in a few epochs)
                best_valid = valid_result
                # compute best_train and best_test
                train_cost = []
                for minibatch_train in range(n_train_batches):
                    x_value = x_train[minibatch_train *
                                      batch_size:(minibatch_train + 1) *
                                      batch_size]
                    y_value = y_train[minibatch_train *
                                      batch_size:(minibatch_train + 1) *
                                      batch_size]
                    train_cost.append(valid_f(x_value, y_value))
                test_cost = []
                for minibatch_test in range(n_test):
                    x_value = x_test[minibatch_test *
                                     batch_size:(minibatch_test + 1) *
                                     batch_size]
                    y_value = y_test[minibatch_test *
                                     batch_size:(minibatch_test + 1) *
                                     batch_size]
                    test_cost.append(test_f(x_value, y_value))
                best_train = np.mean(train_cost)
                best_test = np.mean(test_cost)
                increment = init_increment
                state_of_train['TRAIN'] = best_train
                state_of_train['VALID'] = best_valid
                state_of_train['TEST'] = best_test

            else:
                increment -= 1

            if not done and increment == 0:
                increment = init_increment
                done = True
                learning_rate /= 2
                #train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate*0.1, model=model)
            if done and increment == 0:
                # keep the best set of params found during training
                model.load_model()
                increment = init_increment
                done = False
                record_state(n_train_batches, n_train, best_train, best_valid,
                             best_test, record_repo, record_filename)
                # record in a file
                print "RATIO :" + str(1. * n_train_batches / n_train * 100)
                print state_of_train
                (x_train, y_train), n_train_batches = active_selection(
                    model, x_train, y_train, n_train_batches, batch_size,
                    valid_f, None, 'uncertainty')

                #model.initialize()
                model.initial_state()
                reinit()
                best_valid = np.inf
                best_train = np.inf
                best_test = np.inf
                learning_rate = lr_init
                epoch_tmp = 0
                #train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate*0.1, model=model)
                #state_of_train['TRAIN']=best_train; state_of_train['VALID']=best_valid; state_of_train['TEST']=best_test;

        except KeyboardInterrupt:
            # ask confirmation if you want to check state of training or really quit
            print 'BEST STATE OF TRAINING ACHIEVED'
            print "RATIO :" + str(1. * n_train_batches / n_train * 100)
            print "TRAIN : " + str(state_of_train['TRAIN'] * 100)
            print "VALID : " + str(state_of_train['VALID'] * 100)
            print "TEST : " + str(state_of_train['TEST'] * 100)
            import pdb
            pdb.set_trace()
예제 #5
0
def training(repo, learning_rate, batch_size, filenames):

	print 'LOAD DATA'
	(x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_datasets_mnist(repo, filenames)

	print 'BUILD MODEL'
	train_f, valid_f, test_f, model, fisher, params = build_training()

	x_train = x_train[:1000]; y_train = y_train[:1000]
	#kfac_Fisher = kronecker_Fisher(x_train, y_train, model)
	kfac_Fisher = kronecker_Fisher(x_train, y_train, model)
	import pickle as pkl
	from contextlib import closing
	with closing(open('kfac_fisher', 'wb')) as f:
		pkl.dump(kfac_Fisher, f)
	return
	n_train = len(y_train)/batch_size
	#emp_Fisher = np.zeros_like(kfac_Fisher)
	emp_Fisher = None
	#for minibatch_train in range(n_train):
	for i in range(len(y_train)):
		x_value = x_train[i:i+1]
		y_value = y_train[i:i+1]
		tmp = empiricial_Fisher(params, fisher(x_value, y_value))
		if emp_Fisher is None:
			emp_Fisher = np.zeros_like(tmp)
		emp_Fisher += tmp
	emp_Fisher/=(1.*len(y_train))

	import pickle as pkl
	from contextlib import closing
	with closing(open('true_fisher', 'wb')) as f:
		pkl.dump(emp_Fisher, f)
	return

	#build_obs_Fisher(x_train, y_train, model, batch_size=1)

	n_train = len(y_train)/batch_size
	n_valid = len(y_valid)/batch_size
	n_test = len(y_test)/batch_size
        print n_train, n_valid, n_test
	epochs = 30000
	best_valid = np.inf; best_train = np.inf; best_test=np.inf 
	done = False
	n_train_batches=n_train
	#n_train_batches = n_train
	state_of_train = {}
	state_of_train['TRAIN']=best_train; state_of_train['VALID']=best_valid; state_of_train['TEST']=best_test; 
	print 'TRAINING IN PROGRESS'

	init_increment = 10 #n_train_batches # 20 5 8
	increment = init_increment
	lr = learning_rate

	for epoch in range(epochs):

		try:
			#for minibatch_index in range(n_train_batches):
			minibatch_index=0
			while minibatch_index < n_train_batches:
				x_value = x_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
				y_value = y_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
				value = train_f(lr, x_value, y_value)
				#print value
				minibatch_index+=1
				if np.isnan(value):
					import pdb
					pdb.set_trace()
				if minibatch_index %10==0:
					valid_cost=[]
					for minibatch_index in range(n_valid):
						x_value = x_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
						y_value = y_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size]
						value = test_f(x_value, y_value)
						valid_cost.append(value)

					# deciding when to stop the training on the sub batch
					valid_result = np.mean(valid_cost)
					#print "ONGOIN valid :"+str(valid_result)
					if valid_result <= best_valid*0.995:
						#print 'OBS', obs()
						#print valid_result*100
						model.save_model() # record the best architecture so to apply active learning on it (overfitting may appear in a few epochs)
	    					best_valid = valid_result
						# compute best_train and best_test
						train_cost=[]
						for minibatch_train in range(n_train_batches):
							x_value = x_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size]
							y_value = y_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size]
							train_cost.append(valid_f(x_value, y_value))
						test_cost=[]
						for minibatch_test in range(n_test):
							x_value = x_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size]
							y_value = y_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size]
							test_cost.append(test_f(x_value, y_value))
						best_train=np.mean(train_cost)
						best_test=np.mean(test_cost)
	    					increment=init_increment
						state_of_train['VALID'] = best_valid
						state_of_train['TEST'] = best_test
						state_of_train['TRAIN'] = best_train
						print 'VALID', best_valid*100
					else:
						increment-=1

					if not done and increment ==0:
						model.load_model()
						increment = init_increment
						if lr <=1e-5:
							done = True
						#train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate*0.1, model=model)
						lr *=0.1
						minibatch_index=0
					if done and increment==0:

						# keep the best set of params found during training
						model.load_model()
						increment = init_increment
						done = False
						print "RATIO :"+str(1.*n_train_batches/n_train*100)
						print state_of_train
						print (best_valid, best_test)
						# build the hessian
						# record it

						
						"""
						# 1) KFAC fisher matrix
						kfac_Fisher = kronecker_Fisher(x_train, y_train, model)
						# 2) empirical fisher matrix
						emp_Fisher = np.zeros_like(kfac_Fisher)
						for minibatch_train in range(n_train_batches):
							x_value = x_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size]
							y_value = y_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size]
							tmp = empiricial_Fisher(params, fisher(x_value, y_value))
							emp_Fisher += tmp
						emp_Fisher/=(1.*n_train_batches)
						# 3) diagonal fisher matrix
						diag_Fisher = np.diag([emp_Fisher[i,i] for i in range(len(emp_Fisher))])

						# normalization between 0, 255
						max_value = np.max(emp_Fisher)
						min_value = np.min(emp_Fisher)

						kfac_Fisher = (((kfac_Fisher - min_value)/(max_value - min_value))*255.9).astype(np.uint8)
						kfac_Fisher = np.clip(kfac_Fisher, 0, 255)
						
						emp_Fisher = (((emp_Fisher - min_value)/(max_value - min_value))*255.9).astype(np.uint8)
						emp_Fisher = np.clip(emp_Fisher, 0, 255)

						diag_Fisher = (((diag_Fisher - min_value)/(max_value - min_value))*255.9).astype(np.uint8)
						diag_Fisher = np.clip(diag_Fisher, 0, 255)

						img_kfac = Image.fromarray(kfac_Fisher)#.convert('LA')
						img_kfac.save('kfac_fisher.png')

						img_emp = Image.fromarray(emp_Fisher)#.convert('LA')
						img_emp.save('emp_fisher.png')

						img_diag = Image.fromarray(diag_Fisher)#.convert('LA')
						img_diag.save('diag_fisher.png')
						"""
						return

		except KeyboardInterrupt:
			# ask confirmation if you want to check state of training or really quit
			print 'BEST STATE OF TRAINING ACHIEVED'
			print "RATIO :"+str(1.*n_train_batches/n_train*100)
			print "TRAIN : "+str(state_of_train['TRAIN']*100)
			print "VALID : "+str(state_of_train['VALID']*100)
			print "TEST : "+str(state_of_train['TEST']*100)
			import pdb
			pdb.set_trace()