def training(repo, learning_rate, batch_size, filenames): print 'LOAD DATA' (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_datasets_mnist(repo, filenames) print 'BUILD MODEL' train_f, valid_f, test_f, model, fisher, params = build_training() x_train = x_train[:1000] y_train = y_train[:1000] x = T.tensor4() y = T.imatrix() output = model.apply(x) output = output.reshape( (x.shape[0], model.get_dim('output'))) #TO DO : get_dim('name') for Architecture cost = Softmax().categorical_cross_entropy(y.flatten(), output).mean() cg = ComputationGraph(cost) inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg) outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg) inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg) outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg) dico = OrderedDict([('conv_output', outputs_conv[0])]) [grad_s] = T.grad(cost, outputs_conv) dico['conv_output'] = grad_s f = theano.function([x, y], grad_s, allow_input_downcast=True, on_unused_input='ignore') print np.mean(f(x_train[:10], y_train[:10]))
def training(repo, learning_rate, batch_size, filenames, percentage=1): momentum = 0.5 print 'LOAD DATA' (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_datasets_mnist(repo, filenames) x_train, y_train = permut_data(x_train, y_train) print 'BUILD MODEL' train_f, valid_f, test_f, model, reinit = build_training() n_train = len(y_train) / batch_size n_train = (int)(n_train * percentage) n_valid = len(y_valid) / batch_size n_test = len(y_test) / batch_size init_lr_decay = 6 done = False increment = init_lr_decay epochs = 2000 best_valid = np.inf best_train = np.inf best_test = np.inf state_of_train = {} state_of_train['TRAIN'] = best_train state_of_train['VALID'] = best_valid state_of_train['TEST'] = best_test print 'TRAINING IN PROGRESS' for epoch in range(epochs): if epoch + 1 % 5 == 0: learning_rate = learning_rate * (1 - 0.2 / epoch) for minibatch_index in range(n_train): x_value = x_train[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] y_value = y_train[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] value = train_f(learning_rate, x_value, y_value) if np.isnan(value): import pdb pdb.set_trace() valid_cost = [] for minibatch_index in range(n_valid): x_value = x_valid[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] y_value = y_valid[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] value = test_f(x_value, y_value) valid_cost.append(value) # deciding when to stop the training on the sub batch valid_result = np.mean(valid_cost) if valid_result <= best_valid * 0.95: increment = init_lr_decay best_valid = valid_result # compute best_train and best_test train_cost = [] for minibatch_train in range(n_train): x_value = x_train[minibatch_train * batch_size:(minibatch_train + 1) * batch_size] y_value = y_train[minibatch_train * batch_size:(minibatch_train + 1) * batch_size] train_cost.append(valid_f(x_value, y_value)) test_cost = [] for minibatch_test in range(n_test): x_value = x_test[minibatch_test * batch_size:(minibatch_test + 1) * batch_size] y_value = y_test[minibatch_test * batch_size:(minibatch_test + 1) * batch_size] test_cost.append(test_f(x_value, y_value)) best_train = np.mean(train_cost) best_test = np.mean(test_cost) #print "TRAIN : "+str(best_train) #print "VALID : "+str(best_valid*100) #print "TEST : "+str(best_test*100) else: increment -= 1 if not done and increment == 0: learning_rate /= 2. #train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate*0.1, model=model) increment = init_lr_decay done = True if increment == 0: print 'END OF TRAINING' print percentage * 100 print "TRAIN : " + str(best_train) print "VALID : " + str(best_valid * 100) print "TEST : " + str(best_test * 100) return
def training(repo, learning_rate, batch_size, filenames, option="qbc", record_repo=None, record_filename=None): print 'LOAD DATA' (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_datasets_mnist(repo, filenames) print 'BUILD MODEL' train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate) n_train = len(y_train)/batch_size n_valid = len(y_valid)/batch_size n_test = len(y_test)/batch_size epochs = 2000 best_valid = np.inf; best_train = np.inf; best_test=np.inf init_increment = 5 # 20 5 8 increment = 0 n_train_batches=int (n_train*1./100.) state_of_train = {} state_of_train['TRAIN']=best_train; state_of_train['VALID']=best_valid; state_of_train['TEST']=best_test; print 'TRAINING IN PROGRESS' for epoch in range(epochs): try: for minibatch_index in range(n_train_batches): x_value = x_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size] y_value = y_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size] value = train_f(x_value, y_value) if np.isnan(value): import pdb pdb.set_trace() valid_cost=[] for minibatch_index in range(n_valid): x_value = x_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size] y_value = y_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size] value = test_f(x_value, y_value) valid_cost.append(value) # deciding when to stop the training on the sub batch valid_result = np.mean(valid_cost) if valid_result <= best_valid*0.95: model.save_model() # record the best architecture so to apply active learning on it (overfitting may appear in a few epochs) best_valid = valid_result # compute best_train and best_test train_cost=[] for minibatch_train in range(n_train_batches): x_value = x_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size] y_value = y_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size] train_cost.append(valid_f(x_value, y_value)) test_cost=[] for minibatch_test in range(n_test): x_value = x_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size] y_value = y_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size] test_cost.append(test_f(x_value, y_value)) best_train=np.mean(train_cost) best_test=np.mean(test_cost) increment=init_increment else: increment-=1 increment = 0 if increment==0: # keep the best set of params found during training model.load_model() increment = init_increment record_state(n_train_batches, n_train, best_train, best_valid, best_test, record_repo, record_filename) # record in a file if state_of_train['VALID'] > best_valid : state_of_train['TRAIN']=best_train state_of_train['VALID']=best_valid state_of_train['TEST']=best_test; import time start = time.clock() (x_train, y_train), n_train_batches = active_selection(model, x_train, y_train, n_train_batches, batch_size, valid_f, option) end = time.clock() print end -start return model.initialize() reinit() best_valid=np.inf; best_train=np.inf; best_test=np.inf except KeyboardInterrupt: # ask confirmation if you want to check state of training or really quit print 'BEST STATE OF TRAINING ACHIEVED' print "RATIO :"+str(1.*n_train_batches/n_train*100) print "TRAIN : "+str(state_of_train['TRAIN']*100) print "VALID : "+str(state_of_train['VALID']*100) print "TEST : "+str(state_of_train['TEST']*100) import pdb pdb.set_trace()
def training(repo, learning_rate, batch_size, filenames, record_repo=None, record_filename=None): lr_init = learning_rate epoch_tmp = 0 print 'LOAD DATA' (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_datasets_mnist(repo, filenames) x_train, y_train = permut_data(x_train, y_train) print 'BUILD MODEL' train_f, valid_f, test_f, model, reinit = build_training() n_train = len(y_train) / batch_size n_valid = len(y_valid) / batch_size n_test = len(y_test) / batch_size epochs = 3000 best_valid = np.inf best_train = np.inf best_test = np.inf init_increment = 5 # 20 5 8 done = False increment = init_increment n_train_batches = int(n_train * 10. / 100.) state_of_train = {} state_of_train['TRAIN'] = best_train state_of_train['VALID'] = best_valid state_of_train['TEST'] = best_test print 'TRAINING IN PROGRESS' for epoch in range(epochs): try: if epoch_tmp + 1 % 5 == 0: learning_rate = learning_rate * (1 - 0.2 / epoch_tmp) for minibatch_index in range(n_train_batches): x_value = x_train[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] y_value = y_train[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] value = train_f(learning_rate, x_value, y_value) if np.isnan(value): model.initialize() reinit() best_valid = np.inf increment = init_increment #import pdb #pdb.set_trace() valid_cost = [] for minibatch_index in range(n_valid): x_value = x_valid[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] y_value = y_valid[minibatch_index * batch_size:(minibatch_index + 1) * batch_size] value = test_f(x_value, y_value) valid_cost.append(value) # deciding when to stop the training on the sub batch valid_result = np.mean(valid_cost) if valid_result <= best_valid * 0.95: model.save_model( ) # record the best architecture so to apply active learning on it (overfitting may appear in a few epochs) best_valid = valid_result # compute best_train and best_test train_cost = [] for minibatch_train in range(n_train_batches): x_value = x_train[minibatch_train * batch_size:(minibatch_train + 1) * batch_size] y_value = y_train[minibatch_train * batch_size:(minibatch_train + 1) * batch_size] train_cost.append(valid_f(x_value, y_value)) test_cost = [] for minibatch_test in range(n_test): x_value = x_test[minibatch_test * batch_size:(minibatch_test + 1) * batch_size] y_value = y_test[minibatch_test * batch_size:(minibatch_test + 1) * batch_size] test_cost.append(test_f(x_value, y_value)) best_train = np.mean(train_cost) best_test = np.mean(test_cost) increment = init_increment state_of_train['TRAIN'] = best_train state_of_train['VALID'] = best_valid state_of_train['TEST'] = best_test else: increment -= 1 if not done and increment == 0: increment = init_increment done = True learning_rate /= 2 #train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate*0.1, model=model) if done and increment == 0: # keep the best set of params found during training model.load_model() increment = init_increment done = False record_state(n_train_batches, n_train, best_train, best_valid, best_test, record_repo, record_filename) # record in a file print "RATIO :" + str(1. * n_train_batches / n_train * 100) print state_of_train (x_train, y_train), n_train_batches = active_selection( model, x_train, y_train, n_train_batches, batch_size, valid_f, None, 'uncertainty') #model.initialize() model.initial_state() reinit() best_valid = np.inf best_train = np.inf best_test = np.inf learning_rate = lr_init epoch_tmp = 0 #train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate*0.1, model=model) #state_of_train['TRAIN']=best_train; state_of_train['VALID']=best_valid; state_of_train['TEST']=best_test; except KeyboardInterrupt: # ask confirmation if you want to check state of training or really quit print 'BEST STATE OF TRAINING ACHIEVED' print "RATIO :" + str(1. * n_train_batches / n_train * 100) print "TRAIN : " + str(state_of_train['TRAIN'] * 100) print "VALID : " + str(state_of_train['VALID'] * 100) print "TEST : " + str(state_of_train['TEST'] * 100) import pdb pdb.set_trace()
def training(repo, learning_rate, batch_size, filenames): print 'LOAD DATA' (x_train, y_train), (x_valid, y_valid), (x_test, y_test) = load_datasets_mnist(repo, filenames) print 'BUILD MODEL' train_f, valid_f, test_f, model, fisher, params = build_training() x_train = x_train[:1000]; y_train = y_train[:1000] #kfac_Fisher = kronecker_Fisher(x_train, y_train, model) kfac_Fisher = kronecker_Fisher(x_train, y_train, model) import pickle as pkl from contextlib import closing with closing(open('kfac_fisher', 'wb')) as f: pkl.dump(kfac_Fisher, f) return n_train = len(y_train)/batch_size #emp_Fisher = np.zeros_like(kfac_Fisher) emp_Fisher = None #for minibatch_train in range(n_train): for i in range(len(y_train)): x_value = x_train[i:i+1] y_value = y_train[i:i+1] tmp = empiricial_Fisher(params, fisher(x_value, y_value)) if emp_Fisher is None: emp_Fisher = np.zeros_like(tmp) emp_Fisher += tmp emp_Fisher/=(1.*len(y_train)) import pickle as pkl from contextlib import closing with closing(open('true_fisher', 'wb')) as f: pkl.dump(emp_Fisher, f) return #build_obs_Fisher(x_train, y_train, model, batch_size=1) n_train = len(y_train)/batch_size n_valid = len(y_valid)/batch_size n_test = len(y_test)/batch_size print n_train, n_valid, n_test epochs = 30000 best_valid = np.inf; best_train = np.inf; best_test=np.inf done = False n_train_batches=n_train #n_train_batches = n_train state_of_train = {} state_of_train['TRAIN']=best_train; state_of_train['VALID']=best_valid; state_of_train['TEST']=best_test; print 'TRAINING IN PROGRESS' init_increment = 10 #n_train_batches # 20 5 8 increment = init_increment lr = learning_rate for epoch in range(epochs): try: #for minibatch_index in range(n_train_batches): minibatch_index=0 while minibatch_index < n_train_batches: x_value = x_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size] y_value = y_train[minibatch_index*batch_size:(minibatch_index+1)*batch_size] value = train_f(lr, x_value, y_value) #print value minibatch_index+=1 if np.isnan(value): import pdb pdb.set_trace() if minibatch_index %10==0: valid_cost=[] for minibatch_index in range(n_valid): x_value = x_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size] y_value = y_valid[minibatch_index*batch_size:(minibatch_index+1)*batch_size] value = test_f(x_value, y_value) valid_cost.append(value) # deciding when to stop the training on the sub batch valid_result = np.mean(valid_cost) #print "ONGOIN valid :"+str(valid_result) if valid_result <= best_valid*0.995: #print 'OBS', obs() #print valid_result*100 model.save_model() # record the best architecture so to apply active learning on it (overfitting may appear in a few epochs) best_valid = valid_result # compute best_train and best_test train_cost=[] for minibatch_train in range(n_train_batches): x_value = x_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size] y_value = y_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size] train_cost.append(valid_f(x_value, y_value)) test_cost=[] for minibatch_test in range(n_test): x_value = x_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size] y_value = y_test[minibatch_test*batch_size:(minibatch_test+1)*batch_size] test_cost.append(test_f(x_value, y_value)) best_train=np.mean(train_cost) best_test=np.mean(test_cost) increment=init_increment state_of_train['VALID'] = best_valid state_of_train['TEST'] = best_test state_of_train['TRAIN'] = best_train print 'VALID', best_valid*100 else: increment-=1 if not done and increment ==0: model.load_model() increment = init_increment if lr <=1e-5: done = True #train_f, valid_f, test_f, model, reinit = build_training(lr=learning_rate*0.1, model=model) lr *=0.1 minibatch_index=0 if done and increment==0: # keep the best set of params found during training model.load_model() increment = init_increment done = False print "RATIO :"+str(1.*n_train_batches/n_train*100) print state_of_train print (best_valid, best_test) # build the hessian # record it """ # 1) KFAC fisher matrix kfac_Fisher = kronecker_Fisher(x_train, y_train, model) # 2) empirical fisher matrix emp_Fisher = np.zeros_like(kfac_Fisher) for minibatch_train in range(n_train_batches): x_value = x_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size] y_value = y_train[minibatch_train*batch_size:(minibatch_train+1)*batch_size] tmp = empiricial_Fisher(params, fisher(x_value, y_value)) emp_Fisher += tmp emp_Fisher/=(1.*n_train_batches) # 3) diagonal fisher matrix diag_Fisher = np.diag([emp_Fisher[i,i] for i in range(len(emp_Fisher))]) # normalization between 0, 255 max_value = np.max(emp_Fisher) min_value = np.min(emp_Fisher) kfac_Fisher = (((kfac_Fisher - min_value)/(max_value - min_value))*255.9).astype(np.uint8) kfac_Fisher = np.clip(kfac_Fisher, 0, 255) emp_Fisher = (((emp_Fisher - min_value)/(max_value - min_value))*255.9).astype(np.uint8) emp_Fisher = np.clip(emp_Fisher, 0, 255) diag_Fisher = (((diag_Fisher - min_value)/(max_value - min_value))*255.9).astype(np.uint8) diag_Fisher = np.clip(diag_Fisher, 0, 255) img_kfac = Image.fromarray(kfac_Fisher)#.convert('LA') img_kfac.save('kfac_fisher.png') img_emp = Image.fromarray(emp_Fisher)#.convert('LA') img_emp.save('emp_fisher.png') img_diag = Image.fromarray(diag_Fisher)#.convert('LA') img_diag.save('diag_fisher.png') """ return except KeyboardInterrupt: # ask confirmation if you want to check state of training or really quit print 'BEST STATE OF TRAINING ACHIEVED' print "RATIO :"+str(1.*n_train_batches/n_train*100) print "TRAIN : "+str(state_of_train['TRAIN']*100) print "VALID : "+str(state_of_train['VALID']*100) print "TEST : "+str(state_of_train['TEST']*100) import pdb pdb.set_trace()