def test_AE(data='', validationdata='', param_list=None, n_hidden=288, share=False, missing=True, missing_rate=0.2, learning_rate=0.08, training_epochs=10, batch_size=100, output_folder='dA_plots', order=1): newpath = '../Result/' + output_folder + '_' + str(missing_rate) if not os.path.exists(newpath): os.makedirs(newpath) ################################### # Initializing training dataset # #################################### datasets,indi_matrix,data_test,indi_matrix_test,n_train_batches,numMod,raw,trainstats_list,visible_size_Mod = \ load_data(param_list,data,batch_size,missing_rate,train = True,order = order) #################################### # Initializing validation dataset # #################################### valid_batch_size = 306 validationset,indi_matrix_validation,valid_test,indi_matrix_valid_test,n_valid_batches = \ load_data(param_list,validationdata,valid_batch_size, missing_rate,train = False,order = order) # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') y = T.matrix('y') # the data is presented as rasterized images # end-snippet-2 #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) # indi_matrix = theano.shared(numpy.asarray(indi_matrix,dtype=theano.config.floatX),name='indi_matrix', borrow=True) # # datasets = datasets * indi_matrix ae = AE(numpy_rng=rng, theano_rng=theano_rng, input=x, indi_matrix=y, bias_matrix=None, n_visible=raw.shape[1], n_hidden=n_hidden, W1=None, W2=None, bhid=None, bvis=None, missing=missing, param_list=param_list, share=share) cost, updates = ae.get_cost_updates(learning_rate) train_ae = theano.function( [index], cost, updates=updates, givens={ x: datasets[index * batch_size:(index + 1) * batch_size], y: indi_matrix[index * batch_size:(index + 1) * batch_size] }, on_unused_input='warn', ) validate_cost = ae.get_cost() validate_ae = theano.function( [index], validate_cost, givens={ x: validationset[index * valid_batch_size:(index + 1) * valid_batch_size], y: indi_matrix_validation[index * valid_batch_size:(index + 1) * valid_batch_size] }, on_unused_input='warn', ) ############ # TRAINING # ############ # go through training epochs ############### # TRAIN MODEL # ############### print('... training the model') # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf start_time = timeit.default_timer() done_looping = False epoch = 0 best_epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 c = [] for minibatch_index in range(int(n_train_batches)): a = train_ae(minibatch_index) c.append(a) #print(a) # iteration number indicate how many batches we have already runned on iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_ae(i) for i in range(int(n_valid_batches)) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation cost %f ' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save the best model print('saving the model for epoch %i' % epoch) best_epoch = epoch #f = open('../Result_test/best_model_epoch_' +str() + '.txt', 'w') save(newpath + '/best_model_epoch_' + str(epoch) + '.pkl', ae) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f') % (best_validation_loss)) print( sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))) #################################### # computing RMSE and error ratio # #################################### print('Best epoch is %i' % best_epoch) print('Now we starting computing the RMSE and error ratio') for i in range(best_epoch - 1): path = newpath + '/best_model_epoch_' + str(i + 1) + '.pkl' if os.path.exists(path): os.remove(path) ae = newpath + '/best_model_epoch_' + str(best_epoch) + '.pkl' W1, W2, b1, b2, G = load_numerical_params_ae(ae) bias_matrix = None y = get_hidden_values(data_test, indi_matrix_test, W1, b1, G, bias_matrix, missing) y2 = get_hidden_values(valid_test, indi_matrix_valid_test, W1, b1, G, bias_matrix, missing) h1 = newpath + '/h1.npy' h_valid = newpath + '/h_valid.npy' numpy.save(h1, y) numpy.save(h_valid, y2) reconstruction = get_reconstructed_input(y, W2, b2, G, missing) print(reconstruction) numpy.savetxt(newpath + '/output_' + str(best_epoch) + '.txt', reconstruction, delimiter=',') f = open(newpath + '/AE_' + str(best_epoch) + '.txt', 'w') if order == 1: for i in range(int(numMod)): numpy.savetxt(newpath + '/Raw_' + str(i) + '.txt', raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], delimiter=',') numpy.savetxt(newpath + '/Recstru_' + str(i) + '_' + str(best_epoch) + '.txt', denormActiv( reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], trainstats_list[i]), delimiter=',') print( f, 'AE RMSE for Modality', i, str( RMSE( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], trainstats_list[i])))) f.write('AE RMSE for Modality' + '\t' + str(i) + '\t' + str( RMSE( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction[:, i * visible_size_Mod:( i + 1) * visible_size_Mod], trainstats_list[i]))) + '\n') print( f, 'AE error ratio for Modality', i, str( error_ratio( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], trainstats_list[i]), indi_matrix_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], missing_rate))) f.write('AE error ratio for Modality' + '\t' + str(i) + '\t' + str( error_ratio( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], trainstats_list[i]), indi_matrix_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], missing_rate)) + '\n') print('we are done here') else: for i in range(int(numMod)): numpy.savetxt(newpath + '/Raw_' + str(i) + '.txt', raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], delimiter=',') numpy.savetxt(newpath + '/Recstru_' + str(i) + '_' + str(best_epoch) + '.txt', reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], delimiter=',') print( f, 'AE RMSE for Modality', i, str( RMSE( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod]))) f.write('AE RMSE for Modality' + '\t' + str(i) + '\t' + str( RMSE( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod])) + '\n') print( f, 'AE error ratio for Modality', i, str( error_ratio( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod]))) f.write('AE error ratio for Modality' + '\t' + str(i) + '\t' + str( error_ratio( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod])) + '\n') if order == 1: print( '...This is the missing case, we predict the reconstruction error ratio for the test data...' ) raw_test, data_real_test, teststats_list, indi_matrix_final_test = load_test_data( data, numMod, missing_rate) y1 = get_hidden_values(data_real_test, indi_matrix_final_test, W1, b1, G, bias_matrix, missing) reconstruction_test = get_reconstructed_input(y1, W2, b2, G, missing) print(reconstruction_test) numpy.savetxt(newpath + '/output_test_' + str(best_epoch) + '.txt', reconstruction_test, delimiter=',') f = open(newpath + '/AE_test_' + str(best_epoch) + '.txt', 'w') for i in range(int(numMod)): numpy.savetxt(newpath + '/Raw_test_' + str(i) + '.txt', raw_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], delimiter=',') numpy.savetxt( newpath + '/Recstru_test_' + str(i) + '_' + str(best_epoch) + '.txt', denormActiv( reconstruction_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], teststats_list[i]), delimiter=',') print( f, 'AE RMSE for Modality', i, str( RMSE( raw_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], teststats_list[i])))) f.write('AE RMSE for Modality' + '\t' + str(i) + '\t' + str( RMSE( raw_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction_test[:, i * visible_size_Mod:( i + 1) * visible_size_Mod], teststats_list[i]))) + '\n') print( f, 'AE error ratio for Modality', i, str( error_ratio( raw_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], teststats_list[i]), indi_matrix_final_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], missing_rate))) f.write('AE error ratio for Modality' + '\t' + str(i) + '\t' + str( error_ratio( raw_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction_test[:, i * visible_size_Mod:( i + 1) * visible_size_Mod], teststats_list[i]), indi_matrix_final_test[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], missing_rate)) + '\n') return ae, h1, h_valid, indi_matrix_test, indi_matrix_valid_test, indi_matrix_final_test if order == 2: return ae, h1, h_valid, indi_matrix_test, indi_matrix_valid_test
def test_AE(data='',validationdata= '', param_list= None,n_hidden = 288,share= False,missing = True, missing_rate = 0.2,learning_rate=0.08, training_epochs= 10, batch_size= 100, output_folder='dA_plots',order = 1): newpath = '../Result/'+ output_folder + '_' + str(missing_rate) if not os.path.exists(newpath): os.makedirs(newpath) ################################### # Initializing training dataset # #################################### datasets,indi_matrix,data_test,indi_matrix_test,n_train_batches,numMod,raw,trainstats_list,visible_size_Mod = \ load_data(param_list,data,batch_size,missing_rate,train = True,order = order) #################################### # Initializing validation dataset # #################################### valid_batch_size = 306 validationset,indi_matrix_validation,valid_test,indi_matrix_valid_test,n_valid_batches = \ load_data(param_list,validationdata,valid_batch_size, missing_rate,train = False,order = order) # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') y = T.matrix('y')# the data is presented as rasterized images # end-snippet-2 #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # indi_matrix = theano.shared(numpy.asarray(indi_matrix,dtype=theano.config.floatX),name='indi_matrix', borrow=True) # # datasets = datasets * indi_matrix ae = AE( numpy_rng = rng, theano_rng= theano_rng, input=x, indi_matrix = y, bias_matrix = None, n_visible = raw.shape[1], n_hidden = n_hidden , W1 = None, W2 = None, bhid=None, bvis=None, missing = missing, param_list = param_list, share = share ) cost,updates = ae.get_cost_updates(learning_rate) train_ae = theano.function( [index], cost, updates=updates, givens={ x: datasets[index * batch_size: (index + 1) * batch_size], y: indi_matrix[index * batch_size: (index + 1) * batch_size] }, on_unused_input='warn', ) validate_cost = ae.get_cost() validate_ae = theano.function( [index], validate_cost, givens={ x: validationset[index * valid_batch_size: (index + 1) * valid_batch_size], y: indi_matrix_validation[index * valid_batch_size: (index + 1) * valid_batch_size] }, on_unused_input='warn', ) ############ # TRAINING # ############ # go through training epochs ############### # TRAIN MODEL # ############### print('... training the model') # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf start_time = timeit.default_timer() done_looping = False epoch = 0 best_epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 c= [] for minibatch_index in range(int(n_train_batches)): a = train_ae(minibatch_index) c.append(a) #print(a) # iteration number indicate how many batches we have already runned on iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_ae(i) for i in range(int(n_valid_batches))] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation cost %f ' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save the best model print('saving the model for epoch %i' % epoch) best_epoch = epoch #f = open('../Result_test/best_model_epoch_' +str() + '.txt', 'w') save(newpath + '/best_model_epoch_' +str(epoch) +'.pkl', ae) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print( ( 'Optimization complete with best validation score of %f' ) % (best_validation_loss) ) print( sys.stderr, ('The code for file ' +os.path.split(__file__)[1] +' ran for %.1fs' % ((end_time - start_time)))) #################################### # computing RMSE and error ratio # #################################### print('Best epoch is %i' % best_epoch ) print('Now we starting computing the RMSE and error ratio') for i in range(best_epoch -1): path = newpath + '/best_model_epoch_' +str(i+1) +'.pkl' if os.path.exists(path): os.remove(path) ae = newpath + '/best_model_epoch_' +str(best_epoch) +'.pkl' W1,W2,b1,b2,G = load_numerical_params_ae(ae) bias_matrix = None y = get_hidden_values(data_test,indi_matrix_test,W1,b1,G,bias_matrix,missing) y2 = get_hidden_values(valid_test,indi_matrix_valid_test,W1,b1,G,bias_matrix,missing) h1 = newpath + '/h1.npy' h_valid = newpath + '/h_valid.npy' numpy.save(h1,y) numpy.save(h_valid,y2) reconstruction = get_reconstructed_input(y,W2,b2,G,missing) print(reconstruction) numpy.savetxt(newpath+ '/output_' +str(best_epoch) + '.txt',reconstruction,delimiter=',') f = open(newpath +'/AE_' +str(best_epoch) + '.txt', 'w') if order == 1: for i in range(int(numMod)): numpy.savetxt(newpath +'/Raw_' +str(i) + '.txt', raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod],delimiter=',') numpy.savetxt(newpath+'/Recstru_' +str(i) + '_' +str(best_epoch) + '.txt', denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod],trainstats_list[i]),delimiter=',') print(f, 'AE RMSE for Modality', i, str(RMSE(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod], trainstats_list[i])))) f.write('AE RMSE for Modality'+ '\t' + str(i) +'\t'+ str(RMSE(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod], trainstats_list[i]))) + '\n') print(f, 'AE error ratio for Modality', i, str(error_ratio(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod], trainstats_list[i]),indi_matrix_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod],missing_rate))) f.write('AE error ratio for Modality'+ '\t' + str(i) +'\t'+ str(error_ratio(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod], trainstats_list[i]),indi_matrix_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod],missing_rate)) + '\n') print('we are done here') else: for i in range(int(numMod)): numpy.savetxt(newpath +'/Raw_' +str(i) + '.txt', raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod],delimiter=',') numpy.savetxt(newpath+'/Recstru_' +str(i) + '_' +str(best_epoch) + '.txt', reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod],delimiter=',') print(f, 'AE RMSE for Modality', i, str(RMSE(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod]))) f.write('AE RMSE for Modality'+ '\t' + str(i) +'\t'+ str(RMSE(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod])) + '\n') print(f, 'AE error ratio for Modality', i, str(error_ratio(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod] ))) f.write('AE error ratio for Modality'+ '\t' + str(i) +'\t'+ str(error_ratio(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod])) + '\n') if order == 1: print('...This is the missing case, we predict the reconstruction error ratio for the test data...') raw_test,data_real_test,teststats_list,indi_matrix_final_test = load_test_data(data,numMod,missing_rate) y1 = get_hidden_values(data_real_test,indi_matrix_final_test,W1,b1,G,bias_matrix,missing) reconstruction_test = get_reconstructed_input(y1,W2,b2,G,missing) print(reconstruction_test) numpy.savetxt(newpath+ '/output_test_' +str(best_epoch) + '.txt',reconstruction_test,delimiter=',') f = open(newpath +'/AE_test_' +str(best_epoch) + '.txt', 'w') for i in range(int(numMod)): numpy.savetxt(newpath +'/Raw_test_' +str(i) + '.txt', raw_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod],delimiter=',') numpy.savetxt(newpath+'/Recstru_test_' +str(i) + '_' +str(best_epoch) + '.txt', denormActiv(reconstruction_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod],teststats_list[i]),delimiter=',') print(f, 'AE RMSE for Modality', i, str(RMSE(raw_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod], teststats_list[i])))) f.write('AE RMSE for Modality'+ '\t' + str(i) +'\t'+ str(RMSE(raw_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod], teststats_list[i]))) + '\n') print(f, 'AE error ratio for Modality', i, str(error_ratio(raw_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod], teststats_list[i]),indi_matrix_final_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod],missing_rate))) f.write('AE error ratio for Modality'+ '\t' + str(i) +'\t'+ str(error_ratio(raw_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod], teststats_list[i]),indi_matrix_final_test[:,i*visible_size_Mod:(i+1)*visible_size_Mod],missing_rate)) + '\n') return ae, h1, h_valid, indi_matrix_test,indi_matrix_valid_test,indi_matrix_final_test if order == 2: return ae, h1, h_valid, indi_matrix_test,indi_matrix_valid_test
def test_SAE(data='', validationdata='', param_list=[], n_hidden1=288, n_hidden2=100, missing1=False, missing2=False, share1=False, share2=True, missingrate1=0, missingrate2=0, learningrate=0.08, training_epochs=3, batch_size=3000, output_folder=''): newpath = '../Result/' + output_folder if not os.path.exists(newpath): os.makedirs(newpath) ae1, h1, h_valid, indi_matrix_test,indi_matrix_valid_test = \ build_block(data,validationdata, param_list,n_hidden1,share1,missing1, missingrate1, learningrate, training_epochs, batch_size, output_folder='m_HI_1',order = 1) print('Fininshed training the first auto encoder') ae2,h2,h2_valid,indi_matrix_test2,indi_matrix_valid_test2 = \ build_block(h1, h_valid, param_list,n_hidden2,share2,missing = missing2, missing_rate = missingrate2, learning_rate=learningrate, training_epochs= training_epochs, batch_size = 30, output_folder='m_HI_2',order = 2) print('Fininshed training the second auto encoder') datasets,indi_matrix,data_test,n_train_batches,numMod,raw,trainstats_list,visible_size_Mod =\ load_data_sae(param_list,data,indi_matrix_test,batch_size,train = True) valid_batch_size = 306 validset,valid_indi_matrix, n_valid_batches =\ load_data_sae(param_list,validationdata,indi_matrix_valid_test,valid_batch_size,train = False) index = T.lscalar() # index to a [mini]batch x = T.matrix('x') y = T.matrix('y') # end-snippet-2 sae = SAE( ae1=ae1, ae2=ae2, input=x, indi_matrix=y, missing=missing1, param_list=param_list, ) # datasets = np.load(data) # # indi_matrix = [0]*raw.shape[1] # for i in range(raw.shape[1]): # indi_matrix[i] = np.random.binomial(1, 1-missingrate1,(raw.shape[0],1)) # indi_matrix = np.concatenate(indi_matrix,axis = 1) # datasets = theano.shared(np.asarray(datasets,dtype=theano.config.floatX),name='datasets', borrow=True) # indi_matrix = theano.shared(np.asarray(indi_matrix,dtype = theano.config.floatX ),name='indi_matrix', borrow=True) cost, updates = sae.finetuning(learningrate) train_sae = theano.function( [index], cost, updates=updates, givens={ x: datasets[index * batch_size:(index + 1) * batch_size], y: indi_matrix[index * batch_size:(index + 1) * batch_size] }, on_unused_input='warn', ) validate_cost = sae.get_cost() validate_sae = theano.function( [index], validate_cost, givens={ x: validset[index * valid_batch_size:(index + 1) * valid_batch_size], y: valid_indi_matrix[index * valid_batch_size:(index + 1) * valid_batch_size] }, on_unused_input='warn', ) ############### # TRAIN MODEL # ############### print('... training the sae model') # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf start_time = timeit.default_timer() done_looping = False epoch = 0 best_epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 c = [] for minibatch_index in range(int(n_train_batches)): a = train_sae(minibatch_index) c.append(a) print(a) # iteration number indicate how many batches we have already runned on iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [ validate_sae(i) for i in range(int(n_valid_batches)) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation cost %f ' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save the best model print('saving the model for epoch %i' % epoch) best_epoch = epoch #f = open('../Result_test/best_model_epoch_' +str() + '.txt', 'w') save(newpath + '/best_model_epoch_' + str(epoch) + '.pkl', sae) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f') % (best_validation_loss)) print( sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))) print('Best epoch is %i' % best_epoch) print('Now we starting computing the RMSE and error ratio') sae = newpath + '/best_model_epoch_' + str(best_epoch) + '.pkl' W1, W2, b1, b2, G, W3, W4, b3, b4, G_share = load_numerical_params_sae(sae) h1 = get_h1(missing1, data_test, W1, b1, G) h2 = get_h2(h1, W2, b2, G_share) h3 = get_h3(h2, W3, b3, G_share) reconstruction = get_reconstruct(missing1, h3, W4, b4, G) print(reconstruction) np.savetxt(newpath + '/output_' + str(best_epoch) + '.txt', reconstruction, delimiter=',') f = open(newpath + '/AE_' + str(best_epoch) + '.txt', 'w') for i in range(int(numMod)): np.savetxt(newpath + '/Raw_' + str(i) + '.txt', raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], delimiter=',') np.savetxt(newpath + '/Recstru_' + str(i) + '_' + str(best_epoch) + '.txt', denormActiv( reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], trainstats_list[i]), delimiter=',') print( f, 'AE RMSE for Modality', i, str( RMSE( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], trainstats_list[i])))) f.write('AE RMSE for Modality' + '\t' + str(i) + '\t' + str( RMSE( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], trainstats_list[i]))) + '\n') print( f, 'AE error ratio for Modality', i, str( error_ratio( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], trainstats_list[i])))) f.write('AE error ratio for Modality' + '\t' + str(i) + '\t' + str( error_ratio( raw[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], denormActiv( reconstruction[:, i * visible_size_Mod:(i + 1) * visible_size_Mod], trainstats_list[i]))) + '\n')
def test_SAE(data = '',validationdata='',param_list= [], n_hidden1 = 288,n_hidden2 = 100,missing1= False, missing2 = False,share1 = False,share2 = True, missingrate1 = 0, missingrate2 = 0,learningrate = 0.08, training_epochs = 3,batch_size = 3000, output_folder = ''): newpath = '../Result/'+ output_folder if not os.path.exists(newpath): os.makedirs(newpath) # ae1, h1, h_valid, indi_matrix_test,indi_matrix_valid_test = \ # build_block(data,validationdata, param_list,288,share1,missing1, missingrate1, # learningrate, training_epochs, # batch_size, output_folder='m_HI_1',order = 1) # # print('Fininshed training the first auto encoder') # # # ae2,h2,h2_valid,indi_matrix_test2,indi_matrix_valid_test2 = \ # build_block(h1, h_valid, param_list,100,share2,missing = missing2, missing_rate = missingrate2, # learning_rate=learningrate, training_epochs= training_epochs, # batch_size = 30, output_folder='m_HI_2',order = 2) # # print('Fininshed training the second auto encoder') # # # # # datasets,indi_matrix,data_test,n_train_batches,numMod,raw,trainstats_list,visible_size_Mod =\ # load_data_sae(param_list,data,indi_matrix_test,batch_size,train = True) # # # valid_batch_size = 306 # validset,valid_indi_matrix, n_valid_batches =\ # load_data_sae(param_list,validationdata,indi_matrix_valid_test,valid_batch_size,train = False) # datasets,indi_matrix,data_test,indi_matrix_test,n_train_batches,numMod,raw,trainstats_list,visible_size_Mod = \ load_data(param_list,data,batch_size,missingrate1,train = True,order = 1) #################################### # Initializing validation dataset # #################################### valid_batch_size = 306 validset,valid_indi_matrix,valid_test,indi_matrix_valid_test,n_valid_batches = \ load_data(param_list,validationdata,valid_batch_size, missingrate1,train = False,order = 1) ae1 = '../Result/m_HI_1/best_model_epoch_10.pkl' ae2 = '../Result/m_HI_1/best_model_epoch_10.pkl' index = T.lscalar() # index to a [mini]batch x = T.matrix('x') y = T.matrix('y') # end-snippet-2 sae = SAE( ae1=ae1, ae2=ae2, input = x, indi_matrix = y, missing = True, param_list = param_list, ) cost,updates = sae.finetuning(learningrate) train_sae = theano.function( [index], cost, updates=updates, givens={ x: datasets[index * batch_size: (index + 1) * batch_size], y: indi_matrix[index * batch_size: (index + 1) * batch_size] }, mode='DebugMode', on_unused_input='warn' ) validate_cost = sae.get_cost() validate_sae = theano.function( [index], validate_cost, givens={ x: validset[index * valid_batch_size: (index + 1) * valid_batch_size], y: valid_indi_matrix[index * valid_batch_size: (index + 1) * valid_batch_size] }, on_unused_input='warn' ) ############### # TRAIN MODEL # ############### print('... training the sae model...') # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf start_time = timeit.default_timer() done_looping = False epoch = 0 best_epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 c= [] print('...Starting training with SGD and check validation here...') print(n_train_batches) for minibatch_index in range(int(n_train_batches)): a = train_sae(minibatch_index) c.append(a) print(a) # iteration number indicate how many batches we have already runned on iter = (epoch - 1) * n_train_batches + minibatch_index print('...starting validation here...') if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_sae(i) for i in range(int(n_valid_batches))] this_validation_loss = np.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation cost %f ' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # save the best model print('saving the model for epoch %i' % epoch) best_epoch = epoch #f = open('../Result_test/best_model_epoch_' +str() + '.txt', 'w') save(newpath + '/best_model_epoch_' +str(epoch) +'.pkl', sae) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print( ( 'Optimization complete with best validation score of %f' ) % (best_validation_loss) ) print( sys.stderr, ('The code for file ' +os.path.split(__file__)[1] +' ran for %.1fs' % ((end_time - start_time)))) print('Best epoch is %i' % best_epoch ) print('Now we starting computing the RMSE and error ratio') sae = newpath + '/best_model_epoch_' +str(best_epoch) +'.pkl' W1,W2,b1,b2,G,W3,W4,b3,b4,G_share = load_numerical_params_sae(sae) h1 = get_h1(missing1,data_test,W1,b1,G) h2 = get_h2(h1,W2,b2,G_share) h3 = get_h3(h2,W3,b3,G_share) reconstruction = get_reconstruct(missing1,h3,W4,b4,G) print(reconstruction) np.savetxt(newpath+ '/output_' +str(best_epoch) + '.txt',reconstruction,delimiter=',') f = open(newpath +'/AE_' +str(best_epoch) + '.txt', 'w') for i in range(int(numMod)): np.savetxt(newpath +'/Raw_' +str(i) + '.txt', raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod],delimiter=',') np.savetxt(newpath+'/Recstru_' +str(i) + '_' +str(best_epoch) + '.txt', denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod],trainstats_list[i]),delimiter=',') print(f, 'AE RMSE for Modality', i, str(RMSE(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod], trainstats_list[i])))) f.write('AE RMSE for Modality'+ '\t' + str(i) +'\t'+ str(RMSE(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod], trainstats_list[i]))) + '\n') print(f, 'AE error ratio for Modality', i, str(error_ratio(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod], trainstats_list[i])))) f.write('AE error ratio for Modality'+ '\t' + str(i) +'\t'+ str(error_ratio(raw[:,i*visible_size_Mod:(i+1)*visible_size_Mod], denormActiv(reconstruction[:,i*visible_size_Mod:(i+1)*visible_size_Mod], trainstats_list[i]))) + '\n')