def test(test_loader, modelID, showAttn=True): encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP, FLIP).cuda() decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention, TRADEOFF_CONTEXT_EMBED).cuda() seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda() model_file = 'save_weights/seq2seq-' + str(modelID) + '.model' pretrain_dict = torch.load(model_file) seq2seq_dict = seq2seq.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in seq2seq_dict } seq2seq_dict.update(pretrain_dict) seq2seq.load_state_dict(seq2seq_dict) #load print('Loading ' + model_file) seq2seq.eval() total_loss_t = 0 start_t = time.time() for num, (test_index, test_in, test_in_len, test_out, test_domain) in enumerate(test_loader): lambd = LAMBD test_in, test_out = Variable(test_in, volatile=True).cuda(), Variable( test_out, volatile=True).cuda() test_domain = Variable(test_domain, volatile=True).cuda() output_t, attn_weights_t, out_domain_t = seq2seq(test_in, test_out, test_in_len, lambd, teacher_rate=False, train=False) batch_count_n = writePredict(modelID, test_index, output_t, 'test') test_label = test_out.permute(1, 0)[1:].contiguous().view(-1) if LABEL_SMOOTH: loss_t = crit(log_softmax(output_t.view(-1, vocab_size)), test_label) else: loss_t = F.cross_entropy(output_t.view(-1, vocab_size), test_label, ignore_index=tokens['PAD_TOKEN']) total_loss_t += loss_t.data[0] if showAttn: global_index_t = 0 for t_idx, t_in in zip(test_index, test_in): visualizeAttn(t_in.data[0], test_in_len[0], [j[global_index_t] for j in attn_weights_t], modelID, batch_count_n[global_index_t], 'test_' + t_idx.split(',')[0]) global_index_t += 1 total_loss_t /= (num + 1) writeLoss(total_loss_t, 'test') print(' TEST loss=%.3f, time=%.3f' % (total_loss_t, time.time() - start_t))
def test(test_loader, modelID, showAttn=True): encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP, FLIP).to(device) decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention, TRADEOFF_CONTEXT_EMBED).to(device) seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).to(device) model_file = 'save_weights/seq2seq-' + str(modelID) + '.model' print('Loading ' + model_file) seq2seq.load_state_dict(torch.load(model_file)) #load seq2seq.eval() total_loss_t = 0 start_t = time.time() with torch.no_grad(): for num, (test_index, test_in, test_in_len, test_out) in enumerate(test_loader): #test_in = test_in.unsqueeze(1) test_in, test_out = test_in.to(device), test_out.to(device) if test_in.requires_grad or test_out.requires_grad: print( 'ERROR! test_in, test_out should have requires_grad=False') output_t, attn_weights_t = seq2seq(test_in, test_out, test_in_len, teacher_rate=False, train=False) batch_count_n = writePredict(modelID, test_index, output_t, 'test') test_label = test_out.permute(1, 0)[1:].reshape(-1) #loss_t = F.cross_entropy(output_t.view(-1, vocab_size), # test_label, ignore_index=tokens['PAD_TOKEN']) #loss_t = loss_label_smoothing(output_t.view(-1, vocab_size), test_label) if LABEL_SMOOTH: loss_t = crit(log_softmax(output_t.reshape(-1, vocab_size)), test_label) else: loss_t = F.cross_entropy(output_t.reshape(-1, vocab_size), test_label, ignore_index=tokens['PAD_TOKEN']) total_loss_t += loss_t.item() if showAttn: global_index_t = 0 for t_idx, t_in in zip(test_index, test_in): visualizeAttn(t_in.detach()[0], test_in_len[0], [j[global_index_t] for j in attn_weights_t], modelID, batch_count_n[global_index_t], 'test_' + t_idx.split(',')[0]) global_index_t += 1 total_loss_t /= (num + 1) writeLoss(total_loss_t, 'test') print(' TEST loss=%.3f, time=%.3f' % (total_loss_t, time.time() - start_t))
def main(train_loader, valid_loader, test_loader): attn = Attention(ENC_HID_DIM, DEC_HID_DIM) enc = Encoder(HEIGHT, WIDTH, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT).cuda() dec = Decoder(vocab_size, EMBEDDING_SIZE, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn).cuda() model = Seq2Seq(enc, dec, output_max_len, vocab_size).cuda() model.apply(init_weights) if CurriculumModelID > 0: model_file = 'save_weights/seq2seq-' + str( CurriculumModelID) + '.model' #model_file = 'save_weights/words/seq2seq-' + str(CurriculumModelID) +'.model' print('Loading ' + model_file) model.load_state_dict(torch.load(model_file)) #load optimizer = optim.Adam(model.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss(ignore_index=tokens['PAD_TOKEN']) #criterion=FocalLoss() N_EPOCHS = 150 CLIP = 2 for epoch in range(N_EPOCHS): epoch = epoch + CurriculumModelID start_time = time.time() teacher_rate = teacher_force_func(epoch) if TEACHER_FORCING else False train_loss = train(model, train_loader, optimizer, criterion, CLIP, epoch, teacher_rate) writeLoss(train_loss, 'train') valid_loss = evaluate(model, valid_loader, criterion, epoch) writeLoss(valid_loss, 'valid') end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) #save model after every 2 epochs if epoch % MODEL_SAVE_EPOCH == 0: folder_weights = 'save_weights' if not os.path.exists(folder_weights): os.makedirs(folder_weights) torch.save(model.state_dict(), folder_weights + '/seq2seq-%d.model' % epoch) print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s') print('train loss=%.4f, valid_loss=%.4f, teacher_rate=%.3f' % (train_loss, valid_loss, teacher_rate))
P_pred, rho_pred, u_pred, v_pred, Et_pred = model.predict( P_back_test, x_test, y_test) # #Error error_P = np.linalg.norm(P_test - P_pred, 2) / np.linalg.norm(P_test, 2) print("Test Error in P: " + str(error_P)) error_rho = np.linalg.norm(rho_test - rho_pred, 2) / np.linalg.norm( rho_test, 2) print("Test Error in rho: " + str(error_rho)) error_u = np.linalg.norm(u_test - u_pred, 2) / np.linalg.norm(u_test, 2) print("Test Error in u: " + str(error_u)) error_v = np.linalg.norm(v_test - v_pred, 2) / np.linalg.norm(v_test, 2) print("Test Error in v: " + str(error_v)) error_Et = np.linalg.norm(Et_test - Et_pred, 2) / np.linalg.norm(Et_test, 2) print("Test Error in E: " + str(error_Et)) P_pred *= P_norm rho_pred *= rho_norm u_pred *= u_norm v_pred *= v_norm Et_pred *= Et_norm path = os.getcwd() + '/predict/%s_bp=%s.csv' % ( setting_name, str(int(P_back_test[0] * P_norm))) utils.writeData(path, x_test, y_test, P_pred, rho_pred, u_pred, v_pred, Et_pred) path2 = os.getcwd() + '/predict/%s_bp=%s_loss.csv' % ( setting_name, str(int(P_back_test[0] * P_norm))) utils.writeLoss(path2, model.loss_vector, model.step_vector)
def main(train_loader, valid_loader, test_loader): encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP, FLIP).cuda() decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention, TRADEOFF_CONTEXT_EMBED).cuda() seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda() if CurriculumModelID > 0: model_file = 'save_weights/seq2seq-' + str( CurriculumModelID) + '.model' #model_file = 'save_weights/words/seq2seq-' + str(CurriculumModelID) +'.model' print('Loading ' + model_file) seq2seq.load_state_dict(torch.load(model_file)) #load opt = optim.Adam(seq2seq.parameters(), lr=learning_rate) #opt = optim.SGD(seq2seq.parameters(), lr=learning_rate, momentum=0.9) #opt = optim.RMSprop(seq2seq.parameters(), lr=learning_rate, momentum=0.9) #scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=1) scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=lr_milestone, gamma=lr_gamma) epochs = 5000000 if EARLY_STOP_EPOCH is not None: min_loss = 1e3 min_loss_index = 0 min_loss_count = 0 if CurriculumModelID > 0 and WORD_LEVEL: start_epoch = CurriculumModelID + 1 for i in range(start_epoch): scheduler.step() else: start_epoch = 0 for epoch in range(start_epoch, epochs): scheduler.step() lr = scheduler.get_lr()[0] teacher_rate = teacher_force_func(epoch) if TEACHER_FORCING else False start = time.time() loss = train(train_loader, seq2seq, opt, teacher_rate, epoch) writeLoss(loss, 'train') print('epoch %d/%d, loss=%.3f, lr=%.8f, teacher_rate=%.3f, time=%.3f' % (epoch, epochs, loss, lr, teacher_rate, time.time() - start)) if epoch % MODEL_SAVE_EPOCH == 0: folder_weights = 'save_weights' if not os.path.exists(folder_weights): os.makedirs(folder_weights) torch.save(seq2seq.state_dict(), folder_weights + '/seq2seq-%d.model' % epoch) start_v = time.time() loss_v = valid(valid_loader, seq2seq, epoch) writeLoss(loss_v, 'valid') print(' Valid loss=%.3f, time=%.3f' % (loss_v, time.time() - start_v)) if EARLY_STOP_EPOCH is not None: gt = 'RWTH_partition/RWTH.iam_word_gt_final.valid.thresh' decoded = 'pred_logs/valid_predict_seq.' + str(epoch) + '.log' res_cer = sub.Popen(['./tasas_cer.sh', gt, decoded], stdout=sub.PIPE) res_cer = res_cer.stdout.read().decode('utf8') loss_v = float(res_cer) / 100 if loss_v < min_loss: min_loss = loss_v min_loss_index = epoch min_loss_count = 0 else: min_loss_count += 1 if min_loss_count >= EARLY_STOP_EPOCH: print('Early Stopping at: %d. Best epoch is: %d' % (epoch, min_loss_index)) return min_loss_index
def main(all_data_loader_func): encoder = Encoder(HIDDEN_SIZE_ENC, HEIGHT, WIDTH, Bi_GRU, CON_STEP, FLIP).cuda() decoder = Decoder(HIDDEN_SIZE_DEC, EMBEDDING_SIZE, vocab_size, Attention, TRADEOFF_CONTEXT_EMBED).cuda() seq2seq = Seq2Seq(encoder, decoder, output_max_len, vocab_size).cuda() if CurriculumModelID > 0: model_file = 'save_weights/seq2seq-' + str( CurriculumModelID) + '.model' print('Loading ' + model_file) pretrain_dict = torch.load(model_file) seq2seq_dict = seq2seq.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in seq2seq_dict } seq2seq_dict.update(pretrain_dict) seq2seq.load_state_dict(seq2seq_dict) #load opt = optim.Adam(seq2seq.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.MultiStepLR(opt, milestones=lr_milestone, gamma=lr_gamma) epochs = 5000 if EARLY_STOP_EPOCH is not None: min_loss = 1e3 min_loss_index = 0 min_loss_count = 0 if CurriculumModelID > 0: start_epoch = CurriculumModelID + 1 for i in range(start_epoch): scheduler.step() else: start_epoch = 0 for epoch in range(start_epoch, epochs): # each epoch, random sample training set to be balanced with unlabeled test set train_loader, valid_loader, test_loader = all_data_loader_func() scheduler.step() lr = scheduler.get_lr()[0] teacher_rate = teacher_force_func(epoch) if TEACHER_FORCING else False start = time.time() lambd = return_lambda(epoch) loss, loss_d = train(train_loader, seq2seq, opt, teacher_rate, epoch, lambd) writeLoss(loss, 'train') writeLoss(loss_d, 'domain_train') print( 'epoch %d/%d, loss=%.3f, domain_loss=%.3f, lr=%.6f, teacher_rate=%.3f, lambda_pau=%.3f, time=%.3f' % (epoch, epochs, loss, loss_d, lr, teacher_rate, lambd, time.time() - start)) if epoch % MODEL_SAVE_EPOCH == 0: folder_weights = 'save_weights' if not os.path.exists(folder_weights): os.makedirs(folder_weights) torch.save(seq2seq.state_dict(), folder_weights + '/seq2seq-%d.model' % epoch) start_v = time.time() loss_v, loss_v_d = valid(valid_loader, seq2seq, epoch) writeLoss(loss_v, 'valid') writeLoss(loss_v_d, 'domain_valid') print(' Valid loss=%.3f, domain_loss=%.3f, time=%.3f' % (loss_v, loss_v_d, time.time() - start_v)) test(test_loader, epoch, False) #~~~~~~ if EARLY_STOP_EPOCH is not None: gt = loadData.GT_TE decoded = 'pred_logs/valid_predict_seq.' + str(epoch) + '.log' res_cer = sub.Popen(['./tasas_cer.sh', gt, decoded], stdout=sub.PIPE) res_cer = res_cer.stdout.read().decode('utf8') loss_v = float(res_cer) / 100 if loss_v < min_loss: min_loss = loss_v min_loss_index = epoch min_loss_count = 0 else: min_loss_count += 1 if min_loss_count >= EARLY_STOP_EPOCH: print('Early Stopping at: %d. Best epoch is: %d' % (epoch, min_loss_index)) return min_loss_index
error_rho = np.linalg.norm(rho_test-rho_pred,2)/np.linalg.norm(rho_test,2) error_u = np.linalg.norm(u_test-u_pred,2)/np.linalg.norm(u_test,2) error_v = np.linalg.norm(v_test-v_pred,2)/np.linalg.norm(v_test,2) error_Et = np.linalg.norm(Et_test-Et_pred,2)/np.linalg.norm(Et_test,2) print("Average Test Error: %.3f"%((error_P+error_rho+error_u+error_v+error_Et)/5)) print("P\trho\tu\tv\tEt") print("%.3f\t%.3f\t%.3f\t%.3f\t%.3f" %(error_P,error_rho,error_u,error_v,error_Et)) print("******************************************************") ##Save --------------------------------------------------------------------------------------------------------------- path = os.getcwd() + '/predict/%s_bp=%s.csv'%(setting_name,str(int(P_b_test_value))) utils.writeData(path,x_test,y_test,P_pred,rho_pred,u_pred,v_pred,Et_pred) path2 = os.getcwd() + '/predict/%s_bp=%s_gt_loss.csv'%(setting_name,str(int(P_b_test_value))) utils.writeLoss(path2,model.sse_loss_vector,model.step_vector) path3 = os.getcwd() + '/predict/%s_bp=%s_pinn_loss.csv'%(setting_name,str(int(P_b_test_value))) utils.writeLoss(path3,model.pinn_loss_vector,model.step_vector) else: for case in range(P_back_test.shape[0]): P_b_test_value = P_back_test[case:(case+1),:][0] P_back_test_ = P_back_test[case:(case+1),:].transpose() x_test_ = x_test[case:(case+1),:].transpose() y_test_ = y_test[case:(case+1),:].transpose() P_test_ = P_test[case:(case+1),:].transpose() rho_test_ = rho_test[case:(case+1),:].transpose()
def train(self, num_epochs, num_iter, batch_size, learning_rate): switch = True # for epoch in range(num_epochs): for epoch in range(num_epochs): start_time = time.time() A = np.random.choice(range(self.x.shape[0]), size=(batch_size, ), replace=False) for it in range(num_iter): #for it in range(0,N_nodes,batch_size): #node_idx = nodes_perm[np.arange(it,it+batch_size)] #slice data P_back_batch = self.P_back[A].flatten()[:, None] x_batch = self.x[A].flatten()[:, None] y_batch = self.y[A].flatten()[:, None] P_batch = self.P[A].flatten()[:, None] rho_batch = self.rho[A].flatten()[:, None] u_batch = self.u[A].flatten()[:, None] v_batch = self.v[A].flatten()[:, None] Et_batch = self.Et[A].flatten()[:, None] tf_dict = { self.P_back_tf: P_back_batch, self.x_tf: x_batch, self.y_tf: y_batch, self.P_tf: P_batch, self.rho_tf: rho_batch, self.u_tf: u_batch, self.v_tf: v_batch, self.Et_tf: Et_batch, self.learning_rate: learning_rate } self.sess.run(self.train_op_Adam, tf_dict) # Print if it % 100 == 0: elapsed = time.time() - start_time loss_value = self.sess.run([self.loss], tf_dict) e_1, e_2, e_3, e_4 = self.sess.run( [self.e_1, self.e_2, self.e_3, self.e_4], tf_dict) e_P, e_rho, e_u, e_v, e_Et = self.sess.run( [self.e_P, self.e_rho, self.e_u, self.e_v, self.e_Et], tf_dict) # if e_P < 0.05 and switch: # switch = False # self.loss = 1*self.e_P + \ # 1*self.e_rho + \ # 1*self.e_u + \ # 1*self.e_v + \ # 1*self.e_T + \ # 0*self.e_1 + \ # 0*self.e_2 + \ # 0*self.e_3 + \ # 0*self.e_4 # self.train_op_Adam = self.optimizer_Adam.minimize(self.loss) # res1 = self.sess.run(self.e1, tf_dict) # res2 = self.sess.run(self.e2, tf_dict) #res3 = self.sess.run(self.total_res, tf_dict) #print(res3) print('Epoch: %d, It: %d, Loss: %.3e, Time: %.2f' % (epoch, it, loss_value[0], elapsed)) print( "\tE_1: %.3f, E_2: %.3f, E_3: %.3f, E_4: %.3f, E_5: %.3f" % (e_1, e_2, e_3, e_4, 0.0)) print( "\tE_P: %.3f, E_rho: %.3f, E_u: %.3f, E_v: %.3f, E_Et: %.3f" % (e_P, e_rho, e_u, e_v, e_Et)) # print('Mass Residual: %f\t\tMomentum Residual: %f\tEnergy Residual: %f' # %(sum(map(lambda a:a*a,res1))/len(res1), sum(map(lambda a:a*a,res2))/len(res2), sum(map(lambda a:a*a,res3))/len(res3))) start_time = time.time() self.saver.save(self.sess, self.ckpt_name, global_step=epoch) self.loss_vector.append(loss_value[0]) self.step_vector.append(1) if epoch % 5 == 0 and it == 0: path2 = self.ckpt_name + '_temp_loss.csv' utils.writeLoss(path2, self.loss_vector, self.step_vector) self.optimizer.minimize(self.sess, feed_dict=tf_dict, fetches=[self.loss], loss_callback=self.callback)