def calc_test_loss(test_x, test_y, display=True): accs = [] worksum = int(len(test_x) / batch_size) loss_list = [] predict_list = [] target_list = [] source_list = [] pb = ProgressBar(worksum=worksum, info="validating...", auto_display=display) pb.startjob() #test_set = Dataset(test_x,test_y) for j in range(0, len(test_x), batch_size): batch_x, batch_y = test_x[j:j + batch_size], test_y[ j:j + batch_size] #test_set.next_batch(batch_size) if len(batch_x) < batch_size: continue bx = [len(m) + 1 for m in batch_x] by = [len(m) + 1 for m in batch_y] lx = [max(bx)] * batch_size ly = [max(by)] * batch_size batch_x = preprocessing.sequence.pad_sequences( batch_x, max(bx), padding='post', value=en2ind_oov['<eos>']) batch_y = preprocessing.sequence.pad_sequences( batch_y, max(by), padding='post', value=ch2ind_oov['<eos>']) tmp_loss, tran = session.run( [train_loss, translations], feed_dict={ x: batch_x, y: batch_y, y_in: np.concatenate((np.ones( (batch_y.shape[0], 1), dtype=np.int) * ch2ind['<go>'], batch_y[:, :-1]), axis=1), x_len: lx, y_len: ly, y_real_len: by, x_real_len: bx, y_max_len: max(by) }) loss_list.append(tmp_loss) tmp_acc = cal_acc(tran[:, :, 0], batch_y) accs.append(tmp_acc) predict_list += [i for i in tran[:, :, 0]] target_list += [i for i in batch_y] source_list += [i for i in batch_x] pb.complete(1) return np.average(loss_list), np.average(accs), get_bleu_score( predict_list, target_list), predict_list, target_list, source_list
def test(model: nn.Module, test_loader: DataLoader, use_cuda: bool, loss_func: Callable[[Tensor, Tensor], Tensor] = nnf.cross_entropy, input_ops: Callable[[Tensor], Tensor] = None, output_ops: Callable[..., Tensor] = None) -> Tuple[float, float]: model.eval() tot_loss = 0. correct = 0 total = 0 print('\nTest started:') sys.stdout.flush() pb = ProgressBar() for batch_idx, (data, target) in enumerate(test_loader): (loss, correct, total, tot_loss) = step(model, data, target, use_cuda, correct, total, tot_loss, None, loss_func, input_ops, output_ops) pb.progress(batch_idx / len(test_loader)) pb.complete() avg_loss = tot_loss / total accuracy = correct / total print('\n{0}: Test result: mean loss = {1:f}, accuracy = {2:f}'.format( get_timestamp(), avg_loss, accuracy)) sys.stdout.flush() return avg_loss, accuracy
def calc_test_loss(test_set = Dataset(test_x,test_y),display=True): accs = [] worksum = int(len(test_x) / batch_size) loss_list = [] predict_list = [] target_list = [] source_list = [] pb = ProgressBar(worksum=worksum,info="validating...",auto_display=display) pb.startjob() #test_set = Dataset(test_x,test_y) for j in range(worksum): batch_x,batch_y = test_set.next_batch(batch_size) lx = [seq_max_len] * batch_size ly = [seq_max_len] * batch_size bx = [np.sum(m > 0) for m in batch_x] by = [np.sum(m > 0) for m in batch_y] tmp_loss,tran = session.run([train_loss,translations],feed_dict={x:batch_x,y:batch_y, y_in: np.concatenate(( np.ones((batch_y.shape[0],1),dtype=np.int) * ch2ind['<go>'],batch_y[:,:-1]) ,axis=1) ,x_len:lx,y_len:ly, y_real_len:by, x_real_len:bx}) loss_list.append(tmp_loss) tmp_acc = cal_acc(tran,batch_y) accs.append(tmp_acc) predict_list += [i for i in tran] target_list += [i for i in batch_y] source_list += [i for i in batch_x] pb.complete(1) return np.average(loss_list),np.average(accs),get_bleu_score(predict_list,target_list),predict_list,target_list,source_list
def get_most_common(a1, a2): temp_dict1 = {} temp_dict2 = {} pb = ProgressBar(worksum=len(a1), auto_display=False) pb.startjob() num = 0 for s1, s2 in zip(a1, a2): num += 1 pb.complete(1) if args.max_words != -1 and (len(s1) > args.max_words or len(s2) > args.max_words): continue for w1 in s1: temp_dict1.setdefault(w1, 0) temp_dict1[w1] += 1 for w2 in s2: temp_dict2.setdefault(w2, 0) temp_dict2[w2] += 1 if num % 32 == 0: pb.display_progress_bar() sorted1 = sorted(temp_dict1.items(), key=lambda i: i[1], reverse=True) sorted2 = sorted(temp_dict2.items(), key=lambda i: i[1], reverse=True) #print(sorted1[:100]) #print(sorted2[:100]) return [i[0] for i in sorted1[:args.vac_dict_ch] ], [i[0] for i in sorted2[:args.vac_dict_en]]
pb.startjob() for one_batch in range(0, len(X_train), batch_size): batch_x, batch_y = gen.__next__() batch_x_len = np.asarray([len(x) for x in batch_x]) batch_lr = beginning_lr _, batch_loss = sess.run([optimizer, loss], feed_dict={ X: batch_x, y: batch_y, X_len: batch_x_len, learning_rate: batch_lr }) pb.info = "EPOCH {} batch{} lr {} loss {}".format(one_epoch, one_batch, batch_lr, batch_loss) pb.complete(batch_size) losses.append(batch_loss) batch_predict = sess.run(model_pred, feed_dict={ X: test_X, X_len: test_X_len })[:, 0] batch_predict = [(1 if x > 0.5 else 0) for x in batch_predict] # In[] # ============================================================================= # Run Batch Prediction # ============================================================================= pd.DataFrame(losses).plot() df = pd.DataFrame({'id': testData.id, 'pred': batch_predict}) pred_path = os.path.join(dir_path, 'TF_RNN.csv') df.to_csv(pred_path, index=False, header=True)
'w', encoding='utf-8') as whdl: for line in train_target_texts: whdl.write("{}\n".format(line)) with open('eval/{}/{}_{}_source_train'.format( model_path, i + 1, j), 'w', encoding='utf-8') as whdl: for line in train_source_texts: whdl.write("{}\n".format(line)) print( "\niter {} step {} train loss {} train acc {} test loss {} test acc {} bleu {} lr {}\n" .format(i + 1, j, np.average(train_loss_list[-val_step:]), train_acc, test_loss, test_acc, bleu_score, lr)) with open('val/{}/test_loss.txt'.format(model_path), 'a') as whdl: whdl.write("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format( i + 1, j, np.average(train_loss_list[-val_step:]), train_acc, test_loss, test_acc, bleu_score, lr)) try: saver = tf.train.Saver() saver.save( session, 'middleresult/{}/result_{}_{}'.format( model_path, i + 1, j)) except: print('save fail') lr_step = int(worksum / 2) - 1 if j % lr_step == 0 and j != 0: if (i + 1) >= 6: lr = lr / 2 pb.complete(1)
clip_norm = 400 if restore == False: train_epoch = 1 train_batch = 0 for one_epoch in range(train_epoch, N_EPOCH): train_epoch = one_epoch pb = ProgressBar(worksum=N_BATCH * BATCH_SIZE, info=" epoch {} batch {}".format(train_epoch, train_batch)) pb.startjob() for one_batch in range(N_BATCH): if restore == True and one_batch < train_batch: pb.auto_display = False pb.complete(BATCH_SIZE) pb.auto_display = True continue else: restore = False train_batch = one_batch batch_x, batch_y_ori, a, t, s = voice_flow_train.next()['data'] batch_y = sparse_tuple_from(batch_y_ori) batch_seq_len = [i // 2 for i in s] batch_target_len = [len(i) for i in batch_y_ori] # learning rate decay strategy batch_lr = begining_learning_rate * 10**-(one_epoch // DECAY_EPOCH) step_dis, _, step_loss, step_summary, step_value = sess.run(