def evaluate(model, maxlen=9, batch_size=1000): end_num = 10**maxlen datas = gen_data_batch(batch_size=batch_size, start=0, end=end_num) Nums1, Nums2, results = prepare_batch(*datas, maxlen=maxlen + 2) with torch.no_grad(): logits = model(torch.tensor(Nums1), torch.tensor(Nums2)) logits = logits.numpy() pred = np.argmax(logits, axis=-1) res = results_converter(pred) # for o in list(zip(datas[2], res))[:20]: # print(o[0], o[1], o[0]==o[1]) acc = np.mean([o[0] == o[1] for o in zip(datas[2], res)]) # added print('accuracy is: %g' % acc) return acc
def train(steps, model, optimizer, maxlen=9): loss = 0.0 accuracy = 0.0 end_num = 10**maxlen for step in range(steps): datas = gen_data_batch(batch_size=200, start=0, end=end_num) Nums1, Nums2, results = prepare_batch(*datas, maxlen=maxlen + 2) loss = train_one_step(model, optimizer, Nums1, Nums2, results) model.losses.append(loss) # added if step % 50 == 0: print('step', step, ': loss', loss) acc = evaluate(model) model.acc.append(acc) return loss
def train(steps, model, optimizer, max_len=10, test_len=10, b_size=20): loss = 0.0 accuracy = 0.0 end_num = 10**(max_len - 1) for step in range(steps): # [0, 555555555)的随机长度为200的数组 datas = gen_data_batch(batch_size=b_size, start=0, end=(end_num)) # 获得200 * maxlen的二维数组 Nums1, Nums2, results = prepare_batch(*datas, maxlen=max_len + 1) loss = train_one_step(model, optimizer, Nums1, Nums2, results) model.loss.append(loss) # acc = evaluate(model, test_len) # model.accuracy.append(acc) if step % 50 == 0: acc = evaluate(model, test_len) model.accuracy.append(acc) print('step', step, '\tloss', loss, '\taccuracy', acc) return loss
def evaluate(model, device, maxlen): # datas = gen_data_batch(batch_size=200, start=int((10 ** (maxlen - 1) - 1) / 9 * 5), # end=int((10 ** (maxlen - 1) - 1))) datas = gen_data_batch_longer(batch_size=200, start=0.5, end=1.0, digitlen=maxlen - 1) Nums1, Nums2, results = prepare_batch(*datas, maxlen + 1) with torch.no_grad(): logits = model( torch.tensor(Nums1).to(device), torch.tensor(Nums2).to(device)) logits = logits.cpu().numpy() pred = np.argmax(logits, axis=-1) res = results_converter(pred) # for o in list(zip(datas[2], res))[:20]: # print(o[0], o[1], o[0]==o[1]) # print('accuracy is: %g' % np.mean([o[0]==o[1] for o in zip(datas[2], res)])) return np.mean([o[0] == o[1] for o in zip(datas[2], res)])
def train(steps, model, optimizer, scheduler, evaluate_step, device, maxlen): loss = 0.0 accuracy = 0.0 for step in range(steps): # datas = gen_data_batch(batch_size=200, start=0, end=int((10 ** (maxlen - 1) - 1) / 9 * 5)) datas = gen_data_batch_longer(batch_size=200, start=0, end=0.5, digitlen=maxlen - 1) Nums1, Nums2, results = prepare_batch(*datas, maxlen + 1) loss = train_one_step(model, optimizer, scheduler, Nums1, Nums2, results, device) if step % 50 == 0: print('step', step, ': loss', loss) model.loss_record.append(loss) if step % evaluate_step == 0: accuracy = evaluate(model, device, maxlen) print('step', step, ':accuracy', accuracy) model.accuracy_record.append(accuracy) return loss
def train(model, data, params, shuffle=True, tblogger=None): """""" # actual training id_hash = {v: k for k, v in enumerate(data['ids'][:])} # only for string labels lb = LabelBinarizer().fit(data['y']['tg'][:]) # params['iter'] = 0 try: if params['verbose']: epoch = trange(params['n_epochs'], desc='[Loss : -.--] Epoch', ncols=80) else: epoch = range(params['n_epochs']) for n in epoch: if shuffle: trn_ids = shuffle_ids(params['split']['train'], id_hash) val_ids = shuffle_ids(params['split']['valid'], id_hash) else: trn_ids = [ id_hash[x] for x in params['split']['train'] if x in id_hash ] val_ids = [ id_hash[x] for x in params['split']['valid'] if x in id_hash ] for i, X_, y_, target in prepare_batch(data, trn_ids, params, lb): if params['iter'] % params['report_every'] == 0: # draw validation samples idx_v = sorted( np.random.choice(val_ids, params['batch_sz'], replace=False)) if target == 'tg': y_v = lb.transform(data['y'][target][idx_v]) else: y_v = data['y'][target][idx_v] X_v, y_v = random_crop(data['X'][idx_v], data['mask'][idx_v], y_v, params['dur']) c = model[target]['cost'](X_, y_).item() cv = model[target]['cost'](X_v, y_v).item() a = model[target]['acc'](X_, y_).item() av = model[target]['acc'](X_v, y_v).item() if tblogger is not None: tblogger.log_value('%s_cost_tr' % target, c, params['iter']) tblogger.log_value('%s_cost_vl' % target, cv, params['iter']) tblogger.log_value('%s_acc_tr' % target, a, params['iter']) tblogger.log_value('%s_acc_vl' % target, av, params['iter']) if params['verbose']: epoch.set_description( '[v_loss : {:.4f} / v_acc: {:.4f}]Epoch'.format( cv, av)) model[target]['train'](X_, y_) params['iter'] += 1 except KeyboardInterrupt as kbe: print('User Stopped!')
def training(edit_net, nepochs, args, vocab, print_every=100, check_every=500, test=False): if test: print(args.data_path + 'test.df.filtered.pos') eval_dataset = data.Dataset( args.data_path + 'test.df.filtered.pos') # load eval dataset else: print(args.data_path + 'val.df.filtered.pos') eval_dataset = data.Dataset(args.data_path + 'val.df.filtered.pos') # load eval dataset evaluator = Evaluator( loss=nn.NLLLoss(ignore_index=vocab.w2i['PAD'], reduction='none')) editnet_optimizer = torch.optim.Adam(edit_net.parameters(), lr=1e-3, weight_decay=1e-6) # scheduler = MultiStepLR(abstract_optimizer, milestones=[20,30,40], gamma=0.1) # abstract_scheduler = ReduceLROnPlateau(abstract_optimizer, mode='max') # uncomment this part to re-weight different operations # NLL_weight = reweight_global_loss(args.w_add, args.w_keep, args.w_del) # NLL_weight_t = torch.from_numpy(NLL_weight).float().cuda() # editnet_criterion = nn.NLLLoss(weight=NLL_weight_t, ignore_index=vocab.w2i['PAD'], reduce=False) editnet_criterion = nn.NLLLoss(ignore_index=vocab.w2i['PAD'], reduction='none') best_eval_loss = 0. # init statistics print_loss = [] # Reset every print_every for epoch in range(nepochs): # scheduler.step() #reload training for every epoch if os.path.isfile(args.data_path + 'train.df.filtered.pos'): train_dataset = data.Dataset(args.data_path + 'train.df.filtered.pos') else: # iter chunks and vocab_data train_dataset = data.Datachunk(args.data_path + 'train.df.filtered.pos') for i, batch_df in train_dataset.batch_generator( batch_size=args.batch_size, shuffle=True): # time1 = time.time() prepared_batch, syn_tokens_list = data.prepare_batch( batch_df, vocab, args.max_seq_len) #comp,scpn,simp # a batch of complex tokens in vocab ids, sorted in descending order org_ids = prepared_batch[0] org_lens = org_ids.ne(0).sum(1) org = sort_by_lens( org_ids, org_lens ) # inp=[inp_sorted, inp_lengths_sorted, inp_sort_order] # a batch of pos-tags in pos-tag ids for complex org_pos_ids = prepared_batch[1] org_pos_lens = org_pos_ids.ne(0).sum(1) org_pos = sort_by_lens(org_pos_ids, org_pos_lens) out = prepared_batch[2][:, :] tar = prepared_batch[2][:, 1:] simp_ids = prepared_batch[3] editnet_optimizer.zero_grad() output = edit_net(org, out, org_ids, org_pos, simp_ids) ##################calculate loss tar_lens = tar.ne(0).sum(1).float() tar_flat = tar.contiguous().view(-1) loss = editnet_criterion(output.contiguous().view(-1, vocab.count), tar_flat).contiguous() loss[tar_flat == 1] = 0 #remove loss for UNK loss = loss.view(tar.size()) loss = loss.sum(1).float() loss = loss / tar_lens loss = loss.mean() print_loss.append(loss.item()) loss.backward() torch.nn.utils.clip_grad_norm_(edit_net.parameters(), 1.) editnet_optimizer.step() if i % print_every == 0: log_msg = 'Epoch: %d, Step: %d, Loss: %.4f' % ( epoch, i, np.mean(print_loss)) print_loss = [] print(log_msg) # Checkpoint if i % check_every == 0: edit_net.eval() val_loss, bleu_score, sari, sys_out = evaluator.evaluate( eval_dataset, vocab, edit_net, args) log_msg = "epoch %d, step %d, Dev loss: %.4f, Bleu score: %.4f, Sari: %.4f \n" % ( epoch, i, val_loss, bleu_score, sari) print(log_msg) if val_loss < best_eval_loss: best_eval_loss = val_loss Checkpoint( model=edit_net, opt=editnet_optimizer, epoch=epoch, step=i, ).save(args.store_dir) print("checked after %d steps" % i) edit_net.train() print(edit_net) return edit_net
def evaluate(self, dataset, vocab, model, args, max_edit_steps=50): """ Evaluate a model on given dataset and return performance during training Args: dataset: an object of data.Dataset() model (editNTS model): model to evaluate vocab: an object containing data.Vocab() args: args from the main methods Returns: loss (float): loss of the given model on the given dataset evaluated with teacher forcing sari: computed based on python script """ print_loss, print_loss_tf = [], [] bleu_list = [] ter = 0. sari_list = [] sys_out = [] print('Doing tokenized evaluation') for i, batch_df in dataset.batch_generator(batch_size=1, shuffle=False): model.eval() prepared_batch, syn_tokens_list = data.prepare_batch( batch_df, vocab, args.max_seq_len) # comp,scpn,simp org_ids = prepared_batch[0] org_lens = org_ids.ne(0).sum(1) org = sort_by_lens( org_ids, org_lens ) # inp=[inp_sorted, inp_lengths_sorted, inp_sort_order] org_pos_ids = prepared_batch[1] org_pos_lens = org_pos_ids.ne(0).sum(1) org_pos = sort_by_lens( org_pos_ids, org_pos_lens ) # inp=[inp_sorted, inp_lengths_sorted, inp_sort_order] out = prepared_batch[2][:, :] tar = prepared_batch[2][:, 1:] simp_ids = prepared_batch[3] best_seq_list = model.beamsearch(org, out, simp_ids, org_ids, org_pos, 5) # output_without_teacher_forcing = model(org, out, org_ids, org_pos, simp_ids,0.0) #can't compute loss for this one, can only do teacher forcing # output_teacher_forcing = model(org, out, org_ids, org_pos,simp_ids, 1.0) # if True: # the loss on validation is computed based on teacher forcing # ##################calculate loss # tar_lens = tar.ne(0).sum(1).float() # tar_flat = tar.contiguous().view(-1) # def compute_loss(output,tar_flat): #this function computes the loss based on model outputs and target in flat # loss = self.loss(output.contiguous().view(-1, vocab.count), tar_flat).contiguous() # loss[tar_flat == 1] = 0 # remove loss for UNK # loss = loss.view(tar.size()) # loss = loss.sum(1).float() # loss = loss / tar_lens # loss = loss.mean() # return loss # loss_tf = compute_loss(output_teacher_forcing,tar_flat) # print_loss_tf.append(loss_tf.item()) # the SARI and BLUE is computed based on model.eval without teacher forcing # for j in range(output_without_teacher_forcing.size()[0]): if True: ## write beam search here # try: if True: # example = batch_df.iloc[j] example = batch_df.iloc[0] # example_out = output_without_teacher_forcing[j, :, :] ##GREEDY # pred_action = torch.argmax(example_out, dim=1).view(-1).data.cpu().numpy() # edit_list_in_tokens = data.id2edits(pred_action, vocab) # ###BEST BEAM edit_list_in_tokens = data.id2edits( best_seq_list[0][1:], vocab) greedy_decoded_tokens = ' '.join( edit2sent(example['comp_tokens'], edit_list_in_tokens)) greedy_decoded_tokens = greedy_decoded_tokens.split( 'STOP')[0].split(' ') # tgt_tokens_translated = [vocab.i2w[i] for i in example['simp_ids']] sys_out.append(' '.join(greedy_decoded_tokens)) # prt = True if random.random() < 0.01 else False # if prt: # print('*' * 30) # # print('tgt_in_tokens_translated', ' '.join(tgt_tokens_translated)) # print('ORG', ' '.join(example['comp_tokens'])) # print('GEN', ' '.join(greedy_decoded_tokens)) # print('TGT', ' '.join(example['simp_tokens'])) # print('edit_list_in_tokens',edit_list_in_tokens) # print('gold labels', ' '.join(example['edit_labels'])) bleu_list.append( cal_bleu_score(greedy_decoded_tokens, example['simp_tokens'])) # calculate sari comp_string = ' '.join(example['comp_tokens']) simp_string = ' '.join(example['simp_tokens']) gen_string = ' '.join(greedy_decoded_tokens) sari_list.append( SARIsent(comp_string, gen_string, [simp_string])) print('loss_with_teacher_forcing', np.mean(print_loss_tf)) return np.mean(print_loss_tf), np.mean(bleu_list), np.mean( sari_list), sys_out