def gen(self, event_ids, context_ids, prior, topk): preds, scores = [], [] zero = torch.cuda.LongTensor(1).fill_(0) prob, topk_id = prior.topk(topk, -1) context_ids_all = context_ids for i in range(event_ids.shape[0]): beam = Beam(self.beam_size, self.sos_id, self.eos_id, prob[i]) context_ids = context_ids_all[i:i + 1].repeat(topk, 1, 1) context_ids, _ = self.selecter(context_ids, topk_id[i]) ############################################################################ #concatenate evidence and event to obtain hidden states inputs_ids = torch.cat( (context_ids, event_ids[i:i + 1].repeat(topk, 1)), -1) transformer_outputs = self.decoder(inputs_ids) past_x = [ x.repeat(1, self.beam_size, 1, 1, 1) for x in transformer_outputs[1] ] past_inputs = inputs_ids.repeat(self.beam_size, 1) ############################################################################ #beam search input_ids = None for _ in range(self.max_length - 1): if beam.done(): break if input_ids is None: input_ids = beam.getCurrentState() else: input_ids = torch.cat((input_ids, beam.getCurrentState()), -1) target_ids = input_ids.unsqueeze(1).repeat(1, topk, 1).view( -1, input_ids.shape[-1]) transformer_outputs = self.decoder(target_ids, past=past_x) hidden_states = transformer_outputs[0] out = self.lsm(self.lm_head(hidden_states[:, -1, :])).data out = out.view(-1, topk, out.shape[-1]) beam.advance(out) input_ids.data.copy_( input_ids.data.index_select(0, beam.getCurrentOrigin())) hyp = beam.getHyp(beam.getFinal()) pred = beam.buildTargetTokens(hyp)[:10] pred = [ torch.cat([x.view(-1) for x in p] + [zero] * (self.max_length - len(p))).view(1, -1) for p in pred ] preds.append(torch.cat(pred, 0).unsqueeze(0)) preds = torch.cat(preds, 0) return preds
def eval_bleu(args, model, tokenizer, file_type='test', num=99999999): dataset = CodeChangeDataset(tokenizer, args, logger, file_type=file_type, block_size=args.block_size, mode='test') test_sampler = SequentialSampler(dataset) test_dataloader = DataLoader(dataset, sampler=test_sampler, batch_size=1) model.to(args.device) model.zero_grad() model.eval() preds = [] for step, (batch, token_labels) in enumerate( tqdm(test_dataloader, total=min(num, len(dataset)))): if step >= num: break inputs = batch.to(args.device) with torch.no_grad(): beam_size = args.beam_size m = torch.nn.LogSoftmax(dim=-1) outputs = model(inputs)[1] p = [] zero = torch.cuda.LongTensor(1).fill_(0) for i in range(inputs.shape[0]): past_hidden = [] for x in outputs: _p = x[:, i:i + 1] _q = _p.expand(-1, beam_size, -1, -1, -1) past_hidden.append(_q) # context_mask=source_mask[i:i+1,:].expand(beam_size,-1) beam = Beam(beam_size, tokenizer.bos_token_id, tokenizer.eos_token_id) input_ids = None for _ in range(162): if beam.done(): break input_ids = beam.getCurrentState() transformer_outputs = model(input_ids, past=past_hidden) out = m(transformer_outputs[0][:, -1, :]).data beam.advance(out) past_hidden = [ x.data.index_select(1, beam.getCurrentOrigin()) for x in transformer_outputs[1] ] hyp = beam.getHyp(beam.getFinal()) pred = beam.buildTargetTokens(hyp)[:beam_size] pred = [ torch.cat([x.view(-1) for x in p] + [zero] * (162 - len(p))).view( 1, -1) for p in pred ] p.append(torch.cat(pred, 0).unsqueeze(0)) p = torch.cat(p, 0) for pred in p: t = pred[0].cpu().numpy() t = list(t) if 0 in t: t = t[:t.index(0)] text = tokenizer.decode(t, clean_up_tokenization_spaces=False) preds.append(text) golds = [] datas = read_data(data_dir=args.data_dir, file_type=file_type) for (src, tgt) in datas[:num]: golds.append(tgt) assert len(preds) == len(golds), 'Pred %d\tGold %d' % (len(preds), len(golds)) EM = [] with open(os.path.join(args.output_dir, f"{file_type}.output"), 'w', encoding='utf-8') as f, open(os.path.join( args.output_dir, f"{file_type}.gold"), 'w', encoding='utf-8') as f1: for pred, gold in zip(preds, golds): f.write(pred + '\n') f1.write(gold + '\n') EM.append(pred.split() == gold.split()) bleu_score = round( _bleu(os.path.join(args.output_dir, f"{file_type}.gold"), os.path.join(args.output_dir, f"{file_type}.output")), 2) EM = round(np.mean(EM) * 100, 2) return bleu_score, EM
def eval_bleu(args, model, tokenizer, file_type='test', num=20000): dataset = MethodDataset(tokenizer, args, file_type='test', block_size=args.block_size, mode='test') test_sampler = SequentialSampler(dataset) test_dataloader = DataLoader(dataset, sampler=test_sampler, batch_size=1) model.to(args.device) model.zero_grad() model.eval() preds = [] for step, (batch, token_labels) in enumerate(test_dataloader): if step >= num: break inputs = batch.to(args.device) max_gen_len = min(256, args.block_size - inputs.shape[1] - 1) try: with torch.no_grad(): beam_size = 5 m = torch.nn.LogSoftmax(dim=-1) outputs = model(inputs, return_dict=True).past_key_values p = [] zero = torch.cuda.LongTensor(1).fill_(0) for i in range(inputs.shape[0]): past_hidden = tuple( tuple(xx[i:i + 1, :].expand(beam_size, -1, -1, -1) for xx in x) for x in outputs) # past_hidden = [x[:, i:i+1].expand(-1, beam_size, -1, -1, -1) for x in outputs] beam = Beam(beam_size, tokenizer.bos_token_id, [tokenizer.eos_token_id]) input_ids = None for _ in range(max_gen_len): if beam.done(): break input_ids = beam.getCurrentState() transformer_outputs = model( input_ids, past_key_values=past_hidden, return_dict=True) out = m(transformer_outputs.logits[:, -1, :]).data beam.advance(out) past_hidden = tuple( tuple( xx.data.index_select( 0, beam.getCurrentOrigin()) for xx in x) for x in transformer_outputs.past_key_values) # past_hidden = [x.data.index_select(1, beam.getCurrentOrigin()) for x in transformer_outputs[1]] hyp = beam.getHyp(beam.getFinal()) pred = beam.buildTargetTokens(hyp)[:beam_size] pred = [ torch.cat([x.view(-1) for x in p] + [zero] * (max_gen_len - len(p))).view(1, -1) for p in pred ] p.append(torch.cat(pred, 0).unsqueeze(0)) p = torch.cat(p, 0) for pred in p: t = pred[0].cpu().numpy() t = list(t) if 0 in t: t = t[:t.index(0)] text = tokenizer.decode( t, clean_up_tokenization_spaces=False).rstrip("</s>") # print(text) preds.append(text) except Exception: preds.append("") if step % args.logging_steps == 0: logger.info(f"{step} are done!") golds = [] datafile = os.path.join(args.data_dir, f"{file_type}.jsonl") datas = open(datafile).readlines() for x in datas[:num]: x = json.loads(x) golds.append(x["body"]) # assert len(preds) == len(golds) def post_process(code): code = code.replace("<EOL>", "\n").replace("<INDENT>", " ").replace("<DEDENT>", " ") code = code.replace("<NUM_LIT>", "0").replace("<STR_LIT>", "").replace("<CHAR_LIT>", "") pattern = re.compile(r"<(STR|NUM|CHAR)_LIT:(.*?)>", re.S) lits = re.findall(pattern, code) for lit in lits: code = code.replace(f"<{lit[0]}_LIT:{lit[1]}>", lit[1]) return " ".join(code.split()) ES = [] with open(os.path.join(args.output_dir, f"{file_type}.output"), 'w') as f, open( os.path.join(args.output_dir, f"{file_type}.gold"), 'w') as f1: for pred, gold in zip(preds, golds): pred = post_process(pred) gold = post_process(gold) f.write(pred + '\n') f1.write(gold + '\n') ES.append(fuzz.ratio(pred, gold)) bleu_score = round( _bleu(os.path.join(args.output_dir, f"{file_type}.gold"), os.path.join(args.output_dir, f"{file_type}.output")), 2) ES = round(np.mean(ES), 2) print(bleu_score, ES)
def eval_line_completion(args, model, tokenizer, file_type='test'): """ Evaluate line level code completion on exact match and edit similarity. It is recommanded to use single GPU because it could not be batched. """ def DecodeIds(idxs): codes = "" for idx in idxs: to_add = tokenizer.convert_ids_to_tokens(idx) if tokenizer.convert_ids_to_tokens(idx)[0] == '\u0120': if not codes.endswith(" "): codes += " " + to_add[1:] else: codes += to_add[1:] elif (idx in [ tokenizer.bos_token_id, tokenizer.eos_token_id, tokenizer.sep_token_id, tokenizer.pad_token_id ] or tokenizer.convert_ids_to_tokens(idx).startswith("<NUM_LIT")): codes += " " + to_add + " " else: codes += to_add return codes.strip(" ") dataset = lineDataset(tokenizer, args, logger, file_type=file_type, block_size=args.block_size - 100) test_sampler = SequentialSampler(dataset) test_dataloader = DataLoader(dataset, sampler=test_sampler, batch_size=1) model.to(args.device) # model.zero_grad() model.eval() def repackage_hidden(h): """Wraps hidden states in new Tensors, to detach them from their history.""" if isinstance(h, torch.Tensor): return h.detach() else: return tuple(repackage_hidden(v) for v in h) if args.langs == "python": break_ids = [tokenizer.sep_token_id] else: break_ids = [ tokenizer.convert_tokens_to_ids('Ġ;'), tokenizer.convert_tokens_to_ids('Ġ}'), tokenizer.convert_tokens_to_ids('Ġ{') ] preds = [] gts = [] edit_sim = 0.0 em = 0.0 for step, (inputs, gt) in enumerate(test_dataloader): inputs = inputs.to(args.device) with torch.no_grad(): beam_size = 5 m = torch.nn.LogSoftmax(dim=-1) outputs = model(inputs[:, :-1])[1] p = [] zero = torch.cuda.LongTensor(1).fill_(0) for i in range(inputs.shape[0]): if args.model_type == "rnn": past_hidden = tuple( x[:, i:i + 1].expand(-1, beam_size, -1).contiguous() for x in outputs) else: past = [ torch.cat([x[0].unsqueeze(0), x[1].unsqueeze(0)], dim=0) if type(x) == tuple else x for x in outputs ] past_hidden = [ x[:, i:i + 1].expand(-1, beam_size, -1, -1, -1) for x in past ] beam = Beam(beam_size, inputs[i][-1].cpu().data, break_ids) input_ids = None for _ in range(100): if beam.done(): break input_ids = beam.getCurrentState() if args.model_type == "rnn": outputs = model(input_ids, hidden=repackage_hidden(past_hidden)) else: outputs = model(input_ids, past_key_values=past_hidden) out = m(outputs[0][:, -1, :]).data beam.advance(out) if args.model_type == "rnn": past_hidden = tuple( x.data.index_select( 1, beam.getCurrentOrigin()).contiguous() for x in outputs[1]) else: past = [ torch.cat([x[0].unsqueeze(0), x[1].unsqueeze(0)], dim=0) if type(x) == tuple else x for x in outputs[1] ] past_hidden = [ x.data.index_select(1, beam.getCurrentOrigin()) for x in past ] hyp = beam.getHyp(beam.getFinal()) pred = beam.buildTargetTokens(hyp)[:beam_size] pred = [ torch.cat([x.view(-1) for x in p] + [zero] * (100 - len(p))).view( 1, -1) for p in pred ] p.append(torch.cat(pred, 0).unsqueeze(0)) p = torch.cat(p, 0) for pred in p: t = pred[0].cpu().numpy() t = t.tolist() if 0 in t: t = t[:t.index(0)] if args.langs == "python": text = DecodeIds(t).strip("<EOL>").strip() else: text = DecodeIds(t).strip("{").strip() # print(text) # exit() preds.append(text) gts.append(gt[0]) edit_sim += fuzz.ratio(text, gt[0]) em += 1 if text == gt[0] else 0 if step % args.logging_steps == 0: logger.info(f"{step} are done!") saved_file = os.path.join(args.output_dir, "predictions_line.txt") with open(saved_file, "w") as f: for pred_text in preds: f.write(pred_text + "\n") logger.info(f"Test {len(preds)} samples") logger.info(f"Edit sim: {edit_sim/len(preds)}, EM: {em/len(preds)}")
def eval_bleu(args, model, tokenizer, file_type='test', num=2000): dataset = concodeDataset(tokenizer, args, logger, file_type=file_type, block_size=args.block_size, mode='test') test_sampler = SequentialSampler(dataset) test_dataloader = DataLoader(dataset, sampler=test_sampler, batch_size=1) model.to(args.device) model.zero_grad() model.eval() preds = [] max_gen_len = 100 for step, (batch, token_labels) in enumerate(test_dataloader): if step >= num: break inputs = batch.to(args.device) # with torch.no_grad(): # outputs = model.generate(inputs, max_length=args.block_size, num_beams=10, temperature=0.7, early_stopping=False, top_k=70, \ # bos_token_id=tokenizer.bos_token_id, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id) # # outputs = model.generate(inputs, max_length=args.block_size, do_sample=True, temperature=0.7, top_k=70, top_p=0.95, \ # # bos_token_id=tokenizer.bos_token_id, eos_token_id=tokenizer.pad_token_id, pad_token_id=tokenizer.pad_token_id) # # outputs = model.generate(inputs, max_length=args.block_size, num_beams=10, temperature=0.7, early_stopping=False, top_k=70) # # outputs = model.generate(inputs, max_length=args.block_size, do_sample=True, temperature=0.7, top_k=70, top_p=0.95) # generation = tokenizer.decode(outputs[0])[len(tokenizer.decode(inputs[0])):] # preds.append(generation.rstrip("<pad>")) with torch.no_grad(): beam_size = 10 m = torch.nn.LogSoftmax(dim=-1) outputs = model(inputs)[1] p = [] zero = torch.cuda.LongTensor(1).fill_(0) for i in range(inputs.shape[0]): # Compatible with transformers version 3.3.0 and 4.13.0 past = [ torch.cat([x[0].unsqueeze(0), x[1].unsqueeze(0)], dim=0) if type(x) == tuple else x for x in outputs ] past_hidden = [ x[:, i:i + 1].expand(-1, beam_size, -1, -1, -1) for x in past ] # context_mask=source_mask[i:i+1,:].expand(beam_size,-1) beam = Beam(beam_size, tokenizer.bos_token_id, tokenizer.eos_token_id) input_ids = None for _ in range(max_gen_len): if beam.done(): break input_ids = beam.getCurrentState() # context_mask=torch.cat((context_mask,input_ids*0+1),-1) # mask=context_mask.unsqueeze(0).unsqueeze(-2).unsqueeze(-2).expand(self.config.n_layer, -1, -1, -1, -1) transformer_outputs = model(input_ids, past=past_hidden) out = m(transformer_outputs[0][:, -1, :]).data # out = self.lsm(self.lm_head(transformer_outputs[0][:,-1,:])).data beam.advance(out) past = [ torch.cat([x[0].unsqueeze(0), x[1].unsqueeze(0)], dim=0) if type(x) == tuple else x for x in transformer_outputs[1] ] past_hidden = [ x.data.index_select(1, beam.getCurrentOrigin()) for x in past ] hyp = beam.getHyp(beam.getFinal()) pred = beam.buildTargetTokens(hyp)[:beam_size] pred = [ torch.cat([x.view(-1) for x in p] + [zero] * (max_gen_len - len(p))).view(1, -1) for p in pred ] p.append(torch.cat(pred, 0).unsqueeze(0)) p = torch.cat(p, 0) for pred in p: t = pred[0].cpu().numpy() t = list(t) if 0 in t: t = t[:t.index(0)] text = tokenizer.decode(t, clean_up_tokenization_spaces=False) # print(text) preds.append(text) if step % args.logging_steps == 0: logger.info(f"{step} are done!") golds = [] datafile = os.path.join(args.data_dir, f"{file_type}.json") datas = open(datafile).readlines() for x in datas[:num]: x = json.loads(x) golds.append(x["code"]) assert len(preds) == len(golds) EM = [] with open(os.path.join(args.output_dir, f"{file_type}.output"), 'w') as f, open( os.path.join(args.output_dir, f"{file_type}.gold"), 'w') as f1: for pred, gold in zip(preds, golds): f.write(pred + '\n') f1.write(gold + '\n') EM.append(pred.split() == gold.split()) if file_type == "test": return 0, 0 bleu_score = round( _bleu(os.path.join(args.output_dir, f"{file_type}.gold"), os.path.join(args.output_dir, f"{file_type}.output")), 2) EM = round(np.mean(EM) * 100, 2) return bleu_score, EM