def evaluate_ppl(model: RenamingModel, dataset: Dataset, config: Dict, predicate: Any = None): if predicate is None: def predicate(_): return True eval_batch_size = config['train']['batch_size'] num_readers = config['train']['num_readers'] num_batchers = config['train']['num_batchers'] data_iter = dataset.batch_iterator(batch_size=eval_batch_size, train=False, progress=True, return_examples=False, return_prediction_target=True, config=model.config, num_readers=num_readers, num_batchers=num_batchers) was_training = model.training model.eval() cum_log_probs = 0. cum_num_examples = 0 with torch.no_grad(): for batch in data_iter: td = batch.tensor_dict nn_util.to(td, model.device) result = model(td, td['prediction_target']) log_probs = result['batch_log_prob'].cpu().tolist() for e_id, test_meta in enumerate(td['test_meta']): if predicate(test_meta): log_prob = log_probs[e_id] cum_log_probs += log_prob cum_num_examples += 1 ppl = np.exp(-cum_log_probs / cum_num_examples) if was_training: model.train() return ppl
def decode(model: RenamingModel, dataset: Dataset, config: Dict, eval_batch_size=None): if eval_batch_size is None: if 'eval_batch_size' in config['train']: eval_batch_size = config['train']['eval_batch_size'] else: eval_batch_size = config['train']['batch_size'] num_readers = config['train']['num_readers'] num_batchers = config['train']['num_batchers'] data_iter = dataset.batch_iterator(batch_size=eval_batch_size, train=False, progress=True, return_examples=True, config=model.config, num_readers=num_readers, num_batchers=num_batchers) model.eval() all_examples = dict() with torch.no_grad(): for batch in data_iter: examples = batch.examples rename_results = model.predict(examples) for example, rename_result in zip(examples, rename_results): example_pred_accs = [] top_rename_result = rename_result[0] for old_name, gold_new_name \ in example.variable_name_map.items(): pred = top_rename_result[old_name] pred_new_name = pred['new_name'] var_metric = Evaluator.get_soft_metrics( pred_new_name, gold_new_name) example_pred_accs.append(var_metric) file_name = example.binary_file['file_name'] line_num = example.binary_file['line_num'] fun_name = example.ast.compilation_unit all_examples[f'{file_name}_{line_num}_{fun_name}'] = \ (rename_result, Evaluator.average(example_pred_accs)) return all_examples
def decode(model: RenamingModel, examples, config: Dict): model.eval() all_examples = dict() with torch.no_grad(): for line_num, example in enumerate(examples): rename_result = model.predict([example])[0] example_pred_accs = [] top_rename_result = rename_result[0] for old_name, gold_new_name \ in example.variable_name_map.items(): pred = top_rename_result[old_name] pred_new_name = pred['new_name'] var_metric = Evaluator.get_soft_metrics( pred_new_name, gold_new_name) example_pred_accs.append(var_metric) fun_name = example.ast.compilation_unit all_examples[f'{line_num}_{fun_name}'] = \ (rename_result, Evaluator.average(example_pred_accs)) return all_examples
def decode_and_evaluate(model: RenamingModel, dataset: Dataset, config: Dict, return_results=False, eval_batch_size=None, approx=False): if eval_batch_size is None: eval_batch_size = config['train'][ 'eval_batch_size'] if 'eval_batch_size' in config[ 'train'] else config['train']['batch_size'] data_iter = dataset.batch_iterator( batch_size=eval_batch_size, train=False, progress=True, return_examples=True, max_seq_len=512, config=model.module.config if isinstance(model, torch.nn.DataParallel) else model.config, num_readers=config['train']['num_readers'], num_batchers=config['train']['num_batchers'], truncate=approx) was_training = model.training model.eval() example_acc_list = [] variable_acc_list = [] need_rename_cases = [] func_name_in_train_acc_list = [] func_name_not_in_train_acc_list = [] func_body_in_train_acc_list = [] func_body_not_in_train_acc_list = [] all_examples = dict() with torch.no_grad(): for i, batch in enumerate(data_iter): examples = batch.examples if isinstance(model, torch.nn.DataParallel): rename_results = model.module.predict(examples) else: rename_results = model.predict(examples) for example, rename_result in zip(examples, rename_results): example_pred_accs = [] top_rename_result = rename_result[0] for old_name, gold_new_name in example.variable_name_map.items( ): pred = top_rename_result[old_name] pred_new_name = pred['new_name'] var_metric = Evaluator.get_soft_metrics( pred_new_name, gold_new_name) # is_correct = pred_new_name == gold_new_name example_pred_accs.append(var_metric) if gold_new_name != old_name: # and gold_new_name in model.vocab.target: need_rename_cases.append(var_metric) if example.test_meta['function_name_in_train']: func_name_in_train_acc_list.append(var_metric) else: func_name_not_in_train_acc_list.append( var_metric) if example.test_meta['function_body_in_train']: func_body_in_train_acc_list.append(var_metric) else: func_body_not_in_train_acc_list.append( var_metric) variable_acc_list.extend(example_pred_accs) example_acc_list.append(example_pred_accs) if return_results: all_examples[example.binary_file['file_name'] + '_' + str(example.binary_file['line_num'])] = ( rename_result, Evaluator.average(example_pred_accs)) # all_examples.append((example, rename_result, example_pred_accs)) valid_example_num = len(example_acc_list) num_variables = len(variable_acc_list) corpus_acc = Evaluator.average(variable_acc_list) if was_training: model.train() eval_results = dict( corpus_acc=corpus_acc, corpus_need_rename_acc=Evaluator.average(need_rename_cases), func_name_in_train_acc=Evaluator.average( func_name_in_train_acc_list), func_name_not_in_train_acc=Evaluator.average( func_name_not_in_train_acc_list), func_body_in_train_acc=Evaluator.average( func_body_in_train_acc_list), func_body_not_in_train_acc=Evaluator.average( func_body_not_in_train_acc_list), num_variables=num_variables, num_valid_examples=valid_example_num) if return_results: return eval_results, all_examples return eval_results
with torch.no_grad(): for line_num, example in enumerate(examples): rename_result = model.predict([example])[0] example_pred_accs = [] top_rename_result = rename_result[0] for old_name, gold_new_name \ in example.variable_name_map.items(): pred = top_rename_result[old_name] pred_new_name = pred['new_name'] var_metric = Evaluator.get_soft_metrics( pred_new_name, gold_new_name) example_pred_accs.append(var_metric) fun_name = example.ast.compilation_unit all_examples[f'{line_num}_{fun_name}'] = \ (rename_result, Evaluator.average(example_pred_accs)) return all_examples seed_stuff() model = RenamingModel.load(args.model, use_cuda=False, new_config=extra_config) decode_results = \ list(decode(model, examples, model.config).values()) # Get the first function. There should be only one. if len(decode_results) == 0: raise ValueError("The decoder did not return any variable names.") else: print(json.dumps(decode_results[0]))
if cmd_args['--cuda'] is not None: torch.cuda.manual_seed(seed) np.random.seed(seed * 13 // 7) random.seed(seed * 17 // 7) sys.setrecursionlimit(7000) if cmd_args['--extra-config'] is not None: extra_config = json.loads(cmd_args['--extra-config']) else: default_config = '{"decoder": {"remove_duplicates_in_prediction": true} }' extra_config = json.loads(default_config) model_path = cmd_args['MODEL_FILE'] print(f'loading model from [{model_path}]', file=sys.stderr) model = RenamingModel.load(model_path, use_cuda=cmd_args['--cuda'], new_config=extra_config) model.eval() test_set_path = cmd_args['TEST_DATA_FILE'] test_set = Dataset(test_set_path) decode_results = \ Evaluator.decode(model, test_set, model.config) pp = pprint.PrettyPrinter(stream=sys.stderr) pp.pprint(decode_results) if cmd_args['--save-to'] is not None: save_to = cmd_args['--save-to'] else: test_name = test_set_path.split("/")[-1] save_to = \
def decode_and_evaluate(model: RenamingModel, dataset: Dataset, config: Dict, return_results=False, eval_batch_size=None): if eval_batch_size is None: if 'eval_batch_size' in config['train']: eval_batch_size = config['train']['eval_batch_size'] else: eval_batch_size = config['train']['batch_size'] num_readers = config['train']['num_readers'] num_batchers = config['train']['num_batchers'] data_iter = dataset.batch_iterator(batch_size=eval_batch_size, train=False, progress=True, return_examples=True, config=model.config, num_readers=num_readers, num_batchers=num_batchers) was_training = model.training model.eval() example_acc_list = [] variable_acc_list = [] need_rename_cases = [] func_name_in_train_acc = [] func_name_not_in_train_acc = [] func_body_in_train_acc = [] func_body_not_in_train_acc = [] all_examples = dict() with torch.no_grad(): for batch in data_iter: examples = batch.examples rename_results = model.predict(examples) for example, rename_result in zip(examples, rename_results): example_pred_accs = [] top_rename_result = rename_result[0] for old_name, gold_new_name \ in example.variable_name_map.items(): pred = top_rename_result[old_name] pred_new_name = pred['new_name'] var_metric = Evaluator.get_soft_metrics( pred_new_name, gold_new_name) # is_correct = pred_new_name == gold_new_name example_pred_accs.append(var_metric) if gold_new_name != old_name: need_rename_cases.append(var_metric) if example.test_meta['function_name_in_train']: func_name_in_train_acc.append(var_metric) else: func_name_not_in_train_acc.append(var_metric) if example.test_meta['function_body_in_train']: func_body_in_train_acc.append(var_metric) else: func_body_not_in_train_acc.append(var_metric) variable_acc_list.extend(example_pred_accs) example_acc_list.append(example_pred_accs) if return_results: example = \ f"{example.binary_file['file_name']}_" \ f"{example.binary_file['line_num']}" all_examples[example] = \ (rename_result, Evaluator.average(example_pred_accs)) valid_example_num = len(example_acc_list) num_variables = len(variable_acc_list) corpus_acc = Evaluator.average(variable_acc_list) if was_training: model.train() need_rename_acc = Evaluator.average(need_rename_cases) name_in_train_acc = Evaluator.average(func_name_in_train_acc) name_not_in_train_acc = Evaluator.average(func_name_not_in_train_acc) body_in_train_acc = Evaluator.average(func_body_in_train_acc) body_not_in_train_acc = Evaluator.average(func_body_not_in_train_acc) eval_results = dict(corpus_acc=corpus_acc, corpus_need_rename_acc=need_rename_acc, func_name_in_train_acc=name_in_train_acc, func_name_not_in_train_acc=name_not_in_train_acc, func_body_in_train_acc=body_in_train_acc, func_body_not_in_train_acc=body_not_in_train_acc, num_variables=num_variables, num_valid_examples=valid_example_num) if return_results: return eval_results, all_examples return eval_results
def train(args): work_dir = args['--work-dir'] config = json.loads(_jsonnet.evaluate_file(args['CONFIG_FILE'])) config['work_dir'] = work_dir if not os.path.exists(work_dir): print(f'creating work dir [{work_dir}]', file=sys.stderr) os.makedirs(work_dir) if args['--extra-config']: extra_config = args['--extra-config'] extra_config = json.loads(extra_config) config = util.update(config, extra_config) json.dump(config, open(os.path.join(work_dir, 'config.json'), 'w'), indent=2) model = RenamingModel.build(config) config = model.config model.train() if args['--cuda']: model = model.cuda() params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.Adam(params, lr=0.001) nn_util.glorot_init(params) # set the padding index for embedding layers to zeros # model.encoder.var_node_name_embedding.weight[0].fill_(0.) train_set = Dataset(config['data']['train_file']) dev_set = Dataset(config['data']['dev_file']) batch_size = config['train']['batch_size'] print(f'Training set size {len(train_set)}, dev set size {len(dev_set)}', file=sys.stderr) # training loop train_iter = epoch = cum_examples = 0 log_every = config['train']['log_every'] evaluate_every_nepoch = config['train']['evaluate_every_nepoch'] max_epoch = config['train']['max_epoch'] max_patience = config['train']['patience'] cum_loss = 0. patience = 0. t_log = time.time() history_accs = [] while True: # load training dataset, which is a collection of ASTs and maps of gold-standard renamings train_set_iter = train_set.batch_iterator( batch_size=batch_size, return_examples=False, config=config, progress=True, train=True, num_readers=config['train']['num_readers'], num_batchers=config['train']['num_batchers']) epoch += 1 for batch in train_set_iter: train_iter += 1 optimizer.zero_grad() # t1 = time.time() nn_util.to(batch.tensor_dict, model.device) # print(f'[Learner] {time.time() - t1}s took for moving tensors to device', file=sys.stderr) # t1 = time.time() result = model(batch.tensor_dict, batch.tensor_dict['prediction_target']) # print(f'[Learner] batch {train_iter}, {batch.size} examples took {time.time() - t1:4f}s', file=sys.stderr) loss = -result['batch_log_prob'].mean() cum_loss += loss.item() * batch.size cum_examples += batch.size loss.backward() # clip gradient grad_norm = torch.nn.utils.clip_grad_norm_(params, 5.) optimizer.step() del loss if train_iter % log_every == 0: print( f'[Learner] train_iter={train_iter} avg. loss={cum_loss / cum_examples}, ' f'{cum_examples} examples ({cum_examples / (time.time() - t_log)} examples/s)', file=sys.stderr) cum_loss = cum_examples = 0. t_log = time.time() print(f'[Learner] Epoch {epoch} finished', file=sys.stderr) if epoch % evaluate_every_nepoch == 0: print(f'[Learner] Perform evaluation', file=sys.stderr) t1 = time.time() # ppl = Evaluator.evaluate_ppl(model, dev_set, config, predicate=lambda e: not e['function_body_in_train']) eval_results = Evaluator.decode_and_evaluate( model, dev_set, config) # print(f'[Learner] Evaluation result ppl={ppl} (took {time.time() - t1}s)', file=sys.stderr) print( f'[Learner] Evaluation result {eval_results} (took {time.time() - t1}s)', file=sys.stderr) dev_metric = eval_results['func_body_not_in_train_acc']['accuracy'] # dev_metric = -ppl if len(history_accs) == 0 or dev_metric > max(history_accs): patience = 0 model_save_path = os.path.join(work_dir, f'model.bin') model.save(model_save_path) print( f'[Learner] Saved currently the best model to {model_save_path}', file=sys.stderr) else: patience += 1 if patience == max_patience: print( f'[Learner] Reached max patience {max_patience}, exiting...', file=sys.stderr) patience = 0 exit() history_accs.append(dev_metric) if epoch == max_epoch: print(f'[Learner] Reached max epoch', file=sys.stderr) exit() t1 = time.time()
def decode_and_evaluate(model: RenamingModel, dataset: Dataset, config: Dict, return_results=False, eval_batch_size=None): if eval_batch_size is None: eval_batch_size = config['train'][ 'eval_batch_size'] if 'eval_batch_size' in config[ 'train'] else config['train']['batch_size'] data_iter = dataset.batch_iterator( batch_size=eval_batch_size, train=False, progress=True, return_examples=True, config=model.config, num_readers=config['train']['num_readers'], num_batchers=config['train']['num_batchers']) was_training = model.training model.eval() example_acc_list = [] variable_acc_list = [] need_rename_cases = [] func_name_in_train_acc_list = [] func_name_not_in_train_acc_list = [] func_body_in_train_acc_list = [] func_body_not_in_train_acc_list = [] all_examples = dict() results = {} with torch.no_grad(): for batch in data_iter: examples = batch.examples rename_results = model.predict(examples) for example, rename_result in zip(examples, rename_results): example_pred_accs = [] binary = example.binary_file[ 'file_name'][:example.binary_file['file_name']. index("_")] func_name = example.ast.compilation_unit top_rename_result = rename_result[0] for old_name, gold_new_name in example.variable_name_map.items( ): pred = top_rename_result[old_name] pred_new_name = pred['new_name'] results.setdefault(binary, {}).setdefault( func_name, {})[old_name] = "", pred_new_name var_metric = Evaluator.get_soft_metrics( pred_new_name, gold_new_name) # is_correct = pred_new_name == gold_new_name example_pred_accs.append(var_metric) if gold_new_name != old_name: # and gold_new_name in model.vocab.target: need_rename_cases.append(var_metric) if example.test_meta['function_name_in_train']: func_name_in_train_acc_list.append(var_metric) else: func_name_not_in_train_acc_list.append( var_metric) if example.test_meta['function_body_in_train']: func_body_in_train_acc_list.append(var_metric) else: func_body_not_in_train_acc_list.append( var_metric) variable_acc_list.extend(example_pred_accs) example_acc_list.append(example_pred_accs) if return_results: all_examples[example.binary_file['file_name'] + '_' + str(example.binary_file['line_num'])] = ( rename_result, Evaluator.average(example_pred_accs)) # all_examples.append((example, rename_result, example_pred_accs)) json.dump(results, open(f"pred_dire_{time.strftime('%d%H%M')}.json", "w")) valid_example_num = len(example_acc_list) num_variables = len(variable_acc_list) corpus_acc = Evaluator.average(variable_acc_list) if was_training: model.train() eval_results = dict( corpus_acc=corpus_acc, corpus_need_rename_acc=Evaluator.average(need_rename_cases), func_name_in_train_acc=Evaluator.average( func_name_in_train_acc_list), func_name_not_in_train_acc=Evaluator.average( func_name_not_in_train_acc_list), func_body_in_train_acc=Evaluator.average( func_body_in_train_acc_list), func_body_not_in_train_acc=Evaluator.average( func_body_not_in_train_acc_list), num_variables=num_variables, num_valid_examples=valid_example_num) if return_results: return eval_results, all_examples return eval_results