def evaluate(checkpoint: str, data: str = None, batch_size: int = None): seed_everything(SEED) model = Code2Seq.load_from_checkpoint(checkpoint_path=checkpoint) batch_size = batch_size or model.hyperparams.test_batch_size data = data or model.hyperparams.test_data_path gpu = 1 if torch.cuda.is_available() else None data_loader, n_samples = create_dataloader( data, model.hyperparams.max_context, False, False, batch_size, cpu_count(), ) print(f"approximate number of steps for test is {ceil(n_samples / batch_size)}") trainer = Trainer(gpus=gpu) trainer.test(model, test_dataloaders=data_loader)
def evaluate(checkpoint: str, data: str = None): seed_everything(SEED) model = Code2Seq.load_from_checkpoint(checkpoint_path=checkpoint) gpu = 1 if torch.cuda.is_available() else None trainer = Trainer(gpus=gpu) if data is not None: data_loader, n_samples = create_dataloader( join(DATA_FOLDER, data), model.config.max_context, False, False, model.config.test_batch_size, cpu_count()) print( f"approximate number of steps for test is {ceil(n_samples / model.config.test_batch_size)}" ) trainer.test(model, test_dataloaders=data_loader) else: trainer.test(model)
def load_code2seq( checkpoint_path: str, config: DictConfig, vocabulary: Vocabulary) -> Tuple[Code2Seq, PathContextDataModule]: model = Code2Seq.load_from_checkpoint(checkpoint_path=checkpoint_path) data_module = PathContextDataModule(config, vocabulary) return model, data_module
print('Done dumping reduced data set') return out_path if __name__=="__main__": opt = parse_args() print(opt) print('data path: ', opt.data_path) data_split = opt.data_path.split('/')[-2] print('data_split', data_split) # replace_tokens = ["@R_%d@"%x for x in range(0,opt.num_replacements+1)] replace_tokens = ["@R_%d@"%x for x in range(1000)] model = Code2Seq.load_from_checkpoint(checkpoint_path=opt.expt_dir) data_loader, n_samples = create_dataloader( opt.data_path, model.hyperparams.max_context, False, False, opt.batch_size, 1, ) vocab = pickle.load(open(opt.vocab, 'rb')) token_to_id = vocab['token_to_id'] id_to_token = {token_to_id[t]:t for t in token_to_id} print('length: ', len(id_to_token)) label_to_id = vocab['label_to_id'] id_to_label = {label_to_id[t]:t for t in label_to_id} # if data_split == 'test' and opt.exact_matches:
args = parser.parse_args() return args def create_datafile(data_path, exact_matches, split): new_data_path = os.path.join(data_path, 'small.{}.c2s'.format(split)) lines = open(os.path.join(data_path, 'data.{}.c2s'.format(split)), 'r') new_file = open(new_data_path, 'w') for line in lines: if line.split()[0] in exact_matches: new_file.write(line) print("Saved exact matches.") if __name__ == '__main__': args = parse_args() model = Code2Seq.load_from_checkpoint(checkpoint_path=args.checkpoint) data_loader, n_samples = create_dataloader( os.path.join(args.orig_data_path, args.split), model.hyperparams.max_context, False, False, args.batch_size, 1) vocab = pickle.load(open(args.vocab_path, 'rb')) label_to_id = vocab['label_to_id'] id_to_label = {label_to_id[l]: l for l in label_to_id} li_exact_matches = get_exact_matches(data_loader, n_samples, model, id_to_label) print(li_exact_matches) create_datafile(args.data_path, li_exact_matches, args.split)