Example #1
0
def evaluate(checkpoint: str, data: str = None, batch_size: int = None):
    seed_everything(SEED)
    model = Code2Seq.load_from_checkpoint(checkpoint_path=checkpoint)
    batch_size = batch_size or model.hyperparams.test_batch_size
    data = data or model.hyperparams.test_data_path
    gpu = 1 if torch.cuda.is_available() else None
    data_loader, n_samples = create_dataloader(
        data, model.hyperparams.max_context, False, False, batch_size, cpu_count(),
    )
    print(f"approximate number of steps for test is {ceil(n_samples / batch_size)}")
    trainer = Trainer(gpus=gpu)
    trainer.test(model, test_dataloaders=data_loader)
Example #2
0
def evaluate(checkpoint: str, data: str = None):
    seed_everything(SEED)
    model = Code2Seq.load_from_checkpoint(checkpoint_path=checkpoint)
    gpu = 1 if torch.cuda.is_available() else None
    trainer = Trainer(gpus=gpu)
    if data is not None:
        data_loader, n_samples = create_dataloader(
            join(DATA_FOLDER, data), model.config.max_context, False, False,
            model.config.test_batch_size, cpu_count())
        print(
            f"approximate number of steps for test is {ceil(n_samples / model.config.test_batch_size)}"
        )
        trainer.test(model, test_dataloaders=data_loader)
    else:
        trainer.test(model)
Example #3
0
def load_code2seq(
        checkpoint_path: str, config: DictConfig,
        vocabulary: Vocabulary) -> Tuple[Code2Seq, PathContextDataModule]:
    model = Code2Seq.load_from_checkpoint(checkpoint_path=checkpoint_path)
    data_module = PathContextDataModule(config, vocabulary)
    return model, data_module
				
	print('Done dumping reduced data set')
	return out_path


if __name__=="__main__":
	opt = parse_args()
	print(opt)
	print('data path: ', opt.data_path)
	data_split = opt.data_path.split('/')[-2]
	print('data_split', data_split)

	# replace_tokens = ["@R_%d@"%x for x in range(0,opt.num_replacements+1)]
	replace_tokens = ["@R_%d@"%x for x in range(1000)]
	
	model = Code2Seq.load_from_checkpoint(checkpoint_path=opt.expt_dir)

	data_loader, n_samples = create_dataloader(
		opt.data_path, model.hyperparams.max_context, False, False, opt.batch_size, 1,
	)

	vocab = pickle.load(open(opt.vocab, 'rb'))
	token_to_id = vocab['token_to_id']
	id_to_token = {token_to_id[t]:t for t in token_to_id}
	print('length: ', len(id_to_token))
	label_to_id = vocab['label_to_id']
	id_to_label = {label_to_id[t]:t for t in label_to_id}


	# if data_split == 'test' and opt.exact_matches:
Example #5
0
    args = parser.parse_args()
    return args


def create_datafile(data_path, exact_matches, split):

    new_data_path = os.path.join(data_path, 'small.{}.c2s'.format(split))
    lines = open(os.path.join(data_path, 'data.{}.c2s'.format(split)), 'r')
    new_file = open(new_data_path, 'w')
    for line in lines:
        if line.split()[0] in exact_matches:
            new_file.write(line)
    print("Saved exact matches.")


if __name__ == '__main__':

    args = parse_args()
    model = Code2Seq.load_from_checkpoint(checkpoint_path=args.checkpoint)
    data_loader, n_samples = create_dataloader(
        os.path.join(args.orig_data_path, args.split),
        model.hyperparams.max_context, False, False, args.batch_size, 1)
    vocab = pickle.load(open(args.vocab_path, 'rb'))
    label_to_id = vocab['label_to_id']
    id_to_label = {label_to_id[l]: l for l in label_to_id}

    li_exact_matches = get_exact_matches(data_loader, n_samples, model,
                                         id_to_label)
    print(li_exact_matches)
    create_datafile(args.data_path, li_exact_matches, args.split)