def main(json_path): cwd = Path.cwd() with open(cwd / json_path) as io: params = json.loads(io.read()) # tokenizer vocab_path = params['filepath'].get('vocab') with open(cwd / vocab_path, mode='rb') as io: vocab = pickle.load(io) length = params['padder'].get('length') padder = PadSequence(length=length, pad_val=vocab.to_indices(vocab.padding_token)) tokenizer = Tokenizer(vocab=vocab, split_fn=split_to_jamo, pad_fn=padder) # model (restore) save_path = cwd / params['filepath'].get('ckpt') ckpt = torch.load(save_path) num_classes = params['model'].get('num_classes') embedding_dim = params['model'].get('embedding_dim') k_max = params['model'].get('k_max') model = VDCNN(num_classes=num_classes, embedding_dim=embedding_dim, k_max=k_max, vocab=tokenizer.vocab) model.load_state_dict(ckpt['model_state_dict']) # evaluation batch_size = params['training'].get('batch_size') tr_path = cwd / params['filepath'].get('tr') val_path = cwd / params['filepath'].get('val') tst_path = cwd / params['filepath'].get('tst') tr_ds = Corpus(tr_path, tokenizer.split_and_transform) tr_dl = DataLoader(tr_ds, batch_size=batch_size, num_workers=4) val_ds = Corpus(val_path, tokenizer.split_and_transform) val_dl = DataLoader(val_ds, batch_size=batch_size, num_workers=4) tst_ds = Corpus(tst_path, tokenizer.split_and_transform) tst_dl = DataLoader(tst_ds, batch_size=batch_size, num_workers=4) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) tr_acc = get_accuracy(model, tr_dl, device) val_acc = get_accuracy(model, val_dl, device) tst_acc = get_accuracy(model, tst_dl, device) print('tr_acc: {:.2%}, val_acc: {:.2%}, tst_acc: {:.2%}'.format( tr_acc, val_acc, tst_acc))
with open(data_config.vocab, mode='rb') as io: vocab = pickle.load(io) pad_sequence = PadSequence(length=model_config.length, pad_val=vocab.to_indices(vocab.padding_token)) tokenizer = Tokenizer(vocab=vocab, split_fn=split_to_jamo, pad_fn=pad_sequence) # model (restore) checkpoint_manager = CheckpointManager(model_dir) checkpoint = checkpoint_manager.load_checkpoint('best.tar') model = VDCNN(num_classes=model_config.num_classes, embedding_dim=model_config.embedding_dim, k_max=model_config.k_max, vocab=tokenizer.vocab) model.load_state_dict(checkpoint['model_state_dict']) # evaluation summary_manager = SummaryManager(model_dir) filepath = getattr(data_config, args.dataset) ds = Corpus(filepath, tokenizer.split_and_transform) dl = DataLoader(ds, batch_size=model_config.batch_size, num_workers=4) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) summary = evaluate(model, dl, { 'loss': nn.CrossEntropyLoss(), 'acc': acc }, device)