token_vocab = {'$UNK$': 0} label_vocab = {'NM': 0} for t in list(train_token_count.keys()) \ + list(dev_token_count.keys()) + list(test_token_count.keys()): if t not in token_vocab: token_vocab[t] = len(token_vocab) for l in list(train_label_count.keys()) \ + list(dev_label_count.keys()) + list(test_label_count.keys()): if l not in label_vocab: label_vocab[l] = len(label_vocab) train_set.token_vocab = token_vocab dev_set.token_vocab = token_vocab test_set.token_vocab = token_vocab train_set.label_vocab = label_vocab dev_set.label_vocab = label_vocab test_set.label_vocab = label_vocab train_set.numberize_dataset() dev_set.numberize_dataset() test_set.numberize_dataset() else: assert os.path.isdir(model_dir), 'Model directory not found: {}'.format(model_dir) saved_state = torch.load(os.path.join(model_dir, 'checkpoint_{}.mdl'.format(labels[0]))) token_vocab = saved_state['token_vocab'] label_vocab = saved_state['label_vocab'] test_set = Dataset(test_file, labels=labels) test_set.token_vocab = token_vocab test_set.label_vocab = label_vocab test_set.numberize_dataset()