Ejemplo n.º 1
0
    token_vocab = {'$UNK$': 0}
    label_vocab = {'NM': 0}
    for t in list(train_token_count.keys()) \
            + list(dev_token_count.keys()) + list(test_token_count.keys()):
        if t not in token_vocab:
            token_vocab[t] = len(token_vocab)
    for l in list(train_label_count.keys()) \
            + list(dev_label_count.keys()) + list(test_label_count.keys()):
        if l not in label_vocab:
            label_vocab[l] = len(label_vocab)

    train_set.token_vocab = token_vocab
    dev_set.token_vocab = token_vocab
    test_set.token_vocab = token_vocab
    train_set.label_vocab = label_vocab
    dev_set.label_vocab = label_vocab
    test_set.label_vocab = label_vocab

    train_set.numberize_dataset()
    dev_set.numberize_dataset()
    test_set.numberize_dataset()
else:
    assert os.path.isdir(model_dir), 'Model directory not found: {}'.format(model_dir)
    saved_state = torch.load(os.path.join(model_dir, 'checkpoint_{}.mdl'.format(labels[0])))
    token_vocab = saved_state['token_vocab']
    label_vocab = saved_state['label_vocab']
    test_set = Dataset(test_file, labels=labels)
    test_set.token_vocab = token_vocab
    test_set.label_vocab = label_vocab
    test_set.numberize_dataset()