예제 #1
0
    args.vocab_size, args.embedding_size = embLoader.get_dimensions()

model = NN(vocab_size=args.vocab_size,
           number_tags=dataset.get_num_tags(),
           batch_size=args.batch_size,
           gpu=args.gpu,
           window_size=args.window_size)
model.load_state_dict(torch.load(args.trained_model))

if args.gpu and torch.cuda.is_available():
    model.cuda()

tag_level = {'correct': 0, 'total_in_test': 0, 'total_detected': 0}

for i, (words_input, tags) in enumerate(test_loader):
    input_tensor, target_tensor = model.get_train_tensors(
        words_input, tags, embLoader.word_to_index, dataset.tag_to_index)
    output = model(Variable(input_tensor))
    top_n, top_i = output.data.topk(1)
    for i, tag_i in enumerate(top_i):
        tag_i = tag_i[0]
        if tag_i == target_tensor[
                i] and target_tensor[i] != dataset.tag_to_index('O'):
            tag_level['correct'] += 1
            tag_level['total_detected'] += 1
        elif tag_i != dataset.tag_to_index('O'):
            tag_level['total_detected'] += 1
        if target_tensor[i] != dataset.tag_to_index('O'):
            tag_level['total_in_test'] += 1

tag_level['precision'] = tag_level['correct'] / tag_level['total_detected']
tag_level['recall'] = tag_level['correct'] / tag_level['total_in_test']
예제 #2
0
model = NN(vocab_size=args.vocab_size,number_tags=dataset.get_num_tags(),
batch_size=args.batch_size,gpu=args.gpu,window_size=args.window_size,
include_cap=args.capitalization,number_suffix=dataset.get_num_suffix(),
embedding_dim=args.embedding_size,activation=args.activation)

model.load_state_dict(torch.load(args.trained_model))

if args.gpu and torch.cuda.is_available():
    model.cuda()

tagged_file = open(args.output_file,"w")

if args.embeddings_path is not None:
    word_to_index = embLoader.word_to_index
else:
    word_to_index = dataset.word_to_index

for i,(words_input,tags) in enumerate(test_loader):
    input_tensor,target_tensor,cap_tensor,suf_tensor = model.get_train_tensors(words_input,tags,
    word_to_index,dataset.tag_to_index,suffix2id=dataset.suffix_to_index)
    output = model(Variable(input_tensor),Variable(cap_tensor),Variable(suf_tensor))
    top_n, top_i = output.data.topk(1)
    for i,tag_i in enumerate(top_i):
        word = words_input[args.window_size][i]
        real_tag = dataset.index_to_tag(target_tensor[i])
        predicted_tag = dataset.index_to_tag(tag_i[0])
        tagged_file.write("{} {} {}\n".format(word,real_tag,predicted_tag))

tagged_file.close()
예제 #3
0
           gpu=args.gpu,
           window_size=args.window_size,
           number_suffix=dataset.get_num_suffix(),
           include_cap=args.capitalization)
model.load_state_dict(torch.load(args.trained_model))

if args.embeddings_path is not None:
    embLoader = EmbeddingsLoader(args.embeddings_path)
    print("Embeddings loaded, vocab size: {} embedding dims: {}".format(
        embLoader.get_dimensions()[0],
        embLoader.get_dimensions()[1]))

if args.gpu and torch.cuda.is_available():
    model.cuda()

while True:
    sentence = input("Enter sentence to process: ")
    word_list = [[w] for w in sentence.split()]
    for i in range(len(word_list) - (2 * args.window_size)):
        input_tensor, _, cap_tensor, suf_tensor = model.get_train_tensors(
            word_list[i:i + 1 + 2 * args.window_size],
            word2id=embLoader.word_to_index,
            suffix2id=dataset.suffix_to_index)
        output = model(Variable(input_tensor), Variable(cap_tensor),
                       Variable(suf_tensor))
        top_n, top_i = output.data.topk(1)
        tag_i = top_i[0][0]
        print("{} : {}".format(
            word_list[i:i + 1 + 2 * args.window_size][args.window_size],
            dataset.index_to_tag(tag_i)))