Python BiLSTMTaggerModel.eval Examples

Programming Language: Python

Namespace/Package Name: neural_srl.pytorch.tagger

Method/Function: eval

Examples at hotexamples.com: 3

Python BiLSTMTaggerModel.eval - 3 examples found. These are the top rated real world Python examples of neural_srl.pytorch.tagger.BiLSTMTaggerModel.eval extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BiLSTMTaggerModel(6)

forward(6)

parameters(6)

cuda(5)

eval(3)

load(3)

train(2)

compute_loss(1)

loss(1)

named_parameters(1)

optimizer(1)

to(1)

zero_grad(1)

Example #1

Show file

def get_scores(config, task, model_path, word_dict_path, label_dict_path,
               tpf_dict_path, input_path):
    with Timer('Data loading'):
        print('Task: {}'.format(task))
        allow_new_words = True
        print('Allow new words in test data: {}'.format(allow_new_words))

        # Load word and tag dictionary
        word_dict = Dictionary(
            padding_token=PADDING_TOKEN,
            unknown_token=UNKNOWN_TOKEN)  # word tokens to Dict
        label_dict = Dictionary()
        tpf_dict = Dictionary()
        word_dict.load(word_dict_path)
        label_dict.load(label_dict_path)
        tpf_dict.load(tpf_dict_path)
        data = TaggerData(config, [], [], word_dict, label_dict, None, None)
        data.tpf2_dict = tpf_dict

        # Load test data.
        if task == 'srl':
            test_sentences, emb_inits, emb_shapes = reader.get_srl_test_data(
                input_path, config, data.word_dict, data.label_dict,
                allow_new_words)
        else:
            test_sentences, emb_inits, emb_shapes = reader.get_postag_test_data(
                input_path, config, data.word_dict, data.label_dict,
                allow_new_words)

        print('Read {} sentences.'.format(len(test_sentences)))

        # Add pre-trained embeddings for new words in the test data.
        # if allow_new_words:
        data.embedding_shapes = emb_shapes
        data.embeddings = emb_inits
        # Batching.
        test_data = data.get_test_data(test_sentences,
                                       batch_size=config.dev_batch_size)

    with Timer("Get test sentences dict"):
        test_sentences_w_id = []
        for sen in get_srl_sentences(args.input):
            test_sentences_w_id.append(' '.join(sen[1]))
        test_sentences_ids = [int(sen[0][0]) for sen in test_sentences]
        temp = {}
        assert len(test_sentences_w_id) == len(test_sentences_ids)
        for idx, sen in zip(test_sentences_ids, test_sentences_w_id):
            temp[idx] = sen
        test_sentences_w_id = temp

    with Timer("Loading ELMO"):
        test_elmo_hdf5 = hdf5_reader()
        test_elmo_hdf5.read_from_file(args.input_elmo, test_sentences_w_id)

    with Timer('Syntactic Information Extracting'
               ):  # extract the syntactic information from file
        test_dep_trees = SyntacticCONLL()
        test_dep_trees.read_from_file(args.input_dep_trees)

    with Timer("TPF2 generating..."):
        # generate the tree-based position features according the Dependency Tree.
        data.tpf2_dict.accept_new = False
        test_tpf2 = test_dep_trees.get_tpf2_dict(data.test_tensors,
                                                 data.tpf2_dict)
        print("Extract {} test TPF2 features".format(len(test_tpf2)))
        assert len(test_tpf2) == len(data.test_tensors)

    with Timer('Model building and loading'):
        model = BiLSTMTaggerModel(data, config=config, gpu_id=args.gpu)
        model.load(model_path)
        for param in model.parameters():
            print(param.size())
        if args.gpu:
            print("Initialize the model with GPU!")
            model = model.cuda()

    with Timer('Running model'):
        scores = []
        model.eval()
        for i, batched_tensor in enumerate(test_data):
            x, y, lengths, weights = batched_tensor
            word_inputs_seqs, predicate_inputs_seqs, tpf_ids, sentences_ids, answers, input_lengths, masks, padding_answers = \
                batch_data_variable(test_tpf2, x, y, lengths, weights)
            elmo_representations = test_elmo_hdf5.forward(
                sentences_ids,
                word_inputs_seqs.size()[-1], [len(ans) for ans in answers])
            if args.gpu:
                word_inputs_seqs, predicate_inputs_seqs, tpf_ids, input_lengths, masks, padding_answers = \
                    word_inputs_seqs.cuda(), predicate_inputs_seqs.cuda(), tpf_ids.cuda(), input_lengths.cuda(), masks.cuda(), padding_answers.cuda()
                elmo_representations = elmo_representations.cuda()

            sc = model.forward(word_inputs_seqs, predicate_inputs_seqs,
                               tpf_ids, elmo_representations, input_lengths)
            sc = sc.data.cpu().numpy() if args.gpu else sc.data.numpy()
            sc = [sc[j] for j in range(sc.shape[0])]
            scores.extend(sc)

    return scores, data, test_sentences, test_data

Example #2

Show file

def get_scores(config, task, model_path, word_dict_path, label_dict_path, syntactic_dict_path, input_path):
    with Timer('Data loading'):
        print ('Task: {}'.format(task))
        allow_new_words = True
        print ('Allow new words in test data: {}'.format(allow_new_words))

        # Load word and tag dictionary
        word_dict = Dictionary(padding_token=PADDING_TOKEN, unknown_token=UNKNOWN_TOKEN)  # word tokens to Dict
        label_dict, syntactic_dict = Dictionary(), Dictionary()
        word_dict.load(word_dict_path)
        label_dict.load(label_dict_path)
        syntactic_dict.load(syntactic_dict_path)
        data = TaggerData(config, [], [], word_dict, label_dict, None, None)
        data.syntactic_dict = syntactic_dict

        # Load test data.
        if task == 'srl':
            test_sentences, emb_inits, emb_shapes = reader.get_srl_test_data(
                input_path,
                config,
                data.word_dict,
                data.label_dict,
                allow_new_words)
        else:
            test_sentences, emb_inits, emb_shapes = reader.get_postag_test_data(
                input_path,
                config,
                data.word_dict,
                data.label_dict,
                allow_new_words)

        print ('Read {} sentences.'.format(len(test_sentences)))

        # Add pre-trained embeddings for new words in the test data.
        # if allow_new_words:
        data.embedding_shapes = emb_shapes
        data.embeddings = emb_inits
        # Batching.
        test_data = data.get_test_data(test_sentences, batch_size=config.dev_batch_size)

    with Timer('Syntactic Information Extracting'):  # extract the syntactic information from file
        test_dep_trees = SyntacticCONLL()
        test_dep_trees.read_from_file(args.input_dep_trees)
        # generate the syntactic label dict in training corpus
        data.syntactic_dict.accept_new = False
        test_dep_trees.get_syntactic_label_dict(data.syntactic_dict)

    with Timer('Model building and loading'):
        model = BiLSTMTaggerModel(data, config=config, gpu_id=args.gpu)
        model.load(model_path)
        for param in model.parameters():
            print param.size()
        if args.gpu:
            print("Initialize the model with GPU!")
            model = model.cuda()

    with Timer('Running model'):
        scores = []
        model.eval()
        for i, batched_tensor in enumerate(test_data):
            x, y, lengths, weights = batched_tensor
            word_inputs_seqs, predicate_inputs_seqs, syn_label_inputs_seqs, pes, answers, input_lengths, masks, padding_answers = \
                batch_data_variable(test_dep_trees, None, x, y, lengths, weights)

            if args.gpu:
                word_inputs_seqs, predicate_inputs_seqs, syn_label_inputs_seqs, input_lengths, masks, \
                padding_answers = \
                    word_inputs_seqs.cuda(), predicate_inputs_seqs.cuda(), syn_label_inputs_seqs.cuda(), \
                    input_lengths.cuda(), masks.cuda(), padding_answers.cuda()

            sc = model.forward(word_inputs_seqs, predicate_inputs_seqs, syn_label_inputs_seqs, pes, input_lengths)
            sc = sc.data.cpu().numpy() if args.gpu else sc.data.numpy()
            sc = [sc[j] for j in range(sc.shape[0])]
            scores.extend(sc)

    return scores, data, test_sentences, test_data

Example #3

Show file

        with Timer('Model building and loading'):
            model = BiLSTMTaggerModel(data, config=config, gpu_id=args.gpu)
            model.load(model_path)
            for param in model.parameters():
                print param.size()
            if args.gpu:
                print("Initialize the model with GPU!")
                model = model.cuda()

        with Timer('Running model'):
            dev_loss = 0.0
            srl_predictions = []

            # with torch.no_grad():  # Eval don't need the grad
            model.eval()
            for i, batched_tensor in enumerate(test_data):
                sent_ids, sent_lengths, \
                word_indexes, head_indexes, char_indexes, \
                predicate_indexes, arg_starts, arg_ends, arg_labels, srl_lens, \
                gold_predicates, num_gold_predicates = batched_tensor

                if args.gpu:
                    word_indexes, head_indexes, char_indexes, \
                    predicate_indexes, arg_starts, arg_ends, arg_labels, srl_lens = \
                        word_indexes.cuda(), head_indexes.cuda(), char_indexes.cuda(), predicate_indexes.cuda(), arg_starts.cuda(), \
                        arg_ends.cuda(), arg_labels.cuda(), srl_lens.cuda()  # , gold_predicates.cuda(), num_gold_predicates.cuda()

                predicated_dict, loss = model.forward(
                    sent_lengths, word_indexes, head_indexes, char_indexes,
                    (predicate_indexes, arg_starts, arg_ends, arg_labels,