예제 #1
0
    train_inputs = [x_train, x_char_train]
    test_inputs = [x_test, x_char_test]

    train_inputs.append(
        np.sum(np.not_equal(x_train, 0), axis=-1).reshape((-1, 1)))
    test_inputs.append(
        np.sum(np.not_equal(x_test, 0), axis=-1).reshape((-1, 1)))

    conll_cb = ConllCallback(test_inputs,
                             y_test,
                             dataset.y_labels.vocab,
                             batch_size=args.b)
    ner_model.fit(
        x=train_inputs,
        y=y_train,
        batch_size=args.b,
        epochs=args.e,
        callbacks=[conll_cb],
        validation=(test_inputs, y_test),
    )

    # saving model
    ner_model.save(args.model_path)
    with open(args.model_info_path, "wb") as fp:
        info = {
            "y_vocab": dataset.y_labels.vocab,
            "word_vocab": dataset.word_vocab.vocab,
            "char_vocab": dataset.char_vocab.vocab,
        }
        pickle.dump(info, fp)

    # running predictions
예제 #2
0
                    dataset.word_vocab,
                    vocabulary_size,
                    char_vocabulary_size,
                    word_embedding_dims=args.word_embedding_dims,
                    char_embedding_dims=args.character_embedding_dims,
                    word_lstm_dims=args.char_features_lstm_dims,
                    tagger_lstm_dims=args.entity_tagger_lstm_dims,
                    dropout=args.dropout,
                    external_embedding_model=args.embedding_model)

    conll_cb = ConllCallback([x_test, x_char_test], y_test, dataset.y_labels,
                             batch_size=args.b)

    ner_model.fit(x=[x_train, x_char_train], y=y_train,
                  batch_size=args.b,
                  epochs=args.e,
                  callbacks=[conll_cb],
                  validation=([x_test, x_char_test], y_test))

    # saving model
    ner_model.save(args.model_path)
    with open(args.model_info_path, 'wb') as fp:
        info = {
            'sentence_len': args.sentence_length,
            'word_len': args.word_length,
            'num_of_labels': num_y_labels,
            'labels_id_to_word': {v: k for k, v in dataset.y_labels.items()},
            'word_vocab': dataset.word_vocab,
            'vocab_size': vocabulary_size,
            'char_vocab_size': char_vocabulary_size,
            'char_vocab': dataset.char_vocab,