Exemple #1
0
        char_test = test_set[3]
        char_vocab_size = len(dataset.char_vocab) + 2

    pos_train = keras.utils.to_categorical(pos_train, num_classes=pos_labels)
    chunk_train = keras.utils.to_categorical(chunk_train,
                                             num_classes=chunk_labels)
    pos_test = keras.utils.to_categorical(pos_test, num_classes=pos_labels)
    chunk_test = keras.utils.to_categorical(chunk_test,
                                            num_classes=chunk_labels)

    # build model with input parameters
    model = SequenceChunker(use_cudnn=args.use_cudnn)
    model.build(word_vocab_size,
                pos_labels,
                chunk_labels,
                char_vocab_size=char_vocab_size,
                max_word_len=args.max_word_length,
                feature_size=args.feature_size,
                classifier=args.classifier)

    # initialize word embedding if external model selected
    if args.embedding_model is not None:
        embedding_model, _ = load_word_embeddings(args.embedding_model)
        embedding_mat = get_embedding_matrix(embedding_model,
                                             dataset.word_vocab)
        model.load_embedding_weights(embedding_mat)

    # train the model
    if args.char_features is True:
        train_features = [words_train, char_train]
        test_features = [words_test, char_test]
Exemple #2
0
    # get label sizes, transform y's into 1-hot encoding
    chunk_labels = len(dataset.chunk_vocab) + 1
    pos_labels = len(dataset.pos_vocab) + 1
    word_vocab_size = len(dataset.word_vocab) + 2
    pos_train = keras.utils.to_categorical(pos_train, num_classes=pos_labels)
    chunk_train = keras.utils.to_categorical(chunk_train,
                                             num_classes=chunk_labels)
    pos_test = keras.utils.to_categorical(pos_test, num_classes=pos_labels)
    chunk_test = keras.utils.to_categorical(chunk_test,
                                            num_classes=chunk_labels)

    # build model with input parameters
    model = SequenceChunker(use_gpu=args.use_gpu)
    model.build(word_vocab_size,
                pos_labels,
                chunk_labels,
                feature_size=args.feature_size)

    # initialize word embedding if external model selected
    if args.embedding_model is not None:
        embedding_model, _ = load_word_embeddings(args.embedding_model)
        embedding_mat = get_embedding_matrix(embedding_model,
                                             dataset.word_vocab)
        model.load_embedding_weights(embedding_mat)

    # train the model
    chunk_f1_cb = ConllCallback(words_test,
                                chunk_test,
                                dataset.chunk_vocab.vocab,
                                batch_size=64)
    model.fit(words_train, [pos_train, chunk_train],