예제 #1
0
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=["accuracy"])

    #Prints a summary of model layers and parameters
    #print(model.summary())

    #Custom metric calculations
    metrics = Metrics_Approx(tag_index=all_labels, k=k)

    #Train model
    if not trans_learn:
        history = model.fit(X_train,
                            np.array(y_train),
                            batch_size=batch_size,
                            epochs=int(args.num_epochs),
                            verbose=1,
                            callbacks=[metrics],
                            validation_data=(X_test, np.array(y_test)))

    #Perform prediction to calculate scores for K iteration
    y_pred = np.asarray(model.predict(X_test))
    y_pred = np.argmax(y_pred, axis=-1)
    targ = y_test
    targ = np.argmax(targ, axis=-1)
    k_scores = evaluate_approx_match(y_pred, targ, all_labels)
    k_metrics_val = model.evaluate(X_test, y_test)
    k_metrics_train = model.evaluate(X_train, y_train)

    #Get timestamp for log
    timestamp = str(datetime.datetime.now())
예제 #2
0
#build model        
input = Input(shape=(X_train.shape[1],))

model = Embedding(input_dim=vocab_size, output_dim=word_embed_size, 
                   input_length=seq_maxlen, weights=[embedding_weight_matrix], 
                   trainable = False) (input)
model = LSTM(100, return_sequences=True)(model)
output = TimeDistributed(Dense(num_tags, activation="softmax"))(model)  

model = Model(inputs=input, outputs=output)
print(model.summary())

model.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])

save_weights = ModelCheckpoint('model.h5', monitor='val_loss', save_best_only=True)

model.fit(x=X_train, y=y_train, batch_size=batch_size,
          epochs=epochs, validation_data=(X_validation, y_validation), callbacks=[save_weights])

sentences_test, tags_test = load_data(os.path.join(data_dir, 'test.txt'))               
X_test = getSequences(sentences_test, tokenizer_words, seq_maxlen, 0)
y_test = getSequences(tags_test, tokenizer_tags, seq_maxlen, tokenizer_tags.word_index['o'])

y_pred = model.predict(X_test)

y_pred_tags, y_test_tags = decode_output(X_test, y_pred, y_test, tokenizer_words, tokenizer_tags, seq_maxlen)

report = classification_report(y_pred=y_pred_tags, y_true=y_test_tags)
print(report)