Esempio n. 1
0
                       max_document_length=MAX_DOCUMENT_LENGTH,
                       max_vocabulary_size=MAX_VOCABULARY_SIZE,
                       embeddings_size=EMBEDDINGS_SIZE)

check_randomness("After preprocess")

# weigh training examples: everything that's not class 0 (not kp)
# gets a heavier score
train_y_weights = np.argmax(train_y,axis=2) # this removes the one-hot representation
train_y_weights[train_y_weights > 0] = KP_WEIGHT
train_y_weights[train_y_weights < 1] = 1

logging.info("Data preprocessing complete.")
logging.info("Maximum possible recall: %s",
             metrics.recall(test_answer,
                               postprocessing.get_words(test_doc,postprocessing.undo_sequential(test_y))))

if not SAVE_MODEL or not os.path.isfile(MODEL_PATH) :

    logging.debug("Building the network...")
    model = Sequential()

    embedding_layer = Embedding(np.shape(embedding_matrix)[0],
                                EMBEDDINGS_SIZE,
                                weights=[embedding_matrix],
                                input_length=MAX_DOCUMENT_LENGTH,
                                trainable=False)

    model.add(embedding_layer)
    model.add(Bidirectional(LSTM(150,activation='tanh', recurrent_activation='hard_sigmoid', return_sequences=True)))
    model.add(Dropout(0.25))
Esempio n. 2
0
# weigh training examples: everything that's not class 0 (not kp)
# gets a heavier score
from sklearn.utils import class_weight

train_y_weights = np.argmax(train_y, axis=2)
train_y_weights = np.reshape(
    class_weight.compute_sample_weight('balanced', train_y_weights.flatten()),
    np.shape(train_y_weights))

logging.info("Data preprocessing complete.")
logging.info(
    "Maximum possible recall: %s",
    metrics.recall(
        test_answer,
        postprocessing.get_words(test_doc,
                                 postprocessing.undo_sequential(test_y)),
        STEM_MODE))

if not SAVE_MODEL or not os.path.isfile(MODEL_PATH):

    logging.debug("Building the network...")
    model = Sequential()

    embedding_layer = Embedding(np.shape(embedding_matrix)[0],
                                EMBEDDINGS_SIZE,
                                weights=[embedding_matrix],
                                input_length=MAX_DOCUMENT_LENGTH,
                                trainable=False)

    model.add(embedding_layer)
    model.add(