Example #1
0
def train_lstm_model():
    """
	Trains and returns an LSTM model
	this is extremely slow especially now that we're using 4000+ examples
	"""
    texts, _ = data.data_util.load_text_with_specific_label(
        DEFAULT_FILE_NAME, data.data_util.FbReaction.LIKE_INDEX)
    sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items(
        texts)
    predictors, labels, max_sequence_len = generate_padded_sequences(
        sequences, num_words, word_to_index)
    model = create_lstm_model(num_words, max_sequence_len)
    # todo - can we use batch size and validation split here?
    history = model.fit(
        predictors,
        labels,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_split=VALIDATION_SPLIT,
        verbose=1,
    )
    model_store.save_model(
        model,
        "lstm_hidden_neurons_%d" % LSTM_HIDDEN_NEURONS,
        EMBEDDING_SIZE,
        EPOCHS,
        BATCH_SIZE,
        VALIDATION_SPLIT,
    )
    return model
Example #2
0
def load_model(model_path: str, index: int) -> tuple:
    raise Exception("Deprecated, please use bucket_classification.py instead")
    texts, like_labels = data.data_util.load_text_with_specific_label(
        DEFAULT_FILE_NAME, index)
    binary_labels = create_binary_labels_for_classification(like_labels, 20)
    sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items(
        texts)
    _, _, _, _, max_sequence_length = \
        text_tokenizer.split_dataset(sequences, binary_labels, word_to_index)
    model = keras.models.load_model(model_path)
    return model, word_to_index, index_to_word, max_sequence_length
Example #3
0
def run_binary_classifier_model(index: int) -> Model:
    """
    Trains and returns a binary classification model
    :param index : int
        -> index for data label, see data.data_util.FbReaction
    """
    raise Exception("Deprecated, please use bucket_classification.py instead")
    texts, like_labels = data.data_util.load_text_with_specific_label(
        DEFAULT_FILE_NAME, index)
    binary_labels = create_binary_labels_for_classification(like_labels, 20)
    sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items(
        texts)
    train_data, train_labels, test_data, test_labels, max_sequence_length = \
        text_tokenizer.split_dataset(sequences, binary_labels, word_to_index)
    model = create_keras_classifier_model(num_words, max_sequence_length)
    history = model.fit(
        train_data,
        train_labels,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_split=VALIDATION_SPLIT,
        verbose=1,
    )
    print(test_data.shape, test_labels.shape)
    loss, accuracy = model.evaluate(test_data, test_labels)
    print("test set results: loss: %f, accuracy: %f" % (loss, accuracy))
    plot_classification_history(history)
    plot_prediction(model, train_data, train_labels, "train")
    plot_prediction(model, test_data, test_labels, "test")
    model_store.save_model(
        model,
        "binary_classification_index_%d" % index,
        EMBEDDING_SIZE,
        EPOCHS,
        BATCH_SIZE,
        VALIDATION_SPLIT,
    )
    return model
Example #4
0
def load_model_and_predict(model: Model = None) -> None:
    """
	Load the appropriate model and use it to make predictions
	"""
    texts, _ = data.data_util.load_text_with_specific_label(
        DEFAULT_FILE_NAME, data.data_util.FbReaction.LIKE_INDEX)
    sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items(
        texts)
    _, _, max_sequence_len = generate_padded_sequences(sequences, num_words,
                                                       word_to_index)
    if model is None:
        model = create_lstm_model(num_words, max_sequence_len)
    model_name = model_store.get_model_title(
        "lstm_hidden_neurons_%d" % LSTM_HIDDEN_NEURONS,
        EMBEDDING_SIZE,
        EPOCHS,
        BATCH_SIZE,
        VALIDATION_SPLIT,
    )
    model.load_weights(model_name)
    output = predict_from_example_list(model, word_to_index, max_sequence_len,
                                       EXAMPLES)
    print(output)
Example #5
0
def train_regression_model() -> Model:
    """
	Trains and returns a regression model
	"""
    texts, like_labels = data.data_util.load_text_with_specific_label(
        DEFAULT_FILE_NAME, data.data_util.FbReaction.LIKE_INDEX)
    standardized_labels, avg, std = data.data_util.standardize_array(
        like_labels)
    sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items(
        texts)
    train_data, train_labels, test_data, test_labels, max_sequence_length = \
     text_tokenizer.split_dataset(sequences, standardized_labels, word_to_index)
    model = create_regression_model(num_words, max_sequence_length)
    history = model.fit(
        train_data,
        train_labels,
        epochs=EPOCHS,
        batch_size=BATCH_SIZE,
        validation_split=VALIDATION_SPLIT,
        verbose=1,
    )
    loss, accuracy = model.evaluate(test_data, test_labels)
    print("test set results: loss: %f, mean absolute error: %f" %
          (loss, accuracy))
    plot_regression_history(history)
    plot_prediction(model, train_data, train_labels, "train")
    plot_prediction(model, test_data, test_labels, "test")
    model_store.save_model(
        model,
        "regression",
        EMBEDDING_SIZE,
        EPOCHS,
        BATCH_SIZE,
        VALIDATION_SPLIT,
    )
    return model