def train_lstm_model(): """ Trains and returns an LSTM model this is extremely slow especially now that we're using 4000+ examples """ texts, _ = data.data_util.load_text_with_specific_label( DEFAULT_FILE_NAME, data.data_util.FbReaction.LIKE_INDEX) sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items( texts) predictors, labels, max_sequence_len = generate_padded_sequences( sequences, num_words, word_to_index) model = create_lstm_model(num_words, max_sequence_len) # todo - can we use batch size and validation split here? history = model.fit( predictors, labels, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, verbose=1, ) model_store.save_model( model, "lstm_hidden_neurons_%d" % LSTM_HIDDEN_NEURONS, EMBEDDING_SIZE, EPOCHS, BATCH_SIZE, VALIDATION_SPLIT, ) return model
def load_model(model_path: str, index: int) -> tuple: raise Exception("Deprecated, please use bucket_classification.py instead") texts, like_labels = data.data_util.load_text_with_specific_label( DEFAULT_FILE_NAME, index) binary_labels = create_binary_labels_for_classification(like_labels, 20) sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items( texts) _, _, _, _, max_sequence_length = \ text_tokenizer.split_dataset(sequences, binary_labels, word_to_index) model = keras.models.load_model(model_path) return model, word_to_index, index_to_word, max_sequence_length
def run_binary_classifier_model(index: int) -> Model: """ Trains and returns a binary classification model :param index : int -> index for data label, see data.data_util.FbReaction """ raise Exception("Deprecated, please use bucket_classification.py instead") texts, like_labels = data.data_util.load_text_with_specific_label( DEFAULT_FILE_NAME, index) binary_labels = create_binary_labels_for_classification(like_labels, 20) sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items( texts) train_data, train_labels, test_data, test_labels, max_sequence_length = \ text_tokenizer.split_dataset(sequences, binary_labels, word_to_index) model = create_keras_classifier_model(num_words, max_sequence_length) history = model.fit( train_data, train_labels, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, verbose=1, ) print(test_data.shape, test_labels.shape) loss, accuracy = model.evaluate(test_data, test_labels) print("test set results: loss: %f, accuracy: %f" % (loss, accuracy)) plot_classification_history(history) plot_prediction(model, train_data, train_labels, "train") plot_prediction(model, test_data, test_labels, "test") model_store.save_model( model, "binary_classification_index_%d" % index, EMBEDDING_SIZE, EPOCHS, BATCH_SIZE, VALIDATION_SPLIT, ) return model
def load_model_and_predict(model: Model = None) -> None: """ Load the appropriate model and use it to make predictions """ texts, _ = data.data_util.load_text_with_specific_label( DEFAULT_FILE_NAME, data.data_util.FbReaction.LIKE_INDEX) sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items( texts) _, _, max_sequence_len = generate_padded_sequences(sequences, num_words, word_to_index) if model is None: model = create_lstm_model(num_words, max_sequence_len) model_name = model_store.get_model_title( "lstm_hidden_neurons_%d" % LSTM_HIDDEN_NEURONS, EMBEDDING_SIZE, EPOCHS, BATCH_SIZE, VALIDATION_SPLIT, ) model.load_weights(model_name) output = predict_from_example_list(model, word_to_index, max_sequence_len, EXAMPLES) print(output)
def train_regression_model() -> Model: """ Trains and returns a regression model """ texts, like_labels = data.data_util.load_text_with_specific_label( DEFAULT_FILE_NAME, data.data_util.FbReaction.LIKE_INDEX) standardized_labels, avg, std = data.data_util.standardize_array( like_labels) sequences, num_words, index_to_word, word_to_index = text_tokenizer.get_text_items( texts) train_data, train_labels, test_data, test_labels, max_sequence_length = \ text_tokenizer.split_dataset(sequences, standardized_labels, word_to_index) model = create_regression_model(num_words, max_sequence_length) history = model.fit( train_data, train_labels, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_split=VALIDATION_SPLIT, verbose=1, ) loss, accuracy = model.evaluate(test_data, test_labels) print("test set results: loss: %f, mean absolute error: %f" % (loss, accuracy)) plot_regression_history(history) plot_prediction(model, train_data, train_labels, "train") plot_prediction(model, test_data, test_labels, "test") model_store.save_model( model, "regression", EMBEDDING_SIZE, EPOCHS, BATCH_SIZE, VALIDATION_SPLIT, ) return model