def main_training(): lexicon_loader = LexiconLoader() scored_lexicon: dict = lexicon_loader.load_all_and_merge() tr_tweets_loader = LabeledTweetsLoader(TRAINING_INPUT_FILENAME) tr_labeled_tweets = tr_tweets_loader.parse_tokens_and_labels( tr_tweets_loader.load_lines()) token_summarizer = TokenSummarizer(scored_lexicon) feature_extractor = FeatureExtractor(scored_lexicon) vu = VocabUtil() nn_input_preparer = NNInputPreparer(vu) tr_feature_vectors = [] # 2D array of feature vectors for labeled_tweet in tr_labeled_tweets: known_token_sequence = token_summarizer.get_known_tokens( labeled_tweet[0]) feature_vector = feature_extractor.compute_feature_vector( known_token_sequence) tr_feature_vectors.append(feature_vector) tr_network_input = np.array(tr_feature_vectors) tr_targets = [labeled_tweet[1] for labeled_tweet in tr_labeled_tweets] tr_targets_one_hot_encoded = nn_input_preparer.rectangular_targets_to_one_hot( tr_targets) dev_tweets_loader = LabeledTweetsLoader(DEV_INPUT_FILENAME) dev_labeled_tweets = dev_tweets_loader.parse_tokens_and_labels( dev_tweets_loader.load_lines()) dev_feature_vectors = [] # 2D array of feature vectors for labeled_tweet in dev_labeled_tweets: known_token_sequence = token_summarizer.get_known_tokens( labeled_tweet[0]) feature_vector = feature_extractor.compute_feature_vector( known_token_sequence) dev_feature_vectors.append(feature_vector) dev_network_input = np.array(dev_feature_vectors) dev_targets = [labeled_tweet[1] for labeled_tweet in dev_labeled_tweets] dev_targets_one_hot_encoded = nn_input_preparer.rectangular_targets_to_one_hot( dev_targets) # Every epoch is cheap (< 1ms), so we don't need the ability to continue training from a previous model. print("Commencing new training run") model_creator = ModelCreator(vu) model = model_creator.create_two_dense_model(hidden_layer_size=HIDDEN_SIZE) cp_filepath = BASE_DIR + 'ep_{epoch}_valacc_{val_accuracy:.5f}.h5' checkpoint = ModelCheckpoint(cp_filepath, monitor='val_accuracy', verbose=1, save_best_only=False) model.fit(tr_network_input, tr_targets_one_hot_encoded, batch_size=32, epochs=MAX_EPOCHS, validation_data=(dev_network_input, dev_targets_one_hot_encoded), callbacks=[checkpoint])
def main_inference(): print(f'Using TensorFlow version {tf.__version__}') print(f'Loading model {TRAINING_MODEL_FILENAME}') trained_model = load_model(TRAINING_MODEL_FILENAME) trained_model.summary() lexicon_loader = LexiconLoader() scored_lexicon: dict = lexicon_loader.load_all_and_merge() token_summarizer = TokenSummarizer(scored_lexicon) feature_extractor = FeatureExtractor(scored_lexicon) vu = VocabUtil() nn_input_preparer = NNInputPreparer(vu) for input_filename in [DEV_INPUT_FILENAME]: tweets_loader = LabeledTweetsLoader(DEV_INPUT_FILENAME) labeled_tweets = tweets_loader.parse_tokens_and_labels( tweets_loader.load_lines()) feature_vectors = [] # 2D array of feature vectors for labeled_tweet in labeled_tweets: known_token_sequence = token_summarizer.get_known_tokens( labeled_tweet[0]) feature_vector = feature_extractor.compute_feature_vector( known_token_sequence) feature_vectors.append(feature_vector) network_input = np.array(feature_vectors) print('network_input.shape:', network_input.shape) targets = [labeled_tweet[1] for labeled_tweet in labeled_tweets] targets_one_hot_encoded = nn_input_preparer.rectangular_targets_to_one_hot( targets) trained_model.evaluate(network_input, targets_one_hot_encoded) argmax_confusion_matrix = np.zeros( (vu.get_output_vocab_size(), vu.get_output_vocab_size()), dtype=int) expected_sampling_confusion_matrix = np.zeros( (vu.get_output_vocab_size(), vu.get_output_vocab_size())) expected_sampling_accuracy_sum = 0.0 num_correct_argmax_predictions = 0 for rectangular_input, target_human in tqdm(zip( network_input, targets)): rectangular_input.shape = (1, 3) target_index = vu.nn_rsl_to_int[target_human] predicted_probabilities = trained_model(rectangular_input)[0] # the predicted index if we take the class with the largest probability argmax_index = np.argmax(predicted_probabilities) if argmax_index == target_index: num_correct_argmax_predictions += 1 argmax_confusion_matrix[target_index][argmax_index] += 1 # rhs is the probability of guessing target_index if we sample according to predicted probabilities expected_sampling_accuracy_sum += tf.keras.backend.get_value( predicted_probabilities[target_index]) for i in range(vu.get_output_vocab_size()): expected_sampling_confusion_matrix[target_index][ i] += predicted_probabilities[i] num_tweets_in_dataset = len(targets) print(f'Argmax accuracy for {input_filename}:', num_correct_argmax_predictions / num_tweets_in_dataset) print(f'Expected sampling accuracy for {input_filename}:', expected_sampling_accuracy_sum / num_tweets_in_dataset) print( f"Argmax confusion matrix of targets vs predicted for {input_filename}:\n" f"{vu.raw_sentiment_labels}\n", argmax_confusion_matrix) print( f"Expected sampling confusion matrix of targets vs predicted for {input_filename}:\n" f"{vu.raw_sentiment_labels}\n", expected_sampling_confusion_matrix)