def run_part22_audio(k): print("================MFC - BINARY FEATURE================") print("Importing data...") train_dataset = feature_map_part2_2(train_audio_dataset, { ' ': 1, '%': 0 }, (30, 13), 5) test_dataset = feature_map_part2_2(test_audio_dataset, { ' ': 1, '%': 0 }, (30, 13), 5) run(k, train_dataset, test_dataset) print("===================================================\n")
def run_part1_face_1(k): print("================FACE - BINARY FEATURE================") print("Importing data...") train_dataset = feature_map_part1_1(train_raw_face_dataset, { ' ': 0, '#': 1 }, (70, 60), 2) test_dataset = feature_map_part1_1(test_raw_face_dataset, { ' ': 0, '#': 1 }, (70, 60), 2) run(0.1, train_dataset, test_dataset) print("=====================================================\n")
def run_part1_digit_extra_1(k): print("================DIGIT - TERNARY FEATURE================") print("Importing data...") train_dataset = feature_map_part1_1(train_raw_digit_dataset, { ' ': 0, '#': 1, '+': 2 }, (28, 28), 10) test_dataset = feature_map_part1_1(test_raw_digit_dataset, { ' ': 0, '#': 1, '+': 2 }, (28, 28), 10) run(0.1, train_dataset, test_dataset) print("=====================================================\n")
def run_part1_face_2(k, h, w, overlap): if (overlap): print("===========FACE - PIXEL GROUP (%d * %d) OVERLAP==========" % (h, w)) else: print("===============FACE - PIXEL GROUP (%d * %d) =============" % (h, w)) print("Importing data...") train_dataset = feature_map_part1_2(train_raw_face_dataset, { ' ': 0, '#': 1 }, (70, 60), 2, (h, w), overlap) test_dataset = feature_map_part1_2(test_raw_face_dataset, { ' ': 0, '#': 1 }, (70, 60), 2, (h, w), overlap) run(0.1, train_dataset, test_dataset) print("=====================================================\n")
def run_part2_extra3(k): print("================AUDIO - AVERAGE FEATURE================") print("Importing data...") train_audio_dataset = (np.concatenate((train_yes_data, train_no_data)), np.concatenate((train_yes_label, train_no_label))) test_audio_dataset = (np.concatenate((test_yes_data, test_no_data)), np.concatenate((test_yes_label, test_no_label))) train_dataset = feature_map_part2_extra3(train_audio_dataset, { ' ': 1, '%': 0 }, (25, 10), 2) test_dataset = feature_map_part2_extra3(test_audio_dataset, { ' ': 1, '%': 0 }, (25, 10), 2) run(k, train_dataset, test_dataset) print("====================================================\n")
def run_part1_digit_2(k, h, w, overlap): if (overlap): print("===========DIGIT - PIXEL GROUP (%d * %d) OVERLAP==========" % (h, w)) else: print("===============DIGIT - PIXEL GROUP (%d * %d) =============" % (h, w)) print("Importing data...") train_dataset = feature_map_part1_2(train_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10, (h, w), overlap) test_dataset = feature_map_part1_2(test_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10, (h, w), overlap) run(0.1, train_dataset, test_dataset) print("=====================================================\n")
def run_extra_credit_1_audio(k): print("================AUDIO - BINARY FEATURE================") print("Importing data...") train_dataset = feature_map_part1_1(train_segmented_dataset, { ' ': 1, '%': 0 }, (25, 10), 2) test_dataset = feature_map_part1_1(test_segmented_dataset, { ' ': 1, '%': 0 }, (25, 10), 2) (model, _, examples, confusion_matrix) = run(k, train_dataset, test_dataset) print("=====================================================\n")
def run_part1_digit_1(k): print("================DIGIT - BINARY FEATURE================") print("Importing data...") train_dataset = feature_map_part1_1(train_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) test_dataset = feature_map_part1_1(test_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) (model, _, examples, confusion_matrix) = run(0.1, train_dataset, test_dataset) confusion_matrix_ndig = np.array(confusion_matrix) np.fill_diagonal(confusion_matrix_ndig, 0) confusion_pairs = largest_indices(confusion_matrix_ndig, 4) confusion_pairs = list(zip(confusion_pairs[0], confusion_pairs[1])) (priors, distributions) = model fig1, axes1 = plt.subplots(nrows=5, ncols=4, figsize=(6, 7.5)) fig1.subplots_adjust(left=0.07, right=0.92, top=0.93, bottom=0.05, wspace=0.05, hspace=0.05) for i in np.arange(0, 5): axs = axes1[i] ims = [axs[0].imshow(np.reshape(1-examples[2*i][0], (28, 28)), interpolation = 'nearest', cmap="Greys"), \ axs[1].imshow(np.reshape(1-examples[2*i][1], (28, 28)), interpolation = 'nearest', cmap="Greys"), \ axs[2].imshow(np.reshape(1-examples[2*i+1][0], (28, 28)), interpolation = 'nearest', cmap="Greys"), \ axs[3].imshow(np.reshape(1-examples[2*i+1][1], (28, 28)), interpolation = 'nearest', cmap="Greys")] for j in np.arange(0, 4): axs[j].set_axis_off() plt.suptitle( 'Example Pairs with Lowest(left) and Highest(right) posterior probability', fontsize=12) fig2, axes2 = plt.subplots(nrows=4, ncols=3, figsize=(6, 8)) fig2.subplots_adjust(left=0.05, right=0.92, top=0.95, bottom=0.05, wspace=0.35, hspace=0.01) for pairi in np.arange(0, 4): axs = axes2[pairi] logp1 = np.log( np.array([d[1] for d in distributions[confusion_pairs[pairi][0]]])) logp2 = np.log( np.array([d[1] for d in distributions[confusion_pairs[pairi][1]]])) ims = [axs[0].imshow(np.reshape(logp1, (28, 28)), interpolation = 'nearest', cmap='jet'), \ axs[1].imshow(np.reshape(logp2, (28, 28)), interpolation = 'nearest', cmap='jet'), \ axs[2].imshow(np.reshape(logp1 - logp2, (28, 28)), interpolation = 'nearest', cmap='jet')] for j in np.arange(0, 3): axs[j].set_axis_off() cbar = plt.colorbar(ims[j], ax=axs[j], fraction=0.046, pad=0.04) cbar.locator = ticker.MaxNLocator(nbins=5) cbar.update_ticks() plt.suptitle('Odds ratios', fontsize=16) plt.show() print("=====================================================\n")
def data_split_bow_run(algorithm, emotion_str, modifier, n_folds, df, n_grams): kf = KFold(n_splits=n_folds, shuffle=True, random_state=12) metrics_dict = { emotion_str: { "precision": [], "recall": [], "f1-score": [], "support": [], "avg": { "precision": 0, "recall": 0, "f1-score": 0, "support": 0 } }, "no_" + emotion_str: { "precision": [], "recall": [], "f1-score": [], "support": [], "avg": { "precision": 0, "recall": 0, "f1-score": 0, "support": 0 } }, "macro avg": { "precision": [], "recall": [], "f1-score": [], "support": [], "avg": { "precision": 0, "recall": 0, "f1-score": 0, "support": 0 } }, "weighted avg": { "precision": [], "recall": [], "f1-score": [], "support": [], "avg": { "precision": 0, "recall": 0, "f1-score": 0, "support": 0 } }, "accuracy": { "list": [], "avg": 0 } } for training_index, test_index in kf.split(df.index.tolist()): training_ids, training_texts, training_emotion_scores = [], [], [] test_ids, test_texts, test_emotion_scores = [], [], [] for index, row in df.iterrows(): if index in training_index: training_ids.append(index) training_texts.append(str(row.preprocessed_tweet_text)) training_emotion_scores.append(str(row[emotion_str + "_str"])) elif index in test_index: test_ids.append(index) test_texts.append(str(row.preprocessed_tweet_text)) test_emotion_scores.append(str(row[emotion_str + "_str"])) if n_grams == "unigrams": training_instances_bow, test_instances_bow = bag_of_ngrams.unigrams( training_texts, test_texts) elif n_grams == "bigrams": training_instances_bow, test_instances_bow = bag_of_ngrams.bigrams( training_texts, test_texts) elif n_grams == "trigrams": training_instances_bow, test_instances_bow = bag_of_ngrams.trigrams( training_texts, test_texts) elif n_grams == "unigrams_bigrams": training_instances_bow, test_instances_bow = bag_of_ngrams.unigrams_and_bigrams( training_texts, test_texts) elif n_grams == "unigrams_bigrams_trigrams": training_instances_bow, test_instances_bow = bag_of_ngrams.unigrams_bigrams_and_trigrams( training_texts, test_texts) else: return # call algorithm precision = [] recall = [] f_score = [] if algorithm == "knn": metrics = knn.run(modifier, training_instances_bow, training_emotion_scores, test_instances_bow, test_emotion_scores) elif algorithm == "decision_tree": metrics = decision_tree.run(training_instances_bow, training_emotion_scores, test_instances_bow, test_emotion_scores) elif algorithm == "random_forest": metrics = random_forest.run(modifier, training_instances_bow, training_emotion_scores, test_instances_bow, test_emotion_scores) elif algorithm == "naive_bayes": metrics = naive_bayes.run(modifier, training_instances_bow, training_emotion_scores, test_instances_bow, test_emotion_scores) elif algorithm == "linear_svm": metrics = linear_svm.run(modifier, training_instances_bow, training_emotion_scores, test_instances_bow, test_emotion_scores) else: return for key in metrics: if key in metrics_dict: if key == "accuracy": metrics_dict[key]["list"].append(metrics[key]) continue metrics_dict[key]["precision"].append( metrics[key]["precision"]) metrics_dict[key]["recall"].append(metrics[key]["recall"]) metrics_dict[key]["f1-score"].append(metrics[key]["f1-score"]) metrics_dict[key]["support"].append(metrics[key]["support"]) for key in metrics_dict: if key == "accuracy": metrics_dict[key]["avg"] = average(metrics_dict[key]["list"]) continue metrics_dict[key]["avg"]["precision"] = average( metrics_dict[key]["precision"]) metrics_dict[key]["avg"]["recall"] = average( metrics_dict[key]["recall"]) metrics_dict[key]["avg"]["f1-score"] = average( metrics_dict[key]["f1-score"]) metrics_dict[key]["avg"]["support"] = average( metrics_dict[key]["support"]) metric_id = metric_storage.store_metrics(metrics_dict, emotion_str, algorithm, modifier, n_grams) emotion = [ metrics_dict[emotion_str]["avg"]["precision"], metrics_dict[emotion_str]["avg"]["recall"], metrics_dict[emotion_str]["avg"]["f1-score"] ] no_emotion = [ metrics_dict["no_" + emotion_str]["avg"]["precision"], metrics_dict["no_" + emotion_str]["avg"]["recall"], metrics_dict["no_" + emotion_str]["avg"]["f1-score"] ] weighted_avg = [ metrics_dict["weighted avg"]["avg"]["precision"], metrics_dict["weighted avg"]["avg"]["recall"], metrics_dict["weighted avg"]["avg"]["f1-score"] ] macro_avg = [ metrics_dict["macro avg"]["avg"]["precision"], metrics_dict["macro avg"]["avg"]["recall"], metrics_dict["macro avg"]["avg"]["f1-score"] ] accuracy = metrics_dict["accuracy"]["avg"] return metric_id, emotion, no_emotion, weighted_avg, macro_avg, accuracy