예제 #1
0
def run_part22_audio(k):
    print("================MFC - BINARY FEATURE================")
    print("Importing data...")

    train_dataset = feature_map_part2_2(train_audio_dataset, {
        ' ': 1,
        '%': 0
    }, (30, 13), 5)
    test_dataset = feature_map_part2_2(test_audio_dataset, {
        ' ': 1,
        '%': 0
    }, (30, 13), 5)

    run(k, train_dataset, test_dataset)
    print("===================================================\n")
예제 #2
0
def run_part1_face_1(k):
    print("================FACE - BINARY FEATURE================")
    print("Importing data...")

    train_dataset = feature_map_part1_1(train_raw_face_dataset, {
        ' ': 0,
        '#': 1
    }, (70, 60), 2)
    test_dataset = feature_map_part1_1(test_raw_face_dataset, {
        ' ': 0,
        '#': 1
    }, (70, 60), 2)

    run(0.1, train_dataset, test_dataset)

    print("=====================================================\n")
예제 #3
0
def run_part1_digit_extra_1(k):
    print("================DIGIT - TERNARY FEATURE================")
    print("Importing data...")

    train_dataset = feature_map_part1_1(train_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 2
    }, (28, 28), 10)
    test_dataset = feature_map_part1_1(test_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 2
    }, (28, 28), 10)

    run(0.1, train_dataset, test_dataset)

    print("=====================================================\n")
예제 #4
0
def run_part1_face_2(k, h, w, overlap):
    if (overlap):
        print("===========FACE - PIXEL GROUP (%d * %d) OVERLAP==========" %
              (h, w))
    else:
        print("===============FACE - PIXEL GROUP (%d * %d) =============" %
              (h, w))
    print("Importing data...")

    train_dataset = feature_map_part1_2(train_raw_face_dataset, {
        ' ': 0,
        '#': 1
    }, (70, 60), 2, (h, w), overlap)
    test_dataset = feature_map_part1_2(test_raw_face_dataset, {
        ' ': 0,
        '#': 1
    }, (70, 60), 2, (h, w), overlap)

    run(0.1, train_dataset, test_dataset)
    print("=====================================================\n")
예제 #5
0
def run_part2_extra3(k):
    print("================AUDIO - AVERAGE FEATURE================")
    print("Importing data...")

    train_audio_dataset = (np.concatenate((train_yes_data, train_no_data)),
                           np.concatenate((train_yes_label, train_no_label)))
    test_audio_dataset = (np.concatenate((test_yes_data, test_no_data)),
                          np.concatenate((test_yes_label, test_no_label)))

    train_dataset = feature_map_part2_extra3(train_audio_dataset, {
        ' ': 1,
        '%': 0
    }, (25, 10), 2)
    test_dataset = feature_map_part2_extra3(test_audio_dataset, {
        ' ': 1,
        '%': 0
    }, (25, 10), 2)
    run(k, train_dataset, test_dataset)

    print("====================================================\n")
예제 #6
0
def run_part1_digit_2(k, h, w, overlap):
    if (overlap):
        print("===========DIGIT - PIXEL GROUP (%d * %d) OVERLAP==========" %
              (h, w))
    else:
        print("===============DIGIT - PIXEL GROUP (%d * %d) =============" %
              (h, w))
    print("Importing data...")

    train_dataset = feature_map_part1_2(train_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 1
    }, (28, 28), 10, (h, w), overlap)
    test_dataset = feature_map_part1_2(test_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 1
    }, (28, 28), 10, (h, w), overlap)

    run(0.1, train_dataset, test_dataset)

    print("=====================================================\n")
예제 #7
0
def run_extra_credit_1_audio(k):
    print("================AUDIO - BINARY FEATURE================")
    print("Importing data...")

    train_dataset = feature_map_part1_1(train_segmented_dataset, {
        ' ': 1,
        '%': 0
    }, (25, 10), 2)
    test_dataset = feature_map_part1_1(test_segmented_dataset, {
        ' ': 1,
        '%': 0
    }, (25, 10), 2)

    (model, _, examples, confusion_matrix) = run(k, train_dataset,
                                                 test_dataset)
    print("=====================================================\n")
예제 #8
0
def run_part1_digit_1(k):
    print("================DIGIT - BINARY FEATURE================")
    print("Importing data...")
    train_dataset = feature_map_part1_1(train_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 1
    }, (28, 28), 10)
    test_dataset = feature_map_part1_1(test_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 1
    }, (28, 28), 10)

    (model, _, examples, confusion_matrix) = run(0.1, train_dataset,
                                                 test_dataset)

    confusion_matrix_ndig = np.array(confusion_matrix)
    np.fill_diagonal(confusion_matrix_ndig, 0)
    confusion_pairs = largest_indices(confusion_matrix_ndig, 4)
    confusion_pairs = list(zip(confusion_pairs[0], confusion_pairs[1]))

    (priors, distributions) = model

    fig1, axes1 = plt.subplots(nrows=5, ncols=4, figsize=(6, 7.5))
    fig1.subplots_adjust(left=0.07,
                         right=0.92,
                         top=0.93,
                         bottom=0.05,
                         wspace=0.05,
                         hspace=0.05)
    for i in np.arange(0, 5):
        axs = axes1[i]
        ims = [axs[0].imshow(np.reshape(1-examples[2*i][0], (28, 28)), interpolation = 'nearest', cmap="Greys"), \
                axs[1].imshow(np.reshape(1-examples[2*i][1], (28, 28)), interpolation = 'nearest', cmap="Greys"), \
                axs[2].imshow(np.reshape(1-examples[2*i+1][0], (28, 28)), interpolation = 'nearest', cmap="Greys"), \
                axs[3].imshow(np.reshape(1-examples[2*i+1][1], (28, 28)), interpolation = 'nearest', cmap="Greys")]
        for j in np.arange(0, 4):
            axs[j].set_axis_off()
    plt.suptitle(
        'Example Pairs with Lowest(left) and Highest(right) posterior probability',
        fontsize=12)

    fig2, axes2 = plt.subplots(nrows=4, ncols=3, figsize=(6, 8))
    fig2.subplots_adjust(left=0.05,
                         right=0.92,
                         top=0.95,
                         bottom=0.05,
                         wspace=0.35,
                         hspace=0.01)
    for pairi in np.arange(0, 4):
        axs = axes2[pairi]
        logp1 = np.log(
            np.array([d[1] for d in distributions[confusion_pairs[pairi][0]]]))
        logp2 = np.log(
            np.array([d[1] for d in distributions[confusion_pairs[pairi][1]]]))
        ims = [axs[0].imshow(np.reshape(logp1, (28, 28)), interpolation = 'nearest', cmap='jet'), \
                axs[1].imshow(np.reshape(logp2, (28, 28)), interpolation = 'nearest', cmap='jet'), \
                axs[2].imshow(np.reshape(logp1 - logp2, (28, 28)), interpolation = 'nearest', cmap='jet')]
        for j in np.arange(0, 3):
            axs[j].set_axis_off()
            cbar = plt.colorbar(ims[j], ax=axs[j], fraction=0.046, pad=0.04)
            cbar.locator = ticker.MaxNLocator(nbins=5)
            cbar.update_ticks()
    plt.suptitle('Odds ratios', fontsize=16)
    plt.show()

    print("=====================================================\n")
예제 #9
0
def data_split_bow_run(algorithm, emotion_str, modifier, n_folds, df, n_grams):
    kf = KFold(n_splits=n_folds, shuffle=True, random_state=12)
    metrics_dict = {
        emotion_str: {
            "precision": [],
            "recall": [],
            "f1-score": [],
            "support": [],
            "avg": {
                "precision": 0,
                "recall": 0,
                "f1-score": 0,
                "support": 0
            }
        },
        "no_" + emotion_str: {
            "precision": [],
            "recall": [],
            "f1-score": [],
            "support": [],
            "avg": {
                "precision": 0,
                "recall": 0,
                "f1-score": 0,
                "support": 0
            }
        },
        "macro avg": {
            "precision": [],
            "recall": [],
            "f1-score": [],
            "support": [],
            "avg": {
                "precision": 0,
                "recall": 0,
                "f1-score": 0,
                "support": 0
            }
        },
        "weighted avg": {
            "precision": [],
            "recall": [],
            "f1-score": [],
            "support": [],
            "avg": {
                "precision": 0,
                "recall": 0,
                "f1-score": 0,
                "support": 0
            }
        },
        "accuracy": {
            "list": [],
            "avg": 0
        }
    }

    for training_index, test_index in kf.split(df.index.tolist()):
        training_ids, training_texts, training_emotion_scores = [], [], []
        test_ids, test_texts, test_emotion_scores = [], [], []
        for index, row in df.iterrows():
            if index in training_index:
                training_ids.append(index)
                training_texts.append(str(row.preprocessed_tweet_text))
                training_emotion_scores.append(str(row[emotion_str + "_str"]))
            elif index in test_index:
                test_ids.append(index)
                test_texts.append(str(row.preprocessed_tweet_text))
                test_emotion_scores.append(str(row[emotion_str + "_str"]))

        if n_grams == "unigrams":
            training_instances_bow, test_instances_bow = bag_of_ngrams.unigrams(
                training_texts, test_texts)
        elif n_grams == "bigrams":
            training_instances_bow, test_instances_bow = bag_of_ngrams.bigrams(
                training_texts, test_texts)
        elif n_grams == "trigrams":
            training_instances_bow, test_instances_bow = bag_of_ngrams.trigrams(
                training_texts, test_texts)
        elif n_grams == "unigrams_bigrams":
            training_instances_bow, test_instances_bow = bag_of_ngrams.unigrams_and_bigrams(
                training_texts, test_texts)
        elif n_grams == "unigrams_bigrams_trigrams":
            training_instances_bow, test_instances_bow = bag_of_ngrams.unigrams_bigrams_and_trigrams(
                training_texts, test_texts)
        else:
            return

        # call algorithm
        precision = []
        recall = []
        f_score = []
        if algorithm == "knn":
            metrics = knn.run(modifier, training_instances_bow,
                              training_emotion_scores, test_instances_bow,
                              test_emotion_scores)
        elif algorithm == "decision_tree":
            metrics = decision_tree.run(training_instances_bow,
                                        training_emotion_scores,
                                        test_instances_bow,
                                        test_emotion_scores)
        elif algorithm == "random_forest":
            metrics = random_forest.run(modifier, training_instances_bow,
                                        training_emotion_scores,
                                        test_instances_bow,
                                        test_emotion_scores)
        elif algorithm == "naive_bayes":
            metrics = naive_bayes.run(modifier, training_instances_bow,
                                      training_emotion_scores,
                                      test_instances_bow, test_emotion_scores)
        elif algorithm == "linear_svm":
            metrics = linear_svm.run(modifier, training_instances_bow,
                                     training_emotion_scores,
                                     test_instances_bow, test_emotion_scores)
        else:
            return

        for key in metrics:
            if key in metrics_dict:
                if key == "accuracy":
                    metrics_dict[key]["list"].append(metrics[key])
                    continue
                metrics_dict[key]["precision"].append(
                    metrics[key]["precision"])
                metrics_dict[key]["recall"].append(metrics[key]["recall"])
                metrics_dict[key]["f1-score"].append(metrics[key]["f1-score"])
                metrics_dict[key]["support"].append(metrics[key]["support"])

    for key in metrics_dict:
        if key == "accuracy":
            metrics_dict[key]["avg"] = average(metrics_dict[key]["list"])
            continue
        metrics_dict[key]["avg"]["precision"] = average(
            metrics_dict[key]["precision"])
        metrics_dict[key]["avg"]["recall"] = average(
            metrics_dict[key]["recall"])
        metrics_dict[key]["avg"]["f1-score"] = average(
            metrics_dict[key]["f1-score"])
        metrics_dict[key]["avg"]["support"] = average(
            metrics_dict[key]["support"])
    metric_id = metric_storage.store_metrics(metrics_dict, emotion_str,
                                             algorithm, modifier, n_grams)
    emotion = [
        metrics_dict[emotion_str]["avg"]["precision"],
        metrics_dict[emotion_str]["avg"]["recall"],
        metrics_dict[emotion_str]["avg"]["f1-score"]
    ]
    no_emotion = [
        metrics_dict["no_" + emotion_str]["avg"]["precision"],
        metrics_dict["no_" + emotion_str]["avg"]["recall"],
        metrics_dict["no_" + emotion_str]["avg"]["f1-score"]
    ]
    weighted_avg = [
        metrics_dict["weighted avg"]["avg"]["precision"],
        metrics_dict["weighted avg"]["avg"]["recall"],
        metrics_dict["weighted avg"]["avg"]["f1-score"]
    ]
    macro_avg = [
        metrics_dict["macro avg"]["avg"]["precision"],
        metrics_dict["macro avg"]["avg"]["recall"],
        metrics_dict["macro avg"]["avg"]["f1-score"]
    ]
    accuracy = metrics_dict["accuracy"]["avg"]
    return metric_id, emotion, no_emotion, weighted_avg, macro_avg, accuracy