def correlation_avg_preds(num):
    target = D.basic_load_personality_labels('test')
    target = np.mean(target, axis=1)
    target = fix_labels(target)

    nname = 'pred_%d.txt' % num

    get_corr(nname, target)
예제 #2
0
def plot_labels():
    trait = 3 # agreeableness
    ground_truth = basic_load_personality_labels('test')[:, trait]
    # np.ndarray.sort(ground_truth)
    plt.figure()
    # x = np.arange(0, ground_truth.shape[0])
    # sc = plt.scatter(x, ground_truth, s=2, c=ground_truth)
    plt.hist(ground_truth, bins=5)
    # plt.colorbar(sc)
    plt.title('agreeableness labels')
    # plt.ylabel('label values')
    plt.savefig('%s/%s.png' % (save_path, 'agreeableness_hist'))
def correlations_ground_truth(trait, name):
    traits_all = ['O', 'C', 'E', 'A', 'S']
    index = traits_all.index(trait)
    target = D.basic_load_personality_labels('test')
    target = target[:, index]
    # target = fix_labels(target)

    path = os.path.join(P.LOG_BASE, name)
    predictions = np.genfromtxt(path, delimiter=',', dtype=float)

    r, p = stats.pearsonr(predictions, target)

    print(name, 'r: ', r, 'p: ', p)
예제 #4
0
def pearson_r(tr=None):
    print('Initializing')

    if tr is not None:
        which = tr
        test_labels = D.basic_load_personality_labels('test')
        train_labels = D.basic_load_personality_labels('train')

        mean_label = np.mean(train_labels, axis=0)
        prediction = np.tile(mean_label, (test_labels.shape[0], 1))

        all_traits = ['O', 'C', 'E', 'A', 'S']
        t = all_traits.index(which)
        # U.record_loss_all_test(prediction, trait=True)

        # TODO: idk if pearson is the good test for this
        # cor_coeff = stats.pearsonr(test_labels[:, t], prediction[:, t]*1.00000001)[0]
        cor_coeff = np.corrcoef(test_labels[:, t], prediction[:, t])

        print('pearson correlation coef trait %s: %f' % (tr, cor_coeff))
    else:
        test_labels = D.basic_load_personality_labels('test')
        train_labels = D.basic_load_personality_labels('train')

        mean_label = np.mean(train_labels, axis=0)
        prediction = np.tile(mean_label, (test_labels.shape[0], 1))
        prediction = np.array(list(np.mean(prediction, axis=1)))

        # U.record_loss_all_test(np.mean(prediction, axis=1))

        cor_coeff = stats.pearsonr(np.mean(test_labels, axis=1),
                                   np.mean(prediction, axis=1))[0]

        print('pearson correlation coef: %s' % (str(cor_coeff)))

        # pearson correlation coef: -1.684251e-07


# pearson_r('O')
예제 #5
0
def binomial_test(model1, model2, which_trait=None):
    if which_trait is None:
        print('binomial test %s vs. %s' % (model1, model2))
    else:
        print('binomial test %s vs. %s for trait %s' %
              (model1, model2, which_trait))

    model1 = os.path.join(P.LOG_BASE, model1 + '.txt')
    model2 = os.path.join(P.LOG_BASE, model2 + '.txt')

    model1_pred = np.genfromtxt(model1, delimiter=',')
    model2_pred = np.genfromtxt(model2, delimiter=',')

    ground_truth = D.basic_load_personality_labels('test')

    if which_trait is not None:
        traits = ['O', 'C', 'E', 'A', 'S']
        idx = traits.index(which_trait)
        ground_truth = ground_truth[:, idx]

        if len(model1_pred.shape) > 1:
            model1_pred = model1_pred[:, idx]

        if model2_pred.shape[1] != 1:
            model2_pred = model2_pred[:, idx]

    model1_diff = np.abs(model1_pred - ground_truth)
    model2_diff = np.abs(model2_pred - ground_truth)

    if which_trait is None:
        model1_diff = np.mean(model1_diff, axis=1)
        model2_diff = np.mean(model2_diff, axis=1)

    model1_better_than_model2 = sum(model1_diff < model2_diff)

    p = stats.binom_test(x=model1_better_than_model2, n=ground_truth.shape[0])
    alpha = 0.05 / 2

    if p < alpha:
        sig = 'significant'
    else:
        sig = 'not significant'

    print('m1 > m2 %d times, out of %d. p value: %s. difference is %s' %
          (model1_better_than_model2, ground_truth.shape[0], str(p), sig))
예제 #6
0
def convert(pred_path, trait):
    all_traits = ['O', 'C', 'E', 'A', 'S']
    assert(trait in all_traits)

    pred_path = os.path.join(P.LOG_BASE, pred_path)

    idx = all_traits.index(trait)

    if not os.path.exists(pred_path):
        print('wrong prediction folder')
        return

    pred = np.genfromtxt(pred_path, dtype=float)
    labels = D.basic_load_personality_labels('test')[:, idx]

    loss = np.abs(pred - labels)

    print('mean loss trait %s: %f' % (trait, np.mean(loss)))
예제 #7
0
def plot_mae_predictions_agreeableness():
    # make 3 plots, face, bg, all
    # x axis is predictions, y axis is labels
    # face_path = os.path.join(P.LOG_BASE, 'pred_97.txt')
    # bg_path = os.path.join(P.LOG_BASE, 'pred_99.txt')
    # all_path = os.path.join(P.LOG_BASE, 'pred_94.txt')
    face_path = os.path.join(P.LOG_BASE, 'pred_85_A.txt')
    bg_path = os.path.join(P.LOG_BASE, 'pred_86_A.txt')
    all_path = os.path.join(P.LOG_BASE, 'pred_84_A.txt')

    paths = [face_path, bg_path, all_path]
    names = ['face', 'bg', 'all']

    agreeableness = 3
    ground_truth = basic_load_personality_labels('test')[:, agreeableness]

    for m in range(3):
        plt.figure()
        x = ground_truth
        # y = np.genfromtxt(paths[m], float, delimiter=',')[:, agreeableness]
        y = np.genfromtxt(paths[m], float, delimiter=',')
        n = names[m]

        clr = np.abs(x - y) #np.abs(x - y)

        sc = plt.scatter(x, y, s=3, c=clr)
        plt.plot(x, x, 'r')

        plt.colorbar(sc)
        plt.title('single Agreeableness: %s vs. labels' % (n))
        plt.xlabel('labels')
        plt.ylabel('predictions')
        plt.xlim(0, 1)
        plt.ylim(0, 1)
        plt.gca().set_aspect('equal', adjustable='box')

        # plt.savefig('%s/%s.png' % (save_path, '5_traits_A_pred_vs_labels_%s' % n))
        plt.savefig('%s/%s.png' % (save_path, 'single_traits_A_pred_vs_labels_%s' % n))
예제 #8
0
def pearson_r_single_traits():
    print('Initializing')

    test_labels = D.basic_load_personality_labels('test')
    print(len(test_labels))
    test_label_order = ['O', 'C', 'E', 'A', 'S']

    all_path = os.path.join(P.LOG_BASE, 'pred_96')
    face_path = os.path.join(P.LOG_BASE, 'pred_85')
    bg_path = os.path.join(P.LOG_BASE, 'pred_86')
    lum_path = os.path.join(P.LOG_BASE, 'pred_87')

    # everyone_path = [all_path, face_path, bg_path, lum_path]
    # everyone_txt = ['all', 'face', 'bg', 'lumi']
    everyone_path = [all_path, face_path, bg_path]
    everyone_txt = ['all', 'face', 'bg']

    pt = ['O', 'C', 'E', 'A', 'S']
    pt2 = ["O'", "C'", "E'", "A'", "S'"]

    for idx, e in enumerate(everyone_path):
        corr_mat = np.zeros(5)
        p_vals = np.zeros(5)
        for i in range(len(pt)):
            path = e + '_%s.txt' % pt[i]
            pred_vals = np.genfromtxt(path, delimiter=',', dtype='float')
            test_vals = test_labels[:, i]

            corr_mat[i], p_vals[i] = stats.pearsonr(pred_vals, test_vals)

        if everyone_txt[idx] == 'lumi':
            round_num = 5
        else:
            round_num = 2

        corr_mat = np.round(corr_mat, decimals=round_num)
        corr_mat = np.diag(corr_mat)

        # print p-vals
        print('mode: %s, p-values: %s' % (everyone_txt[idx], str(p_vals)))

        # ------------------- plot matrix -------------------

        fig, ax = plt.subplots()
        im = ax.imshow(corr_mat)

        # We want to show all ticks...
        ax.set_xticks(np.arange(len(pt)))
        ax.set_yticks(np.arange(len(pt2)))
        # ... and label them with the respective list entries
        ax.set_xticklabels(pt)
        ax.set_yticklabels(pt2)

        # Rotate the tick labels and set their alignment.
        plt.setp(ax.get_xticklabels(),
                 rotation=45,
                 ha="right",
                 rotation_mode="anchor")

        # Loop over data dimensions and create text annotations.
        for i in range(len(pt2)):
            for j in range(len(pt)):
                text = ax.text(j,
                               i,
                               corr_mat[i][j],
                               ha="center",
                               va="center",
                               color="w")

        ax.set_title("correlation '%s' vs. ground truth" % everyone_txt[idx])
        fig.tight_layout()
        plt.savefig(
            os.path.join(P.PAPER_PLOTS, 'singles',
                         'correlation_%s_v2.png' % everyone_txt[idx]))
예제 #9
0
def pearson_r_all_traits():
    print('Initializing')

    test_labels = D.basic_load_personality_labels('test')
    # all_predictions = np.genfromtxt(os.path.join(P.LOG_BASE, 'pred_80.txt'), delimiter=',',dtype='float')
    all_predictions = np.genfromtxt(os.path.join(P.LOG_BASE, 'pred_94.txt'),
                                    delimiter=',',
                                    dtype='float')  # wd=0.0001
    face_predictions = np.genfromtxt(os.path.join(P.LOG_BASE, 'pred_81.txt'),
                                     delimiter=',',
                                     dtype='float')
    bg_predictions = np.genfromtxt(os.path.join(P.LOG_BASE, 'pred_82.txt'),
                                   delimiter=',',
                                   dtype='float')
    lum_predictions = np.genfromtxt(os.path.join(P.LOG_BASE, 'pred_83.txt'),
                                    delimiter=',',
                                    dtype='float')

    # everyone = [all_predictions, face_predictions, bg_predictions, lum_predictions]
    everyone = [all_predictions]
    everyone_txt = ['all_wd_001', 'face', 'bg', 'lumi']

    # pt = ['E', 'A', 'C', 'N', 'O']
    # pt2 = ["E'", "A'", "C'", "N'", "O'"]

    pt = ['O', 'C', 'E', 'A', 'S']
    pt2 = ["O'", "C'", "E'", "A'", "S'"]

    for idx, e in enumerate(everyone):
        corr_mat = np.zeros((5, 5))

        if everyone_txt[idx] == 'lumi':
            print('holdup')

        for i in range(len(pt)):
            for j in range(len(pt)):
                p = e[:, i]
                l = test_labels[:, j]
                corr_mat[i][j] = stats.pearsonr(p, l)[0]
                if everyone_txt[idx] == 'lumi':
                    print("%s-%s: %f" % (pt[i], pt2[j], corr_mat[i][j]))

        for i in range(5):
            for j in range(5):
                if everyone_txt[idx] == 'lumi':
                    round_num = 5
                else:
                    round_num = 2
                corr_mat[i][j] = round(corr_mat[i][j], round_num)

        fig, ax = plt.subplots()
        im = ax.imshow(corr_mat)

        # We want to show all ticks...
        ax.set_xticks(np.arange(len(pt)))
        ax.set_yticks(np.arange(len(pt2)))
        # ... and label them with the respective list entries
        ax.set_xticklabels(pt)
        ax.set_yticklabels(pt2)

        # Rotate the tick labels and set their alignment.
        plt.setp(ax.get_xticklabels(),
                 rotation=45,
                 ha="right",
                 rotation_mode="anchor")

        # Loop over data dimensions and create text annotations.
        for i in range(len(pt2)):
            for j in range(len(pt)):
                text = ax.text(j,
                               i,
                               corr_mat[i][j],
                               ha="center",
                               va="center",
                               color="w")

        ax.set_title("correlation '%s' vs. ground truth. wd=0.001" %
                     everyone_txt[idx])
        fig.tight_layout()
        plt.savefig(
            os.path.join(P.PAPER_PLOTS,
                         'correlation_%s.png' % everyone_txt[idx]))
예제 #10
0
import numpy as np
import deepimpression2.paths as P
import deepimpression2.chalearn30.data_utils as D
import deepimpression2.util as U
import os
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt

print('Initializing')

which = 'test'
# repetitions = 1001 # seed 42
repetitions = 1000  # seed 6

labels = D.basic_load_personality_labels(which)

mother_seed = 6
seeds = np.random.RandomState(mother_seed).randint(low=0,
                                                   high=10000,
                                                   size=repetitions)

all_diff = np.zeros((repetitions, labels.shape[0]), dtype=np.float32)

for i, s in enumerate(seeds):
    random_predictions = np.random.RandomState(s).uniform(0, 1, labels.shape)
    diff = np.absolute(labels - random_predictions)
    diff = np.mean(diff, axis=1)
    all_diff[i] = diff

all_diff = np.mean(all_diff, axis=0)
예제 #11
0
def mae():
    print('Initializing')

    trait_type = 'not_single'

    if trait_type == 'single':
        which = 'S'
        test_labels = D.basic_load_personality_labels('test')
        train_labels = D.basic_load_personality_labels('train')

        mean_label = np.mean(train_labels, axis=0)

        diff = np.absolute(test_labels - mean_label)

        all_traits = ['O', 'C', 'E', 'A', 'S']
        t = all_traits.index(which)
        diff = diff[:, t]
        U.record_loss_all_test(diff, trait=True)

        # diff = np.mean(diff, axis=0)
        # print(diff)

        print('loss: %f' % np.mean(diff, axis=0))

        save_path = os.path.join(P.FIGURES, 'train_62')
        U.safe_mkdir(save_path)
        #
        plt.figure()
        n, bins, patches = plt.hist(diff,
                                    50,
                                    density=True,
                                    facecolor='g',
                                    alpha=0.75)
        plt.grid(True)
        plt.title('histogram MAE avg train - test trait %s' % which)
        plt.savefig('%s/%s_%s.png' % (save_path, 'histdiff', which))
    else:
        test_labels = D.basic_load_personality_labels('test')
        train_labels = D.basic_load_personality_labels('train')

        mean_label = np.mean(train_labels, axis=0)

        diff = np.absolute(test_labels - mean_label)

        diff = np.mean(diff, axis=1)

        # U.record_loss_all_test(diff)

        print('loss: %f' % np.mean(diff))

        save_path = os.path.join(P.FIGURES, 'train_56')
        U.safe_mkdir(save_path)
        #
        plt.figure()
        n, bins, patches = plt.hist(diff,
                                    50,
                                    density=True,
                                    facecolor='g',
                                    alpha=0.75)
        plt.grid(True)
        plt.title('histogram MAE avg train - test')
        plt.savefig('%s/%s.png' % (save_path, 'histdiff'))