예제 #1
0
def colorfulness_analysis(model='densenet121', top_n=2500):
    """
    Experiment to analyse the relevance if the colorfulness attribute
    See the metrics_color.colorfulness() function for more details on the attribute
    :param model: The predictions of :model: will be used to compute the prediciton scores
    :param top_n: Number of elements in the series that will be plotted for analysis
    :return:
    """

    # Load test data and model results
    test_data = dt.get_data('cifar10', (50000, 60000))
    model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    true_classes = [int(k) for k in test_data[1]]

    # Compute scores and sort test data ids by score
    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)

    # Compute metric for high score and low score data
    high_score_series = []
    low_score_series = []
    print(len(score_sorted_ids))
    for k in xrange(0, top_n):
        high_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[-k-1]]))
        low_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[k]]))

    # Plot box plot of the two series
    plotting.box_plot(high_score_series, low_score_series, name_s1='high prediction scores',
                      name_s2='low prediction scores', y_label='Colorfulness',
                      title='Colorfulness analysis (' + str(top_n) + ' images/series)')
예제 #2
0
def data_analysis():

    tr_data = dt.get_data('cifar10', (0, 20000))
    val_data = dt.get_data('cifar10', (40000, 50000))
    test_data = dt.get_data('cifar10', (50000, 60000))

    for m in models[:1]:
        # model0, model_name0 = mt.train2(m, tr_data, val_data, 50, False, 'cifar10-2-5', h5_path)
        # model0, model_name0 = mt.train(m, 'cifar10-channelswitched', 50, data_augmentation=False, path=res_path)
        # acc, predicted_classes, y_predicted = dt.predict_and_acc(model0, test_data)
        # t_log.log_predictions(y_predicted, model_name0, file_path=csv_path)

        model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
        y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)

        # true_classes = np.argmax(test_data[1], axis=1)  # wrong
        true_classes = [int(k) for k in test_data[1]]
        pr = metrics.prediction_ratings(y_predicted, true_classes)
        imgs_entropies = []

        # for image in test_data[0]:
        #     imgs_entropies.append(metrics_color.entropy_cc(image, 8))
            # c, i = metrics_color.contrast_intensity(image)
            # imgs_c.append(c)
            # imgs_i.append(i)

            # scores.append(metrics_color.colorfulness(image))

        sorted_e = np.argsort(imgs_entropies)
        # id_list = [sorted_e[k] for k in [10, 100, 1000, 2000, 5000, 8000, 9000, 9900, 9990]]
        id_list = [21, 3767, 9176, 730, 5905]
        plotting.show_imgs(id_list, 'cdc entropy examples', test_data[0], showColorCube=True)
예제 #3
0
def colorcube_analysis():
    # m = 'densenet121'
    for m in models:
        test_data = dt.get_data('cifar10', (50000, 60000))
        top_n = 2000
        model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
        # model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False, suffix='ft20ep-exp')
        model = mt.load_by_name(model_name0, test_data[0].shape[1:], h5_path+model_name0)
        # y_predicted = model.predict(np.array(test_data[0]))
        y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
        true_classes = [int(k) for k in test_data[1]]
        scores = metrics.prediction_ratings(y_predicted, true_classes)
        score_sorted_ids = np.argsort(scores)
        cc_high = metrics_color.ColorDensityCube(resolution=4)
        for img_id in score_sorted_ids[-top_n:]:
            cc_high.feed(test_data[0][img_id])
        cc_high.normalize()
        cc_high.plot_cube()

        cc_low = metrics_color.ColorDensityCube(resolution=4)
        for img_id in score_sorted_ids[:top_n]:
            cc_low.feed(test_data[0][img_id])
        cc_low.normalize()

        cc_diff = cc_high.substract(cc_low, 'value')

        cc_low.plot_cube()

        cc_diff.normalize()
        cc_diff.plot_cube(title='Color cube analysis difference (' + str(top_n) + ' images/series)', normalize=True,
                          save=True)
예제 #4
0
def histogram_analysis():
    m = 'densenet121'
    test_data = dt.get_data('cifar10', (50000, 60000))
    top_n = 2000
    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    true_classes = [int(k) for k in test_data[1]]
    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)
    high_score_series = []
    low_score_series = []
    for k in xrange(0, top_n):
        high_score_series.append(test_data[0][score_sorted_ids[-k-1]])
        low_score_series.append(test_data[0][score_sorted_ids[k]])

    plotting.plot_hists(high_score_series, 'high scores', low_score_series, 'low scores', plotting.cs_bgr, title=' ')
예제 #5
0
def confusion(model='densenet121'):
    # Load test data and model results
    test_data = dt.get_data('cifar10', (50000, 60000))
    model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    predicted_classes = np.argmax(y_predicted, axis=1)
    true_classes = [int(k) for k in test_data[1]]

    print('Confusion Matrix for Total Test Data')
    print(sk_metrics.confusion_matrix(true_classes, predicted_classes))

    scores = metrics.prediction_ratings(y_predicted, true_classes)
    prediction_scores = np.zeros((10, 1)).tolist()
    print(prediction_scores)
    for k in xrange(len(y_predicted)):
        prediction_scores[predicted_classes[k]].append(scores[k])

    print(np.array(prediction_scores).shape)
    for cifar_class in prediction_scores:
        print(float(np.mean(cifar_class)))
예제 #6
0
def check_pr():
    m = 'densenet121'
    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)

    test_data = dt.get_data('cifar10', (50000, 60000))
    easy = [9929, 9935, 9939, 9945, 9952, 9966, 9971, 9992, 9997, 9999]
    hard = [9746, 9840, 9853, 9901, 9910, 9923, 9924, 9926, 9960, 9982]
    # cat = [671]
    # cars = [6983, 3678, 3170, 1591]
    # plotting.show_imgs(easy, 'easy set: ', test_data[0], showColorCube=True, resolution=4)
    # plotting.show_imgs(hard, 'hard set: ', test_data[0], showColorCube=True, resolution=4)
    true_classes = [int(k) for k in test_data[1]]

    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)

    # print(scores[score_sorted_ids[0]], y_predicted[score_sorted_ids[0]])
    # print(scores[score_sorted_ids[1]], y_predicted[score_sorted_ids[1]])
    print(scores[score_sorted_ids[2500]], y_predicted[score_sorted_ids[2500]])
    print(scores[score_sorted_ids[2501]], y_predicted[score_sorted_ids[2501]])
    # print(scores[score_sorted_ids[9998]], y_predicted[score_sorted_ids[9998]])
    # print(scores[score_sorted_ids[9999]], y_predicted[score_sorted_ids[9999]])

    print('easy')
    for img_id in easy:
        print(
            img_id, '- pr:',
            metrics.prediction_rating(y_predicted[img_id],
                                      true_classes[img_id]), ' - correct?: ',
            np.argmax(y_predicted[img_id]) == true_classes[img_id])
        # print(y_predicted[id])
    print('hard')
    for img_id in hard:
        print(
            img_id, '- pr:',
            metrics.prediction_rating(y_predicted[img_id],
                                      true_classes[img_id]), ' - correct?: ',
            np.argmax(y_predicted[img_id]) == true_classes[img_id])
예제 #7
0
def entropy_cc_analysis():
    m = 'densenet121'
    test_data = dt.get_data('cifar10', (50000, 60000))
    top_n = 2000

    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    true_classes = [int(k) for k in test_data[1]]
    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)
    high_score_entropies = []
    low_score_entropies = []
    print(len(score_sorted_ids))
    for k in xrange(0, top_n):
        # id = score_sorted_ids[-k - 1]
        # print(id)
        # img = test_data[id]
        high_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[-k-1]], 8))
        low_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[k]], 8))

    plotting.box_plot(high_score_entropies, low_score_entropies, name_s1='high prediction scores',
                      name_s2='low prediction scores', y_label='Color entropy',
                      title='Color entropy analysis (' + str(top_n) + ' images/series)')
예제 #8
0
def pr_on_fair_distribution(models=['densenet121'], top_n=100, res=4):
    test_data = dt.get_data('cifar10', (50000, 60000))

    # Add every image's cube in densities
    densities = []
    for img in test_data[0]:
        cc = metrics_color.ColorDensityCube(res)
        cc.feed(img)
        densities.append(cc.get_cube())
        # ccf = np.array(cc.get_cube()).flatten()

    # Shape densities (list of cubes) to make a list per color
    densities_lists = np.swapaxes(np.swapaxes(np.swapaxes(densities, 0, 3), 0, 2), 0, 1)
    # print(densities_lists.shape)
    densities_cube = np.empty((res, res, res), dtype=object)

    # For each color keep the ids of the top_n most dense images in this color (same image can be in 2 colors)
    for i in xrange(res):
        for j in xrange(res):
            for k in xrange(res):
                # pr_most_dense = []
                density_list = densities_lists[i][j][k].tolist()
                args_most_dense = np.argsort(density_list)[-top_n:]
                densities_cube[i][j][k] = args_most_dense
    # print(densities_cube.shape)

    # Per model analysis
    for m in models:
        # Load model predictions and ground_truth values
        model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
        y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
        true_classes = [int(k) for k in test_data[1]]
        pr = metrics.prediction_ratings(y_predicted, true_classes)

        # For each color get prediction score of the top_n images
        score_cube = np.zeros((res, res, res))
        global_cc = metrics_color.ColorDensityCube(resolution=res)
        args_most_dense_all = []
        for i in xrange(res):
            for j in xrange(res):
                for k in xrange(res):
                    pr_most_dense = []
                    densities_args = densities_cube[i][j][k].tolist()
                    # args_most_dense = np.argsort(density_list)[-topn:]
                    ijk_cc = metrics_color.ColorDensityCube(res)
                    for a in densities_cube[i][j][k].tolist():
                        pr_most_dense.append(pr[a])
                        ijk_cc.feed(test_data[0][a])
                        global_cc.feed(test_data[0][a])
                    ijk_cc.normalize()
                    ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')'
                    # ijk_cc.plot_cube()
                    score_cube[i][j][k] = np.mean(pr_most_dense)
                    print(np.mean(pr_most_dense))
                    # args_most_dense_all.append(args_most_dense)
                    ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')'
                    # plotting.show_imgs(densities_args[:10], ttl, test_data[0], showColorCube=True, resolution=4)

        global_cc.normalize()
        global_cc.plot_cube(title='Fair distributed dataset ColorCube')

        sc = metrics_color.ColorDensityCube(resolution=res, cube=score_cube)
        sc.normalize()
        sc.plot_cube(title='Scores per color for ' + m)
예제 #9
0
def bug_feature_detection():

    for m in models:
        tr_data = dt.get_data('cifar10', (0, 20000))
        val_data = dt.get_data('cifar10', (20000, 30000))
        test_data = dt.get_data('cifar10', (30000, 60000))

        model0, model_name0 = mt.train2(m, tr_data, val_data, 50, False, tag='cifar10-2-5', path=h5_path)
        acc, predicted_classes, y_predicted = dt.predict_and_acc(model0, test_data)
        # log_predictions(y_predicted, model_name0, path=csv_path)
        print('acc', acc)

        # print(sk_metrics.confusion_matrix(test_data[1], predicted_classes))
        # true_classes = np.argmax(test_data[1], axis=1) wrong
        true_classes = [int(k) for k in test_data[1]]
        pr = metrics.prediction_ratings(y_predicted, true_classes)

        model2, model_name2 = mt.train2(m, tr_data, val_data, 1, False, tag='cifar10-0223', path=h5_path)
        model1 = mt.reg_from_(model2, m)
        print('Reg model created')
        X_test, y_test = test_data
        tr_data = X_test[0:20000], pr[0:20000]
        val_data = X_test[20000:30000], pr[20000:30000]
        model1, model_name1 = mt.train_reg(model1, m, tr_data, val_data, '', 50, False, path=h5_path)
        # score = model1.evaluate(val_data[0], val_data[1], verbose=0)
        # print('Test loss:', score[0])
        # print('Val accuracy:', score[1])
        formatted_test_data = dt.format_data(val_data, 10)
        y_true = pr[20000:30000]
        print('Ground truth values:')
        print('Mean', np.mean(y_true))
        print('Std', np.std(y_true))
        print('Max', np.max(y_true))
        print('Min', np.min(y_true))
        y_predicted1 = model1.predict(formatted_test_data[0])
        # print(np.array(y_predicted).shape)
        n_guesses = len(y_predicted1)
        y_predicted2 = [y_predicted1[k][0] for k in xrange(n_guesses)]
        print('Prediction values:')
        print('Mean', np.mean(y_predicted2))
        print('Std', np.std(y_predicted2))
        print('Max', np.max(y_predicted2))
        print('Min', np.min(y_predicted2))
        y_predicted3 = y_predicted2 / np.linalg.norm(y_predicted2)
        print('Norm Prediction values:')
        print('Mean', np.mean(y_predicted3))
        print('Std', np.std(y_predicted3))
        print('Max', np.max(y_predicted3))
        print('Min', np.min(y_predicted3))

        # fig, axs = plt.subplots(1, 1)
        # axs.hist(y_true, bins=30)
        # axs.set_title('y_true for ' + m)
        # plt.show()
        #
        # fig, axs = plt.subplots(1, 1)
        # axs.hist(y_predicted2, bins=30, range=(0, 2))
        # axs.set_title(m)
        # plt.show()

        diff2 = []
        diff3 = []
        for k in xrange(min(10000, len(y_predicted))):
            diff2.append(abs(y_predicted2[k] - y_true[k]))
            diff3.append(abs(y_predicted3[k] - y_true[k]))
        print('Difference:')
        print('Mean ', np.mean(diff2))
        print('Max ', max(diff2))
        print('Difference Norm:')
        print('Mean ', np.mean(diff3))
        print('Max ', max(diff3))

        # R/W guess prediction
        opti_thr = float(np.sort(y_predicted2)[int(acc*10000)])
        print('opti_thr', opti_thr)
        thresholds = (float(0.6), float(0.7), float(0.777), float(0.8), float(0.9), opti_thr)
        # thresholds = (float(0.9), float(1), float(1.1), float(1.2), opti_thr)

        for thr in thresholds:
            n_right_guesses = 0
            for k in xrange(n_guesses):
                q = (test_data[1][20000+k] == predicted_classes[20000+k])
                p = y_predicted1[k][0] > thr
                if p == q:
                    n_right_guesses = n_right_guesses + 1

            print('acc for reg for true/false with thr of ' + str(thr) + ': ' + str(float(n_right_guesses)/n_guesses))

        # n_images = 10
        # n_rows = 10
        # for th in xrange(n_rows):
        #     fig, axes = plt.subplots(1, n_images, figsize=(n_images, 4),
        #                              subplot_kw={'xticks': (), 'yticks': ()})
        #     for dec in xrange(n_images):
        #         ax = axes[dec]
        #         pr_rank = 7000 + th * 100 + dec
        #         img_id = sorted_pr_args[pr_rank]
        #         # print(str(pr_rank) + ': ' + str(y_test[img_id]))  # + ' conf. guessed = ' + str(guessed[img_id]))
        #         ax.imshow(X_test[img_id], vmin=0, vmax=1)
        #         ax.set_title('pr#' + str(pr_rank) + "\nid#" + str(img_id)
        #                      + '\nr=' + str("{0:.2f}".format(pr[img_id]))
        #                      + '\np_cl=' + str(predicted_classes[img_id])
        #                      + '\nr_cl=' + str(true_classes[img_id]))
        #     plt.show()

        print('           ~           ')
예제 #10
0
                                     'yticks': ()
                                 })
        for image, label, ax in zip(X_people[mask], y_people[mask], axes):
            ax.imshow(image.reshape(image_shape), vmin=0, vmax=1)
            ax.set_title(people.target_names[label].split()[-1])
        plt.show()


labels = [[] for k in xrange(n_clusters)]
for id, x in enumerate(k_means_test()):
    labels[x].append(id)

for f in file_list[:1]:
    predictions = aa.load_csv(res_path + f, 2)
    losses = t_log.load_csv(res_path + f, 3)
    pr = metrics.prediction_ratings(losses, test_labels)
    sorted_pr_indexes = np.argsort(pr)

    per_class_score = np.zeros(10)
    guessed = []

    for id, p in enumerate(predictions):
        if p == test_labels[id]:
            per_class_score[p] += 1
            guessed.append(True)
        else:
            guessed.append(False)

    print(per_class_score)
    print(np.mean(pr))