Exemple #1
0
def data_analysis():

    tr_data = dt.get_data('cifar10', (0, 20000))
    val_data = dt.get_data('cifar10', (40000, 50000))
    test_data = dt.get_data('cifar10', (50000, 60000))

    for m in models[:1]:
        # model0, model_name0 = mt.train2(m, tr_data, val_data, 50, False, 'cifar10-2-5', h5_path)
        # model0, model_name0 = mt.train(m, 'cifar10-channelswitched', 50, data_augmentation=False, path=res_path)
        # acc, predicted_classes, y_predicted = dt.predict_and_acc(model0, test_data)
        # t_log.log_predictions(y_predicted, model_name0, file_path=csv_path)

        model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
        y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)

        # true_classes = np.argmax(test_data[1], axis=1)  # wrong
        true_classes = [int(k) for k in test_data[1]]
        pr = metrics.prediction_ratings(y_predicted, true_classes)
        imgs_entropies = []

        # for image in test_data[0]:
        #     imgs_entropies.append(metrics_color.entropy_cc(image, 8))
            # c, i = metrics_color.contrast_intensity(image)
            # imgs_c.append(c)
            # imgs_i.append(i)

            # scores.append(metrics_color.colorfulness(image))

        sorted_e = np.argsort(imgs_entropies)
        # id_list = [sorted_e[k] for k in [10, 100, 1000, 2000, 5000, 8000, 9000, 9900, 9990]]
        id_list = [21, 3767, 9176, 730, 5905]
        plotting.show_imgs(id_list, 'cdc entropy examples', test_data[0], showColorCube=True)
Exemple #2
0
def colorfulness_analysis(model='densenet121', top_n=2500):
    """
    Experiment to analyse the relevance if the colorfulness attribute
    See the metrics_color.colorfulness() function for more details on the attribute
    :param model: The predictions of :model: will be used to compute the prediciton scores
    :param top_n: Number of elements in the series that will be plotted for analysis
    :return:
    """

    # Load test data and model results
    test_data = dt.get_data('cifar10', (50000, 60000))
    model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    true_classes = [int(k) for k in test_data[1]]

    # Compute scores and sort test data ids by score
    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)

    # Compute metric for high score and low score data
    high_score_series = []
    low_score_series = []
    print(len(score_sorted_ids))
    for k in xrange(0, top_n):
        high_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[-k-1]]))
        low_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[k]]))

    # Plot box plot of the two series
    plotting.box_plot(high_score_series, low_score_series, name_s1='high prediction scores',
                      name_s2='low prediction scores', y_label='Colorfulness',
                      title='Colorfulness analysis (' + str(top_n) + ' images/series)')
Exemple #3
0
def colorcube_analysis():
    # m = 'densenet121'
    for m in models:
        test_data = dt.get_data('cifar10', (50000, 60000))
        top_n = 2000
        model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
        # model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False, suffix='ft20ep-exp')
        model = mt.load_by_name(model_name0, test_data[0].shape[1:], h5_path+model_name0)
        # y_predicted = model.predict(np.array(test_data[0]))
        y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
        true_classes = [int(k) for k in test_data[1]]
        scores = metrics.prediction_ratings(y_predicted, true_classes)
        score_sorted_ids = np.argsort(scores)
        cc_high = metrics_color.ColorDensityCube(resolution=4)
        for img_id in score_sorted_ids[-top_n:]:
            cc_high.feed(test_data[0][img_id])
        cc_high.normalize()
        cc_high.plot_cube()

        cc_low = metrics_color.ColorDensityCube(resolution=4)
        for img_id in score_sorted_ids[:top_n]:
            cc_low.feed(test_data[0][img_id])
        cc_low.normalize()

        cc_diff = cc_high.substract(cc_low, 'value')

        cc_low.plot_cube()

        cc_diff.normalize()
        cc_diff.plot_cube(title='Color cube analysis difference (' + str(top_n) + ' images/series)', normalize=True,
                          save=True)
Exemple #4
0
def check_acc():
    m = 'densenet121'
    test_data = dt.get_data('cifar10', (50000, 60000))

    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    predicted_classes = np.argmax(y_predicted, axis=1)
    print(predicted_classes[:10])
    true_classes = [int(k) for k in test_data[1]]
    acc = metrics.accuracy(predicted_classes, true_classes)
    print(acc)
Exemple #5
0
def histogram_analysis():
    m = 'densenet121'
    test_data = dt.get_data('cifar10', (50000, 60000))
    top_n = 2000
    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    true_classes = [int(k) for k in test_data[1]]
    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)
    high_score_series = []
    low_score_series = []
    for k in xrange(0, top_n):
        high_score_series.append(test_data[0][score_sorted_ids[-k-1]])
        low_score_series.append(test_data[0][score_sorted_ids[k]])

    plotting.plot_hists(high_score_series, 'high scores', low_score_series, 'low scores', plotting.cs_bgr, title=' ')
Exemple #6
0
def confusion(model='densenet121'):
    # Load test data and model results
    test_data = dt.get_data('cifar10', (50000, 60000))
    model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    predicted_classes = np.argmax(y_predicted, axis=1)
    true_classes = [int(k) for k in test_data[1]]

    print('Confusion Matrix for Total Test Data')
    print(sk_metrics.confusion_matrix(true_classes, predicted_classes))

    scores = metrics.prediction_ratings(y_predicted, true_classes)
    prediction_scores = np.zeros((10, 1)).tolist()
    print(prediction_scores)
    for k in xrange(len(y_predicted)):
        prediction_scores[predicted_classes[k]].append(scores[k])

    print(np.array(prediction_scores).shape)
    for cifar_class in prediction_scores:
        print(float(np.mean(cifar_class)))
Exemple #7
0
def check_pr():
    m = 'densenet121'
    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)

    test_data = dt.get_data('cifar10', (50000, 60000))
    easy = [9929, 9935, 9939, 9945, 9952, 9966, 9971, 9992, 9997, 9999]
    hard = [9746, 9840, 9853, 9901, 9910, 9923, 9924, 9926, 9960, 9982]
    # cat = [671]
    # cars = [6983, 3678, 3170, 1591]
    # plotting.show_imgs(easy, 'easy set: ', test_data[0], showColorCube=True, resolution=4)
    # plotting.show_imgs(hard, 'hard set: ', test_data[0], showColorCube=True, resolution=4)
    true_classes = [int(k) for k in test_data[1]]

    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)

    # print(scores[score_sorted_ids[0]], y_predicted[score_sorted_ids[0]])
    # print(scores[score_sorted_ids[1]], y_predicted[score_sorted_ids[1]])
    print(scores[score_sorted_ids[2500]], y_predicted[score_sorted_ids[2500]])
    print(scores[score_sorted_ids[2501]], y_predicted[score_sorted_ids[2501]])
    # print(scores[score_sorted_ids[9998]], y_predicted[score_sorted_ids[9998]])
    # print(scores[score_sorted_ids[9999]], y_predicted[score_sorted_ids[9999]])

    print('easy')
    for img_id in easy:
        print(
            img_id, '- pr:',
            metrics.prediction_rating(y_predicted[img_id],
                                      true_classes[img_id]), ' - correct?: ',
            np.argmax(y_predicted[img_id]) == true_classes[img_id])
        # print(y_predicted[id])
    print('hard')
    for img_id in hard:
        print(
            img_id, '- pr:',
            metrics.prediction_rating(y_predicted[img_id],
                                      true_classes[img_id]), ' - correct?: ',
            np.argmax(y_predicted[img_id]) == true_classes[img_id])
Exemple #8
0
def entropy_cc_analysis():
    m = 'densenet121'
    test_data = dt.get_data('cifar10', (50000, 60000))
    top_n = 2000

    model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
    y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
    true_classes = [int(k) for k in test_data[1]]
    scores = metrics.prediction_ratings(y_predicted, true_classes)
    score_sorted_ids = np.argsort(scores)
    high_score_entropies = []
    low_score_entropies = []
    print(len(score_sorted_ids))
    for k in xrange(0, top_n):
        # id = score_sorted_ids[-k - 1]
        # print(id)
        # img = test_data[id]
        high_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[-k-1]], 8))
        low_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[k]], 8))

    plotting.box_plot(high_score_entropies, low_score_entropies, name_s1='high prediction scores',
                      name_s2='low prediction scores', y_label='Color entropy',
                      title='Color entropy analysis (' + str(top_n) + ' images/series)')
Exemple #9
0
def pr_on_fair_distribution(models=['densenet121'], top_n=100, res=4):
    test_data = dt.get_data('cifar10', (50000, 60000))

    # Add every image's cube in densities
    densities = []
    for img in test_data[0]:
        cc = metrics_color.ColorDensityCube(res)
        cc.feed(img)
        densities.append(cc.get_cube())
        # ccf = np.array(cc.get_cube()).flatten()

    # Shape densities (list of cubes) to make a list per color
    densities_lists = np.swapaxes(np.swapaxes(np.swapaxes(densities, 0, 3), 0, 2), 0, 1)
    # print(densities_lists.shape)
    densities_cube = np.empty((res, res, res), dtype=object)

    # For each color keep the ids of the top_n most dense images in this color (same image can be in 2 colors)
    for i in xrange(res):
        for j in xrange(res):
            for k in xrange(res):
                # pr_most_dense = []
                density_list = densities_lists[i][j][k].tolist()
                args_most_dense = np.argsort(density_list)[-top_n:]
                densities_cube[i][j][k] = args_most_dense
    # print(densities_cube.shape)

    # Per model analysis
    for m in models:
        # Load model predictions and ground_truth values
        model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False)
        y_predicted = t_log.load_predictions(model_name0, file_path=csv_path)
        true_classes = [int(k) for k in test_data[1]]
        pr = metrics.prediction_ratings(y_predicted, true_classes)

        # For each color get prediction score of the top_n images
        score_cube = np.zeros((res, res, res))
        global_cc = metrics_color.ColorDensityCube(resolution=res)
        args_most_dense_all = []
        for i in xrange(res):
            for j in xrange(res):
                for k in xrange(res):
                    pr_most_dense = []
                    densities_args = densities_cube[i][j][k].tolist()
                    # args_most_dense = np.argsort(density_list)[-topn:]
                    ijk_cc = metrics_color.ColorDensityCube(res)
                    for a in densities_cube[i][j][k].tolist():
                        pr_most_dense.append(pr[a])
                        ijk_cc.feed(test_data[0][a])
                        global_cc.feed(test_data[0][a])
                    ijk_cc.normalize()
                    ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')'
                    # ijk_cc.plot_cube()
                    score_cube[i][j][k] = np.mean(pr_most_dense)
                    print(np.mean(pr_most_dense))
                    # args_most_dense_all.append(args_most_dense)
                    ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')'
                    # plotting.show_imgs(densities_args[:10], ttl, test_data[0], showColorCube=True, resolution=4)

        global_cc.normalize()
        global_cc.plot_cube(title='Fair distributed dataset ColorCube')

        sc = metrics_color.ColorDensityCube(resolution=res, cube=score_cube)
        sc.normalize()
        sc.plot_cube(title='Scores per color for ' + m)
Exemple #10
0
def train_bdd100k_cl():
    labels_path = '../../bdd100k/classification/labels/'
    train_labels = '../../bdd100k/classification/labels/train_ground_truth.csv'
    val_labels = '../../bdd100k/classification/labels/val_ground_truth.csv'
    # class_map_file = labels_path + 'class_mapping.csv'
    val_json = '../../bdd100k/labels/bdd100k_labels_images_val.json'

    epochs = 20

    # Parameters
    params = {
        'dim': (64, 64, 3),
        'batch_size': 32,
        'n_classes': 10,
        'shuffle': True
    }

    class_map_file = bu.class_mapping(input_json=val_json,
                                      output_csv=labels_path +
                                      'class_mapping.csv')

    # Datasets
    val_partition, val_labels = bu.get_ids_labels(val_labels, class_map_file)
    tr_partition, tr_labels = bu.get_ids_labels(train_labels, class_map_file)

    # Generators
    training_generator = mt.DataGenerator(tr_partition[:500000], tr_labels,
                                          **params)
    validation_generator = mt.DataGenerator(val_partition[:100000], val_labels,
                                            **params)
    print(len(training_generator))

    for m in models:

        weight_file = mt.weight_file_name(m,
                                          'bdd100k_cl0-500k',
                                          epochs,
                                          data_augmentation=False)
        weight_file = h5_path + weight_file
        print("Building: " + weight_file)
        if m in ('mobilenet', 'mobilenetv2', 'nasnet'):
            ###
            model = mt.model_struct(m, (224, 224, 3),
                                    params['n_classes'],
                                    weights='imagenet',
                                    include_top=False)
            new_model = mt.model_struct(m,
                                        params['dim'],
                                        params['n_classes'],
                                        weights=None,
                                        include_top=False)
            print("Loading weights...")

            for new_layer, layer in zip(new_model.layers[1:],
                                        model.layers[1:]):
                new_layer.set_weights(layer.get_weights())
            base_model = new_model
            ###
        else:
            base_model = mt.model_struct(m,
                                         params['dim'],
                                         params['n_classes'],
                                         weights='imagenet',
                                         include_top=False)

        print("Configuring top layers")
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(1024, activation='relu')(x)
        predictions = Dense(10, activation='softmax')(x)
        model = Model(inputs=base_model.input, outputs=predictions)
        model.summary()
        # for layer in base_model.layers:
        #     layer.trainable = False

        model.compile('adam',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        checkpoint = ModelCheckpoint(weight_file.rstrip('.h5') +
                                     '_ep{epoch:02d}_vl{val_loss:.2f}.hdf5',
                                     monitor='val_acc',
                                     verbose=0,
                                     save_best_only=True,
                                     save_weights_only=False,
                                     mode='auto')

        # Train model on dataset
        model.fit_generator(generator=training_generator,
                            validation_data=validation_generator,
                            verbose=1,
                            epochs=epochs,
                            use_multiprocessing=True,
                            workers=6,
                            callbacks=[checkpoint])