def data_analysis(): tr_data = dt.get_data('cifar10', (0, 20000)) val_data = dt.get_data('cifar10', (40000, 50000)) test_data = dt.get_data('cifar10', (50000, 60000)) for m in models[:1]: # model0, model_name0 = mt.train2(m, tr_data, val_data, 50, False, 'cifar10-2-5', h5_path) # model0, model_name0 = mt.train(m, 'cifar10-channelswitched', 50, data_augmentation=False, path=res_path) # acc, predicted_classes, y_predicted = dt.predict_and_acc(model0, test_data) # t_log.log_predictions(y_predicted, model_name0, file_path=csv_path) model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) # true_classes = np.argmax(test_data[1], axis=1) # wrong true_classes = [int(k) for k in test_data[1]] pr = metrics.prediction_ratings(y_predicted, true_classes) imgs_entropies = [] # for image in test_data[0]: # imgs_entropies.append(metrics_color.entropy_cc(image, 8)) # c, i = metrics_color.contrast_intensity(image) # imgs_c.append(c) # imgs_i.append(i) # scores.append(metrics_color.colorfulness(image)) sorted_e = np.argsort(imgs_entropies) # id_list = [sorted_e[k] for k in [10, 100, 1000, 2000, 5000, 8000, 9000, 9900, 9990]] id_list = [21, 3767, 9176, 730, 5905] plotting.show_imgs(id_list, 'cdc entropy examples', test_data[0], showColorCube=True)
def colorfulness_analysis(model='densenet121', top_n=2500): """ Experiment to analyse the relevance if the colorfulness attribute See the metrics_color.colorfulness() function for more details on the attribute :param model: The predictions of :model: will be used to compute the prediciton scores :param top_n: Number of elements in the series that will be plotted for analysis :return: """ # Load test data and model results test_data = dt.get_data('cifar10', (50000, 60000)) model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] # Compute scores and sort test data ids by score scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) # Compute metric for high score and low score data high_score_series = [] low_score_series = [] print(len(score_sorted_ids)) for k in xrange(0, top_n): high_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[-k-1]])) low_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[k]])) # Plot box plot of the two series plotting.box_plot(high_score_series, low_score_series, name_s1='high prediction scores', name_s2='low prediction scores', y_label='Colorfulness', title='Colorfulness analysis (' + str(top_n) + ' images/series)')
def colorcube_analysis(): # m = 'densenet121' for m in models: test_data = dt.get_data('cifar10', (50000, 60000)) top_n = 2000 model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) # model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False, suffix='ft20ep-exp') model = mt.load_by_name(model_name0, test_data[0].shape[1:], h5_path+model_name0) # y_predicted = model.predict(np.array(test_data[0])) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) cc_high = metrics_color.ColorDensityCube(resolution=4) for img_id in score_sorted_ids[-top_n:]: cc_high.feed(test_data[0][img_id]) cc_high.normalize() cc_high.plot_cube() cc_low = metrics_color.ColorDensityCube(resolution=4) for img_id in score_sorted_ids[:top_n]: cc_low.feed(test_data[0][img_id]) cc_low.normalize() cc_diff = cc_high.substract(cc_low, 'value') cc_low.plot_cube() cc_diff.normalize() cc_diff.plot_cube(title='Color cube analysis difference (' + str(top_n) + ' images/series)', normalize=True, save=True)
def check_acc(): m = 'densenet121' test_data = dt.get_data('cifar10', (50000, 60000)) model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) predicted_classes = np.argmax(y_predicted, axis=1) print(predicted_classes[:10]) true_classes = [int(k) for k in test_data[1]] acc = metrics.accuracy(predicted_classes, true_classes) print(acc)
def histogram_analysis(): m = 'densenet121' test_data = dt.get_data('cifar10', (50000, 60000)) top_n = 2000 model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) high_score_series = [] low_score_series = [] for k in xrange(0, top_n): high_score_series.append(test_data[0][score_sorted_ids[-k-1]]) low_score_series.append(test_data[0][score_sorted_ids[k]]) plotting.plot_hists(high_score_series, 'high scores', low_score_series, 'low scores', plotting.cs_bgr, title=' ')
def confusion(model='densenet121'): # Load test data and model results test_data = dt.get_data('cifar10', (50000, 60000)) model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) predicted_classes = np.argmax(y_predicted, axis=1) true_classes = [int(k) for k in test_data[1]] print('Confusion Matrix for Total Test Data') print(sk_metrics.confusion_matrix(true_classes, predicted_classes)) scores = metrics.prediction_ratings(y_predicted, true_classes) prediction_scores = np.zeros((10, 1)).tolist() print(prediction_scores) for k in xrange(len(y_predicted)): prediction_scores[predicted_classes[k]].append(scores[k]) print(np.array(prediction_scores).shape) for cifar_class in prediction_scores: print(float(np.mean(cifar_class)))
def check_pr(): m = 'densenet121' model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) test_data = dt.get_data('cifar10', (50000, 60000)) easy = [9929, 9935, 9939, 9945, 9952, 9966, 9971, 9992, 9997, 9999] hard = [9746, 9840, 9853, 9901, 9910, 9923, 9924, 9926, 9960, 9982] # cat = [671] # cars = [6983, 3678, 3170, 1591] # plotting.show_imgs(easy, 'easy set: ', test_data[0], showColorCube=True, resolution=4) # plotting.show_imgs(hard, 'hard set: ', test_data[0], showColorCube=True, resolution=4) true_classes = [int(k) for k in test_data[1]] scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) # print(scores[score_sorted_ids[0]], y_predicted[score_sorted_ids[0]]) # print(scores[score_sorted_ids[1]], y_predicted[score_sorted_ids[1]]) print(scores[score_sorted_ids[2500]], y_predicted[score_sorted_ids[2500]]) print(scores[score_sorted_ids[2501]], y_predicted[score_sorted_ids[2501]]) # print(scores[score_sorted_ids[9998]], y_predicted[score_sorted_ids[9998]]) # print(scores[score_sorted_ids[9999]], y_predicted[score_sorted_ids[9999]]) print('easy') for img_id in easy: print( img_id, '- pr:', metrics.prediction_rating(y_predicted[img_id], true_classes[img_id]), ' - correct?: ', np.argmax(y_predicted[img_id]) == true_classes[img_id]) # print(y_predicted[id]) print('hard') for img_id in hard: print( img_id, '- pr:', metrics.prediction_rating(y_predicted[img_id], true_classes[img_id]), ' - correct?: ', np.argmax(y_predicted[img_id]) == true_classes[img_id])
def entropy_cc_analysis(): m = 'densenet121' test_data = dt.get_data('cifar10', (50000, 60000)) top_n = 2000 model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) high_score_entropies = [] low_score_entropies = [] print(len(score_sorted_ids)) for k in xrange(0, top_n): # id = score_sorted_ids[-k - 1] # print(id) # img = test_data[id] high_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[-k-1]], 8)) low_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[k]], 8)) plotting.box_plot(high_score_entropies, low_score_entropies, name_s1='high prediction scores', name_s2='low prediction scores', y_label='Color entropy', title='Color entropy analysis (' + str(top_n) + ' images/series)')
def pr_on_fair_distribution(models=['densenet121'], top_n=100, res=4): test_data = dt.get_data('cifar10', (50000, 60000)) # Add every image's cube in densities densities = [] for img in test_data[0]: cc = metrics_color.ColorDensityCube(res) cc.feed(img) densities.append(cc.get_cube()) # ccf = np.array(cc.get_cube()).flatten() # Shape densities (list of cubes) to make a list per color densities_lists = np.swapaxes(np.swapaxes(np.swapaxes(densities, 0, 3), 0, 2), 0, 1) # print(densities_lists.shape) densities_cube = np.empty((res, res, res), dtype=object) # For each color keep the ids of the top_n most dense images in this color (same image can be in 2 colors) for i in xrange(res): for j in xrange(res): for k in xrange(res): # pr_most_dense = [] density_list = densities_lists[i][j][k].tolist() args_most_dense = np.argsort(density_list)[-top_n:] densities_cube[i][j][k] = args_most_dense # print(densities_cube.shape) # Per model analysis for m in models: # Load model predictions and ground_truth values model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] pr = metrics.prediction_ratings(y_predicted, true_classes) # For each color get prediction score of the top_n images score_cube = np.zeros((res, res, res)) global_cc = metrics_color.ColorDensityCube(resolution=res) args_most_dense_all = [] for i in xrange(res): for j in xrange(res): for k in xrange(res): pr_most_dense = [] densities_args = densities_cube[i][j][k].tolist() # args_most_dense = np.argsort(density_list)[-topn:] ijk_cc = metrics_color.ColorDensityCube(res) for a in densities_cube[i][j][k].tolist(): pr_most_dense.append(pr[a]) ijk_cc.feed(test_data[0][a]) global_cc.feed(test_data[0][a]) ijk_cc.normalize() ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')' # ijk_cc.plot_cube() score_cube[i][j][k] = np.mean(pr_most_dense) print(np.mean(pr_most_dense)) # args_most_dense_all.append(args_most_dense) ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')' # plotting.show_imgs(densities_args[:10], ttl, test_data[0], showColorCube=True, resolution=4) global_cc.normalize() global_cc.plot_cube(title='Fair distributed dataset ColorCube') sc = metrics_color.ColorDensityCube(resolution=res, cube=score_cube) sc.normalize() sc.plot_cube(title='Scores per color for ' + m)
def train_bdd100k_cl(): labels_path = '../../bdd100k/classification/labels/' train_labels = '../../bdd100k/classification/labels/train_ground_truth.csv' val_labels = '../../bdd100k/classification/labels/val_ground_truth.csv' # class_map_file = labels_path + 'class_mapping.csv' val_json = '../../bdd100k/labels/bdd100k_labels_images_val.json' epochs = 20 # Parameters params = { 'dim': (64, 64, 3), 'batch_size': 32, 'n_classes': 10, 'shuffle': True } class_map_file = bu.class_mapping(input_json=val_json, output_csv=labels_path + 'class_mapping.csv') # Datasets val_partition, val_labels = bu.get_ids_labels(val_labels, class_map_file) tr_partition, tr_labels = bu.get_ids_labels(train_labels, class_map_file) # Generators training_generator = mt.DataGenerator(tr_partition[:500000], tr_labels, **params) validation_generator = mt.DataGenerator(val_partition[:100000], val_labels, **params) print(len(training_generator)) for m in models: weight_file = mt.weight_file_name(m, 'bdd100k_cl0-500k', epochs, data_augmentation=False) weight_file = h5_path + weight_file print("Building: " + weight_file) if m in ('mobilenet', 'mobilenetv2', 'nasnet'): ### model = mt.model_struct(m, (224, 224, 3), params['n_classes'], weights='imagenet', include_top=False) new_model = mt.model_struct(m, params['dim'], params['n_classes'], weights=None, include_top=False) print("Loading weights...") for new_layer, layer in zip(new_model.layers[1:], model.layers[1:]): new_layer.set_weights(layer.get_weights()) base_model = new_model ### else: base_model = mt.model_struct(m, params['dim'], params['n_classes'], weights='imagenet', include_top=False) print("Configuring top layers") x = base_model.output x = GlobalAveragePooling2D()(x) x = Dense(1024, activation='relu')(x) predictions = Dense(10, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=predictions) model.summary() # for layer in base_model.layers: # layer.trainable = False model.compile('adam', loss='categorical_crossentropy', metrics=['accuracy']) checkpoint = ModelCheckpoint(weight_file.rstrip('.h5') + '_ep{epoch:02d}_vl{val_loss:.2f}.hdf5', monitor='val_acc', verbose=0, save_best_only=True, save_weights_only=False, mode='auto') # Train model on dataset model.fit_generator(generator=training_generator, validation_data=validation_generator, verbose=1, epochs=epochs, use_multiprocessing=True, workers=6, callbacks=[checkpoint])