def colorfulness_analysis(model='densenet121', top_n=2500): """ Experiment to analyse the relevance if the colorfulness attribute See the metrics_color.colorfulness() function for more details on the attribute :param model: The predictions of :model: will be used to compute the prediciton scores :param top_n: Number of elements in the series that will be plotted for analysis :return: """ # Load test data and model results test_data = dt.get_data('cifar10', (50000, 60000)) model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] # Compute scores and sort test data ids by score scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) # Compute metric for high score and low score data high_score_series = [] low_score_series = [] print(len(score_sorted_ids)) for k in xrange(0, top_n): high_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[-k-1]])) low_score_series.append(metrics_color.colorfulness(test_data[0][score_sorted_ids[k]])) # Plot box plot of the two series plotting.box_plot(high_score_series, low_score_series, name_s1='high prediction scores', name_s2='low prediction scores', y_label='Colorfulness', title='Colorfulness analysis (' + str(top_n) + ' images/series)')
def data_analysis(): tr_data = dt.get_data('cifar10', (0, 20000)) val_data = dt.get_data('cifar10', (40000, 50000)) test_data = dt.get_data('cifar10', (50000, 60000)) for m in models[:1]: # model0, model_name0 = mt.train2(m, tr_data, val_data, 50, False, 'cifar10-2-5', h5_path) # model0, model_name0 = mt.train(m, 'cifar10-channelswitched', 50, data_augmentation=False, path=res_path) # acc, predicted_classes, y_predicted = dt.predict_and_acc(model0, test_data) # t_log.log_predictions(y_predicted, model_name0, file_path=csv_path) model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) # true_classes = np.argmax(test_data[1], axis=1) # wrong true_classes = [int(k) for k in test_data[1]] pr = metrics.prediction_ratings(y_predicted, true_classes) imgs_entropies = [] # for image in test_data[0]: # imgs_entropies.append(metrics_color.entropy_cc(image, 8)) # c, i = metrics_color.contrast_intensity(image) # imgs_c.append(c) # imgs_i.append(i) # scores.append(metrics_color.colorfulness(image)) sorted_e = np.argsort(imgs_entropies) # id_list = [sorted_e[k] for k in [10, 100, 1000, 2000, 5000, 8000, 9000, 9900, 9990]] id_list = [21, 3767, 9176, 730, 5905] plotting.show_imgs(id_list, 'cdc entropy examples', test_data[0], showColorCube=True)
def colorcube_analysis(): # m = 'densenet121' for m in models: test_data = dt.get_data('cifar10', (50000, 60000)) top_n = 2000 model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) # model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False, suffix='ft20ep-exp') model = mt.load_by_name(model_name0, test_data[0].shape[1:], h5_path+model_name0) # y_predicted = model.predict(np.array(test_data[0])) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) cc_high = metrics_color.ColorDensityCube(resolution=4) for img_id in score_sorted_ids[-top_n:]: cc_high.feed(test_data[0][img_id]) cc_high.normalize() cc_high.plot_cube() cc_low = metrics_color.ColorDensityCube(resolution=4) for img_id in score_sorted_ids[:top_n]: cc_low.feed(test_data[0][img_id]) cc_low.normalize() cc_diff = cc_high.substract(cc_low, 'value') cc_low.plot_cube() cc_diff.normalize() cc_diff.plot_cube(title='Color cube analysis difference (' + str(top_n) + ' images/series)', normalize=True, save=True)
def histogram_analysis(): m = 'densenet121' test_data = dt.get_data('cifar10', (50000, 60000)) top_n = 2000 model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) high_score_series = [] low_score_series = [] for k in xrange(0, top_n): high_score_series.append(test_data[0][score_sorted_ids[-k-1]]) low_score_series.append(test_data[0][score_sorted_ids[k]]) plotting.plot_hists(high_score_series, 'high scores', low_score_series, 'low scores', plotting.cs_bgr, title=' ')
def confusion(model='densenet121'): # Load test data and model results test_data = dt.get_data('cifar10', (50000, 60000)) model_name0 = mt.weight_file_name(model, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) predicted_classes = np.argmax(y_predicted, axis=1) true_classes = [int(k) for k in test_data[1]] print('Confusion Matrix for Total Test Data') print(sk_metrics.confusion_matrix(true_classes, predicted_classes)) scores = metrics.prediction_ratings(y_predicted, true_classes) prediction_scores = np.zeros((10, 1)).tolist() print(prediction_scores) for k in xrange(len(y_predicted)): prediction_scores[predicted_classes[k]].append(scores[k]) print(np.array(prediction_scores).shape) for cifar_class in prediction_scores: print(float(np.mean(cifar_class)))
def check_pr(): m = 'densenet121' model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) test_data = dt.get_data('cifar10', (50000, 60000)) easy = [9929, 9935, 9939, 9945, 9952, 9966, 9971, 9992, 9997, 9999] hard = [9746, 9840, 9853, 9901, 9910, 9923, 9924, 9926, 9960, 9982] # cat = [671] # cars = [6983, 3678, 3170, 1591] # plotting.show_imgs(easy, 'easy set: ', test_data[0], showColorCube=True, resolution=4) # plotting.show_imgs(hard, 'hard set: ', test_data[0], showColorCube=True, resolution=4) true_classes = [int(k) for k in test_data[1]] scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) # print(scores[score_sorted_ids[0]], y_predicted[score_sorted_ids[0]]) # print(scores[score_sorted_ids[1]], y_predicted[score_sorted_ids[1]]) print(scores[score_sorted_ids[2500]], y_predicted[score_sorted_ids[2500]]) print(scores[score_sorted_ids[2501]], y_predicted[score_sorted_ids[2501]]) # print(scores[score_sorted_ids[9998]], y_predicted[score_sorted_ids[9998]]) # print(scores[score_sorted_ids[9999]], y_predicted[score_sorted_ids[9999]]) print('easy') for img_id in easy: print( img_id, '- pr:', metrics.prediction_rating(y_predicted[img_id], true_classes[img_id]), ' - correct?: ', np.argmax(y_predicted[img_id]) == true_classes[img_id]) # print(y_predicted[id]) print('hard') for img_id in hard: print( img_id, '- pr:', metrics.prediction_rating(y_predicted[img_id], true_classes[img_id]), ' - correct?: ', np.argmax(y_predicted[img_id]) == true_classes[img_id])
def entropy_cc_analysis(): m = 'densenet121' test_data = dt.get_data('cifar10', (50000, 60000)) top_n = 2000 model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] scores = metrics.prediction_ratings(y_predicted, true_classes) score_sorted_ids = np.argsort(scores) high_score_entropies = [] low_score_entropies = [] print(len(score_sorted_ids)) for k in xrange(0, top_n): # id = score_sorted_ids[-k - 1] # print(id) # img = test_data[id] high_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[-k-1]], 8)) low_score_entropies.append(metrics_color.entropy_cc(test_data[0][score_sorted_ids[k]], 8)) plotting.box_plot(high_score_entropies, low_score_entropies, name_s1='high prediction scores', name_s2='low prediction scores', y_label='Color entropy', title='Color entropy analysis (' + str(top_n) + ' images/series)')
def pr_on_fair_distribution(models=['densenet121'], top_n=100, res=4): test_data = dt.get_data('cifar10', (50000, 60000)) # Add every image's cube in densities densities = [] for img in test_data[0]: cc = metrics_color.ColorDensityCube(res) cc.feed(img) densities.append(cc.get_cube()) # ccf = np.array(cc.get_cube()).flatten() # Shape densities (list of cubes) to make a list per color densities_lists = np.swapaxes(np.swapaxes(np.swapaxes(densities, 0, 3), 0, 2), 0, 1) # print(densities_lists.shape) densities_cube = np.empty((res, res, res), dtype=object) # For each color keep the ids of the top_n most dense images in this color (same image can be in 2 colors) for i in xrange(res): for j in xrange(res): for k in xrange(res): # pr_most_dense = [] density_list = densities_lists[i][j][k].tolist() args_most_dense = np.argsort(density_list)[-top_n:] densities_cube[i][j][k] = args_most_dense # print(densities_cube.shape) # Per model analysis for m in models: # Load model predictions and ground_truth values model_name0 = mt.weight_file_name(m, 'cifar10-2-5', 50, False) y_predicted = t_log.load_predictions(model_name0, file_path=csv_path) true_classes = [int(k) for k in test_data[1]] pr = metrics.prediction_ratings(y_predicted, true_classes) # For each color get prediction score of the top_n images score_cube = np.zeros((res, res, res)) global_cc = metrics_color.ColorDensityCube(resolution=res) args_most_dense_all = [] for i in xrange(res): for j in xrange(res): for k in xrange(res): pr_most_dense = [] densities_args = densities_cube[i][j][k].tolist() # args_most_dense = np.argsort(density_list)[-topn:] ijk_cc = metrics_color.ColorDensityCube(res) for a in densities_cube[i][j][k].tolist(): pr_most_dense.append(pr[a]) ijk_cc.feed(test_data[0][a]) global_cc.feed(test_data[0][a]) ijk_cc.normalize() ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')' # ijk_cc.plot_cube() score_cube[i][j][k] = np.mean(pr_most_dense) print(np.mean(pr_most_dense)) # args_most_dense_all.append(args_most_dense) ttl = 'color = (' + str(float(i/res)) + ', ' + str(float(j/res)) + ', ' + str(float(k/res)) + ')' # plotting.show_imgs(densities_args[:10], ttl, test_data[0], showColorCube=True, resolution=4) global_cc.normalize() global_cc.plot_cube(title='Fair distributed dataset ColorCube') sc = metrics_color.ColorDensityCube(resolution=res, cube=score_cube) sc.normalize() sc.plot_cube(title='Scores per color for ' + m)
def bug_feature_detection(): for m in models: tr_data = dt.get_data('cifar10', (0, 20000)) val_data = dt.get_data('cifar10', (20000, 30000)) test_data = dt.get_data('cifar10', (30000, 60000)) model0, model_name0 = mt.train2(m, tr_data, val_data, 50, False, tag='cifar10-2-5', path=h5_path) acc, predicted_classes, y_predicted = dt.predict_and_acc(model0, test_data) # log_predictions(y_predicted, model_name0, path=csv_path) print('acc', acc) # print(sk_metrics.confusion_matrix(test_data[1], predicted_classes)) # true_classes = np.argmax(test_data[1], axis=1) wrong true_classes = [int(k) for k in test_data[1]] pr = metrics.prediction_ratings(y_predicted, true_classes) model2, model_name2 = mt.train2(m, tr_data, val_data, 1, False, tag='cifar10-0223', path=h5_path) model1 = mt.reg_from_(model2, m) print('Reg model created') X_test, y_test = test_data tr_data = X_test[0:20000], pr[0:20000] val_data = X_test[20000:30000], pr[20000:30000] model1, model_name1 = mt.train_reg(model1, m, tr_data, val_data, '', 50, False, path=h5_path) # score = model1.evaluate(val_data[0], val_data[1], verbose=0) # print('Test loss:', score[0]) # print('Val accuracy:', score[1]) formatted_test_data = dt.format_data(val_data, 10) y_true = pr[20000:30000] print('Ground truth values:') print('Mean', np.mean(y_true)) print('Std', np.std(y_true)) print('Max', np.max(y_true)) print('Min', np.min(y_true)) y_predicted1 = model1.predict(formatted_test_data[0]) # print(np.array(y_predicted).shape) n_guesses = len(y_predicted1) y_predicted2 = [y_predicted1[k][0] for k in xrange(n_guesses)] print('Prediction values:') print('Mean', np.mean(y_predicted2)) print('Std', np.std(y_predicted2)) print('Max', np.max(y_predicted2)) print('Min', np.min(y_predicted2)) y_predicted3 = y_predicted2 / np.linalg.norm(y_predicted2) print('Norm Prediction values:') print('Mean', np.mean(y_predicted3)) print('Std', np.std(y_predicted3)) print('Max', np.max(y_predicted3)) print('Min', np.min(y_predicted3)) # fig, axs = plt.subplots(1, 1) # axs.hist(y_true, bins=30) # axs.set_title('y_true for ' + m) # plt.show() # # fig, axs = plt.subplots(1, 1) # axs.hist(y_predicted2, bins=30, range=(0, 2)) # axs.set_title(m) # plt.show() diff2 = [] diff3 = [] for k in xrange(min(10000, len(y_predicted))): diff2.append(abs(y_predicted2[k] - y_true[k])) diff3.append(abs(y_predicted3[k] - y_true[k])) print('Difference:') print('Mean ', np.mean(diff2)) print('Max ', max(diff2)) print('Difference Norm:') print('Mean ', np.mean(diff3)) print('Max ', max(diff3)) # R/W guess prediction opti_thr = float(np.sort(y_predicted2)[int(acc*10000)]) print('opti_thr', opti_thr) thresholds = (float(0.6), float(0.7), float(0.777), float(0.8), float(0.9), opti_thr) # thresholds = (float(0.9), float(1), float(1.1), float(1.2), opti_thr) for thr in thresholds: n_right_guesses = 0 for k in xrange(n_guesses): q = (test_data[1][20000+k] == predicted_classes[20000+k]) p = y_predicted1[k][0] > thr if p == q: n_right_guesses = n_right_guesses + 1 print('acc for reg for true/false with thr of ' + str(thr) + ': ' + str(float(n_right_guesses)/n_guesses)) # n_images = 10 # n_rows = 10 # for th in xrange(n_rows): # fig, axes = plt.subplots(1, n_images, figsize=(n_images, 4), # subplot_kw={'xticks': (), 'yticks': ()}) # for dec in xrange(n_images): # ax = axes[dec] # pr_rank = 7000 + th * 100 + dec # img_id = sorted_pr_args[pr_rank] # # print(str(pr_rank) + ': ' + str(y_test[img_id])) # + ' conf. guessed = ' + str(guessed[img_id])) # ax.imshow(X_test[img_id], vmin=0, vmax=1) # ax.set_title('pr#' + str(pr_rank) + "\nid#" + str(img_id) # + '\nr=' + str("{0:.2f}".format(pr[img_id])) # + '\np_cl=' + str(predicted_classes[img_id]) # + '\nr_cl=' + str(true_classes[img_id])) # plt.show() print(' ~ ')
'yticks': () }) for image, label, ax in zip(X_people[mask], y_people[mask], axes): ax.imshow(image.reshape(image_shape), vmin=0, vmax=1) ax.set_title(people.target_names[label].split()[-1]) plt.show() labels = [[] for k in xrange(n_clusters)] for id, x in enumerate(k_means_test()): labels[x].append(id) for f in file_list[:1]: predictions = aa.load_csv(res_path + f, 2) losses = t_log.load_csv(res_path + f, 3) pr = metrics.prediction_ratings(losses, test_labels) sorted_pr_indexes = np.argsort(pr) per_class_score = np.zeros(10) guessed = [] for id, p in enumerate(predictions): if p == test_labels[id]: per_class_score[p] += 1 guessed.append(True) else: guessed.append(False) print(per_class_score) print(np.mean(pr))