def main(args): # Loading preprocessed features on startup print_status("Loading visual_features from file... ") visual_features = general_helpers.load_visual_features() print "Done." print_status("Loading cluster_for_synsets from mcl_clusters file... ") cluster_for_synsets = general_helpers.load_cluster_for_synsets() print "Done." print_status("Loading keywords_for_pictures from file... ") keywords_for_pictures = general_helpers.load_keywords_for_pictures() print "Done." print_status("Loading cluster_representatives from file... ") cluster_representatives = general_helpers.load_cluster_representatives(how_many_per_cluster=6) print "Done loading preprocessed data." print_status("Checking images against testset:\n") print_status("Retrieving clusters... \n") pipeline_result = pipeline.get_clusters("food", use_meronyms=False, visual_clustering_threshold=100000, mcl_clustering_threshold=10, minimal_mcl_cluster_size=1, minimal_node_size=10, visual_features=visual_features, cluster_for_synsets=cluster_for_synsets, keywords_for_pictures=keywords_for_pictures, cluster_representatives=cluster_representatives) # pipeline_result = pickle.load(open('image_tree.pickle', 'r')) print_status("Parsing result tree to easier accessible format...") parsed_result_tree = parse_result_tree(pipeline_result) print_status("Loading testset from database... \n") same_object_ids, same_object_same_context_ids, not_similar_ids = retrieveTestsetResults(args.database_file) print_status("Comparing result images to testset... \n") average_same_object_distance = calculate_average_distance(parsed_result_tree, same_object_ids, "same object", verbose=True) average_same_context_distance = calculate_average_distance(parsed_result_tree, same_object_same_context_ids, "same context", verbose=True) average_not_similar_distance = calculate_average_distance(parsed_result_tree, not_similar_ids, "not_similar", verbose=True) print_status("Done!\n") sys.stdout.write("Average distance for same object is %s with closeness %s \n" % (average_same_object_distance, float(1)/average_same_object_distance)) sys.stdout.write("Average distance for same context is %s with closeness %s \n" % (average_same_context_distance, float(1)/average_same_context_distance)) sys.stdout.write("Average distance for not similar is %s with closeness %s \n" % (average_not_similar_distance, float(1)/average_not_similar_distance)) sys.stdout.write("Distance %s \n" % (float(1)/average_same_object_distance - float(1)/average_not_similar_distance))
def main(args): print_status("Checking images against testset:\n") print_status("Loading visual_features from file... ") visual_features = general_helpers.load_visual_features() print "Done." print_status("Loading cluster_for_synsets from mcl_clusters file... ") cluster_for_synsets = general_helpers.load_cluster_for_synsets() print "Done." print_status("Loading keywords_for_pictures from file... ") keywords_for_pictures = general_helpers.load_keywords_for_pictures() print "Done." print_status("Loading cluster_representatives from file... ") cluster_representatives = general_helpers.load_cluster_representatives(how_many_per_cluster=6) print "Done loading preprocessed data." print_status("Checking images against testset:\n") print_status("Retrieving clusters... \n") # image_tree = get_searchtrees_with_filenames("food", use_meronyms=False, minimal_node_size=1) image_tree = pipeline.get_clusters( "food", use_meronyms=False, visual_clustering_threshold=10000, mcl_clustering_threshold=15, minimal_mcl_cluster_size=10, minimal_node_size=15, visual_features=visual_features, cluster_for_synsets=cluster_for_synsets, keywords_for_pictures=keywords_for_pictures, cluster_representatives=cluster_representatives, ) sys.stdout.write("Collecting images from tree... \n") result_ids = recursively_collect_images(image_tree) sys.stdout.write("Loading testset from database... \n") testset_positive_ids, testset_negative_ids = retrieveTestsetResults(args.database_file) sys.stdout.write("Comparing result images to testset... \n") result_size = len(result_ids) testset_positive_size = len(testset_positive_ids) testset_negative_size = len(testset_negative_ids) true_positives = 0 false_positives = 0 for result_id in result_ids: if result_id in testset_positive_ids: true_positives += 1 testset_positive_ids.remove(result_id) if result_id in testset_negative_ids: false_positives += 1 testset_negative_ids.remove(result_id) false_negatives = len(testset_positive_ids) precision = float(true_positives) / (true_positives + false_positives) recall = float(true_positives) / (true_positives + false_negatives) sys.stdout.write("Done:\n\n") sys.stdout.write("Testset size: %d\n\n" % (testset_positive_size + testset_negative_size)) sys.stdout.write("Result size: %d\n" % result_size) sys.stdout.write("Real positives: %d\n\n" % testset_positive_size) sys.stdout.write("True Positives: %d\n" % true_positives) sys.stdout.write("True Negatives: ???\n") sys.stdout.write("False Positives: %d\n" % false_positives) sys.stdout.write("False Negatives: %d\n\n" % false_negatives) sys.stdout.write("Precision: %f (tp / (tp + fp))\n" % precision) sys.stdout.write("Recall: %f (tp / (tp + fn))\n" % recall) sys.stdout.write( "F-Measure: %f (2 * (p * r / (p + r)))\n" % (2 * (float(precision) * float(recall)) / (precision + recall)) )
def main(args): # Loading preprocessed features on startup print_status("Loading visual_features from file... ") visual_features = general_helpers.load_visual_features() print "Done." print_status("Loading cluster_for_synsets from mcl_clusters file... ") cluster_for_synsets = general_helpers.load_cluster_for_synsets() print "Done." print_status("Loading keywords_for_pictures from file... ") keywords_for_pictures = general_helpers.load_keywords_for_pictures() print "Done." print_status("Loading cluster_representatives from file... ") cluster_representatives = general_helpers.load_cluster_representatives(how_many_per_cluster=6) print "Done loading preprocessed data." print_status("Checking images against testset:\n") print_status("Retrieving clusters... \n") pipeline_result = pipeline.get_clusters("food", use_meronyms=False, visual_clustering_threshold=100000, mcl_clustering_threshold=4, minimal_mcl_cluster_size=6, minimal_node_size=4, visual_features=visual_features, cluster_for_synsets=cluster_for_synsets, keywords_for_pictures=keywords_for_pictures, cluster_representatives=cluster_representatives) # # Comment in to load preprocessed pipeline_result for dev mode # pipeline_result = pickle.load(open('image_tree.pickle', 'r')) annotated_food_dict = json.load(open(args.food_id_file, 'r')) print_status("Flattening result tree... \n") flattened_mcl_tree = flatten_result_tree(pipeline_result, annotated_food_dict, size_from_id=0, size_to_id=-1) image_counter = len(flattened_mcl_tree.subclusters[0]['subcluster']) print_status("Loading visual_features from file... \n") visual_features = general_helpers.load_visual_features() true_positives_total = [] false_negatives_total = [] true_negatives_total = [] false_positives_total = [] for i in range(0, 10): print_status("Calculating visual clusters (%d x)... \n" % i) visually_clustered_result = combined_clustering.cluster_visually(copy.deepcopy(flattened_mcl_tree), visual_clustering_threshold=4, visual_features=visual_features) print_status("Convert visual clusters to simpler structure... \n") visual_clusters = [] for visual_cluster in visually_clustered_result.subclusters[0]['subcluster']: visual_clusters.append(set([image_tuple[0].split('\\')[-1].split('.')[0] for image_tuple in visual_cluster])) print_status("Done clustering %d images into %d visual clusters. \n" % (image_counter, len(visual_clusters))) # # Comment in to load preprocessed visual_clusters for dev mode # visual_clusters = pickle.load(open('visual_clusters.pickle', 'r')) print_status("Loading testset from database... \n") visually_similar_tuples, visually_different_tuples = retrieveTestsetResults(args.database_file) print_status("Comparing clusters to testset... \n") true_negatives = 0 false_positives = 0 true_positives = 0 false_negatives = 0 print_status("Starting with visually similar tuples... \n") for id_tuple in visually_similar_tuples: if both_ids_are_found(id_tuple, visual_clusters): if one_cluster_contains_both_ids(id_tuple, visual_clusters): true_negatives += 1 else: false_positives += 1 print_status("Now checking different image tuples... \n") for id_tuple in visually_different_tuples: if both_ids_are_found(id_tuple, visual_clusters): if one_cluster_contains_both_ids(id_tuple, visual_clusters): false_negatives += 1 else: true_positives += 1 true_positives_total.append(true_positives) false_negatives_total.append(false_negatives) true_negatives_total.append(true_negatives) false_positives_total.append(false_positives) average_true_positives = float(sum(true_positives_total)) / len(true_positives_total) average_false_negatives = float(sum(false_negatives_total)) / len(false_negatives_total) average_true_negatives = float(sum(true_negatives_total)) / len(true_negatives_total) average_false_positives = float(sum(false_positives_total)) / len(false_positives_total) precision = float(average_true_positives) / (average_true_positives + average_false_positives) recall = float(average_true_positives) / (average_true_positives + average_false_negatives) print_status("Done!\n\n") sys.stdout.write("Testset contains %5d visually similar image tuples \n" % len(visually_similar_tuples)) sys.stdout.write("And there are %5d visually different image tuples \n\n" % len(visually_different_tuples)) sys.stdout.write("Average true positives: %f \n" % average_true_positives) sys.stdout.write("Average false negatives: %f \n" % average_false_negatives) sys.stdout.write("Average true negatives: %f \n" % average_true_negatives) sys.stdout.write("Average false positives: %f \n\n" % average_false_positives) sys.stdout.write("Precision: %f (tp / (tp + fp))\n" % precision) sys.stdout.write("Recall: %f (tp / (tp + fn))\n" % recall) sys.stdout.write("F-Measure: %f (2 * (p * r / (p + r)))\n" % (2 * (float(precision) * float(recall)) / (precision + recall)))