def main(args):
  # Loading preprocessed features on startup
  print_status("Loading visual_features from file... ")
  visual_features = general_helpers.load_visual_features()
  print "Done."
  print_status("Loading cluster_for_synsets from mcl_clusters file... ")
  cluster_for_synsets = general_helpers.load_cluster_for_synsets()
  print "Done."
  print_status("Loading keywords_for_pictures from file... ")
  keywords_for_pictures = general_helpers.load_keywords_for_pictures()
  print "Done."
  print_status("Loading cluster_representatives from file... ")
  cluster_representatives = general_helpers.load_cluster_representatives(how_many_per_cluster=6)
  print "Done loading preprocessed data."

  print_status("Checking images against testset:\n")
  print_status("Retrieving clusters... \n")
  pipeline_result = pipeline.get_clusters("food", use_meronyms=False,
                                     visual_clustering_threshold=100000,
                                     mcl_clustering_threshold=10,
                                     minimal_mcl_cluster_size=1,
                                     minimal_node_size=10,
                                     visual_features=visual_features,
                                     cluster_for_synsets=cluster_for_synsets,
                                     keywords_for_pictures=keywords_for_pictures,
                                     cluster_representatives=cluster_representatives)
  # pipeline_result = pickle.load(open('image_tree.pickle', 'r'))

  print_status("Parsing result tree to easier accessible format...")
  parsed_result_tree = parse_result_tree(pipeline_result)

  print_status("Loading testset from database... \n")
  same_object_ids, same_object_same_context_ids, not_similar_ids = retrieveTestsetResults(args.database_file)


  print_status("Comparing result images to testset... \n")

  average_same_object_distance  = calculate_average_distance(parsed_result_tree, same_object_ids, "same object", verbose=True)
  average_same_context_distance = calculate_average_distance(parsed_result_tree, same_object_same_context_ids, "same context", verbose=True)
  average_not_similar_distance  = calculate_average_distance(parsed_result_tree, not_similar_ids, "not_similar", verbose=True)

  print_status("Done!\n")
  sys.stdout.write("Average distance for same object  is %s with closeness %s \n" % (average_same_object_distance, float(1)/average_same_object_distance))
  sys.stdout.write("Average distance for same context is %s with closeness %s \n" % (average_same_context_distance, float(1)/average_same_context_distance))
  sys.stdout.write("Average distance for not similar  is %s with closeness %s \n" % (average_not_similar_distance, float(1)/average_not_similar_distance))
  sys.stdout.write("Distance %s \n" % (float(1)/average_same_object_distance - float(1)/average_not_similar_distance))
def main(args):
    print_status("Checking images against testset:\n")

    print_status("Loading visual_features from file... ")
    visual_features = general_helpers.load_visual_features()
    print "Done."
    print_status("Loading cluster_for_synsets from mcl_clusters file... ")
    cluster_for_synsets = general_helpers.load_cluster_for_synsets()
    print "Done."
    print_status("Loading keywords_for_pictures from file... ")
    keywords_for_pictures = general_helpers.load_keywords_for_pictures()
    print "Done."
    print_status("Loading cluster_representatives from file... ")
    cluster_representatives = general_helpers.load_cluster_representatives(how_many_per_cluster=6)
    print "Done loading preprocessed data."

    print_status("Checking images against testset:\n")
    print_status("Retrieving clusters... \n")
    # image_tree = get_searchtrees_with_filenames("food", use_meronyms=False, minimal_node_size=1)
    image_tree = pipeline.get_clusters(
        "food",
        use_meronyms=False,
        visual_clustering_threshold=10000,
        mcl_clustering_threshold=15,
        minimal_mcl_cluster_size=10,
        minimal_node_size=15,
        visual_features=visual_features,
        cluster_for_synsets=cluster_for_synsets,
        keywords_for_pictures=keywords_for_pictures,
        cluster_representatives=cluster_representatives,
    )

    sys.stdout.write("Collecting images from tree... \n")
    result_ids = recursively_collect_images(image_tree)

    sys.stdout.write("Loading testset from database... \n")
    testset_positive_ids, testset_negative_ids = retrieveTestsetResults(args.database_file)

    sys.stdout.write("Comparing result images to testset... \n")

    result_size = len(result_ids)
    testset_positive_size = len(testset_positive_ids)
    testset_negative_size = len(testset_negative_ids)

    true_positives = 0
    false_positives = 0

    for result_id in result_ids:
        if result_id in testset_positive_ids:
            true_positives += 1
            testset_positive_ids.remove(result_id)
        if result_id in testset_negative_ids:
            false_positives += 1
            testset_negative_ids.remove(result_id)

    false_negatives = len(testset_positive_ids)

    precision = float(true_positives) / (true_positives + false_positives)
    recall = float(true_positives) / (true_positives + false_negatives)

    sys.stdout.write("Done:\n\n")

    sys.stdout.write("Testset size:    %d\n\n" % (testset_positive_size + testset_negative_size))
    sys.stdout.write("Result size:     %d\n" % result_size)
    sys.stdout.write("Real positives:  %d\n\n" % testset_positive_size)
    sys.stdout.write("True Positives:  %d\n" % true_positives)
    sys.stdout.write("True Negatives:  ???\n")
    sys.stdout.write("False Positives: %d\n" % false_positives)
    sys.stdout.write("False Negatives: %d\n\n" % false_negatives)
    sys.stdout.write("Precision:       %f (tp / (tp + fp))\n" % precision)
    sys.stdout.write("Recall:          %f (tp / (tp + fn))\n" % recall)
    sys.stdout.write(
        "F-Measure:       %f (2 * (p * r / (p + r)))\n"
        % (2 * (float(precision) * float(recall)) / (precision + recall))
    )
def main(args):
  # Loading preprocessed features on startup
  print_status("Loading visual_features from file... ")
  visual_features = general_helpers.load_visual_features()
  print "Done."
  print_status("Loading cluster_for_synsets from mcl_clusters file... ")
  cluster_for_synsets = general_helpers.load_cluster_for_synsets()
  print "Done."
  print_status("Loading keywords_for_pictures from file... ")
  keywords_for_pictures = general_helpers.load_keywords_for_pictures()
  print "Done."
  print_status("Loading cluster_representatives from file... ")
  cluster_representatives = general_helpers.load_cluster_representatives(how_many_per_cluster=6)
  print "Done loading preprocessed data."

  print_status("Checking images against testset:\n")
  print_status("Retrieving clusters... \n")
  pipeline_result = pipeline.get_clusters("food", use_meronyms=False,
                                     visual_clustering_threshold=100000,
                                     mcl_clustering_threshold=4,
                                     minimal_mcl_cluster_size=6,
                                     minimal_node_size=4,
                                     visual_features=visual_features,
                                     cluster_for_synsets=cluster_for_synsets,
                                     keywords_for_pictures=keywords_for_pictures,
                                     cluster_representatives=cluster_representatives)


  # # Comment in to load preprocessed pipeline_result for dev mode
  # pipeline_result = pickle.load(open('image_tree.pickle', 'r'))

  annotated_food_dict = json.load(open(args.food_id_file, 'r'))

  print_status("Flattening result tree... \n")
  flattened_mcl_tree = flatten_result_tree(pipeline_result, annotated_food_dict, size_from_id=0, size_to_id=-1)
  image_counter = len(flattened_mcl_tree.subclusters[0]['subcluster'])

  print_status("Loading visual_features from file... \n")
  visual_features = general_helpers.load_visual_features()

  true_positives_total  = []
  false_negatives_total = []
  true_negatives_total  = []
  false_positives_total = []

  for i in range(0, 10):
    print_status("Calculating visual clusters (%d x)... \n" % i)
    visually_clustered_result = combined_clustering.cluster_visually(copy.deepcopy(flattened_mcl_tree),
                                                                     visual_clustering_threshold=4,
                                                                     visual_features=visual_features)
  
    print_status("Convert visual clusters to simpler structure... \n")
    visual_clusters = []
    for visual_cluster in visually_clustered_result.subclusters[0]['subcluster']:
      visual_clusters.append(set([image_tuple[0].split('\\')[-1].split('.')[0] for image_tuple in visual_cluster]))
  
    print_status("Done clustering %d images into %d visual clusters. \n" % (image_counter, len(visual_clusters)))
  
    # # Comment in to load preprocessed visual_clusters for dev mode
    # visual_clusters = pickle.load(open('visual_clusters.pickle', 'r'))
  
    print_status("Loading testset from database... \n")
    visually_similar_tuples, visually_different_tuples = retrieveTestsetResults(args.database_file)
  
    print_status("Comparing clusters to testset... \n")

    true_negatives  = 0
    false_positives = 0
    true_positives  = 0
    false_negatives = 0

    print_status("Starting with visually similar tuples... \n")
    for id_tuple in visually_similar_tuples:
      if both_ids_are_found(id_tuple, visual_clusters):
        if one_cluster_contains_both_ids(id_tuple, visual_clusters):
          true_negatives += 1
        else:
          false_positives += 1
  
    print_status("Now checking different image tuples... \n")
    for id_tuple in visually_different_tuples:
      if both_ids_are_found(id_tuple, visual_clusters):
        if one_cluster_contains_both_ids(id_tuple, visual_clusters):
          false_negatives += 1
        else:
          true_positives += 1

    true_positives_total.append(true_positives)
    false_negatives_total.append(false_negatives)
    true_negatives_total.append(true_negatives)
    false_positives_total.append(false_positives)

  average_true_positives  = float(sum(true_positives_total))  / len(true_positives_total)
  average_false_negatives = float(sum(false_negatives_total)) / len(false_negatives_total)
  average_true_negatives  = float(sum(true_negatives_total))  / len(true_negatives_total)
  average_false_positives = float(sum(false_positives_total)) / len(false_positives_total)

  precision = float(average_true_positives) / (average_true_positives + average_false_positives)
  recall    = float(average_true_positives) / (average_true_positives + average_false_negatives)

  print_status("Done!\n\n")
  sys.stdout.write("Testset contains %5d visually similar   image tuples \n" % len(visually_similar_tuples))
  sys.stdout.write("And there are    %5d visually different image tuples \n\n" % len(visually_different_tuples))

  sys.stdout.write("Average true  positives: %f \n"   % average_true_positives)
  sys.stdout.write("Average false negatives: %f \n"   % average_false_negatives)
  sys.stdout.write("Average true  negatives: %f \n"   % average_true_negatives)
  sys.stdout.write("Average false positives: %f \n\n" % average_false_positives)

  sys.stdout.write("Precision: %f (tp / (tp + fp))\n" % precision)
  sys.stdout.write("Recall:    %f (tp / (tp + fn))\n" % recall)
  sys.stdout.write("F-Measure: %f (2 * (p * r / (p + r)))\n" % (2 * (float(precision) * float(recall)) / (precision + recall)))