def main(args):
  # Loading preprocessed features on startup
  print_status("Loading visual_features from file... ")
  visual_features = general_helpers.load_visual_features()
  print "Done."
  print_status("Loading cluster_for_synsets from mcl_clusters file... ")
  cluster_for_synsets = general_helpers.load_cluster_for_synsets()
  print "Done."
  print_status("Loading keywords_for_pictures from file... ")
  keywords_for_pictures = general_helpers.load_keywords_for_pictures()
  print "Done."
  print_status("Loading cluster_representatives from file... ")
  cluster_representatives = general_helpers.load_cluster_representatives(how_many_per_cluster=6)
  print "Done loading preprocessed data."

  print_status("Checking images against testset:\n")
  print_status("Retrieving clusters... \n")
  pipeline_result = pipeline.get_clusters("food", use_meronyms=False,
                                     visual_clustering_threshold=100000,
                                     mcl_clustering_threshold=10,
                                     minimal_mcl_cluster_size=1,
                                     minimal_node_size=10,
                                     visual_features=visual_features,
                                     cluster_for_synsets=cluster_for_synsets,
                                     keywords_for_pictures=keywords_for_pictures,
                                     cluster_representatives=cluster_representatives)
  # pipeline_result = pickle.load(open('image_tree.pickle', 'r'))

  print_status("Parsing result tree to easier accessible format...")
  parsed_result_tree = parse_result_tree(pipeline_result)

  print_status("Loading testset from database... \n")
  same_object_ids, same_object_same_context_ids, not_similar_ids = retrieveTestsetResults(args.database_file)


  print_status("Comparing result images to testset... \n")

  average_same_object_distance  = calculate_average_distance(parsed_result_tree, same_object_ids, "same object", verbose=True)
  average_same_context_distance = calculate_average_distance(parsed_result_tree, same_object_same_context_ids, "same context", verbose=True)
  average_not_similar_distance  = calculate_average_distance(parsed_result_tree, not_similar_ids, "not_similar", verbose=True)

  print_status("Done!\n")
  sys.stdout.write("Average distance for same object  is %s with closeness %s \n" % (average_same_object_distance, float(1)/average_same_object_distance))
  sys.stdout.write("Average distance for same context is %s with closeness %s \n" % (average_same_context_distance, float(1)/average_same_context_distance))
  sys.stdout.write("Average distance for not similar  is %s with closeness %s \n" % (average_not_similar_distance, float(1)/average_not_similar_distance))
  sys.stdout.write("Distance %s \n" % (float(1)/average_same_object_distance - float(1)/average_not_similar_distance))
def cluster_via_mcl(searchtree, mcl_clustering_threshold=2, minimal_mcl_cluster_size=2, cluster_for_synsets=None, 
                    url_and_keywords_for_pictures=None, cluster_representatives=None):
  if cluster_for_synsets == None:
    cluster_for_synsets = load_cluster_for_synsets()
  if url_and_keywords_for_pictures == None:
    url_and_keywords_for_pictures = load_keywords_for_pictures()
  if cluster_representatives == None:
    cluster_representatives = load_cluster_representatives()

  pictures_for_clusters = defaultdict(list)
  subcluster_list = []

  if len(searchtree.associated_pictures) >= mcl_clustering_threshold:
    
    # cluster pictures via mcl
    for picture in searchtree.associated_pictures:
      cluster_counter = Counter()
      synsets_for_picture = url_and_keywords_for_pictures[picture[0]][1]
      for synset in synsets_for_picture:
        try:
          cluster_counter[cluster_for_synsets[synset]] += 1
        except KeyError:
          continue
      if len(cluster_counter) > 0:
        for synset_cluster_number in get_clusters_with_highest_counter(cluster_counter):
          pictures_for_clusters[synset_cluster_number+1].append(picture)
      else:
        print "unassignable picture: ", picture[0]
        pictures_for_clusters[0].append(picture)

    # eliminate clusters which are subclusters of other clusters
    # len_sorted_pictures = sorted(iterable)
    for key, pictures in pictures_for_clusters.iteritems():
      if len(pictures) >= minimal_mcl_cluster_size:
        is_subset = False
        for subcluster in subcluster_list:
          is_subset_candidate = True
          for picture in pictures:
            if picture not in subcluster["subcluster"]:
              is_subset_candidate = False
              break
          if is_subset_candidate:
            is_subset = True
            break
        if not is_subset:
          # print str(key) + ", "
          subcluster_list.append({"synsets": cluster_representatives[key], "subcluster": pictures})

    searchtree.subclusters = subcluster_list
  else:
    searchtree.subclusters = [{"synsets": None, "subcluster": searchtree.associated_pictures}]

  # Recursively traverse tree
  if searchtree.has_hyponyms():
    for child_hyponym_node in searchtree.hyponyms:
      cluster_via_mcl(child_hyponym_node, mcl_clustering_threshold, minimal_mcl_cluster_size, 
                      cluster_for_synsets, url_and_keywords_for_pictures, cluster_representatives)
  if searchtree.has_meronyms():
    for child_meronym_node in searchtree.meronyms:
      cluster_via_mcl(child_meronym_node, mcl_clustering_threshold, minimal_mcl_cluster_size, 
                      cluster_for_synsets, url_and_keywords_for_pictures, cluster_representatives)

  return searchtree
def main(args):
    print_status("Checking images against testset:\n")

    print_status("Loading visual_features from file... ")
    visual_features = general_helpers.load_visual_features()
    print "Done."
    print_status("Loading cluster_for_synsets from mcl_clusters file... ")
    cluster_for_synsets = general_helpers.load_cluster_for_synsets()
    print "Done."
    print_status("Loading keywords_for_pictures from file... ")
    keywords_for_pictures = general_helpers.load_keywords_for_pictures()
    print "Done."
    print_status("Loading cluster_representatives from file... ")
    cluster_representatives = general_helpers.load_cluster_representatives(how_many_per_cluster=6)
    print "Done loading preprocessed data."

    print_status("Checking images against testset:\n")
    print_status("Retrieving clusters... \n")
    # image_tree = get_searchtrees_with_filenames("food", use_meronyms=False, minimal_node_size=1)
    image_tree = pipeline.get_clusters(
        "food",
        use_meronyms=False,
        visual_clustering_threshold=10000,
        mcl_clustering_threshold=15,
        minimal_mcl_cluster_size=10,
        minimal_node_size=15,
        visual_features=visual_features,
        cluster_for_synsets=cluster_for_synsets,
        keywords_for_pictures=keywords_for_pictures,
        cluster_representatives=cluster_representatives,
    )

    sys.stdout.write("Collecting images from tree... \n")
    result_ids = recursively_collect_images(image_tree)

    sys.stdout.write("Loading testset from database... \n")
    testset_positive_ids, testset_negative_ids = retrieveTestsetResults(args.database_file)

    sys.stdout.write("Comparing result images to testset... \n")

    result_size = len(result_ids)
    testset_positive_size = len(testset_positive_ids)
    testset_negative_size = len(testset_negative_ids)

    true_positives = 0
    false_positives = 0

    for result_id in result_ids:
        if result_id in testset_positive_ids:
            true_positives += 1
            testset_positive_ids.remove(result_id)
        if result_id in testset_negative_ids:
            false_positives += 1
            testset_negative_ids.remove(result_id)

    false_negatives = len(testset_positive_ids)

    precision = float(true_positives) / (true_positives + false_positives)
    recall = float(true_positives) / (true_positives + false_negatives)

    sys.stdout.write("Done:\n\n")

    sys.stdout.write("Testset size:    %d\n\n" % (testset_positive_size + testset_negative_size))
    sys.stdout.write("Result size:     %d\n" % result_size)
    sys.stdout.write("Real positives:  %d\n\n" % testset_positive_size)
    sys.stdout.write("True Positives:  %d\n" % true_positives)
    sys.stdout.write("True Negatives:  ???\n")
    sys.stdout.write("False Positives: %d\n" % false_positives)
    sys.stdout.write("False Negatives: %d\n\n" % false_negatives)
    sys.stdout.write("Precision:       %f (tp / (tp + fp))\n" % precision)
    sys.stdout.write("Recall:          %f (tp / (tp + fn))\n" % recall)
    sys.stdout.write(
        "F-Measure:       %f (2 * (p * r / (p + r)))\n"
        % (2 * (float(precision) * float(recall)) / (precision + recall))
    )
def main(args):
  # Loading preprocessed features on startup
  print_status("Loading visual_features from file... ")
  visual_features = general_helpers.load_visual_features()
  print "Done."
  print_status("Loading cluster_for_synsets from mcl_clusters file... ")
  cluster_for_synsets = general_helpers.load_cluster_for_synsets()
  print "Done."
  print_status("Loading keywords_for_pictures from file... ")
  keywords_for_pictures = general_helpers.load_keywords_for_pictures()
  print "Done."
  print_status("Loading cluster_representatives from file... ")
  cluster_representatives = general_helpers.load_cluster_representatives(how_many_per_cluster=6)
  print "Done loading preprocessed data."

  print_status("Checking images against testset:\n")
  print_status("Retrieving clusters... \n")
  pipeline_result = pipeline.get_clusters("food", use_meronyms=False,
                                     visual_clustering_threshold=100000,
                                     mcl_clustering_threshold=4,
                                     minimal_mcl_cluster_size=6,
                                     minimal_node_size=4,
                                     visual_features=visual_features,
                                     cluster_for_synsets=cluster_for_synsets,
                                     keywords_for_pictures=keywords_for_pictures,
                                     cluster_representatives=cluster_representatives)


  # # Comment in to load preprocessed pipeline_result for dev mode
  # pipeline_result = pickle.load(open('image_tree.pickle', 'r'))

  annotated_food_dict = json.load(open(args.food_id_file, 'r'))

  print_status("Flattening result tree... \n")
  flattened_mcl_tree = flatten_result_tree(pipeline_result, annotated_food_dict, size_from_id=0, size_to_id=-1)
  image_counter = len(flattened_mcl_tree.subclusters[0]['subcluster'])

  print_status("Loading visual_features from file... \n")
  visual_features = general_helpers.load_visual_features()

  true_positives_total  = []
  false_negatives_total = []
  true_negatives_total  = []
  false_positives_total = []

  for i in range(0, 10):
    print_status("Calculating visual clusters (%d x)... \n" % i)
    visually_clustered_result = combined_clustering.cluster_visually(copy.deepcopy(flattened_mcl_tree),
                                                                     visual_clustering_threshold=4,
                                                                     visual_features=visual_features)
  
    print_status("Convert visual clusters to simpler structure... \n")
    visual_clusters = []
    for visual_cluster in visually_clustered_result.subclusters[0]['subcluster']:
      visual_clusters.append(set([image_tuple[0].split('\\')[-1].split('.')[0] for image_tuple in visual_cluster]))
  
    print_status("Done clustering %d images into %d visual clusters. \n" % (image_counter, len(visual_clusters)))
  
    # # Comment in to load preprocessed visual_clusters for dev mode
    # visual_clusters = pickle.load(open('visual_clusters.pickle', 'r'))
  
    print_status("Loading testset from database... \n")
    visually_similar_tuples, visually_different_tuples = retrieveTestsetResults(args.database_file)
  
    print_status("Comparing clusters to testset... \n")

    true_negatives  = 0
    false_positives = 0
    true_positives  = 0
    false_negatives = 0

    print_status("Starting with visually similar tuples... \n")
    for id_tuple in visually_similar_tuples:
      if both_ids_are_found(id_tuple, visual_clusters):
        if one_cluster_contains_both_ids(id_tuple, visual_clusters):
          true_negatives += 1
        else:
          false_positives += 1
  
    print_status("Now checking different image tuples... \n")
    for id_tuple in visually_different_tuples:
      if both_ids_are_found(id_tuple, visual_clusters):
        if one_cluster_contains_both_ids(id_tuple, visual_clusters):
          false_negatives += 1
        else:
          true_positives += 1

    true_positives_total.append(true_positives)
    false_negatives_total.append(false_negatives)
    true_negatives_total.append(true_negatives)
    false_positives_total.append(false_positives)

  average_true_positives  = float(sum(true_positives_total))  / len(true_positives_total)
  average_false_negatives = float(sum(false_negatives_total)) / len(false_negatives_total)
  average_true_negatives  = float(sum(true_negatives_total))  / len(true_negatives_total)
  average_false_positives = float(sum(false_positives_total)) / len(false_positives_total)

  precision = float(average_true_positives) / (average_true_positives + average_false_positives)
  recall    = float(average_true_positives) / (average_true_positives + average_false_negatives)

  print_status("Done!\n\n")
  sys.stdout.write("Testset contains %5d visually similar   image tuples \n" % len(visually_similar_tuples))
  sys.stdout.write("And there are    %5d visually different image tuples \n\n" % len(visually_different_tuples))

  sys.stdout.write("Average true  positives: %f \n"   % average_true_positives)
  sys.stdout.write("Average false negatives: %f \n"   % average_false_negatives)
  sys.stdout.write("Average true  negatives: %f \n"   % average_true_negatives)
  sys.stdout.write("Average false positives: %f \n\n" % average_false_positives)

  sys.stdout.write("Precision: %f (tp / (tp + fp))\n" % precision)
  sys.stdout.write("Recall:    %f (tp / (tp + fn))\n" % recall)
  sys.stdout.write("F-Measure: %f (2 * (p * r / (p + r)))\n" % (2 * (float(precision) * float(recall)) / (precision + recall)))
from clustering.pipeline import get_clusters
from helpers.general_helpers import print_status, load_visual_features, load_cluster_for_synsets, load_keywords_for_pictures
from helpers.general_helpers import load_cluster_representatives

app = Flask(__name__)
assets = Environment(app)

# Loading preprocessed features on startup
print_status("Loading cluster_for_synsets from mcl_clusters file... ")
cluster_for_synsets = load_cluster_for_synsets()
print "Done."
print_status("Loading keywords_for_pictures from file... ")
keywords_for_pictures = load_keywords_for_pictures()
print "Done."
print_status("Loading cluster_representatives from file... ")
cluster_representatives = load_cluster_representatives(how_many_per_cluster=6)
print "Done.\n\n"
bufferedSearches = {}

print_status("Server is ready!\n\n")

@app.route("/")
def hello():
  return render_template('index.html')

def node_subclusters_empty(subcluster_structure):
  for mcl_cluster in subcluster_structure:
    if mcl_cluster["subcluster"] != [[]]:
      return False
  return True