def create_attribute_vectors (scene_splits, scaler, kmeanspp, classifiers):
  scenes = []
  attribute_vectors = []
  for subdir, images in scene_splits.iteritems():
    # skip root directory
    if len(images) == 0:
      continue
    
    # extract scene name
    scene_name = subdir[subdir.rfind('/') + 1:]

    for image in images:
      descriptors = fe.extract(image, settings.scale_size)
      kmeanspp_predicted = kmeanspp.predict(
        scaler.transform(descriptors)
      )
      histogram = bow.generate_histogram(kmeanspp_predicted)

      # predict attributes of all present classifiers
      # (87 for asymmetric, 102 for symmetric)
      attribute_vector = []
      for attribute, classifier_list in classifiers.iteritems():
        attribute_vector.extend(classifier_list[5].predict(histogram))
      
      # store scene and attribute vector
      scenes.append(scene_name)
      attribute_vectors.append(attribute_vector)
  return [attribute_vectors, scenes]
def create (images, computed_feature_vectors, scaler):
  feature_vectors = []
  step_mod = len(images) * 0.05 # print progress in steps of 5 percent
  done = 0
  print('[K-MEANS++] SIFT feature extraction:\n[K-MEANS++] 0%', end = '')
  sys.stdout.flush()
  for image in images:
    if image in computed_feature_vectors:
      descriptors = computed_feature_vectors[image]
    else:
      try:
        descriptors = fe.extract(settings.images_dir + image, settings.scale_size)
      except Exception as e:
        print('Error while computing SIFT for image {}.'.format(image))
        continue
      computed_feature_vectors[image] = descriptors

    fe.unroll_and_append_descriptors(descriptors, feature_vectors)
    done += 1
    if done % step_mod == 0:
      print(' {}%'.format((100 * done) / len(images)), end = '')
      sys.stdout.flush()

  if done < len(images):
    print(' 100%')
  print('[K-MEANS++] Number of \'SIFTed\' images: {}'.format(done))
  print('[K-MEANS++] Total number of sift descriptors: {}'
    .format(len(feature_vectors))
  )
  sys.stdout.flush()

  # do k-means clustering (512 clusters, k-means++, 4 cores, show information)
  kmeans = cluster.KMeans(n_clusters = settings.class_count, init = 'k-means++',
    n_jobs = 4
  )
  # 10 SIFT descriptors per images (approximately)
  kmeans.fit(scaler.transform(
    random.sample(feature_vectors, int(10 * done))
  ))

  # dump k-means clustering for further usage
  with open(settings.filepaths['k-means++'], 'wb') as f:
    pe.dump(kmeans, f)

  return kmeans
def create (images, computed_feature_vectors):
  feature_vectors = []
  step_mod = len(images) * 0.05 # print progress in steps of 5 percent
  done = 0
  print('[SCALER] SIFT feature extraction:\n[SCALER] 0%', end = '')
  sys.stdout.flush()
  for image in images:
    if image in computed_feature_vectors:
      descriptors = computed_feature_vectors[image]
    else:
      try:
        descriptors = fe.extract(settings.images_dir + image, settings.scale_size)
      except Exception as e:
        print('Error while computing SIFT for image {}.'.format(image))
        continue
      computed_feature_vectors[image] = descriptors

    fe.unroll_and_append_descriptors(descriptors, feature_vectors)
    done += 1
    if done % step_mod == 0:
      print(' {}%'.format((100 * done) / len(images)), end = '')
      sys.stdout.flush()
  
  if done < len(images):
    print(' 100%')
  print('[SCALER] Number of \'SIFTed\' images: {}'.format(done))
  print('[SCALER] Total number of SIFT descriptors: {}'
    .format(len(feature_vectors))
  )
  sys.stdout.flush()

  # calibrate min-max-scaler
  min_max_scaler = preprocessing.MinMaxScaler()
  # 10 SIFT descriptors per image (approximately)
  min_max_scaler = min_max_scaler.fit(
    random.sample(feature_vectors, int(10 * done))
  )

  # dump min-max-scaler for further usage
  with open(settings.filepaths['scaler'], 'wb') as f:
    pe.dump(min_max_scaler, f)

  return min_max_scaler
def select_random_images (size, candidates, computed_feature_vectors):
  selected_candidates = []
  i = 0
  while i < size:
    image = settings.images_dir + choice(candidates)

    # avoid duplicates
    if image in selected_candidates:
      continue;

    if image in computed_feature_vectors:
      descriptors = computed_feature_vectors[image]
    else:
      try:
        descriptors = fe.extract(image, settings.scale_size)
        computed_feature_vectors[image] = descriptors
      except Exception as e:
        print('Error while computing SIFT for image {}'.format(image))
        continue

    selected_candidates.append(image)
    i += 1
    sys.stdout.flush()
  return selected_candidates