def create_attribute_vectors (scene_splits, scaler, kmeanspp, classifiers): scenes = [] attribute_vectors = [] for subdir, images in scene_splits.iteritems(): # skip root directory if len(images) == 0: continue # extract scene name scene_name = subdir[subdir.rfind('/') + 1:] for image in images: descriptors = fe.extract(image, settings.scale_size) kmeanspp_predicted = kmeanspp.predict( scaler.transform(descriptors) ) histogram = bow.generate_histogram(kmeanspp_predicted) # predict attributes of all present classifiers # (87 for asymmetric, 102 for symmetric) attribute_vector = [] for attribute, classifier_list in classifiers.iteritems(): attribute_vector.extend(classifier_list[5].predict(histogram)) # store scene and attribute vector scenes.append(scene_name) attribute_vectors.append(attribute_vector) return [attribute_vectors, scenes]
def create (images, computed_feature_vectors, scaler): feature_vectors = [] step_mod = len(images) * 0.05 # print progress in steps of 5 percent done = 0 print('[K-MEANS++] SIFT feature extraction:\n[K-MEANS++] 0%', end = '') sys.stdout.flush() for image in images: if image in computed_feature_vectors: descriptors = computed_feature_vectors[image] else: try: descriptors = fe.extract(settings.images_dir + image, settings.scale_size) except Exception as e: print('Error while computing SIFT for image {}.'.format(image)) continue computed_feature_vectors[image] = descriptors fe.unroll_and_append_descriptors(descriptors, feature_vectors) done += 1 if done % step_mod == 0: print(' {}%'.format((100 * done) / len(images)), end = '') sys.stdout.flush() if done < len(images): print(' 100%') print('[K-MEANS++] Number of \'SIFTed\' images: {}'.format(done)) print('[K-MEANS++] Total number of sift descriptors: {}' .format(len(feature_vectors)) ) sys.stdout.flush() # do k-means clustering (512 clusters, k-means++, 4 cores, show information) kmeans = cluster.KMeans(n_clusters = settings.class_count, init = 'k-means++', n_jobs = 4 ) # 10 SIFT descriptors per images (approximately) kmeans.fit(scaler.transform( random.sample(feature_vectors, int(10 * done)) )) # dump k-means clustering for further usage with open(settings.filepaths['k-means++'], 'wb') as f: pe.dump(kmeans, f) return kmeans
def create (images, computed_feature_vectors): feature_vectors = [] step_mod = len(images) * 0.05 # print progress in steps of 5 percent done = 0 print('[SCALER] SIFT feature extraction:\n[SCALER] 0%', end = '') sys.stdout.flush() for image in images: if image in computed_feature_vectors: descriptors = computed_feature_vectors[image] else: try: descriptors = fe.extract(settings.images_dir + image, settings.scale_size) except Exception as e: print('Error while computing SIFT for image {}.'.format(image)) continue computed_feature_vectors[image] = descriptors fe.unroll_and_append_descriptors(descriptors, feature_vectors) done += 1 if done % step_mod == 0: print(' {}%'.format((100 * done) / len(images)), end = '') sys.stdout.flush() if done < len(images): print(' 100%') print('[SCALER] Number of \'SIFTed\' images: {}'.format(done)) print('[SCALER] Total number of SIFT descriptors: {}' .format(len(feature_vectors)) ) sys.stdout.flush() # calibrate min-max-scaler min_max_scaler = preprocessing.MinMaxScaler() # 10 SIFT descriptors per image (approximately) min_max_scaler = min_max_scaler.fit( random.sample(feature_vectors, int(10 * done)) ) # dump min-max-scaler for further usage with open(settings.filepaths['scaler'], 'wb') as f: pe.dump(min_max_scaler, f) return min_max_scaler
def select_random_images (size, candidates, computed_feature_vectors): selected_candidates = [] i = 0 while i < size: image = settings.images_dir + choice(candidates) # avoid duplicates if image in selected_candidates: continue; if image in computed_feature_vectors: descriptors = computed_feature_vectors[image] else: try: descriptors = fe.extract(image, settings.scale_size) computed_feature_vectors[image] = descriptors except Exception as e: print('Error while computing SIFT for image {}'.format(image)) continue selected_candidates.append(image) i += 1 sys.stdout.flush() return selected_candidates