def _test_hist(self): img = cv.LoadImage('test_images/lena.jpg') feat = imfeat.Histogram('rgb', [0, 0, 0], [1, 1, 1], [8, 8, 8]) a = imfeat.compute(imfeat.histogram_joint, img)[0] b = imfeat.compute(feat, img)[0] a = a.reshape(8, 8, 8).T.ravel() self.assertEqual(a.tolist(), b.tolist())
def test_identity(self): # for now, test if the code runs without errors images = [cv.LoadImage(x) for x in random.sample(glob.glob( '/home/morariu/downloads/lfwcrop_color/faces/*'), 100)] test_image = 'test_identity.jpg' im1 = cv.LoadImage(test_image) im2 = pil_to_cv(open(test_image)) feat = face_feature.Eigenfaces(images) out1 = imfeat.compute(feat, resize(im1, cv.GetSize(images[0])))[0] out2 = imfeat.compute(feat, resize(im2, cv.GetSize(images[0])))[0] print('||feat(cv) - feat(pil)|| = %g' % ( np.linalg.norm(out1 - out2)/len(out1))) print('||cv - pil|| = %g' % (np.linalg.norm( cv_to_array(im1)-cv_to_array(im2))/len(cv_to_array(im1))))
def map(self, image_hash, image_data): """ Args: image_hash: Unique image string image_data: Binary image data Yields: A tuple in the form of (classifier_name, label_value) classifier_name: String representing the classifier label_value: (label, feature) where label is an int """ try: image = Image.open(StringIO.StringIO(image_data)) except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError') return bgen = imfeat.BlockGenerator(image, imfeat.CoordGeneratorRectRotate, output_size=(self._image_height, self._image_width), step_delta=(self._image_height / 2, self._image_width / 2), angle_steps=1) for num, (image_out, sim) in enumerate(bgen): feature = np.asfarray(imfeat.compute(self._feat, image_out)[0]) pred = dict((classifier_name, classifier.predict(feature)) for classifier_name, classifier in self._classifiers) if any(x for x in pred.values() if x[0][0] * x[0][1] > 0): # At least 1 class needs to be > 0 image_out_fp = StringIO.StringIO() imfeat.convert_image(image_out, ['RGB']).save(image_out_fp, 'JPEG') image_out_fp.seek(0) yield (image_hash, sim), (pred, image_out_fp.read())
def __call__(self, frame_iter): prev_vec = None prev_time = None for frame_num, frame_time, frame in frame_iter: width, height = frame.width, frame.height cgr = imfeat.CoordGeneratorRect out = [] # TODO: This uses LAB to convert to Gray, fix that in imfeat for block, trans in imfeat.BlockGenerator(frame, cgr, output_size=(50, 50), step_delta=(50, 50)): feat = imfeat.Histogram('lab', num_bins=(8, 8, 8), style='planar') out.append(imfeat.compute(feat, block)[:8]) cur_vec = np.hstack(out) if prev_vec is not None: score = np.sum(np.abs(prev_vec - cur_vec)) self.scores.append(score) #print score, frame_time - prev_time, self._min_interval if score > self._diff_thresh and frame_time - prev_time > self._min_interval: iskeyframe = True prev_time = frame_time else: iskeyframe = False else: prev_time = frame_time iskeyframe = False prev_vec = cur_vec yield (frame_num, frame_time, frame), iskeyframe
def put_features(feature_str, hashes=None, replace=False): feature = eval(feature_str, {"imfeat": imfeat}) print ("Feature: %s (%s)" % (feature_str, feature)) # Compute feature on all available images by default if hashes is None: hashes = cass.get_image_hashes() # Optionally try not to replace existing features if replace: print "Replacing all existing features for %s" % feature_str else: old_hashes = cass.get_feature_hashes(feature_str) # Get an estimate of the number of images by counting # FIXME This requires cass to load the whole row, twice if 1: print ("Computing feature for %d images" % len(list(cass.get_feature_hashes(feature_str)))) success_count = 0 start_time = time.time() _hashes = hashes if replace else cass.sorted_iter_diff(hashes, old_hashes) for md5hash in _hashes: data = cass.get_imagedata(md5hash) import StringIO s = StringIO.StringIO(data) try: im = Image.open(s) im.load() # Guard for small images that break GIST if im.size[0] < 10 or im.size[1] < 10 or im.size[0] > 1000 or im.size[1] > 1000: print ( "Skipping small image (%d, %d) because of \ GIST segfault" % im.size ) continue except IOError: print "couldn't load image: %s" % md5hash continue # FIXME this seems to be necessary for many features # e.g. imfeat.Moments and imfeat.GIST() im = im.convert("RGB") # Only for catching segfaults print ("hash: ", md5hash) # Compute the feature value = imfeat.compute(feature, im) ret = cass.put_feature_value(feature_str, md5hash, value) print ("Put feature_value([%s], [%s]): %d" % (feature_str, md5hash, ret)) success_count += 1 stop_time = time.time() print ("Finished %d features in %.2f seconds" % (success_count, stop_time - start_time))
def test_identity(self): # for now, test if the code runs without errors images = [cv.LoadImage(x) for x in random.sample( lfwcrop_data.get_unique_lfw_training_images('data'), 100)] test_image = 'data/exemplar1.jpg' im1 = cv.LoadImage(test_image) im2 = pil_to_cv(open(test_image)) feat = face_feature.Eigenfaces(images) out1 = imfeat.compute(feat, resize(im1, cv.GetSize(images[0])))[0] out2 = imfeat.compute(feat, resize(im2, cv.GetSize(images[0])))[0] print('||feat(cv) - feat(pil)|| = %g' % ( np.linalg.norm(out1 - out2)/len(out1))) print('||cv - pil|| = %g' % (np.linalg.norm( cv_to_array(im1)-cv_to_array(im2))/len(cv_to_array(im1)))) np.testing.assert_almost_equal(out1, out2)
def _compute_face_distance(self, gray): # resize to the fixed size the feature was trained on # TODO(Vlad): do this in eigenfaces feature code fixed_size_gray = cv.CreateImage(self._size, 8, 3) cv.Resize(gray, fixed_size_gray, cv.CV_INTER_LINEAR) f = imfeat.compute(self._feat, fixed_size_gray)[0] # TODO(Vlad) replace w/ distpy return np.linalg.norm(self._exemplar - f)
def main(): import glob images = [cv.LoadImage(x) for x in glob.glob('/home/morariu/downloads/lfwcrop_color/faces/*')[:100]] vectors = range(4, 10) feat = Eigenfaces(images, vectors) out = imfeat.compute(feat, images[0]) print(out)
def _compute_exemplar_feature(self): im = cv.LoadImage(self.exemplar_path) fixed_size = cv.CreateImage((64, 64), 8, im.channels) cv.Resize(im, fixed_size, cv.CV_INTER_LINEAR) with open(self.eigenfaces_fn, 'r') as fp: feat = cPickle.load(fp) with open(self.exemplar_pkl, 'w') as fp: cPickle.dump(imfeat.compute(feat, fixed_size)[0], fp)
def main(): import glob images = [ cv.LoadImage(x) for x in glob.glob( '/home/morariu/downloads/lfwcrop_color/faces/*')[:100] ] vectors = range(4, 10) feat = Eigenfaces(images, vectors) out = imfeat.compute(feat, images[0]) print(out)
def test_hist_planar(self): img = cv.LoadImage('test_images/lena.jpg') for i in range(512): for j in range(512): img[i, j] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for mode in modes: print(mode) feat = imfeat.Histogram(mode, style='planar') b = imfeat.compute(feat, img)[0] print(b)
def test_0(self): # test if the code runs without errors images = [cv.LoadImage(x) for x in lfwcrop_data.get_unique_lfw_training_images('data')[:100]] vectors = range(3, 64) feat = face_feature.Eigenfaces(images, vectors) out = imfeat.compute(feat, images[0]) self.assertEqual(len(feat.mean), feat.vectors.shape[0]) self.assertEqual(1, len(out)) self.assertEqual(61, feat.vectors.shape[1]) self.assertEqual(61, len(out[0]))
def _histogram(self, hf): # Test the histogram module by loading each test image # and converting it to each possible mode. # Also check that the size of the returned histogram is as # expected. self._feat_hist_norm(hf) for fn in self.image_names: img = Image.open(fn) self.assertAlmostEquals(np.sum(hf(img)), 1.) h = imfeat.compute(hf, img) self.assertTrue(len(h) == 1) self.assertTrue(h[0].shape == (8*8*8,))
def get_face_feature(): """ Get the feature of an image from the lfw dataset. This will be hard-coded below for testing purposes. """ # Bush has most images in lfw training set fn = '/home/morariu/downloads/lfwcrop_color/faces/George_W_Bush_0026.ppm' im = cv.LoadImage(fn) pkl_fn = 'eigenfaces_lfw_cropped.pkl' with open(pkl_fn, 'r') as fp: feat = cPickle.load(fp) return imfeat.compute(feat, im)[0]
def map(self, name, image_data): try: image = Image.open(StringIO.StringIO(image_data)) except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError') return image = image.resize((self._image_length, self._image_length)) try: yield name, np.asfarray(imfeat.compute(self._feat, image)[0]) except ValueError, e: print(e) hadoopy.counter('DATA_ERRORS', 'UnkImageType') return
def test_gist(self): feature = imfeat.GIST() for feat_out, image in self._run_all_images(feature): print(feat_out) print(len(feat_out[0])) # Compare against known output image = Image.open('test_images/lena.ppm') out = imfeat.compute(feature, image)[0] test_string = ' '.join(['%.4f' % x for x in out] + ['\n']) with open('fixtures/gist_lena_output.txt') as fp: true_string = fp.read() self.assertEqual(len(true_string.split()), len(test_string.split())) self.assertEqual(true_string, test_string)
def test_hog_latent(self): print('Hog Latent') feature = imfeat.HOGLatent(2) for feat_out, image in self._run_all_images(feature): print(feat_out) print(len(feat_out[0])) print('Hog Latent') image = Image.open('test_images/lena.ppm') out = imfeat.compute(feature, image)[0] self.assertEqual(len(out), 254 * 254 * 32) np.testing.assert_equal(hashlib.md5(out.tostring()).hexdigest(), '3cc2af55af55bd429388d8be52fde356') load_from_umiacs('fixtures/lena_feat.pkl.gz', 'ab4580a8322e18b144c39867aeefa05b') with gzip.GzipFile('fixtures/lena_feat.pkl.gz') as fp: np.testing.assert_almost_equal(out, pickle.load(fp))
def test_0(self): # test if the code runs without errors images = [ cv.LoadImage(x) for x in lfwcrop_data.get_unique_lfw_training_images('data')[:100] ] vectors = range(3, 64) feat = face_feature.Eigenfaces(images, vectors) out = imfeat.compute(feat, images[0]) self.assertEqual(len(feat.mean), feat.vectors.shape[0]) self.assertEqual(1, len(out)) self.assertEqual(61, feat.vectors.shape[1]) self.assertEqual(61, len(out[0]))
def test_0(self): # for now, test if the code runs without errors import glob images = [cv.LoadImage(x) for x in glob.glob( '/home/morariu/downloads/lfwcrop_color/faces/*')[:100]] vectors = range(3, 64) feat = face_feature.Eigenfaces(images, vectors) out = imfeat.compute(feat, images[0]) self.assertEqual(len(feat.mean), feat.vectors.shape[0]) self.assertEqual(1, len(out)) self.assertEqual(61, feat.vectors.shape[1]) self.assertEqual(61, len(out[0]))
def test_0(self): # Find all features lena = Image.open('test_images/lena.jpg') print('Image size [%s]' % str(lena.size)) for feature_module in dir(imfeat): mod = getattr(imfeat, feature_module) if isinstance(mod, types.TypeType): print('Skipping [%s] as it is a class' % feature_module) continue if feature_module == 'rhog_dalal': print('Skipping [%s] as it is unsupported' % feature_module) continue if 'make_features' in dir(mod): print(feature_module) st = time.time() val = np.asfarray(imfeat.compute(mod, lena)) print('Time: %f' % (time.time() - st)) print(len(val)) try: print(len(val[0])) except IndexError: pass print(val)
def get_lfw_restricted_accuracy(split): max_train_ims = 3000 train_data = get_lfw_cropped_data('%s_train' % split) test_data = get_lfw_cropped_data('%s_test' % split) # get a list of unique training images train_fns = [] for fnpair in train_data.keys(): train_fns.extend(fnpair) train_fns = sorted(set(train_fns)) # load the unique training images, and learn PCA print('Training Eigenfaces feature space (%i training images)...' % ( len(train_fns))) if len(train_fns) <= max_train_ims: train_ims = map(cv.LoadImage, train_fns) else: train_ims = map(cv.LoadImage, random.sample(train_fns, max_train_ims)) feat = face_feature.Eigenfaces(train_ims) # go through each training image pair and calculate distances dists = [] classmap = {'same' : 1, 'diff' : 0} for ((fn1, fn2), attr) in train_data.items(): #print('Calculating distance between (%s, %s)' % ( # os.path.basename(fn1), os.path.basename(fn2))) f1 = imfeat.compute(feat, cv.LoadImage(fn1))[0] f2 = imfeat.compute(feat, cv.LoadImage(fn2))[0] dists.append((np.linalg.norm(f1-f2), classmap[attr['class']])) # calculate threshold that maximizes average accuracy dists = sorted(dists) p = len(filter(lambda x: x[1] == 0, dists)) n = len(filter(lambda x: x[1] == 1, dists)) tpi, tni = 0, n a = [] for (d, v) in dists: if v == 1: tpi += 1 else: tni -= 1 a.append((tpi + tni) / float(p + n)) imax = np.argmax(np.array(a)) thresh = dists[imax][0] print('Thresh %4.3g yields a classification accuracy of %4.3g' % ( thresh, a[imax])) # now test on testing split right = 0 for ((fn1, fn2), attr) in test_data.items(): #print('Calculating distance between (%s, %s)' % ( # os.path.basename(fn1), os.path.basename(fn2))) f1 = imfeat.compute(feat, cv.LoadImage(fn1))[0] f2 = imfeat.compute(feat, cv.LoadImage(fn2))[0] d = np.linalg.norm(f1-f2) if(d <= thresh and attr['class'] == 'same' or d > thresh and attr['class'] == 'diff'): right += 1 accuracy = float(right) / len(test_data) print('Testing accuracy %4.3g' % accuracy) return accuracy
def _run_all_images(self, feature): images = (Image.open(fn) for fn in self.image_names) return ((imfeat.compute(feature, image), image) for image in images)
def make_features(self, image): return [np.hstack([imfeat.compute(f, image)[0] for f in self._features])]
def _compute_exemplar_feature(exemplar_fn, feature_pkl, fp): im = cv.LoadImage(exemplar_fn) fixed_size = cv.CreateImage((64, 64), 8, im.channels) cv.Resize(im, fixed_size, cv.CV_INTER_LINEAR) feat = pickle.load(open(feature_pkl, 'rb')) pickle.dump(imfeat.compute(feat, fixed_size)[0], fp)
def test_hist(self): img = cv.LoadImage('test_images/lena.jpg') feat = imfeat.Autocorrelogram([1, 3, 5, 7]) b = imfeat.compute(feat, img)[0] print(b)
def get_lfw_restricted_accuracy(split): max_train_ims = 3000 train_data = lfwcrop_data.get_lfw_cropped_data('%s_train' % split, 'data') test_data = lfwcrop_data.get_lfw_cropped_data('%s_test' % split, 'data') # get a list of unique training images train_fns = [] for fnpair in train_data.keys(): train_fns.extend(fnpair) train_fns = sorted(set(train_fns)) # load the unique training images, and learn PCA print('Training Eigenfaces feature space (%i training images)...' % (len(train_fns))) if len(train_fns) <= max_train_ims: train_ims = map(cv.LoadImage, train_fns) else: train_ims = map(cv.LoadImage, random.sample(train_fns, max_train_ims)) feat = face_feature.Eigenfaces(train_ims) # go through each training image pair and calculate distances dists = [] classmap = {'same': 1, 'diff': 0} for ((fn1, fn2), attr) in train_data.items(): #print('Calculating distance between (%s, %s)' % ( # os.path.basename(fn1), os.path.basename(fn2))) f1 = imfeat.compute(feat, cv.LoadImage(fn1))[0] f2 = imfeat.compute(feat, cv.LoadImage(fn2))[0] dists.append((np.linalg.norm(f1 - f2), classmap[attr['class']])) # calculate threshold that maximizes average accuracy dists = sorted(dists) p = len(filter(lambda x: x[1] == 0, dists)) n = len(filter(lambda x: x[1] == 1, dists)) tpi, tni = 0, n a = [] for (d, v) in dists: if v == 1: tpi += 1 else: tni -= 1 a.append((tpi + tni) / float(p + n)) imax = np.argmax(np.array(a)) thresh = dists[imax][0] print('Thresh %4.3g yields a classification accuracy of %4.3g' % (thresh, a[imax])) # now test on testing split right = 0 for ((fn1, fn2), attr) in test_data.items(): #print('Calculating distance between (%s, %s)' % ( # os.path.basename(fn1), os.path.basename(fn2))) f1 = imfeat.compute(feat, cv.LoadImage(fn1))[0] f2 = imfeat.compute(feat, cv.LoadImage(fn2))[0] d = np.linalg.norm(f1 - f2) if (d <= thresh and attr['class'] == 'same' or d > thresh and attr['class'] == 'diff'): right += 1 accuracy = float(right) / len(test_data) print('Testing accuracy %4.3g' % accuracy) return accuracy