def put_images(imagedir, replace=False): success_count = 0 filenames = os.listdir(imagedir) old_hashes = set(cass.get_image_hashes() if not replace else []) for filename in filenames: try: with open(os.path.join(imagedir, filename), "r") as f: data = f.read() import StringIO s = StringIO.StringIO(data) im = Image.open(s) im.verify() except IOError: print "couldn't load image: %s" % filename continue md5hash = hashlib.md5(data).hexdigest() if md5hash in old_hashes: continue # Store the image file indexed by hash cass.put_image(md5hash, data, metadata={"filename": filename}) success_count += 1 print "Put %s (%s)" % (filename, md5hash) total = len(list(cass.get_image_hashes())) print "Successfully put %d images (total %d)" % (success_count, total)
def put_features(feature_str, hashes=None, replace=False): feature = eval(feature_str, {"imfeat": imfeat}) print ("Feature: %s (%s)" % (feature_str, feature)) # Compute feature on all available images by default if hashes is None: hashes = cass.get_image_hashes() # Optionally try not to replace existing features if replace: print "Replacing all existing features for %s" % feature_str else: old_hashes = cass.get_feature_hashes(feature_str) # Get an estimate of the number of images by counting # FIXME This requires cass to load the whole row, twice if 1: print ("Computing feature for %d images" % len(list(cass.get_feature_hashes(feature_str)))) success_count = 0 start_time = time.time() _hashes = hashes if replace else cass.sorted_iter_diff(hashes, old_hashes) for md5hash in _hashes: data = cass.get_imagedata(md5hash) import StringIO s = StringIO.StringIO(data) try: im = Image.open(s) im.load() # Guard for small images that break GIST if im.size[0] < 10 or im.size[1] < 10 or im.size[0] > 1000 or im.size[1] > 1000: print ( "Skipping small image (%d, %d) because of \ GIST segfault" % im.size ) continue except IOError: print "couldn't load image: %s" % md5hash continue # FIXME this seems to be necessary for many features # e.g. imfeat.Moments and imfeat.GIST() im = im.convert("RGB") # Only for catching segfaults print ("hash: ", md5hash) # Compute the feature value = imfeat.compute(feature, im) ret = cass.put_feature_value(feature_str, md5hash, value) print ("Put feature_value([%s], [%s]): %d" % (feature_str, md5hash, ret)) success_count += 1 stop_time = time.time() print ("Finished %d features in %.2f seconds" % (success_count, stop_time - start_time))