Exemple #1
0
def put_images(imagedir, replace=False):
    success_count = 0
    filenames = os.listdir(imagedir)
    old_hashes = set(cass.get_image_hashes() if not replace else [])

    for filename in filenames:
        try:
            with open(os.path.join(imagedir, filename), "r") as f:
                data = f.read()
            import StringIO

            s = StringIO.StringIO(data)
            im = Image.open(s)
            im.verify()
        except IOError:
            print "couldn't load image: %s" % filename
            continue

        md5hash = hashlib.md5(data).hexdigest()
        if md5hash in old_hashes:
            continue

        # Store the image file indexed by hash
        cass.put_image(md5hash, data, metadata={"filename": filename})
        success_count += 1
        print "Put %s (%s)" % (filename, md5hash)
    total = len(list(cass.get_image_hashes()))
    print "Successfully put %d images (total %d)" % (success_count, total)
Exemple #2
0
def put_features(feature_str, hashes=None, replace=False):
    feature = eval(feature_str, {"imfeat": imfeat})
    print ("Feature: %s (%s)" % (feature_str, feature))

    # Compute feature on all available images by default
    if hashes is None:
        hashes = cass.get_image_hashes()

    # Optionally try not to replace existing features
    if replace:
        print "Replacing all existing features for %s" % feature_str
    else:
        old_hashes = cass.get_feature_hashes(feature_str)

    # Get an estimate of the number of images by counting
    # FIXME This requires cass to load the whole row, twice
    if 1:
        print ("Computing feature for %d images" % len(list(cass.get_feature_hashes(feature_str))))

    success_count = 0
    start_time = time.time()

    _hashes = hashes if replace else cass.sorted_iter_diff(hashes, old_hashes)
    for md5hash in _hashes:
        data = cass.get_imagedata(md5hash)
        import StringIO

        s = StringIO.StringIO(data)

        try:
            im = Image.open(s)
            im.load()

            # Guard for small images that break GIST
            if im.size[0] < 10 or im.size[1] < 10 or im.size[0] > 1000 or im.size[1] > 1000:
                print (
                    "Skipping small image (%d, %d) because of \
                GIST segfault"
                    % im.size
                )
                continue

        except IOError:
            print "couldn't load image: %s" % md5hash
            continue

        # FIXME this seems to be necessary for many features
        # e.g. imfeat.Moments and imfeat.GIST()
        im = im.convert("RGB")

        # Only for catching segfaults
        print ("hash: ", md5hash)

        # Compute the feature
        value = imfeat.compute(feature, im)
        ret = cass.put_feature_value(feature_str, md5hash, value)
        print ("Put feature_value([%s], [%s]): %d" % (feature_str, md5hash, ret))
        success_count += 1
    stop_time = time.time()
    print ("Finished %d features in %.2f seconds" % (success_count, stop_time - start_time))