Esempio n. 1
0
    def append(self, image_filename):
        _hash = get_hash(os.path.join(self.image_lib_path, image_filename))
        pair = _hash, image_filename
        self.hashes.append(pair)

        for cur_hash, f in self.hashes:
            if self._resemble(_hash, cur_hash):
                self.img_set.union(pair, (cur_hash, f))
        self.img_set.union(pair, pair)
Esempio n. 2
0
    def query(self, image_handle=None, image_path=None, image_url=None, image_hash=None):
        print 'in query'
        if image_hash:
            print 'image hash'
            return self._query(image_hash)

        if image_handle:
            print 'image handle'
            return self._query(get_hash(image_handle))

        if image_path:
            print 'image path'
            return self._query(get_hash(image_path))

        if image_url:
            print 'image url'
            print 'retrieving image from %s' % image_url
            raw_image = urllib2.urlopen(image_url).read()
            print 'image retrieved'

            image_handle = StringIO(raw_image)
            return self._query(get_hash(image_handle))

        return []
Esempio n. 3
0
    def refresh(self):
        self.hashes = []
        self.img_set = DisjointSet()

        for filename in os.listdir(self.image_lib_path):
            print filename
            _hash = get_hash(os.path.join(self.image_lib_path, filename))
            self.hashes.append((_hash, filename.decode(filesystem_encoding).encode('utf-8')))

        for pair in self.hashes:
            self.img_set.union(pair, pair)

        for (h1, f1), (h2, f2) in combinations(self.hashes, 2):
            if self._resemble(h1, h2):
                self.img_set.union((h1, f1), (h2, f2))
Esempio n. 4
0
# This module has been deprecated.

from itertools import combinations
import os
from web.lib import pha
from web.lib.constants import IMAGE_LIBRARY_PATH, confidence
from web.lib.disjoint_set import DisjointSet


if __name__ == "__main__":
    print os.getcwd()
    image_lib_path = os.path.join("web", IMAGE_LIBRARY_PATH)

    hashes, img_set = [], DisjointSet()
    for filename in os.listdir(image_lib_path):
        _hash = pha.get_hash(os.path.join(image_lib_path, filename))
        hashes.append((_hash, filename.decode("gbk").encode("utf-8")))

    for pair in hashes:
        img_set.union(pair, pair)

    for (h1, f1), (h2, f2) in combinations(hashes, 2):
        cnt = bin(h1 ^ h2).count("1")
        if cnt <= confidence:
            img_set.union((h1, f1), (h2, f2))

        print "-" * 64
        print "%016x" % h1, "%016x" % h2
        print "% 16s" % f1, "% 16s" % f2
        print "similarity: %2.2f%%" % (((64 - cnt) / 64.0) * 100)
        print "{0:064b}".format(h1 ^ h2)