def append(self, image_filename): _hash = get_hash(os.path.join(self.image_lib_path, image_filename)) pair = _hash, image_filename self.hashes.append(pair) for cur_hash, f in self.hashes: if self._resemble(_hash, cur_hash): self.img_set.union(pair, (cur_hash, f)) self.img_set.union(pair, pair)
def query(self, image_handle=None, image_path=None, image_url=None, image_hash=None): print 'in query' if image_hash: print 'image hash' return self._query(image_hash) if image_handle: print 'image handle' return self._query(get_hash(image_handle)) if image_path: print 'image path' return self._query(get_hash(image_path)) if image_url: print 'image url' print 'retrieving image from %s' % image_url raw_image = urllib2.urlopen(image_url).read() print 'image retrieved' image_handle = StringIO(raw_image) return self._query(get_hash(image_handle)) return []
def refresh(self): self.hashes = [] self.img_set = DisjointSet() for filename in os.listdir(self.image_lib_path): print filename _hash = get_hash(os.path.join(self.image_lib_path, filename)) self.hashes.append((_hash, filename.decode(filesystem_encoding).encode('utf-8'))) for pair in self.hashes: self.img_set.union(pair, pair) for (h1, f1), (h2, f2) in combinations(self.hashes, 2): if self._resemble(h1, h2): self.img_set.union((h1, f1), (h2, f2))
# This module has been deprecated. from itertools import combinations import os from web.lib import pha from web.lib.constants import IMAGE_LIBRARY_PATH, confidence from web.lib.disjoint_set import DisjointSet if __name__ == "__main__": print os.getcwd() image_lib_path = os.path.join("web", IMAGE_LIBRARY_PATH) hashes, img_set = [], DisjointSet() for filename in os.listdir(image_lib_path): _hash = pha.get_hash(os.path.join(image_lib_path, filename)) hashes.append((_hash, filename.decode("gbk").encode("utf-8"))) for pair in hashes: img_set.union(pair, pair) for (h1, f1), (h2, f2) in combinations(hashes, 2): cnt = bin(h1 ^ h2).count("1") if cnt <= confidence: img_set.union((h1, f1), (h2, f2)) print "-" * 64 print "%016x" % h1, "%016x" % h2 print "% 16s" % f1, "% 16s" % f2 print "similarity: %2.2f%%" % (((64 - cnt) / 64.0) * 100) print "{0:064b}".format(h1 ^ h2)