def handle(self, *args, **options): checked = added = 0 images = Image.objects.values_list("phash", flat=True) galleries = Gallery.objects.values_list("slug", flat=True) gallery_path = os.path.join(settings.MEDIA_ROOT, "gallery") for root, dirs, f in os.walk(gallery_path): for dir in dirs: if dir not in galleries: gallery = Gallery(name=dir) gallery.save() else: gallery = Gallery.objects.get(slug=dir) for dir_root, d, files in os.walk(os.path.join(root, dir)): for file in files: file_name = os.path.join(dir_root, file) file_image = PILImage.open(file_name) file_phash = str(phash(file_image)) checked += 1 if file_phash not in images: image = Image(phash=file_phash, gallery=gallery) image.original_image.name = file_name.replace( settings.MEDIA_ROOT, "" )[1:] image.save() self.stdout.write("Saved %s" % image.original_image.name) added += 1 self.stdout.write("Checked %d images, added %d" % (checked, added))
def similarity(image1 , image2): hash1 = imagehash.phash(Image.open(image1)) hash2 = imagehash.phash(Image.open(image2)) diff1 = abs(hash1 - hash2) if(diff1<=12): print (1 - (float(diff1)/64))
def hash_file(file, contains_cb, result_cb): if contains_cb(file): cprint("\tSkipping {}".format(file), "green") else: try: hashes = [] img = Image.open(file) file_size = get_file_size(file) image_size = get_image_size(img) capture_time = get_capture_time(img) # 0 degree hash hashes.append(str(imagehash.phash(img))) # 90 degree hash img = img.rotate(90) hashes.append(str(imagehash.phash(img))) # 180 degree hash img = img.rotate(180) hashes.append(str(imagehash.phash(img))) # 270 degree hash img = img.rotate(270) hashes.append(str(imagehash.phash(img))) hashes = "".join(sorted(hashes)) result_cb(file, hashes, file_size, image_size, capture_time) cprint("\tHashed {}".format(file), "blue") except OSError: cprint("Unable to open {}".format(file), "red")
def image_descriptor(image_path, prior=None): mtime = os.path.getmtime(image_path) ctime = os.path.getctime(image_path) if not prior or (not prior.get('modified')): img = Image.open(image_path) result = {'width': img.size[0], 'height': img.size[1], 'created': mtime, 'modified': ctime, # TODO: if results too bad, change hash sizes for more precission? 'aHash': str(imagehash.average_hash(img)), 'pHash': str(imagehash.phash(img)), 'dHash': str(imagehash.dhash(img)), } return result changed = prior['modified'] < mtime img = Image.open(image_path) if changed or not prior["width"]: prior["width"] = img.size[0] if changed or not prior["height"]: prior["height"] = img.size[1] if changed or not prior["aHash"]: prior["aHash"] = str(imagehash.average_hash(img)) if changed or not prior["pHash"]: prior["pHash"] = str(imagehash.phash(img)) if changed or not prior["dHash"]: prior["dHash"] = str(imagehash.dhash(img)) return prior
def run(self): files = sorted(os.listdir('data/%s/media' % self.date_path)) hashes = {} matches = [] g = nx.Graph() for i in range(len(files)): f = files[i] fn = 'data/%s/media/%s' % (self.date_path, f) ahash = imagehash.average_hash(Image.open(fn)) dhash = imagehash.dhash(Image.open(fn)) phash = imagehash.phash(Image.open(fn)) hashes[f] = {'ahash': ahash, 'dhash': dhash, 'phash': phash} for j in range(0, i): f2name = files[j] f2 = hashes[f2name] sumhash = sum([ahash - f2['ahash'], dhash - f2['dhash'], phash - f2['phash']]) if sumhash <= 40: matches.append([f, files[j], ahash - f2['ahash'], dhash - f2['dhash'], phash - f2['phash'], sumhash]) g.add_edge(f, f2name) with self.output().open('w') as fp_graph: components = list(nx.connected_components(g)) # Note: sets are not JSON serializable d = [] for s in components: d.append(list(s)) logging.debug(' - = - = - = GRAPH HERE - = - = - = -') logging.debug(d) json.dump(d, fp_graph, indent=2)
def _calc(self, options, files): for file in files: if os.path.isdir(file): if options.get_recursive_flag() and not os.path.islink(file): try: self._calc(options, sorted([ os.path.join(file, x) for x in os.listdir(file) ])) except PermissionError: pass elif os.path.isfile(file): file_stat = file_mod.FileStat(file) try: phash = self._cache[ (file, file_stat.get_size(), file_stat.get_time())] except KeyError: try: phash = str(imagehash.phash(PIL.Image.open(file))) except OSError: continue print("{0:s}/{1:010d}/{2:d} {3:s}".format( phash, file_stat.get_size(), file_stat.get_time(), file ))
def getImageHash(imagename): imageF = Image.open(imagename) h = str(imagehash.dhash(imageF, 12)) if h == '000000000000000000000000000000000000': h = 'phash_'+str(imagehash.phash(imageF)) return h
def hashpdfimg(images): hashs = [] for image in images: img = Image.fromarray(image) hash = imagehash.phash(img) hashs.append(hash) return hashs
def get_image_metadata(config, request): ''' Handle request for an image. ''' try: url = request.GET['url'] except KeyError: raise aiohttp.web.HTTPBadRequest(reason='"url" argument is required.') tor_config = config['Tor'] socks_proxy = SOCKSConnector(tor_config['ip'], int(tor_config['port'])) response = yield from aiohttp.get(url, connector=socks_proxy) content_type = response.headers['Content-type'] if not content_type.startswith('image/'): reason = 'Requested a non-image resource ({}).'.format(content_type) raise aiohttp.web.HTTPBadRequest(reason=reason) image_data = yield from response.read() image_file = io.BytesIO(image_data) image = Image.open(image_file) extra = dict() if content_type in ('image/jpeg', 'image/tiff'): for name, tag in exifread.process_file(io.BytesIO(image_data)).items(): if name.startswith('Image') or name.startswith('MakerNote'): if isinstance(tag.values, (int, str)): extra[name] = tag.values elif isinstance(tag.values, list): if len(tag.values) > 0 and isinstance(tag.values[0], int): extra[name] = tag.values else: extra[name] = ','.join(map(str,tag.values)) else: extra[name] = str(tag) metadata = { 'content_type': content_type, 'extra': extra, 'format': image.format, 'hashes': { 'ahash': str(imagehash.average_hash(image)), 'dhash': str(imagehash.dhash(image)), 'md5': hashlib.md5(image_data).hexdigest(), 'phash': str(imagehash.phash(image)), 'sha1': hashlib.sha1(image_data).hexdigest(), 'sha256': hashlib.sha256(image_data).hexdigest(), }, 'last_modified': response.headers.get('Last-modified', None), 'resolution': { 'width': image.width, 'height': image.height, }, 'size': len(image_data), } return aiohttp.web.Response( headers={'Content-type': 'application/json; charset=utf8'}, body=json.dumps(metadata).encode('utf8'), )
def predict(self, image): result_priority_queue = PriorityQueue() results = [] bbs = self.align.getAllFaceBoundingBoxes(image) for bb_index, bb in enumerate(bbs): alignedFace = self.align.alignImg("affine", 96, image, bb) if alignedFace is None: continue phash = str(imagehash.phash(Image.fromarray(alignedFace))) if phash in self.trained_images: identity = self.trained_images[phash].identity result_priority_queue.put_nowait((-1.0, identity, bb_index)) else: rep = self.net.forwardImage(alignedFace) if self.svm is not None: result_proba_list = self.svm.predict_proba(rep) identity = np.argmax(result_proba_list[0]) print str(result_proba_list[0]) + " " + str(bb) for index, prob in enumerate(result_proba_list[0]): result_priority_queue.put_nowait((prob * -1.0, self.identities[index], bb_index)) else: result_priority_queue.put_nowait((0.0, -1, bb_index)) matched_identities = [] matched_bb_indices = [] threshold = 0.6 while len(matched_identities) != len(bbs) and result_priority_queue.empty() is False: detectedFaceInfo = result_priority_queue.get_nowait() identity = detectedFaceInfo[1] probability = detectedFaceInfo[0] * -1.0 bb_index = detectedFaceInfo[2] # print detectedFaceInfo if identity in matched_identities: # print "matched_bbs : " + str(matched_identities) continue matched_bb_indices.append(bb_index) matched_identities.append(identity) if probability < threshold: results.append((-1, bbs[bb_index], 0.0)) else: results.append((identity, bbs[bb_index], probability)) # print '+' + str(results[len(results) - 1]) for bb_index, bb in enumerate(bbs): if bb_index in matched_bb_indices: continue results.append((-1, bb, 0.0)) return results
def create_from_image(cls, img, url, facebook=None, okcupid=None): cls.objects.create( ahash = imagehash.average_hash(img), phash = imagehash.phash(img), dhash = imagehash.dhash(img), url = url, facebook = facebook, okcupid = okcupid)
def hashOP(image1,hashsimg): img1 = Image.fromarray(image1) hash1 = imagehash.phash(img1) hashs = [] for hash in hashsimg: ham_dst = hamdist(str(hash1),str(hash)) hashs.append(ham_dst) return(hashs.index(min(hashs)))
def render_to_img_with_phash(gen_opts, img_code): im = render_to_img(gen_opts['img_size'], img_code) hash_opts = gen_opts['hash_opts'] img_hash = imagehash.phash(im, hash_opts['hash_size'], hash_opts['highfreq_factor']) # print('\t img_hash =', img_hash) return im, img_hash
def _extract(self, data): patch = [ self.patch["x0"], self.patch["y0"], self.patch["x1"], self.patch["y1"] ] crop = data.crop(box=patch) phash = imagehash.phash(crop) return phash
def get_hash(images): imgs = [] append = imgs.append for img in images: img = imagehash.phash(cv2pil(img), hash_size=16).hash.flatten() img = np.array([int(h) for h in img]) append(img) return imgs
def dupe_remover(wd): ''' Checks jpegs in wd for dupes args - wd: str - a directory ''' print(f'removing dupes') image_dict = {} unique_images = {} duplicate_images = {} image_files = f'{wd}/*.jpg' for img in glob(image_files): imname = os.path.basename(img) image = Image.open(img).convert('L') image_dict[imname] = image while len(image_dict) > 0: # grab the first image # and comapre it against every other image we have image_name = list(image_dict.keys())[0] image = image_dict[image_name] duplicate_to_this_image = [] for other_image_name, other_image in image_dict.items(): if image_name == other_image_name: continue # if the image is a duplicate, remove it from the image dictionary # and delete the file p = phash(image) p_other = phash(other_image) delta = p - p_other if delta < 7: other_file = f'{wd}/{other_image_name}' os.remove(other_file) duplicate_to_this_image.append(other_image_name) for dupe_name in duplicate_to_this_image: dupe_image = image_dict[dupe_name] del image_dict[dupe_name] duplicate_images[dupe_name] = dupe_image # Now that we've compared the image # It should be considered unique unique_images[image_name] = image del image_dict[image_name]
def run(self, task): image = str2image(task.get_file_data) self.results["imghash"]["a_hash"] = str(imagehash.average_hash(image)) self.results["imghash"]["p_hash"] = str(imagehash.phash(image)) self.results["imghash"]["d_hash"] = str(imagehash.dhash(image)) return self.results
def compare_phash(source, capture): """ Compares the pHash of the two given images and returns the similarity between the two. @param source: Image of any given shape as a numpy array @param capture: Image of any given shape as a numpy array @return: The similarity between the hashes of the image as a number 0 to 1. """ source = Image.fromarray(source) capture = Image.fromarray(capture) source_hash = imagehash.phash(source) capture_hash = imagehash.phash(capture) return 1 - ((source_hash - capture_hash) / 64.0)
def db_add_image(file_name: str) -> bool: image = Image.open(file_name) return db_add(file_name, str(imagehash.average_hash(image)), str(imagehash.phash(image)), str(imagehash.phash_simple(image)), str(imagehash.dhash(image)), str(imagehash.dhash_vertical(image)), str(imagehash.whash(image)), str(imagehash.colorhash(image)))
def similarityImage(img1, img2): try: enco1, enco2 = imageFace.findFace(img1,img2) except Exception: return None,None,None,None if (enco1 != []) and (enco2 != []): similarity = imageFace.similarityImageTakeEncoding(enco1,enco2) else: similarity = 0 similarity = round(similarity, 5) hash0 = imagehash.average_hash(Image.open(img1)) hash1 = imagehash.average_hash(Image.open(img2)) similarityAverageHash = 0 similarityAverageHash = (hash0-hash1) if similarityAverageHash != 0: similarityAverageHash = similarityAverageHash/100 if similarityAverageHash == 0: print(img1, ' ', img2) hash0 = imagehash.dhash(Image.open(img1)) hash1 = imagehash.dhash(Image.open(img2)) similarityDHash = 0 similarityDHash = (hash0-hash1) if similarityDHash != 0: similarityDHash = similarityDHash/100 if similarityDHash == 0: print(img1, ' ', img2) hash0 = imagehash.phash(Image.open(img1)) hash1 = imagehash.phash(Image.open(img2)) similarityPHash = 0 similarityPHash = (hash0 - hash1) if similarityPHash != 0: similarityPHash = similarityPHash / 100 if similarityPHash == 0: print(img1, ' ', img2) return similarityAverageHash, similarityDHash, similarityPHash, similarity
def unban_image(self, path): img = Image.open(path) lock_phash = Redlock(key=f'phashdb', masters={self.redis}) lock_whash = Redlock(key=f'whashdb', masters={self.redis}) raw_phash = im.phash(img) raw_whash = im.whash(img) self.exec_similar_hash(self.phashdb, raw_phash, 0, 13, self.del_from_db, lock=lock_phash) self.exec_similar_hash(self.whashdb, raw_whash, 0, 13, self.del_from_db, lock=lock_whash)
def rename_hash(file): try: filename = os.path.splitext(os.path.basename(file)) data = imagehash.phash(Image.open(file)) path = os.path.join(os.path.dirname(file), str(data)+filename[1]) move(file, path) except: None
async def image_match(url1, url2): try: highfreq_factor = 1 hash_size = 8 async with aiohttp.ClientSession() as session: async with session.get(url1) as resp: r1 = await resp.read() async with aiohttp.ClientSession() as session: async with session.get(url1) as resp: r2 = await resp.read() hash1 = imagehash.phash(Images.open( BytesIO(r1)), hash_size=hash_size, highfreq_factor=highfreq_factor) hash2 = imagehash.phash(Images.open( BytesIO(r2)), hash_size=hash_size, highfreq_factor=highfreq_factor) return 1 - (hash1 - hash2)/len(hash1.hash)**2 except: return 0.0
def createPerceptualHash(arrayData: "np.ndarray") -> str: """ Creates a perceptual hash of the given data :param arrayData: an array contains the data to be hashed :return: a string describe the hashed array (could be converted to hex using hex_to_hash()) """ dataInstance = Image.fromarray(arrayData) return imagehash.phash(dataInstance, hash_size=16).__str__()
def get_phash(filename): try: phash = str(imagehash.phash(Image.open(filename))) except (NameError, IOError, TypeError, ValueError): print("get phash error, file deleted") os.remove(filename) phash = False return phash
def __init__(self, fontType, fontFilePath): self.fontType = fontType self.fontFilePath = fontFilePath img = Image.open(fontFilePath) self.aHash = str(imagehash.average_hash(img)) self.dHash = str(imagehash.dhash(img)) self.pHash = str(imagehash.phash(img)) self.wHash = str(imagehash.whash(img))
def img_resize(img, img_black_flag): #검정 박스가 있는 경우 width, height = img.size #이미지 크기가 1920, 1080 인 경우 #이미지의 검정 박스가 존재하는 경우 if img_black_flag: #TODO 1920 X 1080 / 1280 X 720 #1920 X 1080 해상도 (박스 높이 137) 1280 X 720 해상도 (박스 높이 92) if ((width == 1920 and height == 1080) or (width == 1280 and height == 720)): cropped_img = img.crop( (0, int(height * 0.13), width, height - int(height * 0.13))) cropped_img = cropped_img.resize((1920, 1080), Image.ANTIALIAS) phash = imagehash.phash(cropped_img) #1280 X 720 해상도 (박스 높이 92) # elif (width == 1280 and height == 720): # cropped_img = img.crop((0, int(height * 0.13) , width, height - int(height * 0.13) )) # cropped_img = cropped_img.resize((1920, 1080), Image.ANTIALIAS) # phash = imagehash.phash(cropped_img) #1280 X 720 해상도 (박스 높이 75 ~ 92) elif (width == 1280 and height == 692): cropped_img = img.crop((0, 90, width, height - 90)) cropped_img = cropped_img.resize((1920, 1080), Image.ANTIALIAS) phash = imagehash.phash(cropped_img) #720 X 480 해상도 (박스 높이 35) elif (width == 720 and height == 480): #cropped_img = img.crop((width//2 - 250, height//2 - 150, width//2 + 250, height//2 + 150)) cropped_img = img.crop((0, 40, width, height - 40)) cropped_img = cropped_img.resize((1920, 1080), Image.ANTIALIAS) phash = imagehash.phash(cropped_img) #아직 구현 안됨 else: print("wait 아직 구현 안됨") #이미지의 검정 박스가 존재하지 않는 경우 else: img = img.resize((1920, 1080), Image.ANTIALIAS) phash = imagehash.phash(img) return phash
def compute(self, frame): ahash = imagehash.average_hash(frame) phash = imagehash.phash(frame) self.A[ahash] = ahash self.P[phash] = phash self._show_(ahash, phash) if self.log: self._log_(ahash, phash, frame, self.div) return ahash, phash
def phash(filename): if "image" in magic.from_file(filename, mime=True): print("Calculating pHash of: %s" % (filename, )) hash = imagehash.phash(Image.open(filename)) helper.sqlite_insert("pHash", str(hash), os.path.basename(filename)) return hash else: print "pHash works only with images" return None
def Hashing(filename): phash = int(str(imagehash.phash(Image.open(filename))),16) ahash = int(str(imagehash.average_hash(Image.open(filename))),16) phashimple = int(str(imagehash.phash_simple(Image.open(filename))),16) dhash = int(str(imagehash.dhash(Image.open(filename))),16) dhashv = int(str(imagehash.dhash_vertical(Image.open(filename))),16) wash = int(str(imagehash.whash(Image.open(filename))),16) return phash,ahash,phashimple,dhash,dhashv,wash
def calculate_hashes() -> None: """calculates the phashes for each pokemon image and stores them in pokedex.json""" for pokemon in pokedex: hash = imagehash.phash(Image.open(f"data/images/{pokemon['id']}.png")) pokemon["hash"] = str(hash) print( chalk.Chalk("green")(pokemon["name"] + "\t=> " + pokemon["hash"])) utils.update_pokedex(pokedex)
def frame_perceptive_hash(frame): im = Image.fromarray(frame) ah = imhash.average_hash(im).hash.astype(float) ph = imhash.phash(im).hash.astype(float) wh = imhash.whash(im).hash.astype(float) dh_h = imhash.dhash(im).hash.astype(float) dh_v = imhash.dhash_vertical(im).hash.astype(float) return (ah, ph, wh, dh_h, dh_v)
def main(): unique_files = scan_directory_for_images() prev_file = '' for key in sorted(unique_files): # print('key value: {} {}'.format(key,unique_files[key])) if prev_file: hash1 = imagehash.phash(Image.open('louisaandbenny\\' + prev_file)) hash2 = imagehash.phash( Image.open('louisaandbenny\\' + unique_files[key])) # print('hash2 - hash1: {}'.format(hash2 - hash1)) # delete near matches if (hash2 - hash1) < 5: delete_document(unique_files[key]) delete_from_storage(unique_files[key]) prev_file = unique_files[key]
def __init__(self, bndbox: typing.Tuple[int, int, int, int], frame: Image): self.bndbox = bndbox self.image = frame.crop_by_bounding_box(bndbox) pilim = PIL.Image.fromarray(self.image.image_data) self.hash = imagehash.phash(pilim, hash_size=12) self.match = None self.skipped = False
def perceptual_hash_distance(img1, img2): time1 = datetime.datetime.now() #hash1 = imagehash.phash(Image.open(img1)) #hash2 = imagehash.phash(Image.open(img2)) hash1 = imagehash.phash(img1) hash2 = imagehash.phash(img2) distance = hash1-hash2 #distance = wasserstein_distance(hash1, hash2) time2 = datetime.datetime.now() delta = time2 - time1 execution_time = int(delta.total_seconds() * 1000) #elapsedSeconds = delta.seconds #elapsedMicroSeconds = (elapsedSeconds * 1000000) + delta.microseconds #execution_time = int(delta.microseconds) print('Perceptual hash_Normal_Distance: ',distance , execution_time) #print ('%02d.%06d',execution_time) return distance, execution_time
def prepareData(self, path): self.X = [] self.Y = [] for filename in os.listdir(path): if not filename.endswith('.jpg'): continue filepath = os.path.join(path, filename) try: img = Image.open(filepath) except: print("cannot open image file") continue baseFileName = os.path.splitext(os.path.basename(filename))[0] rgbFrame = self.convertImageToRgbFrame(img) bbs = align.getAllFaceBoundingBoxes(rgbFrame) faceInFile = 0 for bb in bbs: faceInFile += 1 cropImage = rgbFrame[bb.top():bb.bottom(), bb.left():bb.right()] print("crop image : {}".format(len(cropImage))) if (len(cropImage) > 0) & (bb.left() > 0) & ( bb.right() > 0) & (bb.top() > 0) & (bb.bottom() > 0): cv2.imshow("cropped", cropImage) if cv2.waitKey(1) & 0xFF == ord('q'): return cropFolder = os.path.join(self.targetFolder, "crop") if not os.path.exists(cropFolder): os.makedirs(cropFolder) cropFile = baseFileName + "-" + str(faceInFile) + ".jpg" cropPath = os.path.join(cropFolder, cropFile) im = Image.fromarray(cropImage) im.save(cropPath) landmarks = align.findLandmarks(rgbFrame, bb) alignedFace = align.align( args.imgDim, rgbFrame, bb, landmarks=landmarks, landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE) if alignedFace is None: continue phash = str(imagehash.phash(Image.fromarray(alignedFace))) print("phash = " + phash) rep = net.forward(alignedFace) self.X.append(rep) self.Y.append(cropFile)
def test_hash(): test_path = path.dirname(path.realpath(__file__)) data_path = path.join(test_path, 'data/flash-91-cover.jpg') # Generate a hash for Flash 91 cover image cover_hash = imagehash.phash(Image.open(data_path)) assert hash is not None
def __find_similar_ad_from_pic(self, picture): new_hash = phash(Image.open(urlopen(picture))) hashes = [ad.picturehash for ad in Annonce.select()] for old_hash in hashes: if old_hash is not None and hex_to_hash( old_hash) - new_hash < self.HASH_SIMILAR_TRESHOLD: return Annonce.get(Annonce.picturehash == old_hash) else: return False
def save(self, *args, **kwargs): image = Image.open(self.image) self.hash = phash(image) super(Picture, self).save(*args, **kwargs) if not self.thumbnail: resized = get_thumbnail(self.image, '300x300', crop='center', quality=99) self.thumbnail.save(resized.name, ContentFile(resized.read()), save=True) super(Picture, self).save(*args, **kwargs)
def find(file): hash = imagehash.phash(Image.open(file)) results = [] for k, v in sorted(attachments.items(), key=lambda a: hash - a[1]): similarity = hash - v if similarity <= 7: results.append({"id": k, "similarity": similarity}) return results
def amazon_phash(metadata_s3_bucket: str, metadata_s3_key: str, imgs_s3_bucket: str, imgs_s3_prefix: str, local_data_dir: str, output_s3_bucket: str, output_s3_prefix: str, n: int = sys.maxsize): s3 = boto3.client('s3') # Check if it exists first. output_key = f"{output_s3_prefix}/vecs.json.gz" if exists(s3, output_s3_bucket, output_key): return metadata_file = f"{local_data_dir}/metadata.json.gz" if not os.path.exists(metadata_file): print( f"Downloading s3://{metadata_s3_bucket}/{metadata_s3_key} to {metadata_file}" ) s3.download_file(Bucket=metadata_s3_bucket, Key=metadata_s3_key, Filename=metadata_file) vecs_file = f"{local_data_dir}/vecs.json.gz" vecs_fp = gzip.open(vecs_file, "wt") hash_size = 64 # end up with a 4096-dimensional bit vector. print(f"Writing vectors to {vecs_file}") with gzip.open(metadata_file) as gzfp: lines = islice(gzfp, 0, n) t0 = time() for i, d in enumerate(map(eval, lines)): if "imUrl" not in d or not d["imUrl"].endswith("jpg"): continue asin = d['asin'] try: obj = s3.get_object(Bucket=imgs_s3_bucket, Key=f"{imgs_s3_prefix}/{asin}.jpg") bytes = BytesIO(obj['Body'].read()) img = Image.open(bytes) except (PIL.UnidentifiedImageError, ClientError) as ex: print(f"Error for image {asin}: {ex}\n", file=sys.stderr) ph = phash(img, hash_size) for vec in ndarray_to_sparse_bool_vectors( ph.hash.reshape((1, ph.hash.size))): write_vec(vecs_fp, asin, vec) print( f"Processed {i}: {asin} - {((i + 1) / ((time() - t0) / 60)):.1f} vecs / minute" ) vecs_fp.close() # Very important. Otherwise gzip file is invalid! print(f"Copying {vecs_file} to s3://{output_s3_bucket}/{output_key}") s3.upload_file(vecs_file, output_s3_bucket, output_key)
def compute(self, frame): def _chop(H, s): chop = '' for h in range(0, s): chop += H[h] return chop phash = _chop(str(imagehash.phash(frame)), 14) return phash
def delete_same_image(): image_list = glob(downloadDirectory + "*.*") hash_dic = {} delete_list = [] filename = image_list[0].split("\\")[-1] hash_dic[filename] = imagehash.phash(Image.open(image_list[0])) for i in tqdm.tqdm(range(1, len(image_list)), total=len(image_list) - 1): filename = image_list[i].split("\\")[-1] for j in list(hash_dic.values()): diff = abs(imagehash.phash(Image.open(image_list[i])) - j) if diff > 2: hash_dic[filename] = imagehash.phash(Image.open(image_list[i])) else: delete_list.append(image_list[i]) for i in delete_list: if os.path.exists(i): os.remove(i) return None
def get_images(self, response, request, info): url_sha2 = self.file_sha2(request, response=response, info=info) orig_image = Image.open(BytesIO(response.body)) phash = imagehash.phash(orig_image) phash_str = "".join( ["1" if val else "0" for val in np.nditer(phash.hash, order='C')]) width, height = orig_image.size buf = self.convert_image(orig_image) yield width, height, url_sha2, phash_str, buf
def getHash(img): normal = Image.open(img).convert('L') crop=normal.crop((25,37,195,150)) ahash = str(imagehash.average_hash(crop)) phash = str(imagehash.phash(crop)) psimplehash = str(imagehash.phash_simple(crop)) dhash = str(imagehash.dhash(crop)) vertdhash = str(imagehash.dhash_vertical(crop)) whash = str(imagehash.whash(crop)) return ahash,phash,psimplehash,dhash,vertdhash,whash
def cal_hash_val(file_path): l = [3,4,16] hash_val = [] for i in l: a = imagehash.phash(Image.open(file_path),i) a = str(a) if i != 16: a = bin(int(a,16))[2:].zfill(i**2) hash_val.append(str(a)) return hash_val
def phash(self, img_url): if img_url not in self._fetched or 'phash' not in self._fetched[img_url]: if img_url not in self._fetched: self._fetched[img_url] = {} content_type, image_str = self.image(img_url) if image_str: image = str_to_image(image_str) import imagehash self._fetched[img_url]['phash'] = str(imagehash.phash(image)) else: self._fetched[img_url]['phash'] = None return self._fetched[img_url]['phash']
def _calculate_phash(target_dir): print("Calculating phash for files under {}".format(target_dir)) files_phash = [] for f in [f for f in os.listdir(target_dir) if os.path.isfile(os.path.join(target_dir, f))]: f = os.path.join(target_dir, f) try: files_phash.append((f, unicode(imagehash.phash(Image.open(f))))) except: pass return files_phash
def getHash(img): size = 223,310 normal = Image.open(img).convert('L') normal = normal.resize(size, Image.ANTIALIAS) crop=normal.crop((25,37,195,150)) ahash = str(imagehash.average_hash(crop)) phash = str(imagehash.phash(crop)) psimplehash = str(imagehash.phash_simple(crop)) dhash = str(imagehash.dhash(crop)) vertdhash = str(imagehash.dhash_vertical(crop)) whash = str(imagehash.whash(crop)) return ahash,phash,psimplehash,phash,vertdhash,whash
def hash_value(img_fn, htype): img = Image.open(img_fn) if htype == 'a': hval = imagehash.average_hash(img) elif htype == 'p': hval = imagehash.phash(img) elif htype == 'd': hval = imagehash.dhash(img) elif htype == 'w': hval = imagehash.whash(img) else: hval = imagehash.average_hash(img) return hval
def get_imagehashes(fp: Fileish, size=FINGERPRINT_SIZE) -> Dict[str, imagehash.ImageHash]: """Calculate perceptual hashes for comparison of identical images""" try: img = pil_image(fp) thumb = img.resize((size, size), PIL.Image.BILINEAR).convert('L') return dict( ahash=imagehash.average_hash(thumb), phash=imagehash.phash(thumb), whash=imagehash.whash(thumb), dhash=imagehash.dhash(thumb), ) except OSError: # corrupt image file probably return {}
def run(self, task): self.task = task image = str2image(task.get_file_data) # Calculate hash. self.results["imghash"]["a_hash"] = str(imagehash.average_hash(image, hash_size=self.HASH_SIZE)) self.results["imghash"]["p_hash"] = str(imagehash.phash(image, hash_size=self.HASH_SIZE)) self.results["imghash"]["d_hash"] = str(imagehash.dhash(image, hash_size=self.HASH_SIZE)) # Get similar images. self.results["similar"]["a_hash"] = self.get_similar_images(self.results["imghash"]["a_hash"], imagehash.average_hash) self.results["similar"]["p_hash"] = self.get_similar_images(self.results["imghash"]["p_hash"], imagehash.phash) self.results["similar"]["d_hash"] = self.get_similar_images(self.results["imghash"]["d_hash"], imagehash.dhash) return self.results
def run(): signatures = get_pickled_signatures() import csv hashes_file = open('image_hashes.csv', 'w') columns = ['image_id', 'script_dhash', 'ahash', 'dhash', 'phash', 'signature'] csv_writer = csv.DictWriter(hashes_file, fieldnames=columns) csv_writer.writeheader() t0 = time() for zip_counter in range(0, 10): filename = '../input/Images_%d.zip' % zip_counter print 'processing %s...' % filename imgzipfile = zipfile.ZipFile(filename) namelist = imgzipfile.namelist() for name in tqdm(namelist): if not name.endswith('.jpg'): continue filename = name.split('/')[-1] img_id = filename[:-4] try: imgdata = imgzipfile.read(name) if len(imgdata) == 0: print '%s is empty' % img_id continue stream = io.BytesIO(imgdata) img = Image.open(stream) ahash = imagehash.average_hash(img) dhash = imagehash.dhash(img) phash = imagehash.phash(img) script_dhash = extract_dhash(img) csv_writer.writerow({'image_id': img_id, 'script_dhash': script_dhash, 'ahash': str(ahash), 'dhash': str(dhash), 'phash': str(phash), 'signature': signatures[int(img_id)]}) except: print 'error with ' + img_id hashes_file.flush() hashes_file.close() print 'took %0.5fm' % ((time() - t0) / 60)
def fix_hash_for_all(): print("Fixing hashes...") thresh = 1920*1080 - 32*32 for root, dirs, files in os.walk(dest_dir): for name in files: if name.endswith('.jpg') and len(name) == 14: path = os.path.join(root, name) ext = name[-4:] im = Image.open(path) hsh = imagehash.phash(im) width, height = im.size im.close() newpath = os.path.join(root, str(hsh) + ext) if width*height > thresh: rename(path, newpath) fix_file_locations()
def addImage(image): ''' Adds an image. ''' doc = {'type': 'image', 'tags': [], 'links': []} # Generate the PIL image f = tempfile.NamedTemporaryFile() f.write(image) f.flush() # Get exif and mime doc['exif'] = json.loads(subprocess.check_output(['exiftool', '-j', f.name]))[0] for i in config.exifIgnore: doc['exif'].pop(i) doc['mime'] = subprocess.check_output(['file', '--mime-type', f.name]).split(' ')[1][:-1] f.seek(0) im = Image.open(f) # Calculate the hashes hashes = {} hashes['length'] = len(image) hashes['crc32'] = hex(zlib.crc32(image) & 0xffffffff)[2:] hashes['md5'] = hashlib.md5(image).hexdigest() hashes['sha1'] = hashlib.sha1(image).hexdigest() hashes['sha256'] = hashlib.sha256(image).hexdigest() hashes['sha512'] = hashlib.sha512(image).hexdigest() collisions = checkCollision(hashes) hashes['phash'] = str(imageHashToInt(imagehash.phash(im))) pcollisions = checkPhash(hashes['phash']) doc['hashes'] = hashes # Generate a thumbnail im.thumbnail(config.thumbsize) thumb = StringIO() im.convert('RGB').save(thumb, "JPEG") # Save the result id = images.save(doc)[0] thumb = thumb.getvalue() images.put_attachment(images[id], thumb, filename = 'thumbnail.jpg', content_type = config.thumbMime) images.put_attachment(images[id], image, filename='image', content_type=doc['mime']) return id, collisions, pcollisions
def __init__(self, path): t = path if type and path.__class__ == str: t = Image.open(path) phash = imagehash.phash(t, 8) histogram = np.array(t.convert('L').histogram()) self.md5 = md5(t.tostring()).hexdigest() self.phash = str(phash) self.histogram = utils.smooth(histogram, 100) self.mins = argrelextrema(self.histogram, np.less)[0] self.maxs = argrelextrema(self.histogram, np.greater)[0] self.histogram = np.array(map(lambda x: int(x), self.histogram)) if len(self.mins) < 2: self.mins = np.append(self.mins, [1000] * (2-len(self.mins)) ) if len(self.maxs) < 2: self.maxs = np.append(self.maxs, [1000] * (2-len(self.maxs)) )
def _insert_meta(self, data_store, comic_id): j = self._get_xkcd_json(comic_id) hash_avg = '' hash_d = '' hash_p = '' if not j: return if j.get('img'): file_name = '/tmp/' + get_random_file_name() try: self.myopener.retrieve(j.get('img'), file_name) hash_avg = imagehash.average_hash(Image.open(file_name)) hash_d = imagehash.dhash(Image.open(file_name)) hash_p = imagehash.phash(Image.open(file_name)) except: pass finally: os.remove(file_name) data_store.insert_xkcd_meta(comic_id, json.dumps(j), str(hash_avg), str(hash_d), str(hash_p))
def run(self): date_path = self.search['date_path'] files = sorted(os.listdir('data/%s/media' % date_path)) hashes = {} matches = [] g = nx.Graph() update_block_size = get_block_size(len(files), 5) for i in range(len(files)): f = files[i] fn = 'data/%s/media/%s' % (date_path, f) ahash = imagehash.average_hash(Image.open(fn)) dhash = imagehash.dhash(Image.open(fn)) phash = imagehash.phash(Image.open(fn)) hashes[f] = {'ahash': ahash, 'dhash': dhash, 'phash': phash} for j in range(0, i): f2name = files[j] f2 = hashes[f2name] sumhash = sum([ahash - f2['ahash'], dhash - f2['dhash'], phash - f2['phash']]) # FIXME: 40 is a hard-coded arbitrary (eyeballed) threshold if sumhash <= 40: matches.append([f, files[j], ahash - f2['ahash'], dhash - f2['dhash'], phash - f2['phash'], sumhash]) g.add_edge(f, f2name) if i % update_block_size == 0: self.update_job( date_path=self.search['date_path'], status="STARTED: %s - %s/%s" % (self.task_family, i, len(files)) ) with self.output().open('w') as fp_graph: components = list(nx.connected_components(g)) # Note: sets are not JSON serializable d = [] for s in components: d.append(list(s)) json.dump(d, fp_graph, indent=2)
def _calc(self, options, files): image_phash = {} for file in files: if os.path.isdir(file): if options.get_recursive_flag() and not os.path.islink(file): try: image_phash.update(self._calc(options, sorted([ os.path.join(file, x) for x in os.listdir(file) ]))) except PermissionError: pass elif os.path.isfile(file): try: phash = int(str(imagehash.phash(PIL.Image.open(file))), 16) except OSError: continue image_phash[file] = phash return image_phash
def collect_files(): print("Collecting files...") thresh = 1920*1080 - 32*32 for dir in (source_dir, dest_dir, orig, tall, wide): if not os.path.isdir(dir): os.mkdir(dir) for root, dirs, files in os.walk(source_dir): for name in files: path = os.path.join(root, name) try: im = Image.open(path) width, height = im.size except: continue if width*height > thresh: hsh = imagehash.phash(im) im.close() if width > height: newpath = os.path.join(wide, str(hsh)+'.jpg') copy(path, newpath) elif height > width: newpath = os.path.join(tall, str(hsh)+'.jpg') copy(path, newpath)