Esempio n. 1
0
    def compare_whash(self):
        hash1 = imagehash.whash(Image.open(self.input_image),
                                hash_size=ImagePreprocessor.hash_size)
        hash2 = imagehash.whash(Image.open(self.stored_image),
                                hash_size=ImagePreprocessor.hash_size)

        return hash2 - hash1
def wavelet_hash_distance1(img1, img2):
    time1 = datetime.datetime.now()
    hash1 = str(imagehash.whash(Image.open(img1)))
    hash2 = str(imagehash.whash(Image.open(img2)))
    distance = hamming_hash_distance(hash1, hash2)
    time2 = datetime.datetime.now()
    delta = time2 - time1
    execution_time = int(delta.total_seconds() * 1000)
    print('Wavelet hash_Hamming_Distance: ', distance , execution_time)
Esempio n. 3
0
def process_frame(image, scale):
	pil_im = Image.fromarray(image)
	before_hash = imagehash.whash(pil_im)

	processed = cart_repair.process_frame(image, scale)
	pil_im = Image.fromarray(processed)
	after_hash = imagehash.whash(pil_im)

	diff = before_hash - after_hash
	return diff
Esempio n. 4
0
def compare_frame(image, scale, interp = cv2.INTER_CUBIC):
	height, width, channels = image.shape
	pil_im = Image.fromarray(image)
	before_hash = imagehash.whash(pil_im)
	
	processed = cv2.resize(image, (math.floor(width * scale), math.floor(height * scale)), interpolation = interp)
	pil_im = Image.fromarray(processed)
	after_hash = imagehash.whash(pil_im)
	
	diff = before_hash - after_hash
	return diff
def wavelet_hash_distance(img1, img2):
    time1 = datetime.datetime.now()
    # hash1 = imagehash.whash(Image.open(img1))
    # hash2 = imagehash.whash(Image.open(img2))
    hash1 = imagehash.whash(img1)
    hash2 = imagehash.whash(img2)
    distance = hash1-hash2
    time2 = datetime.datetime.now()
    delta = time2 - time1
    execution_time = int(delta.total_seconds() * 1000)
    print('Wavelet hash_Normal_Distance: ',distance , execution_time)
    return distance, execution_time
Esempio n. 6
0
def generate_hashes_for_image(imgPath):

    print ("Generating hash for: " + imgPath)
    img = Image.open(imgPath)

    data = {}
    data['ahash'] = imagehash.average_hash(img, hash_size=8)
    data['phash'] = imagehash.phash(img, hash_size=8)
    data['dhash'] = imagehash.dhash(img, hash_size=8)
    data['whash'] = imagehash.whash(img, hash_size=8)
    data['whashDb4'] = imagehash.whash(img, mode='db4')
    #print (data)
    return data
Esempio n. 7
0
def spec_similarity(spec1, spec2, hash_type=settings.HASH_TYPE):
    img1, img2 = Image.fromarray(spec1), Image.fromarray(spec2)

    if hash_type == 'ahash':
        hash1, hash2 = average_hash(img1), average_hash(img2)
    elif hash_type == 'phash':
        hash1, hash2 = phash(img1), phash(img2)
    elif hash_type == 'dhash':
        hash1, hash2 = dhash(img1), dhash(img2)
    elif hash_type == 'whash':
        hash1, hash2 = whash(img1), whash(img2)

    return hash1 - hash2
Esempio n. 8
0
def hash_similarity(map1, map2, band, lim):

    e_whash = []
    e_ahash = []
    e_phash = []
    e_dhash = []

    for i in band:
        n1 = int(i[0])
        n2 = int(i[1])
        #print(n1, n2)
        x1 = map1[n1:(n2 + 1), n1:(n2 + 1)]
        x2 = map2[n1:(n2 + 1), n1:(n2 + 1)]
        x1[x1 > lim] = lim
        x1[x1 < 1] = 0
        x1 = x1 * (255 / lim)
        x1 = x1.astype(np.uint8)
        x2[x2 > lim] = lim
        x2[x2 < 1] = 0
        x2 = x2 * (255 / lim)
        x2 = x2.astype(np.uint8)
        x2 = x2 / np.mean(x2)
        img1 = Image.fromarray(x1)
        img2 = Image.fromarray(x2)
        hash1 = imagehash.whash(img1, 16)
        hash2 = imagehash.whash(img2, 16)
        d = 1 - (hash2 - hash1) / 256
        e_whash.append(d)
        hash1 = imagehash.average_hash(img1, 16)
        hash2 = imagehash.average_hash(img2, 16)
        d = 1 - (hash2 - hash1) / 256
        e_ahash.append(d)
        hash1 = imagehash.phash(img1, 16)
        hash2 = imagehash.phash(img2, 16)
        d = 1 - (hash2 - hash1) / 256
        e_phash.append(d)
        hash1 = imagehash.dhash(img1, 16)
        hash2 = imagehash.dhash(img2, 16)
        d = 1 - (hash2 - hash1) / 256
        e_dhash.append(d)

    e_whash = np.array(e_whash)
    e_whash = e_whash.reshape(e_whash.shape[0], 1)
    e_ahash = np.array(e_ahash)
    e_ahash = e_ahash.reshape(e_ahash.shape[0], 1)
    e_phash = np.array(e_phash)
    e_phash = e_phash.reshape(e_phash.shape[0], 1)
    e_dhash = np.array(e_dhash)
    e_dhash = e_dhash.reshape(e_dhash.shape[0], 1)
    band = np.concatenate((band, e_whash, e_ahash, e_phash, e_dhash), 1)
    return band
Esempio n. 9
0
def compare_images():

    # Create the Hash Object of the first image
    img_1_hash = imagehash.whash(Image.open('img1.jpg'))
    print('First image: ' + str(img_1_hash))

    # Create the Hash Object of the second image
    img_2_hash = imagehash.whash(Image.open('img2.jpg'))
    print('Second image: ' + str(img_2_hash))

    # Compare hashes to determine whether the pictures are the same or not
    if img_1_hash == img_2_hash:
        return "The images are the same !"
    return "The pictures are different. The distance is: " + str(img_1_hash - img_2_hash)
Esempio n. 10
0
def GetImageHash(path1, path2):
    if PHAType.GetCurrentSelection() == -1 or PHAType.GetCurrentSelection() == 0:
        hash1 = imagehash.average_hash(PIL.Image.open(path1))
        hash2 = imagehash.average_hash(PIL.Image.open(path2))
    elif PHAType.GetCurrentSelection() == 1:
        hash1 = imagehash.phash(PIL.Image.open(path1))
        hash2 = imagehash.phash(PIL.Image.open(path2))
    elif PHAType.GetCurrentSelection() == 2:
        hash1 = imagehash.dhash(PIL.Image.open(path1))
        hash2 = imagehash.dhash(PIL.Image.open(path2))
    elif PHAType.GetCurrentSelection() == 3:
        hash1 = imagehash.whash(PIL.Image.open(path1))
        hash2 = imagehash.whash(PIL.Image.open(path2))
    return hash1, hash2
Esempio n. 11
0
 def do_hash_check(self, path: str, bias=0.0, limit=30, whash_treshold=8, phash_threshold=16, bounces=None):
     img = Image.open(path)
     phash_check, phash_value = self.check_hash(im.phash(img), True, bias,
                                                threshold=phash_threshold, bounces=bounces)  # check phash
     if not phash_check:  # if false
         if 0 <= phash_value <= limit:  # check how much under the limit
             whash_check, whash_value = self.check_hash(im.whash(img), False, bias=bias,
                                                        threshold=whash_treshold, bounces=bounces)
             return whash_check  # return whash_check
         else:
             whash_check, whash_value = self.check_hash(im.whash(img), False, bias=bias,
                                                        threshold=int(whash_treshold / 2), bounces=bounces)
             return whash_check
     else:
         return True
Esempio n. 12
0
 def see(self, image_location):
     name = uuid4()
     # open the image and hash it
     hash = imagehash.whash(self.__open(image_location))
     # look up the image by hash and return similar ones
     key_and_similarity = self.__lookup_by_hash(type_='Visual', hash=hash)
     if len(key_and_similarity) > 0:
         # two for loops because we need
         # to make sure this is not a repeat node
         # first, i.e. having yellow, purple while
         # seeing purple again will create a duplicate
         # yellow-purple, whilse purple, yellow will
         # not, this two step for loop will prevent
         # that
         for entry  in key_and_similarity:
             if entry['similarity'] == 0:
                 # if it is the same, it is recorded
                 # and associated before (this will be
                 # dependent on the the precision of recognition
                 return None
         for entry in key_and_similarity:
             # TODO: only associate similarities within a threshold
             # decode utf-8 cause redis returns byte string like b'Visual ...' and
             # that throws in querying neo4j
             found_neighbor = self.find_neighbor(('redis_key', entry['key'].decode('utf-8')))
             self.record(name, 'Visual', hash, image_location, neighbor=found_neighbor, link=entry['similarity'])
     else:
         self.record(name, 'Visual', hash, image_location)
Esempio n. 13
0
    def hash_picture(self, curr_picture: picture_class.Picture):
        try:
            if self.conf.ALGO == configuration.ALGO_TYPE.A_HASH:  # Average
                target_hash = imagehash.average_hash(
                    Image.open(curr_picture.path))
            elif self.conf.ALGO == configuration.ALGO_TYPE.P_HASH:  # Perception
                target_hash = imagehash.phash(Image.open(curr_picture.path))
            elif self.conf.ALGO == configuration.ALGO_TYPE.P_HASH_SIMPLE:  # Perception - simple
                target_hash = imagehash.phash_simple(
                    Image.open(curr_picture.path))
            elif self.conf.ALGO == configuration.ALGO_TYPE.D_HASH:  # D
                target_hash = imagehash.dhash(Image.open(curr_picture.path))
            elif self.conf.ALGO == configuration.ALGO_TYPE.D_HASH_VERTICAL:  # D-vertical
                target_hash = imagehash.dhash_vertical(
                    Image.open(curr_picture.path))
            elif self.conf.ALGO == configuration.ALGO_TYPE.W_HASH:  # Wavelet
                target_hash = imagehash.whash(Image.open(curr_picture.path))
            else:
                raise Exception('IMAGEHASH WRAPPER : HASH_CHOICE NOT CORRECT')

            # TO NORMALIZE : https://fullstackml.com/wavelet-image-hash-in-python-3504fdd282b5
            curr_picture.hash = target_hash
        except Exception as e:
            self.logger.error("Error during hashing : " + str(e))

        return curr_picture
Esempio n. 14
0
def like(img, hs):
    return imagehash.whash(img) - hs < 10



# 360 200      390     230   yes 按钮
# 360 240      390     270   no  按钮
Esempio n. 15
0
def image_hash_from_message(message):
    """
    Returns list of hashes(str) of images in message. Embeds with no images are None, embeds with errors are 0.
    """
    # if len(message.embeds) == 0:
    #     return False
    # print("Has embed:", m.jump_url)
    out = {"hashes": [], "errors": 0, "unhashables": 0}
    urls = []
    for embed in message.embeds:
        if embed.thumbnail.url is not discord.Embed.Empty:
            urls.append(embed.thumbnail.url)
        elif embed.image.url is not discord.Embed.Empty:
            urls.append(embed.image.url)
        elif embed.url is not discord.Embed.Empty and embed.type == 'image':
            urls.append(embed.url)
        else:
            out["unhashables"] += 1
    for attachment in message.attachments:
        urls.append(attachment.url)
    for url in urls:
        # print(url)
        try:
            img_data = requests.get(url).content
            img = Image.open(BytesIO(img_data))
            out["hashes"].append(str(imagehash.whash(img,
                                                     hash_size=HASH_SIZE)))
        except UnidentifiedImageError as e:
            out["errors"] += 1
            print(e, url)
    return out
Esempio n. 16
0
def HASH_GEN ( haystackPaths , hashsize):
    
    # init a hash dataframe
    haystack = pd.DataFrame(columns=['file', 'phash', 'ahash', 'dhash', 'whash'])

    # time the hashing operation 
    start = time.time()

    for f in haystackPaths:
        
        image = Image.open(f)
    #     imageHash = imagehash.phash(image)
        p = imagehash.phash(image, hash_size=hashsize)
        a = imagehash.average_hash(image, hash_size=hashsize)
        d = imagehash.dhash(image, hash_size=hashsize)
        w = imagehash.whash(image, hash_size=hashsize)

        haystack = haystack.append ({'file':f, 'phash':p, 'ahash':a, 'dhash':d,'whash':w }, ignore_index=True)

    # print (haystack.head())
    #     print (p, imageHash)
        
    #     haystack[imageHash] = p

    # show timing for hashing haystack images, then start computing the
    # hashes for needle images

    t = time.time() - start

    print("[INFO] processed {} images in {:.2f} seconds".format(
    len(haystack), t ))    

    return (haystack, t)
Esempio n. 17
0
def get_image_fingerprint(file, row):
    do_ahash = get_bool(row['ahash'])
    do_phash = get_bool(row['phash'])
    do_dhash = get_bool(row['dhash'])
    do_whash = get_bool(row['whash'])

    if not do_ahash and not do_phash and not do_dhash and not do_whash:
        return None, None, None, None

    pil_img = Image.open(file)

    # OSError: image file is truncated (0 bytes not processed)
    try:
        pil_img.load()
    except (IOError, OSError):
        pil_img.close()
        return None, None, None, None

    ahash = str(imagehash.average_hash(pil_img)) if do_ahash else None
    phash = str(imagehash.phash(pil_img)) if do_phash else None
    dhash = str(imagehash.dhash(pil_img)) if do_dhash else None
    whash = str(imagehash.whash(pil_img)) if do_whash else None

    pil_img.close()

    return ahash, phash, dhash, whash
Esempio n. 18
0
    def run(self):
        """Creates a new key in the report dict for 
        the deuplicated screenshots.
        """
        self.key = "deduplicated_shots"
        shots = []

        hashmethod = "whash-db4"
        if hashmethod == 'ahash':
            hashfunc = imagehash.average_hash
        elif hashmethod == 'phash':
            hashfunc = imagehash.phash
        elif hashmethod == 'dhash':
            hashfunc = imagehash.dhash
        elif hashmethod == 'whash-haar':
            hashfunc = imagehash.whash
        elif hashmethod == 'whash-db4':
            hashfunc = lambda img: imagehash.whash(img, mode='db4')

        shots_path = os.path.join(self.analysis_path, "shots")
        if os.path.exists(shots_path):
            screenshots = self.deduplicate_images(userpath=shots_path,
                                                  hashfunc=hashfunc)
            for screenshot in screenshots:
                shots.append(screenshot.replace(".jpg", ""))

        return shots
Esempio n. 19
0
    def run(self):
        """Creates a new key in the report dict for 
        the deuplicated screenshots.
        """
        self.key = "deduplicated_shots"
        shots = []
        hashmethod = self.options.get("hashmethod", "ahash")
        try:
            if hashmethod == "ahash":
                hashfunc = imagehash.average_hash
            elif hashmethod == "phash":
                hashfunc = imagehash.phash
            elif hashmethod == "dhash":
                hashfunc = imagehash.dhash
            elif hashmethod == "whash-haar":
                hashfunc = imagehash.whash
            elif hashmethod == "whash-db4":
                hashfunc = lambda img: imagehash.whash(img, mode="db4")

            shots_path = os.path.join(self.analysis_path, "shots")
            if os.path.exists(shots_path):
                screenshots = self.deduplicate_images(userpath=shots_path,
                                                      hashfunc=hashfunc)
                screenshots.sort()
                for screenshot in screenshots:
                    shots.append(screenshot.replace(".jpg", ""))
        except Exception as e:
            log.error(e)

        return shots
Esempio n. 20
0
def process_images(path):

"""relocate only the duplicate images

Arguments:
    path {string} -- [full path to the directory]
"""
 check_images(path)

  images_dict = {}

   for file in glob.glob(path + "/*.*"):
        if(is_Image(file)):

            hash = imagehash.whash(Image.open(file))

            if hash in images_dict:
                relocateImages(path, file)
                print(file, " is a duplicate",
                      "to retrieve use the hash: ", hash)
            else:
                images_dict[hash] = file

    for k, v in images_dict.items():
        print(k, v)
Esempio n. 21
0
    def hash_func(self, x):
        ''''Hash one image and return hash'''

        x = self.process_for_hash(x)

        if self.hash_name == "AverageHash":
            hash_value = imagehash.average_hash(x, hash_size=8, mean=np.mean)
        elif self.hash_name == "Phash":
            hash_value = imagehash.phash(x, hash_size=8, highfreq_factor=4)
        elif self.hash_name == "PhashSimple":
            hash_value = imagehash.phash_simple(x, hash_size=8, highfreq_factor=4)
        elif self.hash_name == "DhashH":
            hash_value = imagehash.dhash(x)
        elif self.hash_name == "DhashV":
            hash_value = imagehash.dhash_vertical(x)
        elif self.hash_name == "Whash":
            hash_value = imagehash.whash(x,
                                         hash_size=8,
                                         image_scale=None,
                                         mode='haar',
                                         remove_max_haar_ll=True)
        elif self.hash_name == "ColorHash":
            hash_value = imagehash.colorhash(x, binbits=3)
        elif self.hash_name == "CropResistantHash": # does not work yet
            hash_value = imagehash.crop_resistant_hash(x,
                                                       hash_func=None,
                                                       limit_segments=None,
                                                       segment_threshold=128,
                                                       min_segment_size=500,
                                                       segmentation_image_size=300
                                                       )
        else:
            raise NotImplementedError(f"Hash Name -- {self.hash_name} -- Unknown")

        return str(hash_value)
Esempio n. 22
0
    def _calc_hash(self) -> None:
        """
        Calculates the hash value by calling the whash(wavelet hash) method of
        imagehash package. The wavelet hash of the collage is the videohash for
        the original input video.

        End-user is not provided any access to the imagehash instance but
        instead the binary and hexadecimal equivalent of the result of
        wavelet-hash.

        :return: None

        :rtype: NoneType
        """

        self.bitlist: List = []

        for row in imagehash.whash(self.image).hash.astype(int).tolist():
            self.bitlist.extend(row)

        self.hash: str = ""

        for bit in self.bitlist:

            if bit:
                self.hash += "1"
            else:
                self.hash += "0"

        # the binary value is prefixed with 0b.
        self.hash = f"0b{self.hash}"
        self.hash_hex: str = VideoHash.bin2hex(self.hash)
Esempio n. 23
0
def getVisualHashes(video_filename, frame_list):

    ''' 
    Compute perceptual hashes for each frame using 4 different methods
    Can load hashes if already precomputed
    '''

    hash_folder = cfg.DATA_FOLDER + os.path.splitext(video_filename)[0]

    # Try to load the files if they are saved
    hash_filename = cfg.HASH_NAME + '.npy'
    hash_filepath = os.path.join(hash_folder, hash_filename)
    if os.path.exists(hash_filepath):
        visual_hash_list = np.load(hash_filepath)
        print hash_filename + ' loaded from file'
    else: # Or compute them
        if cfg.HASH_NAME=='aHash':
            visual_hash_list = [imgh.average_hash(Image.fromarray(frame)) for frame in frame_list]
        elif cfg.HASH_NAME=='pHash':
            visual_hash_list = [imgh.phash(Image.fromarray(frame)) for frame in frame_list]
        elif cfg.HASH_NAME=='dHash':
            visual_hash_list = [imgh.dhash(Image.fromarray(frame)) for frame in frame_list]
        elif cfg.HASH_NAME=='wHash':
            visual_hash_list = [imgh.whash(Image.fromarray(frame)) for frame in frame_list]
        np.save(hash_filepath , visual_hash_list)
        print hash_filename + " computed"

    return visual_hash_list
Esempio n. 24
0
 def generate(self, imgpath=None):
     """
     Args:
     imgpath - Required
     """
     sha256 = hashlib.sha256()
     with open(imgpath, "rb") as image:
         b64string = base64.b64encode(image.read())
     sha256.update(b64string)
     self.sha256 = sha256.hexdigest()
     ob_img = M.open(imgpath)
     average_hash = imagehash.average_hash(ob_img)
     phash = imagehash.phash(ob_img)
     dhash = imagehash.dhash(ob_img)
     whash = imagehash.whash(ob_img)
     self.file_size = os.path.getsize(imgpath)
     self.digest = str(average_hash) + "," + str(phash) + \
         "," + str(dhash) + "," + str(whash)
     info = ob_img._getexif()
     ret = {}
     try:
         for tag, value in info.items():
             decoded = TAGS.get(tag, tag)
             ret[decoded] = convert_to_string(value)
             self.exif = ret
     except:
         print("Error read meta")
Esempio n. 25
0
def image_meta(url, url_idx, web):
    r = web.get(url)
    if not r:
        logger.warning("Could not download image")
        return None
    buf = r.content

    try:
        f = BytesIO(buf)
        im = Image.open(f)

        meta = {
            "url": url_idx,
            "size": len(buf),
            "width": im.width,
            "height": im.height,
            "sha1": hashlib.sha1(buf).hexdigest(),
            "md5": hashlib.md5(buf).hexdigest(),
            "crc32": format(zlib.crc32(buf), "x"),
            "dhash": b64hash(imagehash.dhash(im, hash_size=12), 18),
            "phash": b64hash(imagehash.phash(im, hash_size=12), 18),
            "ahash": b64hash(imagehash.average_hash(im, hash_size=12), 18),
            "whash": b64hash(imagehash.whash(im, hash_size=8), 8),
        }
    except Exception as e:
        logger.warning("exception during image post processing: " + str(e))
        return None

    del im, r, buf

    return meta
Esempio n. 26
0
def GetTotalHash(file):
    img = Image.open(file)
    hasha = imagehash.average_hash(img)
    hashp = imagehash.phash(img)
    hashw = imagehash.whash(img)
    hashd = imagehash.dhash(img)
    return str(hasha) + '_' + str(hashp) + '_' + str(hashw) + '_' + str(hashd)
Esempio n. 27
0
 def __init__(self, fontType, fontFilePath):
     self.fontType = fontType
     self.fontFilePath = fontFilePath
     img = Image.open(fontFilePath)
     self.aHash = str(imagehash.average_hash(img))
     self.dHash = str(imagehash.dhash(img))
     self.pHash = str(imagehash.phash(img))
     self.wHash = str(imagehash.whash(img))
Esempio n. 28
0
def calc_hash(img):
    """
    Calculate the wavelet hash of the image
        img: (ndarray) image file
    """
    # resize image if height > 1000
    img = resize(img)
    return imagehash.whash(Image.fromarray(img))
Esempio n. 29
0
 def feature_extraction(self, images):
     import imagehash
     from PIL import Image
     out = []
     for image in images:
         im = Image.fromarray(np.uint8(image))
         out.append(imagehash.whash(im).hash.astype(float).flatten())
     return out
Esempio n. 30
0
 def unban_image(self, path):
     img = Image.open(path)
     lock_phash = Redlock(key=f'phashdb', masters={self.redis})
     lock_whash = Redlock(key=f'whashdb', masters={self.redis})
     raw_phash = im.phash(img)
     raw_whash = im.whash(img)
     self.exec_similar_hash(self.phashdb, raw_phash, 0, 13, self.del_from_db, lock=lock_phash)
     self.exec_similar_hash(self.whashdb, raw_whash, 0, 13, self.del_from_db, lock=lock_whash)
def getHash(img):
	normal = Image.open(img).convert('L')
	crop=normal.crop((25,37,195,150))
	ahash = str(imagehash.average_hash(crop))
        phash = str(imagehash.phash(crop))
	psimplehash = str(imagehash.phash_simple(crop))
	dhash = str(imagehash.dhash(crop))
	vertdhash = str(imagehash.dhash_vertical(crop))
	whash = str(imagehash.whash(crop))
	return ahash,phash,psimplehash,dhash,vertdhash,whash 
def getHash(img):
        size = 223,310
        normal = Image.open(img).convert('L')
        normal = normal.resize(size, Image.ANTIALIAS) 
        crop=normal.crop((25,37,195,150))
        ahash = str(imagehash.average_hash(crop))
        phash = str(imagehash.phash(crop))
        psimplehash = str(imagehash.phash_simple(crop))
        dhash = str(imagehash.dhash(crop))
        vertdhash = str(imagehash.dhash_vertical(crop))
        whash = str(imagehash.whash(crop))
        return ahash,phash,psimplehash,phash,vertdhash,whash
Esempio n. 33
0
def hash_value(img_fn, htype):
    img = Image.open(img_fn)
    if htype == 'a':
        hval = imagehash.average_hash(img)
    elif htype == 'p':
        hval = imagehash.phash(img)
    elif htype == 'd':
        hval = imagehash.dhash(img)
    elif htype == 'w':
        hval = imagehash.whash(img)
    else:
        hval = imagehash.average_hash(img)
    return hval
Esempio n. 34
0
def get_imagehashes(fp: Fileish,
                    size=FINGERPRINT_SIZE) -> Dict[str, imagehash.ImageHash]:
    """Calculate perceptual hashes for comparison of identical images"""
    try:
        img = pil_image(fp)
        thumb = img.resize((size, size), PIL.Image.BILINEAR).convert('L')
        return dict(
            ahash=imagehash.average_hash(thumb),
            phash=imagehash.phash(thumb),
            whash=imagehash.whash(thumb),
            dhash=imagehash.dhash(thumb),
        )
    except OSError:  # corrupt image file probably
        return {}
def findImagesRootDir(rootDir):
    image_re = re.compile("^..\/input\/Images_(\d)\/(\d)")
    image_file_re = re.compile("^(\d*).jpg$")
    
    count = 0
    
    # Create a dictionary of hash values using image id as key, hash as value
    imageHashes = dict()
    
    # Find all the images 
    for root, dirnames, filenames in os.walk(rootDir):
        for filename in fnmatch.filter(filenames, '*.jpg'):
            #print 'root is {}'.format(root)
            #print 'dirname is {}'.format(dirnames)
            #print 'filename is {}'.format(filename)

            # Format is '../input/Images_a/b
            #a = image_re.match(root).groups(0)[0]
            #b = image_re.match(root).groups(0)[0]
            image_id = image_file_re.match(filename).groups(0)[0]

            filepath = root + '/' + filename  
            try:          
                #ahash = imagehash.average_hash(Image.open(filepath))
       	        #phash = imagehash.phash(Image.open(filepath))
       	        #dhash = imagehash.dhash(Image.open(filepath))
       	        whash = imagehash.whash(Image.open(filepath))
            except:
                print 'Error creating hash for image {}'.format(image_id)
                imageHashes[image_id] = 'NA' # If there's an error, create an NA

    	    imageHashes[image_id] = (whash) # , phash, dhash, whash)
    	    #print 'file: {}, hash: {}'.format(filepath, ahash)
            
            count += 1
            
            if count % 1000 == 0:
                print 'Processed {} images'.format(count)

    print 'Writing hash csv'
    hashDf = pd.DataFrame.from_dict(imageHashes.items())
    hashDf.to_csv('whash.csv')
                
    print 'Found {} images'.format(count)
Esempio n. 36
0
 def test_image_scale_not_2power(self):
     emsg = 'image_scale is not power of 2'
     for image_scale in [4, 8, 16]:
         with six.assertRaisesRegex(self, AssertionError, emsg):
             imagehash.whash(self.image, image_scale=image_scale+1)
Esempio n. 37
0
 def test_hash_size_more_than_scale(self):
     emsg = 'hash_size in a wrong range'
     with six.assertRaisesRegex(self, AssertionError, emsg):
         imagehash.whash(self.image, hash_size=32, image_scale=16)
Esempio n. 38
0
 def test_custom_hash_size_and_scale(self):
     hash_size = 16
     hash = imagehash.whash(self.image, hash_size=hash_size, image_scale=64)
     self.assertEqual(hash.hash.size, hash_size**2)
Esempio n. 39
0
 def test_hash_size_2power(self):
     for hash_size in [4, 8, 16]:
         hash = imagehash.whash(self.image, hash_size=hash_size)
         self.assertEqual(hash.hash.size, hash_size**2)
Esempio n. 40
0
 def test_hash_size_is_less_than_image_size(self):
     image = self._get_white_image((120, 200))
     emsg = 'hash_size in a wrong range'
     for hash_size in [128, 512]:
         with six.assertRaisesRegex(self, AssertionError, emsg):
             imagehash.whash(image, hash_size=hash_size)
Esempio n. 41
0
	def test_custom_hash_size_and_scale(self):
		image = self.get_white_image( (512, 512) )
		hash_size = 16
		hash = imagehash.whash(image, hash_size=hash_size, image_scale=64)
		self.assertEqual(hash.hash.size, hash_size**2)
Esempio n. 42
0
	def test_hash_size_2power(self):
		image = self.get_white_image( (512, 512) )
		for hash_size in [4, 8, 16]:
			hash = imagehash.whash(image, hash_size=hash_size)
			self.assertEqual(hash.hash.size, hash_size**2)
Esempio n. 43
0
Identifies similar images in the directory.

Method: 
  ahash:      Average hash
  phash:      Perceptual hash
  dhash:      Difference hash
  whash-haar: Haar wavelet hash
  whash-db4:  Daubechies wavelet hash

(C) Johannes Buchner, 2013
""" % sys.argv[0])
    	sys.exit(1)
    
    hashmethod = sys.argv[1] if len(sys.argv) > 1 else usage()
    if hashmethod == 'ahash':
    	hashfunc = imagehash.average_hash
    elif hashmethod == 'phash':
    	hashfunc = imagehash.phash
    elif hashmethod == 'dhash':
    	hashfunc = imagehash.dhash
    elif hashmethod == 'whash-haar':
    	hashfunc = imagehash.whash
    elif hashmethod == 'whash-db4':
    	hashfunc = lambda img: imagehash.whash(img, mode='db4')
    else:
    	usage()
    userpath = sys.argv[2] if len(sys.argv) > 2 else "."
    find_similar_images(userpath=userpath, hashfunc=hashfunc)
    

Esempio n. 44
0
 def test_hash_size_not_2power(self):
     emsg = 'hash_size is not power of 2'
     for hash_size in [3, 7, 12]:
         with six.assertRaisesRegex(self, AssertionError, emsg):
             imagehash.whash(self.image, hash_size=hash_size)
def whash_(image):
    ''' bypass assert for small image '''
    try:
        return imagehash.whash(image)
    except AssertionError:
        return imagehash.ImageHash(np.zeros((8,8), dtype=bool))