Esempio n. 1
9
 def handle(self, *args, **options):
     checked = added = 0
     images = Image.objects.values_list("phash", flat=True)
     galleries = Gallery.objects.values_list("slug", flat=True)
     gallery_path = os.path.join(settings.MEDIA_ROOT, "gallery")
     for root, dirs, f in os.walk(gallery_path):
         for dir in dirs:
             if dir not in galleries:
                 gallery = Gallery(name=dir)
                 gallery.save()
             else:
                 gallery = Gallery.objects.get(slug=dir)
             for dir_root, d, files in os.walk(os.path.join(root, dir)):
                 for file in files:
                     file_name = os.path.join(dir_root, file)
                     file_image = PILImage.open(file_name)
                     file_phash = str(phash(file_image))
                     checked += 1
                     if file_phash not in images:
                         image = Image(phash=file_phash, gallery=gallery)
                         image.original_image.name = file_name.replace(
                             settings.MEDIA_ROOT, ""
                         )[1:]
                         image.save()
                         self.stdout.write("Saved %s" % image.original_image.name)
                         added += 1
     self.stdout.write("Checked %d images, added %d" % (checked, added))
Esempio n. 2
1
def similarity(image1 , image2):
	hash1 = imagehash.phash(Image.open(image1))
	hash2 = imagehash.phash(Image.open(image2))
	diff1 = abs(hash1 - hash2)

	if(diff1<=12):
		print (1 -  (float(diff1)/64))
def hash_file(file, contains_cb, result_cb):
    if contains_cb(file):
        cprint("\tSkipping {}".format(file), "green")
    else:
        try:
            hashes = []
            img = Image.open(file)

            file_size = get_file_size(file)
            image_size = get_image_size(img)
            capture_time = get_capture_time(img)

            # 0 degree hash
            hashes.append(str(imagehash.phash(img)))

            # 90 degree hash
            img = img.rotate(90)
            hashes.append(str(imagehash.phash(img)))

            # 180 degree hash
            img = img.rotate(180)
            hashes.append(str(imagehash.phash(img)))

            # 270 degree hash
            img = img.rotate(270)
            hashes.append(str(imagehash.phash(img)))

            hashes = "".join(sorted(hashes))
            result_cb(file, hashes, file_size, image_size, capture_time)

            cprint("\tHashed {}".format(file), "blue")
        except OSError:
            cprint("Unable to open {}".format(file), "red")
Esempio n. 4
0
def image_descriptor(image_path, prior=None):
    mtime = os.path.getmtime(image_path)
    ctime = os.path.getctime(image_path)

    if not prior or (not prior.get('modified')):
        img = Image.open(image_path)
        result = {'width': img.size[0],
                  'height': img.size[1],
                  'created': mtime,
                  'modified': ctime,
                  # TODO: if results too bad, change hash sizes for more precission?
                  'aHash': str(imagehash.average_hash(img)),
                  'pHash': str(imagehash.phash(img)),
                  'dHash': str(imagehash.dhash(img)),
        }
        return result

    changed = prior['modified'] < mtime
    img = Image.open(image_path)

    if changed or not prior["width"]:
        prior["width"] = img.size[0]
    if changed or not prior["height"]:
        prior["height"] = img.size[1]

    if changed or not prior["aHash"]:
        prior["aHash"] = str(imagehash.average_hash(img))
    if changed or not prior["pHash"]:
        prior["pHash"] = str(imagehash.phash(img))
    if changed or not prior["dHash"]:
        prior["dHash"] = str(imagehash.dhash(img))
    return prior
Esempio n. 5
0
 def run(self):
     files = sorted(os.listdir('data/%s/media' % self.date_path))
     hashes = {}
     matches = []
     g = nx.Graph()
     for i in range(len(files)):
         f = files[i]
         fn = 'data/%s/media/%s' % (self.date_path, f)
         ahash = imagehash.average_hash(Image.open(fn))
         dhash = imagehash.dhash(Image.open(fn))
         phash = imagehash.phash(Image.open(fn))
         hashes[f] = {'ahash': ahash, 'dhash': dhash, 'phash': phash}
         for j in range(0, i):
             f2name = files[j]
             f2 = hashes[f2name]
             sumhash = sum([ahash - f2['ahash'],
                            dhash - f2['dhash'],
                            phash - f2['phash']])
             if sumhash <= 40:
                 matches.append([f, files[j],
                                 ahash - f2['ahash'],
                                 dhash - f2['dhash'],
                                 phash - f2['phash'],
                                 sumhash])
                 g.add_edge(f, f2name)
     with self.output().open('w') as fp_graph:
         components = list(nx.connected_components(g))
         # Note: sets are not JSON serializable
         d = []
         for s in components:
             d.append(list(s))
         logging.debug(' - = - = - = GRAPH HERE - = - = - = -')
         logging.debug(d)
         json.dump(d, fp_graph, indent=2)
Esempio n. 6
0
 def _calc(self, options, files):
     for file in files:
         if os.path.isdir(file):
             if options.get_recursive_flag() and not os.path.islink(file):
                 try:
                     self._calc(options, sorted([
                         os.path.join(file, x)
                         for x in os.listdir(file)
                     ]))
                 except PermissionError:
                     pass
         elif os.path.isfile(file):
             file_stat = file_mod.FileStat(file)
             try:
                 phash = self._cache[
                     (file, file_stat.get_size(), file_stat.get_time())]
             except KeyError:
                 try:
                     phash = str(imagehash.phash(PIL.Image.open(file)))
                 except OSError:
                     continue
             print("{0:s}/{1:010d}/{2:d}  {3:s}".format(
                 phash,
                 file_stat.get_size(),
                 file_stat.get_time(),
                 file
             ))
Esempio n. 7
0
def getImageHash(imagename):

    imageF = Image.open(imagename)
    h = str(imagehash.dhash(imageF, 12))
    if h == '000000000000000000000000000000000000':
        h = 'phash_'+str(imagehash.phash(imageF))
    return h
Esempio n. 8
0
def hashpdfimg(images):
    hashs = []
    for image in images:
        img = Image.fromarray(image)
        hash = imagehash.phash(img)
        hashs.append(hash)
    return hashs
Esempio n. 9
0
def get_image_metadata(config, request):
    ''' Handle request for an image. '''

    try:
        url = request.GET['url']
    except KeyError:
        raise aiohttp.web.HTTPBadRequest(reason='"url" argument is required.')

    tor_config = config['Tor']
    socks_proxy = SOCKSConnector(tor_config['ip'], int(tor_config['port']))
    response = yield from aiohttp.get(url, connector=socks_proxy)
    content_type = response.headers['Content-type']

    if not content_type.startswith('image/'):
        reason = 'Requested a non-image resource ({}).'.format(content_type)
        raise aiohttp.web.HTTPBadRequest(reason=reason)

    image_data = yield from response.read()
    image_file = io.BytesIO(image_data)
    image = Image.open(image_file)
    extra = dict()

    if content_type in ('image/jpeg', 'image/tiff'):
        for name, tag in exifread.process_file(io.BytesIO(image_data)).items():
            if name.startswith('Image') or name.startswith('MakerNote'):
                if isinstance(tag.values, (int, str)):
                    extra[name] = tag.values
                elif isinstance(tag.values, list):
                    if len(tag.values) > 0 and isinstance(tag.values[0], int):
                        extra[name] = tag.values
                    else:
                        extra[name] = ','.join(map(str,tag.values))
                else:
                    extra[name] = str(tag)

    metadata = {
        'content_type': content_type,
        'extra': extra,
        'format': image.format,
        'hashes': {
            'ahash': str(imagehash.average_hash(image)),
            'dhash': str(imagehash.dhash(image)),
            'md5': hashlib.md5(image_data).hexdigest(),
            'phash': str(imagehash.phash(image)),
            'sha1': hashlib.sha1(image_data).hexdigest(),
            'sha256': hashlib.sha256(image_data).hexdigest(),
        },
        'last_modified': response.headers.get('Last-modified', None),
        'resolution': {
            'width': image.width,
            'height': image.height,
        },
        'size': len(image_data),
    }

    return aiohttp.web.Response(
        headers={'Content-type': 'application/json; charset=utf8'},
        body=json.dumps(metadata).encode('utf8'),
    )
Esempio n. 10
0
    def predict(self, image):
        result_priority_queue = PriorityQueue()
        results = []

        bbs = self.align.getAllFaceBoundingBoxes(image)

        for bb_index, bb in enumerate(bbs):
            alignedFace = self.align.alignImg("affine", 96, image, bb)
            if alignedFace is None:
                continue

            phash = str(imagehash.phash(Image.fromarray(alignedFace)))
            if phash in self.trained_images:
                identity = self.trained_images[phash].identity
                result_priority_queue.put_nowait((-1.0, identity, bb_index))
            else:
                rep = self.net.forwardImage(alignedFace)
                if self.svm is not None:
                    result_proba_list = self.svm.predict_proba(rep)
                    identity = np.argmax(result_proba_list[0])
                    print str(result_proba_list[0]) + " " + str(bb)
                    for index, prob in enumerate(result_proba_list[0]):
                        result_priority_queue.put_nowait((prob * -1.0, self.identities[index], bb_index))
                else:
                    result_priority_queue.put_nowait((0.0, -1, bb_index))

        matched_identities = []
        matched_bb_indices = []
        threshold = 0.6

        while len(matched_identities) != len(bbs) and result_priority_queue.empty() is False:
            detectedFaceInfo = result_priority_queue.get_nowait()

            identity = detectedFaceInfo[1]
            probability = detectedFaceInfo[0] * -1.0
            bb_index = detectedFaceInfo[2]
            # print detectedFaceInfo

            if identity in matched_identities:
                # print "matched_bbs : " + str(matched_identities)
                continue

            matched_bb_indices.append(bb_index)
            matched_identities.append(identity)

            if probability < threshold:
                results.append((-1, bbs[bb_index], 0.0))
            else:
                results.append((identity, bbs[bb_index], probability))

                # print '+' + str(results[len(results) - 1])

        for bb_index, bb in enumerate(bbs):
            if bb_index in matched_bb_indices:
                continue

            results.append((-1, bb, 0.0))

        return results
Esempio n. 11
0
 def create_from_image(cls, img, url, facebook=None, okcupid=None):
     cls.objects.create(
             ahash = imagehash.average_hash(img),
             phash = imagehash.phash(img),
             dhash = imagehash.dhash(img),
             url = url,
             facebook = facebook,
             okcupid = okcupid)
Esempio n. 12
0
def hashOP(image1,hashsimg):
    img1 = Image.fromarray(image1)
    hash1 = imagehash.phash(img1)
    hashs = []
    for hash in hashsimg:
        ham_dst = hamdist(str(hash1),str(hash))
        hashs.append(ham_dst)
    return(hashs.index(min(hashs)))
Esempio n. 13
0
def render_to_img_with_phash(gen_opts, img_code):
    im = render_to_img(gen_opts['img_size'], img_code)

    hash_opts = gen_opts['hash_opts']
    img_hash = imagehash.phash(im, hash_opts['hash_size'], hash_opts['highfreq_factor'])
    # print('\t img_hash =', img_hash)

    return im, img_hash
Esempio n. 14
0
 def _extract(self, data):
     patch = [
         self.patch["x0"], self.patch["y0"], self.patch["x1"],
         self.patch["y1"]
     ]
     crop = data.crop(box=patch)
     phash = imagehash.phash(crop)
     return phash
Esempio n. 15
0
def get_hash(images):
    imgs = []
    append = imgs.append
    for img in images:
        img = imagehash.phash(cv2pil(img), hash_size=16).hash.flatten()
        img = np.array([int(h) for h in img])
        append(img)
    return imgs
def dupe_remover(wd):
    '''
    Checks jpegs in wd for dupes
    args - wd: str - a directory
    '''
    print(f'removing dupes')
    image_dict = {}
    unique_images = {}
    duplicate_images = {}
    image_files = f'{wd}/*.jpg'

    for img in glob(image_files):
        imname = os.path.basename(img)
        image = Image.open(img).convert('L')
        image_dict[imname] = image

    while len(image_dict) > 0:

        # grab the first image
        # and comapre it against every other image we have
        image_name = list(image_dict.keys())[0]
        image = image_dict[image_name]
        duplicate_to_this_image = []
        for other_image_name, other_image in image_dict.items():
            if image_name == other_image_name:
                continue

            # if the image is a duplicate, remove it from the image dictionary
            # and delete the file
            p = phash(image)
            p_other = phash(other_image)
            delta = p - p_other
            if delta < 7:
                other_file = f'{wd}/{other_image_name}'
                os.remove(other_file)
                duplicate_to_this_image.append(other_image_name)

        for dupe_name in duplicate_to_this_image:
            dupe_image = image_dict[dupe_name]
            del image_dict[dupe_name]
            duplicate_images[dupe_name] = dupe_image

        # Now that we've compared the image
        # It should be considered unique
        unique_images[image_name] = image
        del image_dict[image_name]
Esempio n. 17
0
    def run(self, task):
        image = str2image(task.get_file_data)

        self.results["imghash"]["a_hash"] = str(imagehash.average_hash(image))
        self.results["imghash"]["p_hash"] = str(imagehash.phash(image))
        self.results["imghash"]["d_hash"] = str(imagehash.dhash(image))

        return self.results
Esempio n. 18
0
def compare_phash(source, capture):
    """
    Compares the pHash of the two given images and returns the similarity between
    the two.
    
    @param source: Image of any given shape as a numpy array
    @param capture: Image of any given shape as a numpy array
    @return: The similarity between the hashes of the image as a number 0 to 1.
    """

    source = Image.fromarray(source)
    capture = Image.fromarray(capture)

    source_hash = imagehash.phash(source)
    capture_hash = imagehash.phash(capture)

    return 1 - ((source_hash - capture_hash) / 64.0)
Esempio n. 19
0
def db_add_image(file_name: str) -> bool:
    image = Image.open(file_name)
    return db_add(file_name, str(imagehash.average_hash(image)),
                  str(imagehash.phash(image)),
                  str(imagehash.phash_simple(image)),
                  str(imagehash.dhash(image)),
                  str(imagehash.dhash_vertical(image)),
                  str(imagehash.whash(image)), str(imagehash.colorhash(image)))
Esempio n. 20
0
def similarityImage(img1, img2):

    try:
        enco1, enco2 = imageFace.findFace(img1,img2)
    except Exception:
        return None,None,None,None
    if (enco1 != []) and (enco2 != []):
        similarity = imageFace.similarityImageTakeEncoding(enco1,enco2)
    else:
        similarity = 0

    similarity = round(similarity, 5)

    hash0 = imagehash.average_hash(Image.open(img1))
    hash1 = imagehash.average_hash(Image.open(img2))

    similarityAverageHash = 0
    similarityAverageHash = (hash0-hash1)
    if similarityAverageHash != 0:
        similarityAverageHash = similarityAverageHash/100

    if similarityAverageHash == 0:
        print(img1, ' ', img2)

    hash0 = imagehash.dhash(Image.open(img1))
    hash1 = imagehash.dhash(Image.open(img2))
    similarityDHash = 0
    similarityDHash = (hash0-hash1)
    if similarityDHash != 0:
        similarityDHash = similarityDHash/100

    if similarityDHash == 0:
        print(img1, ' ', img2)

    hash0 = imagehash.phash(Image.open(img1))
    hash1 = imagehash.phash(Image.open(img2))
    similarityPHash = 0
    similarityPHash = (hash0 - hash1)
    if similarityPHash != 0:
        similarityPHash = similarityPHash / 100

    if similarityPHash == 0:
        print(img1, ' ', img2)


    return similarityAverageHash, similarityDHash, similarityPHash, similarity
Esempio n. 21
0
 def unban_image(self, path):
     img = Image.open(path)
     lock_phash = Redlock(key=f'phashdb', masters={self.redis})
     lock_whash = Redlock(key=f'whashdb', masters={self.redis})
     raw_phash = im.phash(img)
     raw_whash = im.whash(img)
     self.exec_similar_hash(self.phashdb, raw_phash, 0, 13, self.del_from_db, lock=lock_phash)
     self.exec_similar_hash(self.whashdb, raw_whash, 0, 13, self.del_from_db, lock=lock_whash)
def rename_hash(file):
    try:
        filename = os.path.splitext(os.path.basename(file))
        data = imagehash.phash(Image.open(file))
        path = os.path.join(os.path.dirname(file), str(data)+filename[1])
        move(file, path)
    except:
        None
Esempio n. 23
0
async def image_match(url1, url2):
    try:
        highfreq_factor = 1
        hash_size = 8
        async with aiohttp.ClientSession() as session:
            async with session.get(url1) as resp:
                r1 = await resp.read()
        async with aiohttp.ClientSession() as session:
            async with session.get(url1) as resp:
                r2 = await resp.read()
        hash1 = imagehash.phash(Images.open(
            BytesIO(r1)), hash_size=hash_size, highfreq_factor=highfreq_factor)
        hash2 = imagehash.phash(Images.open(
            BytesIO(r2)), hash_size=hash_size, highfreq_factor=highfreq_factor)
        return 1 - (hash1 - hash2)/len(hash1.hash)**2
    except:
        return 0.0
def createPerceptualHash(arrayData: "np.ndarray") -> str:
    """
    Creates a perceptual hash of the given data
    :param arrayData: an array contains the data to be hashed
    :return: a string describe the hashed array (could be converted to hex using hex_to_hash())
    """
    dataInstance = Image.fromarray(arrayData)
    return imagehash.phash(dataInstance, hash_size=16).__str__()
Esempio n. 25
0
def get_phash(filename):
    try:
        phash = str(imagehash.phash(Image.open(filename)))
    except (NameError, IOError, TypeError, ValueError):
        print("get phash error, file deleted")
        os.remove(filename)
        phash = False
    return phash
Esempio n. 26
0
 def __init__(self, fontType, fontFilePath):
     self.fontType = fontType
     self.fontFilePath = fontFilePath
     img = Image.open(fontFilePath)
     self.aHash = str(imagehash.average_hash(img))
     self.dHash = str(imagehash.dhash(img))
     self.pHash = str(imagehash.phash(img))
     self.wHash = str(imagehash.whash(img))
Esempio n. 27
0
def img_resize(img, img_black_flag):
    #검정 박스가 있는 경우
    width, height = img.size

    #이미지 크기가 1920, 1080 인 경우

    #이미지의 검정 박스가 존재하는 경우
    if img_black_flag:
        #TODO 1920 X 1080 / 1280 X 720
        #1920 X 1080 해상도 (박스 높이 137) 1280 X 720 해상도 (박스 높이 92)
        if ((width == 1920 and height == 1080)
                or (width == 1280 and height == 720)):
            cropped_img = img.crop(
                (0, int(height * 0.13), width, height - int(height * 0.13)))
            cropped_img = cropped_img.resize((1920, 1080), Image.ANTIALIAS)
            phash = imagehash.phash(cropped_img)

        #1280 X 720 해상도 (박스 높이 92)
        # elif (width == 1280 and height == 720):
        # 	cropped_img = img.crop((0, int(height * 0.13) , width, height - int(height * 0.13) ))
        # 	cropped_img = cropped_img.resize((1920, 1080), Image.ANTIALIAS)
        # 	phash = imagehash.phash(cropped_img)

        #1280 X 720 해상도 (박스 높이 75 ~ 92)
        elif (width == 1280 and height == 692):
            cropped_img = img.crop((0, 90, width, height - 90))
            cropped_img = cropped_img.resize((1920, 1080), Image.ANTIALIAS)
            phash = imagehash.phash(cropped_img)

        #720 X 480 해상도 (박스 높이 35)
        elif (width == 720 and height == 480):
            #cropped_img = img.crop((width//2 - 250, height//2 - 150, width//2 + 250, height//2 + 150))
            cropped_img = img.crop((0, 40, width, height - 40))
            cropped_img = cropped_img.resize((1920, 1080), Image.ANTIALIAS)
            phash = imagehash.phash(cropped_img)

        #아직 구현 안됨
        else:
            print("wait 아직 구현 안됨")

    #이미지의 검정 박스가 존재하지 않는 경우
    else:
        img = img.resize((1920, 1080), Image.ANTIALIAS)
        phash = imagehash.phash(img)
    return phash
Esempio n. 28
0
 def compute(self, frame):
     ahash = imagehash.average_hash(frame)
     phash = imagehash.phash(frame)
     self.A[ahash] = ahash
     self.P[phash] = phash
     self._show_(ahash, phash)
     if self.log:
         self._log_(ahash, phash, frame, self.div)
     return ahash, phash
Esempio n. 29
0
def phash(filename):
    if "image" in magic.from_file(filename, mime=True):
        print("Calculating pHash of: %s" % (filename, ))
        hash = imagehash.phash(Image.open(filename))
        helper.sqlite_insert("pHash", str(hash), os.path.basename(filename))
        return hash
    else:
        print "pHash works only with images"
        return None
Esempio n. 30
0
def Hashing(filename):

  phash = int(str(imagehash.phash(Image.open(filename))),16)
  ahash = int(str(imagehash.average_hash(Image.open(filename))),16)
  phashimple = int(str(imagehash.phash_simple(Image.open(filename))),16)
  dhash = int(str(imagehash.dhash(Image.open(filename))),16)
  dhashv = int(str(imagehash.dhash_vertical(Image.open(filename))),16)
  wash = int(str(imagehash.whash(Image.open(filename))),16)
  return phash,ahash,phashimple,dhash,dhashv,wash
Esempio n. 31
0
def calculate_hashes() -> None:
    """calculates the phashes for each pokemon image and stores them in pokedex.json"""
    for pokemon in pokedex:
        hash = imagehash.phash(Image.open(f"data/images/{pokemon['id']}.png"))
        pokemon["hash"] = str(hash)
        print(
            chalk.Chalk("green")(pokemon["name"] + "\t=> " + pokemon["hash"]))

    utils.update_pokedex(pokedex)
Esempio n. 32
0
    def frame_perceptive_hash(frame):
        im = Image.fromarray(frame)
        ah = imhash.average_hash(im).hash.astype(float)
        ph = imhash.phash(im).hash.astype(float)
        wh = imhash.whash(im).hash.astype(float)
        dh_h = imhash.dhash(im).hash.astype(float)
        dh_v = imhash.dhash_vertical(im).hash.astype(float)

        return (ah, ph, wh, dh_h, dh_v)
Esempio n. 33
0
def main():

    unique_files = scan_directory_for_images()

    prev_file = ''
    for key in sorted(unique_files):
        # print('key value: {} {}'.format(key,unique_files[key]))
        if prev_file:
            hash1 = imagehash.phash(Image.open('louisaandbenny\\' + prev_file))
            hash2 = imagehash.phash(
                Image.open('louisaandbenny\\' + unique_files[key]))
            # print('hash2 - hash1: {}'.format(hash2 - hash1))
            # delete near matches
            if (hash2 - hash1) < 5:
                delete_document(unique_files[key])
                delete_from_storage(unique_files[key])

        prev_file = unique_files[key]
Esempio n. 34
0
    def __init__(self, bndbox: typing.Tuple[int, int, int, int], frame: Image):
        self.bndbox = bndbox
        self.image = frame.crop_by_bounding_box(bndbox)

        pilim = PIL.Image.fromarray(self.image.image_data)
        self.hash = imagehash.phash(pilim, hash_size=12)

        self.match = None
        self.skipped = False
def perceptual_hash_distance(img1, img2):
    time1 = datetime.datetime.now()
    #hash1 = imagehash.phash(Image.open(img1))
    #hash2 = imagehash.phash(Image.open(img2))
    hash1 = imagehash.phash(img1)
    hash2 = imagehash.phash(img2)
    distance = hash1-hash2
    #distance = wasserstein_distance(hash1, hash2)
    time2 = datetime.datetime.now()
    delta = time2 - time1
    execution_time = int(delta.total_seconds() * 1000)
    #elapsedSeconds = delta.seconds
    #elapsedMicroSeconds = (elapsedSeconds * 1000000) + delta.microseconds
    #execution_time = int(delta.microseconds)
    print('Perceptual hash_Normal_Distance: ',distance , execution_time)

    #print ('%02d.%06d',execution_time)
    return distance, execution_time
Esempio n. 36
0
    def prepareData(self, path):
        self.X = []
        self.Y = []
        for filename in os.listdir(path):
            if not filename.endswith('.jpg'):
                continue
            filepath = os.path.join(path, filename)
            try:
                img = Image.open(filepath)
            except:
                print("cannot open image file")
                continue

            baseFileName = os.path.splitext(os.path.basename(filename))[0]
            rgbFrame = self.convertImageToRgbFrame(img)

            bbs = align.getAllFaceBoundingBoxes(rgbFrame)

            faceInFile = 0

            for bb in bbs:
                faceInFile += 1
                cropImage = rgbFrame[bb.top():bb.bottom(),
                                     bb.left():bb.right()]
                print("crop image : {}".format(len(cropImage)))
                if (len(cropImage) > 0) & (bb.left() > 0) & (
                        bb.right() > 0) & (bb.top() > 0) & (bb.bottom() > 0):
                    cv2.imshow("cropped", cropImage)
                    if cv2.waitKey(1) & 0xFF == ord('q'):
                        return

                    cropFolder = os.path.join(self.targetFolder, "crop")
                    if not os.path.exists(cropFolder):
                        os.makedirs(cropFolder)

                    cropFile = baseFileName + "-" + str(faceInFile) + ".jpg"
                    cropPath = os.path.join(cropFolder, cropFile)

                    im = Image.fromarray(cropImage)
                    im.save(cropPath)

                    landmarks = align.findLandmarks(rgbFrame, bb)
                    alignedFace = align.align(
                        args.imgDim,
                        rgbFrame,
                        bb,
                        landmarks=landmarks,
                        landmarkIndices=openface.AlignDlib.OUTER_EYES_AND_NOSE)
                    if alignedFace is None:
                        continue

                    phash = str(imagehash.phash(Image.fromarray(alignedFace)))
                    print("phash = " + phash)

                    rep = net.forward(alignedFace)
                    self.X.append(rep)
                    self.Y.append(cropFile)
Esempio n. 37
0
def test_hash():

    test_path = path.dirname(path.realpath(__file__))
    data_path = path.join(test_path, 'data/flash-91-cover.jpg')

    # Generate a hash for Flash 91 cover image
    cover_hash = imagehash.phash(Image.open(data_path))

    assert hash is not None
Esempio n. 38
0
 def __find_similar_ad_from_pic(self, picture):
     new_hash = phash(Image.open(urlopen(picture)))
     hashes = [ad.picturehash for ad in Annonce.select()]
     for old_hash in hashes:
         if old_hash is not None and hex_to_hash(
                 old_hash) - new_hash < self.HASH_SIMILAR_TRESHOLD:
             return Annonce.get(Annonce.picturehash == old_hash)
         else:
             return False
Esempio n. 39
0
    def save(self, *args, **kwargs):
        image = Image.open(self.image)
        self.hash = phash(image)
        super(Picture, self).save(*args, **kwargs)

        if not self.thumbnail:
            resized = get_thumbnail(self.image, '300x300', crop='center', quality=99)
            self.thumbnail.save(resized.name, ContentFile(resized.read()), save=True)
            super(Picture, self).save(*args, **kwargs)
Esempio n. 40
0
def find(file):
    hash = imagehash.phash(Image.open(file))
    results = []
    for k, v in sorted(attachments.items(), key=lambda a: hash - a[1]):
        similarity = hash - v
        if similarity <= 7:
            results.append({"id": k, "similarity": similarity})

    return results
Esempio n. 41
0
def amazon_phash(metadata_s3_bucket: str,
                 metadata_s3_key: str,
                 imgs_s3_bucket: str,
                 imgs_s3_prefix: str,
                 local_data_dir: str,
                 output_s3_bucket: str,
                 output_s3_prefix: str,
                 n: int = sys.maxsize):

    s3 = boto3.client('s3')

    # Check if it exists first.
    output_key = f"{output_s3_prefix}/vecs.json.gz"
    if exists(s3, output_s3_bucket, output_key):
        return

    metadata_file = f"{local_data_dir}/metadata.json.gz"
    if not os.path.exists(metadata_file):
        print(
            f"Downloading s3://{metadata_s3_bucket}/{metadata_s3_key} to {metadata_file}"
        )
        s3.download_file(Bucket=metadata_s3_bucket,
                         Key=metadata_s3_key,
                         Filename=metadata_file)

    vecs_file = f"{local_data_dir}/vecs.json.gz"
    vecs_fp = gzip.open(vecs_file, "wt")

    hash_size = 64  # end up with a 4096-dimensional bit vector.

    print(f"Writing vectors to {vecs_file}")

    with gzip.open(metadata_file) as gzfp:
        lines = islice(gzfp, 0, n)
        t0 = time()
        for i, d in enumerate(map(eval, lines)):
            if "imUrl" not in d or not d["imUrl"].endswith("jpg"):
                continue
            asin = d['asin']
            try:
                obj = s3.get_object(Bucket=imgs_s3_bucket,
                                    Key=f"{imgs_s3_prefix}/{asin}.jpg")
                bytes = BytesIO(obj['Body'].read())
                img = Image.open(bytes)
            except (PIL.UnidentifiedImageError, ClientError) as ex:
                print(f"Error for image {asin}: {ex}\n", file=sys.stderr)
            ph = phash(img, hash_size)
            for vec in ndarray_to_sparse_bool_vectors(
                    ph.hash.reshape((1, ph.hash.size))):
                write_vec(vecs_fp, asin, vec)
            print(
                f"Processed {i}: {asin} - {((i + 1) / ((time() - t0) / 60)):.1f} vecs / minute"
            )
    vecs_fp.close()  # Very important. Otherwise gzip file is invalid!

    print(f"Copying {vecs_file} to s3://{output_s3_bucket}/{output_key}")
    s3.upload_file(vecs_file, output_s3_bucket, output_key)
Esempio n. 42
0
    def compute(self, frame):
        def _chop(H, s):
            chop = ''
            for h in range(0, s):
                chop += H[h]
            return chop

        phash = _chop(str(imagehash.phash(frame)), 14)
        return phash
Esempio n. 43
0
def delete_same_image():
    image_list = glob(downloadDirectory + "*.*")
    hash_dic = {}
    delete_list = []
    filename = image_list[0].split("\\")[-1]
    hash_dic[filename] = imagehash.phash(Image.open(image_list[0]))
    for i in tqdm.tqdm(range(1, len(image_list)), total=len(image_list) - 1):
        filename = image_list[i].split("\\")[-1]
        for j in list(hash_dic.values()):
            diff = abs(imagehash.phash(Image.open(image_list[i])) - j)
            if diff > 2:
                hash_dic[filename] = imagehash.phash(Image.open(image_list[i]))
            else:
                delete_list.append(image_list[i])
    for i in delete_list:
        if os.path.exists(i):
            os.remove(i)
    return None
Esempio n. 44
0
 def get_images(self, response, request, info):
     url_sha2 = self.file_sha2(request, response=response, info=info)
     orig_image = Image.open(BytesIO(response.body))
     phash = imagehash.phash(orig_image)
     phash_str = "".join(
         ["1" if val else "0" for val in np.nditer(phash.hash, order='C')])
     width, height = orig_image.size
     buf = self.convert_image(orig_image)
     yield width, height, url_sha2, phash_str, buf
def getHash(img):
	normal = Image.open(img).convert('L')
	crop=normal.crop((25,37,195,150))
	ahash = str(imagehash.average_hash(crop))
        phash = str(imagehash.phash(crop))
	psimplehash = str(imagehash.phash_simple(crop))
	dhash = str(imagehash.dhash(crop))
	vertdhash = str(imagehash.dhash_vertical(crop))
	whash = str(imagehash.whash(crop))
	return ahash,phash,psimplehash,dhash,vertdhash,whash 
Esempio n. 46
0
def cal_hash_val(file_path):
	l = [3,4,16]
	hash_val = []
	for i in l:
		a = imagehash.phash(Image.open(file_path),i)
		a = str(a)
		if i != 16:
			a = bin(int(a,16))[2:].zfill(i**2)
		hash_val.append(str(a))
	return hash_val
Esempio n. 47
0
 def phash(self, img_url):
     if img_url not in self._fetched or 'phash' not in self._fetched[img_url]:
         if img_url not in self._fetched:
             self._fetched[img_url] = {}
         content_type, image_str = self.image(img_url)
         if image_str:
             image = str_to_image(image_str)
             import imagehash
             self._fetched[img_url]['phash'] = str(imagehash.phash(image))
         else:
             self._fetched[img_url]['phash'] = None
     return self._fetched[img_url]['phash']
def _calculate_phash(target_dir):
    print("Calculating phash for files under {}".format(target_dir))
    files_phash = []
    for f in [f for f in os.listdir(target_dir) if
              os.path.isfile(os.path.join(target_dir, f))]:
        f = os.path.join(target_dir, f)
        try:
            files_phash.append((f, unicode(imagehash.phash(Image.open(f)))))
        except:
            pass

    return files_phash
def getHash(img):
        size = 223,310
        normal = Image.open(img).convert('L')
        normal = normal.resize(size, Image.ANTIALIAS) 
        crop=normal.crop((25,37,195,150))
        ahash = str(imagehash.average_hash(crop))
        phash = str(imagehash.phash(crop))
        psimplehash = str(imagehash.phash_simple(crop))
        dhash = str(imagehash.dhash(crop))
        vertdhash = str(imagehash.dhash_vertical(crop))
        whash = str(imagehash.whash(crop))
        return ahash,phash,psimplehash,phash,vertdhash,whash
Esempio n. 50
0
def hash_value(img_fn, htype):
    img = Image.open(img_fn)
    if htype == 'a':
        hval = imagehash.average_hash(img)
    elif htype == 'p':
        hval = imagehash.phash(img)
    elif htype == 'd':
        hval = imagehash.dhash(img)
    elif htype == 'w':
        hval = imagehash.whash(img)
    else:
        hval = imagehash.average_hash(img)
    return hval
Esempio n. 51
0
def get_imagehashes(fp: Fileish,
                    size=FINGERPRINT_SIZE) -> Dict[str, imagehash.ImageHash]:
    """Calculate perceptual hashes for comparison of identical images"""
    try:
        img = pil_image(fp)
        thumb = img.resize((size, size), PIL.Image.BILINEAR).convert('L')
        return dict(
            ahash=imagehash.average_hash(thumb),
            phash=imagehash.phash(thumb),
            whash=imagehash.whash(thumb),
            dhash=imagehash.dhash(thumb),
        )
    except OSError:  # corrupt image file probably
        return {}
Esempio n. 52
0
    def run(self, task):
        self.task = task
        image = str2image(task.get_file_data)

        # Calculate hash.
        self.results["imghash"]["a_hash"] = str(imagehash.average_hash(image, hash_size=self.HASH_SIZE))
        self.results["imghash"]["p_hash"] = str(imagehash.phash(image, hash_size=self.HASH_SIZE))
        self.results["imghash"]["d_hash"] = str(imagehash.dhash(image, hash_size=self.HASH_SIZE))

        # Get similar images.
        self.results["similar"]["a_hash"] = self.get_similar_images(self.results["imghash"]["a_hash"], imagehash.average_hash)
        self.results["similar"]["p_hash"] = self.get_similar_images(self.results["imghash"]["p_hash"], imagehash.phash)
        self.results["similar"]["d_hash"] = self.get_similar_images(self.results["imghash"]["d_hash"], imagehash.dhash)

        return self.results
def run():
    signatures = get_pickled_signatures()

    import csv
    hashes_file = open('image_hashes.csv', 'w')
    columns = ['image_id', 'script_dhash', 'ahash', 'dhash', 'phash', 'signature']
    csv_writer = csv.DictWriter(hashes_file, fieldnames=columns)
    csv_writer.writeheader()

    t0 = time()

    for zip_counter in range(0, 10):
        filename = '../input/Images_%d.zip' % zip_counter
        print 'processing %s...' % filename

        imgzipfile = zipfile.ZipFile(filename)
        namelist = imgzipfile.namelist()

        for name in tqdm(namelist):
            if not name.endswith('.jpg'):
                continue
            filename = name.split('/')[-1]
            img_id = filename[:-4]
            try:
                imgdata = imgzipfile.read(name)

                if len(imgdata) == 0:
                    print '%s is empty' % img_id 
                    continue

                stream = io.BytesIO(imgdata)
                img = Image.open(stream)

                ahash = imagehash.average_hash(img)
                dhash = imagehash.dhash(img)
                phash = imagehash.phash(img)
                script_dhash = extract_dhash(img)

                csv_writer.writerow({'image_id': img_id, 'script_dhash': script_dhash, 
                                     'ahash': str(ahash), 'dhash': str(dhash), 'phash': str(phash),
                                     'signature': signatures[int(img_id)]})
            except:
                print 'error with ' + img_id

    hashes_file.flush()
    hashes_file.close()

    print 'took %0.5fm' % ((time() - t0) / 60)
Esempio n. 54
0
def fix_hash_for_all():
    print("Fixing hashes...")
    thresh = 1920*1080 - 32*32
    for root, dirs, files in os.walk(dest_dir):
        for name in files:
            if name.endswith('.jpg') and len(name) == 14:
                path = os.path.join(root, name)
                ext = name[-4:]
                im = Image.open(path)
                hsh = imagehash.phash(im)
                width, height = im.size
                im.close()
                newpath = os.path.join(root, str(hsh) + ext)
                if width*height > thresh:
                    rename(path, newpath)
    fix_file_locations()
Esempio n. 55
0
def addImage(image):
    '''
    Adds an image.
    '''
    doc = {'type': 'image', 'tags': [], 'links': []}

    # Generate the PIL image
    f = tempfile.NamedTemporaryFile()
    f.write(image)
    f.flush()

    # Get exif and mime
    doc['exif'] = json.loads(subprocess.check_output(['exiftool', '-j', f.name]))[0]
    for i in config.exifIgnore:
        doc['exif'].pop(i)
    doc['mime'] = subprocess.check_output(['file', '--mime-type', f.name]).split(' ')[1][:-1]

    f.seek(0)
    im = Image.open(f)

    # Calculate the hashes
    hashes = {}
    hashes['length'] = len(image)

    hashes['crc32'] = hex(zlib.crc32(image) & 0xffffffff)[2:]
    hashes['md5'] = hashlib.md5(image).hexdigest()
    hashes['sha1'] = hashlib.sha1(image).hexdigest()
    hashes['sha256'] = hashlib.sha256(image).hexdigest()
    hashes['sha512'] = hashlib.sha512(image).hexdigest()
    collisions = checkCollision(hashes)
    
    hashes['phash'] = str(imageHashToInt(imagehash.phash(im)))
    pcollisions = checkPhash(hashes['phash'])
    
    doc['hashes'] = hashes

    # Generate a thumbnail
    im.thumbnail(config.thumbsize)
    thumb = StringIO()
    im.convert('RGB').save(thumb, "JPEG")

    # Save the result
    id = images.save(doc)[0]
    thumb = thumb.getvalue()
    images.put_attachment(images[id], thumb, filename = 'thumbnail.jpg', content_type =  config.thumbMime)
    images.put_attachment(images[id], image, filename='image', content_type=doc['mime'])
    return id, collisions, pcollisions
Esempio n. 56
0
    def __init__(self, path):
        t = path
        if type and path.__class__ == str:
            t = Image.open(path)
        phash = imagehash.phash(t, 8)
        histogram = np.array(t.convert('L').histogram())
        self.md5 = md5(t.tostring()).hexdigest()

        self.phash = str(phash)
        self.histogram = utils.smooth(histogram, 100)

        self.mins = argrelextrema(self.histogram, np.less)[0]
        self.maxs = argrelextrema(self.histogram, np.greater)[0]
        self.histogram = np.array(map(lambda x: int(x), self.histogram))

        if len(self.mins) < 2: self.mins = np.append(self.mins, [1000] * (2-len(self.mins)) )
        if len(self.maxs) < 2: self.maxs = np.append(self.maxs, [1000] * (2-len(self.maxs)) )
Esempio n. 57
0
 def _insert_meta(self, data_store, comic_id):
     j = self._get_xkcd_json(comic_id)
     hash_avg = ''
     hash_d = ''
     hash_p = ''
     if not j:
         return
     if j.get('img'):
         file_name = '/tmp/' + get_random_file_name()
         try:
             self.myopener.retrieve(j.get('img'), file_name)
             hash_avg = imagehash.average_hash(Image.open(file_name))
             hash_d = imagehash.dhash(Image.open(file_name))
             hash_p = imagehash.phash(Image.open(file_name))
         except:
             pass
         finally:
             os.remove(file_name)
     data_store.insert_xkcd_meta(comic_id, json.dumps(j), str(hash_avg), str(hash_d), str(hash_p))
Esempio n. 58
0
 def run(self):
     date_path = self.search['date_path']
     files = sorted(os.listdir('data/%s/media' % date_path))
     hashes = {}
     matches = []
     g = nx.Graph()
     update_block_size = get_block_size(len(files), 5)
     for i in range(len(files)):
         f = files[i]
         fn = 'data/%s/media/%s' % (date_path, f)
         ahash = imagehash.average_hash(Image.open(fn))
         dhash = imagehash.dhash(Image.open(fn))
         phash = imagehash.phash(Image.open(fn))
         hashes[f] = {'ahash': ahash, 'dhash': dhash, 'phash': phash}
         for j in range(0, i):
             f2name = files[j]
             f2 = hashes[f2name]
             sumhash = sum([ahash - f2['ahash'],
                            dhash - f2['dhash'],
                            phash - f2['phash']])
             # FIXME: 40 is a hard-coded arbitrary (eyeballed) threshold
             if sumhash <= 40:
                 matches.append([f, files[j],
                                 ahash - f2['ahash'],
                                 dhash - f2['dhash'],
                                 phash - f2['phash'],
                                 sumhash])
                 g.add_edge(f, f2name)
         if i % update_block_size == 0:
             self.update_job(
                 date_path=self.search['date_path'],
                 status="STARTED: %s - %s/%s" %
                        (self.task_family, i, len(files))
             )
     with self.output().open('w') as fp_graph:
         components = list(nx.connected_components(g))
         # Note: sets are not JSON serializable
         d = []
         for s in components:
             d.append(list(s))
         json.dump(d, fp_graph, indent=2)
Esempio n. 59
0
    def _calc(self, options, files):
        image_phash = {}

        for file in files:
            if os.path.isdir(file):
                if options.get_recursive_flag() and not os.path.islink(file):
                    try:
                        image_phash.update(self._calc(options, sorted([
                            os.path.join(file, x)
                            for x in os.listdir(file)
                        ])))
                    except PermissionError:
                        pass
            elif os.path.isfile(file):
                try:
                    phash = int(str(imagehash.phash(PIL.Image.open(file))), 16)
                except OSError:
                    continue
                image_phash[file] = phash

        return image_phash
Esempio n. 60
0
def collect_files():
    print("Collecting files...")
    thresh = 1920*1080 - 32*32
    for dir in (source_dir, dest_dir, orig, tall, wide):
        if not os.path.isdir(dir):
            os.mkdir(dir)
    for root, dirs, files in os.walk(source_dir):
        for name in files:
            path = os.path.join(root, name)
            try:
                im = Image.open(path)
                width, height = im.size
            except:
                continue
            if width*height > thresh:
                hsh = imagehash.phash(im)
                im.close()
                if width > height:
                    newpath = os.path.join(wide, str(hsh)+'.jpg')
                    copy(path, newpath)
                elif height > width:
                    newpath = os.path.join(tall, str(hsh)+'.jpg')
                    copy(path, newpath)