def findSimilarImgs(baseImageFile, tarDir, hasCmpedList, step):
    count = 0
    image1 = load_image(baseImageFile)
    if image1 is None:
        return
    try:
        hash1 = dhash.dhash_int(image1, size=imageSize)
    except:
        return
    for path, d, filelist in os.walk(tarDir):
        if (not path.endswith('.git') and (not path.startswith(resultDir))):
            for filename in filelist:
                if (filename.endswith('jpg') or filename.endswith('png')):
                    count = count + 1
                    imageName = os.path.join(path, filename)
                    if (imageName not in hasCmpedList):
                        image2 = load_image(imageName)
                        if image2 is not None:
                            try:
                                hash2 = dhash.dhash_int(image2, size=imageSize)
                            except:
                                continue
                            num_bits_different = dhash.get_num_bits_different(
                                hash1, hash2)
                            diff = 100 * num_bits_different / (imageSize *
                                                               imageSize * 2)
                            if (diff <= limitDiff):
                                hasCmpedList.append(imageName)
                                print(baseImageFile + " is same with " +
                                      imageName)
                                movePicToResultDir(step, baseImageFile,
                                                   imageName, diff)
Example #2
0
def hashGif(conn, gifUrl, url):
    gifHash = ''
    nframes = 0
    try:
        f = BytesIO(
                urlopen(
                    Request(
                        str(gifUrl),
                        headers={'User-Agent': user_agent},
                    ),
                    context=context,
                ).read(),
            )
        frame = Image.open(f)
    except:
        c = conn.cursor()
        c.execute(
            'DELETE FROM Posts WHERE Url = ?;',
            (
                str(url),
            ),
        )
        conn.commit()
        c.close()
        gifHash = 'invalid'
    else:
        while frame:
            dhash.dhash_int(frame)
            gifHash = '{}{} '.format(gifHash, str(dhash.dhash_int(frame)))
            nframes += 1
            try:
                frame.seek(nframes)
            except EOFError:
                break
    return gifHash
Example #3
0
def doComparison(photo1loc, photo2loc):
    original = cv2.imread(photo1loc)
    contrast = url_to_image(photo2loc)
    contrast = cv2.resize(contrast, (original.shape[1], original.shape[0]))

    original = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
    contrast = cv2.cvtColor(contrast, cv2.COLOR_BGR2GRAY)
    MSE, SSIM = compareImages(original, contrast)

    image1 = Image.open(photo1loc)
    row1, col1 = dhash.dhash_row_col(image1)
    # print(dhash.format_hex(row1, col1))

    newfile = io.BytesIO(urllib.request.urlopen(photo2loc).read())

    image2 = Image.open(newfile)
    row2, col2 = dhash.dhash_row_col(image2)
    # print(dhash.format_hex(row2, col2))

    num_bits_different = dhash.get_num_bits_different(dhash.dhash_int(image1),
                                                      dhash.dhash_int(image2))
    # print(num_bits_different)

    #faceCompare = face.beginImageRec(photo1loc, photo2loc)

    faceCompare = 0.3

    return (MSE, SSIM, num_bits_different, faceCompare)
def compare_images(imageA, imageB, title):
    image1 = Image.open(imageA)
    image2 = Image.open(imageB)
    imageHashInt = dhash.dhash_int(image1, 8)
    imageHashInt2 = dhash.dhash_int(image2, 8)
    res = dhash.get_num_bits_different(imageHashInt, imageHashInt2)
    finalRes = str(100 - ((res / 128) * 100)) + '%'
    x = {"Similarity is ": finalRes}
    y = json.dumps(x)
    print(y)
def main(arguments):

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('infile', help="path to file to compare") #, type=argparse.FileType('r'))
    parser.add_argument('dbDir', help="path to 'Database' directory")
    parser.add_argument('--threshold', help="threshold val (default 0.75)", type=float, default=0.75)

    args = parser.parse_args(arguments)
    baseDir = os.path.abspath(args.dbDir)

    # test

    score = 0.0
    match = False
    # convert test image into numpy array
    try:
        imTest = PIL.Image.open(args.infile)
    except FileNotFoundError:
        # filename not an image file
        print("An error occured trying to read the test file. Can't compare")
        exit()

    # loop through each image in the Test database
    for dbImg in os.listdir(args.dbDir):
        # convert current DB image into numpy array
        try:
            imDB = PIL.Image.open(baseDir + "/" + dbImg)
        except OSError:
            # filename not an image file accessible by PIL. Ignore, quit this loop, and continue.
            continue

        if imTest is not None and imDB is not None:
            # hash, then compare the two images
            dh1 = dhash.dhash_int(imTest)
            dh2 = dhash.dhash_int(imDB)
            score = 1 - dhash.get_num_bits_different(dh1,dh2)/dh1.bit_length()
        else:
            print("comparison error with files %s and %s", imDB, imTest)

        if score > args.threshold:
            # alert about the match
            print("Matching image found in database directory: ", dbImg, " (score: ", str(score), ").")
            match = True

    if match:
        print("Match(es) found. Not adding")
    else:
        print("No match found. Adding ", args.infile, " to database directory: ", baseDir)
        shutil.copy(args.infile, baseDir)
Example #6
0
    async def save_hashes(self, message: discord.Message):
        for attachment in message.attachments:
            if attachment.size > self.config.get("max_size") * 1024:
                continue

            extension = attachment.filename.split(".")[-1].lower()
            if extension not in ("jpg", "jpeg", "png", "webp", "gif"):
                continue

            fp = BytesIO()

            await attachment.save(fp)
            try:
                image = Image.open(fp)
            except OSError:
                continue

            h = dhash.dhash_int(image)
            repo_i.add_image(
                channel_id=message.channel.id,
                message_id=message.id,
                attachment_id=attachment.id,
                dhash=str(hex(h)),
            )
            yield h
Example #7
0
def hashImg(conn, imgUrl, url):
    imgHash = 'invalid'
    try:
        f = BytesIO(
            urlopen(
                Request(
                    str(imgUrl),
                    headers={
                        'User-Agent': user_agent
                    },
                ),
                context=context,
            ).read(),
        )
    except:
        c = conn.cursor()
        c.execute(
            'DELETE FROM Posts WHERE Url = ?;',
            (
                str(url),
            ),
        )
        conn.commit()
        c.close()
    else:
        img = Image.open(f)
        imgHash = dhash.dhash_int(img)
    return imgHash
Example #8
0
def hashImg(conn, imgUrl, url):
    imgHash = 'invalid'
    try:
        f = BytesIO(
            urlopen(
                Request(
                    str(imgUrl),
                    headers={'User-Agent': user_agent},
                ),
                context=context,
            ).read(), )
        img = Image.open(f)
        imgHash = dhash.dhash_int(img)
    except HTTPError:
        c = conn.cursor()
        c.execute(
            'DELETE FROM Posts WHERE Url = ?;',
            (str(url), ),
        )
        conn.commit()
        c.close()
    except:
        f = open('dedLink.txt', 'a')
        f.write('{}\n{}\n'.format(str(traceback.format_exc()), url))
        c = conn.cursor()
        c.execute(
            'DELETE FROM Posts WHERE Url = ?;',
            (str(url), ),
        )
        conn.commit()
        c.close()
    return imgHash
Example #9
0
def hash_vid(conn, vid_url, url):
    vid_hash = ''
    try:
        container = av.open(vid_url['reddit_video']['fallback_url'])
        for frame in container.decode(video=0):
            vid_hash = '{}{} '.format(vid_hash,
                                      str(dhash.dhash_int(frame.to_image())))
    except Exception as e:
        if '403' in str(e):
            c = conn.cursor()
            c.execute(
                'DELETE FROM Posts WHERE Url = ?;',
                (str(url), ),
            )
            conn.commit()
            c.close()
        else:
            f = open('dedLink.txt', 'a')
            f.write('{}\n{}\n'.format(str(traceback.format_exc()), vid_url))
            c = conn.cursor()
            c.execute(
                'DELETE FROM Posts WHERE Url = ?;',
                (str(url), ),
            )
            conn.commit()
            c.close()
        vid_hash = 'invalid'
    return vid_hash
def find_image_label(image_path, char_dict_hash):
    tg_image = Image.open(image_path)
    image_code = dhash.dhash_int(tg_image)
    if image_code in char_dict_hash.keys():
        image_label = char_dict_hash[image_code]
    else:
        image_label = 'not find this character'
    return image_label
Example #11
0
def d_hash_compute(file):
    """
    Compute dhash with a file name, size is 16
    :param file: string filename
    :return: string dhash hex
    """
    image = Image.open(file)
    return dhash.dhash_int(image, size=32)
Example #12
0
def get_dhash(filename):
    # given a filename, return the dhash of the image
    with Image.open(filename) as img:
        # adjust size for senstivity. greater size==more senstivity
        # results of testing for dups on my collection:
        # 215 detected @ s=8; 160@16; 160@32;
        img_dhash = dhash.dhash_int(img, size=16)
        return (img_dhash)
def url_to_img_hash(url):
    try:
        image = url_to_image(url)
        image_hash = dhash.dhash_int(image)
    except Exception as e:
        print(e)
        image_hash = None
    return image_hash
Example #14
0
    def get_existing_dhashes(self, img_dir):
        """ Get a list of existing dhashes from the images in that directory """
        dhashes = []
        for img_path in Path(img_dir).glob("*.png"):
            img = cv2.imread(str(img_path))
            hash = dhash.dhash_int(Image.fromarray(img), HASH_SIZE)
            dhashes.append(hash)

        return dhashes
Example #15
0
    def record(self, new_image):
        """ This will decide whether or not to record the image, and then save it if it's novel enough"""

        hash = dhash.dhash_int(Image.fromarray(new_image), HASH_SIZE)
        if hash not in self.existing_dhashes:
            self.existing_dhashes.append(hash)
            write_to = self.output_dir / (str(hash) + ".png")
            print("Writing image to ", write_to)
            cv2.imwrite(str(write_to), new_image)
Example #16
0
def get_image_info(path):
    with Image(filename=path) as image:
        bits = dhash.dhash_int(image)
        file_size_in_mb = os.path.getsize(path) / (1024 * 1024)
        dims = (image.width, image.height)
        image_info = ImageInfo(path=path,
                               dhash=bits,
                               file_size_in_mb=file_size_in_mb,
                               dims=dims)
    return image_info
Example #17
0
def hashGif(conn, gifUrl, url):
    gifHash = ''
    nframes = 0
    try:
        f = BytesIO(
            urlopen(
                Request(
                    str(gifUrl),
                    headers={'User-Agent': user_agent},
                ),
                context=context,
            ).read(), )
        frame = Image.open(f)
        while frame:
            dhash.dhash_int(frame)
            gifHash = '{}{} '.format(gifHash, str(dhash.dhash_int(frame)))
            nframes += 1
            try:
                frame.seek(nframes)
            except EOFError:
                break
    except HTTPError:
        c = conn.cursor()
        c.execute(
            'DELETE FROM Posts WHERE Url = ?;',
            (str(url), ),
        )
        conn.commit()
        c.close()
    except:
        f = open('dedLink.txt', 'a')
        f.write('{}\n{}\n'.format(str(traceback.format_exc()), url))
        c = conn.cursor()
        c.execute(
            'DELETE FROM Posts WHERE Url = ?;',
            (str(url), ),
        )
        conn.commit()
        c.close()
        gifHash = 'invalid'
    return gifHash
def judge_image_similarity(image_path, chr_name, hash_list, bk_tree,
                           diff_threshold):
    tg_image = Image.open(image_path)
    image_code = dhash.dhash_int(tg_image)
    similar_names_rank = []
    find_result = bk_tree.find(image_code, 30)
    for diff, chr_code in find_result:
        if diff < diff_threshold:
            idx = hash_list.index(chr_code)
            similar_names_rank.append(chr_name[idx] + '_diff: ' + str(diff))
    # print('the similarity rank from high to low is:{}'.format(similar_names_rank))
    return similar_names_rank
Example #19
0
def url_to_img_hash(url: str) -> int:
    """Converts a url to an image hash

    Arguments:
        url {str} -- url to image

    Returns:
        int -- hash of image
    """

    image = url_to_image(url)
    image_hash = dhash.dhash_int(image)
    return image_hash
Example #20
0
def get_image_infos(paths):
    image_infos = []
    for path in paths:
        with Image(filename=path) as image:
            bits = dhash.dhash_int(image)
            file_size_in_mb = os.path.getsize(path) / (1024 * 1024)
            dims = (image.width, image.height)
            image_infos.append(
                ImageInfo(path=path,
                          dhash=bits,
                          file_size_in_mb=file_size_in_mb,
                          dims=dims))
    return image_infos
def build_dict_hash(dict_path):
    char_dict_hash = {}
    for f in get_all_files(dict_path):
        f_path = f
        if f_path[-3:] == 'png':
            chr_image = Image.open(f_path)
            chr_image = chr_image.convert('L')
            represent_hash = dhash.dhash_int(chr_image)
            if not represent_hash:
                continue
            chr_name = f_path.split('/')[-1][:-4]
            if represent_hash not in char_dict_hash.keys():
                char_dict_hash[represent_hash] = chr_name
    return char_dict_hash
def sortPicsBySimilarity(allPicsDir, imageSize, threshold, sortedPicsDir,
                         similarPicsDir):
    allPicsDirLen = len(allPicsDir)
    for path, d, filelist in os.walk(allPicsDir):
        L = []
        for filename in filelist:
            fileNameWithPath = os.path.join(path, filename)
            image = load_image(fileNameWithPath)
            if image is not None:
                try:
                    hash = dhash.dhash_int(image, size=imageSize)
                except:
                    continue
            L.append((hash, filename))

        sortedPath = os.path.join(sortedPicsDir, path[allPicsDirLen + 1:])
        similarPath = os.path.join(similarPicsDir, path[allPicsDirLen + 1:])
        if not os.path.exists(sortedPath):
            os.mkdir(sortedPath)
        if not os.path.exists(similarPath):
            os.mkdir(similarPath)
        S = sorted(L, key=lambda l: l[0])
        count = 0
        for item in S:
            origFile = os.path.join(path, item[1])
            if count > 0:
                lastItem = S[count - 1]
                hash1 = lastItem[0]
                hash2 = item[0]
                num_bits_different = dhash.get_num_bits_different(hash1, hash2)
                diff = 100 * num_bits_different / (imageSize * imageSize * 2)
                newFileName = str(count) + "_" + str(diff) + "%_" + item[1]
                if diff <= threshold:
                    shutil.copy(origFile, os.path.join(similarPath,
                                                       newFileName))
                    if count == 1:
                        lastNewFileName = str(count - 1) + "_" + lastItem[1]
                    else:
                        lastNewFileName = str(count - 1) + "_" + str(
                            lastDiff) + "%_" + lastItem[1]
                    lastNewFilePath = os.path.join(similarPath,
                                                   lastNewFileName)
                    if not os.path.exists(lastNewFilePath):
                        shutil.copy(os.path.join(path, lastItem[1]),
                                    lastNewFilePath)
                lastDiff = diff
            else:
                newFileName = str(count) + "_" + item[1]
            shutil.copy(origFile, os.path.join(sortedPath, newFileName))
            count = count + 1
Example #23
0
def prepare_test_data(input_folder, output_file):
    hdf5_file = h5py.File(output_file, "w")

    print('Counting files and parsing meta data...')

    dir = input_folder
    for root, dirs, files in os.walk(dir):
        pids = files
        images = [*map(lambda x: dir + '/' + x, files)]

    train_shape = (len(images), 224, 224, 3)

    hdf5_file.create_dataset("images_test", train_shape, np.int8)

    hdf5_file.create_dataset("pids_test", [len(pids)],
                             dtype=h5py.special_dtype(vlen=str))
    hdf5_file['pids_test'][...] = pids

    hdf5_file.create_dataset("pixels_test", [len(pids)], dtype=np.int64)
    hdf5_file.create_dataset('Hash_test', [len(pids)],
                             dtype=h5py.special_dtype(vlen=str))

    # loop over train addresses
    for i, addr in enumerate(images):
        # print how many images are saved every 1000 images
        if i % 1000 == 0 and i > 1:
            print('Train data: {}/{}'.format(i, len(images)))

        # read an image and resize to (224, 224)
        # cv2 load images as BGR, convert it to RGB
        img = cv2.imread(addr)
        Hash_image = Image.open(addr)
        Hash_image = Hash_image.convert('L').resize((9, 9), Image.ANTIALIAS)
        Hash_valu = dhash.dhash_int(Hash_image)
        hdf5_file['Hash_test'][i, ...] = Hash_valu
        try:
            image_size = img.size
            img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            hdf5_file["images_test"][i, ...] = img
            hdf5_file['pixels_test'][i, ...] = image_size

        except:
            print(addr)
            os.remove(addr)

    hdf5_file.close()
    print('finished')
Example #24
0
    def run(self):
        self.blank_count = 0
        self.saved_count = 0
        self.total_count = 0

        while self.blank_count < self.max_blank \
                and self.saved_count < self.max_save:

            # Open the camera, get a frame, then close the stream
            cap = cv2.VideoCapture(self.url, cv2.CAP_GSTREAMER)
            _, frame = cap.read()
            cap.release()

            # Check to make sure a frame was actually received
            if frame is None:
                print("Worker: Stopped receiving frames. Received: ",
                      self.total_count, "Saved: ", self.saved_count)
                self.running = False
                return

            self.total_count += 1

            with detector_lock:
                preds = self.detector.predict([frame])[0]

            if any([pred.name in self.class_names for pred in preds]):
                d_hash = dhash.dhash_int(Image.fromarray(frame),
                                         self.DHASH_SIZE)
                filename = self.filename_prefix + "_" + \
                           str(d_hash) + ".jpg"
                save_path = Path(self.output_dir) / filename
                if save_path.exists():
                    print("Worker: Tried to save image with same path!")
                    self.blank_count += 1
                    continue

                cv2.imwrite(str(save_path), frame)
                print("Worker: Saving!", filename)

                self.saved_count += 1
                self.blank_count = 0
                continue

            self.blank_count += 1
        print("Worker: Reached maximum frames. "
              " Received", self.total_count,
              "Saved: ", self.saved_count)
        self.running = False
def build_dict_tree(dict_path):
    hash_list = []
    chr_name = []
    bk_tree = None
    for f in get_all_files(dict_path):
        f_path = f
        if f_path[-3:] == 'png':
            chr_image = Image.open(f_path)
            chr_image = chr_image.convert('L')
            represent_hash = dhash.dhash_int(chr_image)
            if not represent_hash:
                continue
            hash_list.append(represent_hash)
            chr_name.append(f_path.split('/')[-1][:-4])

        bk_tree = pybktree.BKTree(pybktree.hamming_distance, hash_list)
    return chr_name, hash_list, bk_tree
Example #26
0
    async def saveMessageHashes(self, message: disnake.Message):
        for f in message.attachments:
            fp = BytesIO()
            await f.save(fp)
            try:
                image = Image.open(fp)
            except OSError:
                # not an image
                continue
            img_hash = dhash.dhash_int(image)

            repo_i.add_image(
                channel_id=message.channel.id,
                message_id=message.id,
                attachment_id=f.id,
                dhash=str(hex(img_hash)),
            )
            yield img_hash
Example #27
0
 def is_image_duplicate(self, message):
     """ Detects if the image was already posted in the chat"""
     res = []
     search_dist = 1
     urls = filter(lambda x: 'png' in x or 'jpg' in x, message.get_urls())
     for link in urls:
         try:
             response = requests.get(link)
             bytes = BytesIO(response.content)
             image = Image.open(bytes)
             img_hash = dhash.dhash_int(image)
             # dont really care what link from the message is repost
             res = self.images.find(img_hash, search_dist)
             self.images.add(img_hash)
             self.redis_connection.set_images_tree(self.images)
         except Exception as e:
             print(e)
     return len(res) > 0
Example #28
0
def hashVid(conn, vidUrl, url):
    vidHash = ''
    try:
        container = av.open(vidUrl['reddit_video']['fallback_url'])
    except:
        c = conn.cursor()
        c.execute(
            'DELETE FROM Posts WHERE Url = ?;',
            (
                str(url),
            ),
        )
        conn.commit()
        c.close()
        vidHash = 'invalid'
    else:
        for frame in container.decode(video=0):
            vidHash = '{}{} '.format(vidHash, str(dhash.dhash_int(frame.to_image())))
    return vidHash
Example #29
0
    async def saveMessageHashes(self, message: discord.Message):
        for f in message.attachments:
            # FIXME Can we check that the file is image before downloading it?
            fp = BytesIO()
            await f.save(fp)
            try:
                i = Image.open(fp)
            except OSError:
                # not an image
                continue
            h = dhash.dhash_int(i)

            # fmt: off
            repo_i.add_image(
                channel_id=message.channel.id,
                message_id=message.id,
                attachment_id=f.id,
                dhash=str(hex(h)),
            )
            # fmt: on
            yield h
Example #30
0
    async def saveMessageHashes(self, message: discord.Message):
        for f in message.attachments:
            if f.size > self.config.get("max_size") * 1024:
                continue

            fp = BytesIO()
            await f.save(fp)
            try:
                i = Image.open(fp)
            except OSError:
                # not an image
                continue
            h = dhash.dhash_int(i)

            # fmt: off
            repo_i.add_image(
                channel_id=message.channel.id,
                message_id=message.id,
                attachment_id=f.id,
                dhash=str(hex(h)),
            )
            # fmt: on
            yield h
Example #31
0
def is_same_image(img1, img2):
    current_hash = dhash.dhash_int(img1)
    old_hash = dhash.dhash_int(img2)
    return dhash.get_num_bits_different(current_hash, old_hash) == 0