def hamming_calc_tn(number_of_runs, hash_directory, hamm_directory):

    if not os.path.exists(hamm_directory):        #directory for storing files with hash values
        os.mkdir(hamm_directory)

    for i in range(number_of_runs):     #each run

        hamm_dict = {'aHash': {'orig' :{}, 'face' :{}, 'twit': {}, 'goog':{}, 'inst':{}},
                     'dHash': {'orig' :{}, 'face' :{}, 'twit': {}, 'goog':{}, 'inst':{}},
                     'pHash': {'orig' :{}, 'face' :{}, 'twit': {}, 'goog':{}, 'inst':{}},
                     'wHash': {'orig' :{}, 'face' :{}, 'twit': {}, 'goog':{}, 'inst':{}}}  #dictionary which will hold hamming distances
        test_hash_dict = numpy.load(hash_directory+os.listdir(hash_directory)[0],allow_pickle='TRUE').item()  # load first dictionary to choose a random test image from
        rand_photo = random.choice(list(test_hash_dict['orig'].keys()))               # get one random photo from originals
        print(f'Random photo chosen: {rand_photo}')

        for file in os.listdir(hash_directory):                                           # for every hashing algorithm, which holds hash values
            hash_alg = file[7:12]                                           # getting algorithm name
            hash_dict = numpy.load(hash_directory+file, allow_pickle='TRUE').item()        # loading dictionaru for this algorithm
            test_hash = hex_to_hash(hash_dict['orig'][rand_photo])          # getting hash of random photo for this algorithm
            for platform, hashes in hash_dict.items():                      # for each platform, incl orig (facebook, instagram..)
                for image, hash_value in hashes.items():                    # for each image name and hash value inside this platfrom dictionary
                    hamming_distance = test_hash - hex_to_hash(hash_value)
                    hamming_distance = round(hamming_distance/(len(hash_value)*4), 3) #normalising and rounding up
                    hamm_dict[hash_alg][platform][image] = hamming_distance # add hamming distance to corresponding dictionoary

        numpy.save(hamm_directory + '\\hamm_dist_tn_' + rand_photo, hamm_dict)   #save to separata file for each run

        print('All hashes calculated and saved for this run')
def test():
    if request.method == 'POST':
        file = request.files['image']

        db = get_db()
        error = None

        if not file:
            error = 'Image is required.'

        if error is None:
            img = Image.open(file)
            hash = str(imagehash.phash(img))

            images = db.execute('SELECT * FROM images').fetchall()

            differences = []
            for image in images:
                difference = {}
                difference["title"] = image["title"]
                difference["difference"] = imagehash.hex_to_hash(
                    hash) - imagehash.hex_to_hash(image['hash'])
                differences.append(difference)

            session.clear()
            session['filename'] = file.filename
            session['differences'] = differences

            return redirect(url_for('images.test'))

        flash(error)

    g.differences = session.get('differences')
    g.filename = session.get('filename')
    return render_template('images/test.html')
Exemple #3
0
def hash(img):
    image = Image.fromarray(img)
    image = image.convert("RGB")
    image.save('./data/test_test.jpg')
    image_file = Image.open('./data/test_test.jpg')
    phashONE = imagehash.phash(image_file)
    print(phashONE)
    a = str(phashONE)

    image_file = Image.open('./data/pic44.jpg')
    phashTWO = imagehash.phash(image_file)
    print(phashTWO)
    b = str(phashTWO)

    gs_hash = imagehash.hex_to_hash(a)
    ori_hash = imagehash.hex_to_hash(b)
    avg_hash = gs_hash - ori_hash
    print('Hamming distance:', gs_hash - ori_hash)

    if avg_hash <= 25:
        print('image is similar')
        arduino.sendData([1])
        # sleep(5)
        print('FOR-sleep')
        # arduino.sendData([2])
        # sleep(3)
        # print('RE-sleep')
    else:
        print('image is identical')
        # arduino.sendData([0])
        # sleep(5)
        print('NON-sleep')
def getHammingDistance(hash1: str, hash2: str) -> int:
    """
    Gets the hamming distance of 2 hashes
    :param hash1: value of first hash
    :param hash2: value of second hash
    :return: an integer describe the hamming distance between the 2 hashes
    """
    return hex_to_hash(hash1) - hex_to_hash(hash2)
Exemple #5
0
def calculate_distance(origin, candidates):
    res = []
    origin = imagehash.hex_to_hash(str(origin))
    for u in candidates:
        candidate = imagehash.hex_to_hash(str(u))
        calc = (float(origin - candidate) / len(origin.hash)**2)
        res.append(calc)
    click.echo(res)
def worker(start, end, data, hash):

    print('start ' + str(start))
    print('end  ' + str(end))
    for i in range(start, end):
        imagehash.hex_to_hash(data[i]) - hash

    print("--- %s seconds ---" % (time.time() - start_time))
    return
def pre_train(db):
    start_time = time.time()
    all_records = db.find({})
    total_records = db.find({})
    all_hashes = []
    dic = {}
    data = []
    for items in all_records:
        all_hashes.append((items["_id"], items["hash"]))
    for var in range(len(all_hashes)):
        var1 = var + 1
        l = []
        curr_hash = all_hashes[var][1].split(",")
        while (var1 < len(all_hashes)):
            next_hash = all_hashes[var1][1].split(",")
            hamming = [
                abs(
                    imagehash.hex_to_hash(next_hash[loop]) -
                    imagehash.hex_to_hash(curr_hash[loop]))
                for loop in range(8)
            ]
            if ((var < 37 and var1 < 37) or (var > 37 and var1 < 45)
                    or (var > 46 and var1 < 55) or (var > 55 and var1 < 60)
                    or (var > 60 and var1 < 66) or (var > 66 and var1 < 72)
                    or (var > 72 and var1 < 76) or (var > 76 and var1 < 80)
                    or (var > 80 and var1 < 91) or (var > 91 and var1 < 100)):
                hamming.append(1)  #duplicates
                data.append(hamming)
            else:
                hamming.append(0)  # not duplicates
                data.append(hamming)
            l.append((all_hashes[var1][0], hamming))
            var1 += 1
        dic[all_hashes[var][0]] = l
    """txt_file=open('train_data.txt','w')
    txt_file.write(str(dic))
    txt_file.close()
    txt_file2=open('train_data_marked.txt','w')
    txt_file2.write(str(data))
    txt_file2.close()"""
    data = np.array(data)
    X = data[:, :-1]  # Take all data except last
    Y = data[:, -1]  # take only last column of all rows which is the target
    logreg = linear_model.LogisticRegression()
    logreg.fit(X, Y)
    coefficients = logreg.coef_
    intercept = logreg.intercept_
    text_file3 = open("classifier_attributes.txt", 'w')
    text_file3.write(str(coefficients) + "\n" + str(intercept))
    text_file3.close()
    """
    coefficients=[0.20229608,0.2128063,-0.11489182,-0.76958153,-0.2130312,-0.02579938,0.23349941,-0.06487865]
   intercept=[ 3.88161427]
	"""
    print("Time taken for training all hashes : %s" %
          (time.time() - start_time))
Exemple #8
0
    def compare(self, h1, h2):
        '''
		计算两个 hash 的相似度
		:param h1: hash 字符串
		:param h2: hash 字符串
		:return:
		'''
        hash_1 = imagehash.hex_to_hash(h1)
        hash_2 = imagehash.hex_to_hash(h2)
        a = 1 - (hash_1 - hash_2) / len(hash_1.hash)**2
        return a
Exemple #9
0
def hash_diff(hash1, hash2):
    '''
    Returns the integer Hash Difference between :hash1: and :hash2:
    Automatically converts stored text hashs to hex
    '''
    if type(hash1) == str:
        hash1 = imagehash.hex_to_hash(hash1)
    if type(hash2) == str:
        hash2 = imagehash.hex_to_hash(hash2)

    return hash1 - hash2
Exemple #10
0
 def findMatches(self, maxDiff):
     theHash = self.hash
     results = []
     allImages = ImageModel.objects.all()
     for Image in allImages:
         a = hex_to_hash(Image.hash)
         b = hex_to_hash(theHash)
         difference = 100 * (a - b) / (len(a.hash)**2)
         if (difference < maxDiff):
             results.append(Image)
     return results
Exemple #11
0
def search(args):
    '''Search the database for similar files'''
    # open the shelve database
    db = shelve.open(args["shelve"])

    query = Image.open(args["query"])

    if args['hash_name'] == 'grayscale':
        h = imagehash.grayscale_hash(query)
        db_hash = db['grayscale']
    elif args['hash_name'] == 'color':
        h = imagehash.color_hash(query)
        db_hash = db['color']

    print(
        collections.Counter(len(hex)
                            for hex, image in db_hash.items()).most_common())
    l = [(h - imagehash.hex_to_hash(hex), hex)
         for hex, image in db_hash.items()]

    c = collections.Counter(item[0] for item in l)
    print(sorted(c.most_common(), key=lambda item: item[0]))

    command = []
    for strength, item in sorted(l, key=lambda item: item[0]):
        if args['threshold'] < 0 or strength <= args['threshold']:
            print('{} count: {} stength: {}'.format(db_hash[item][0],
                                                    len(db_hash[item]),
                                                    strength))
            command.append(db_hash[item][0])
    if command:
        subprocess.call(['feh', '-t', '-F', '-y 150', '-E 150'] + command)
Exemple #12
0
def find_similar_images(arg):
    """
    #Uses phash to find similar images in corpus and generated images
    #https://en.wikipedia.org/wiki/Perceptual_hashing
    """
    generated_images = glob.glob(arg +"/*.png")
    hashfile = Path("corpus-hashes.csv")
    if not hashfile.is_file():
        print("Hashvalues for corpus are not precomputed.")
        corpus = generate_corpus_hashes()
    corpus = pd.read_csv(hashfile)
    print(corpus)
    targets = []
    sources = []
    dissimilarities = []
    iterator = 0
    last_operation = len(corpus) * len(generated_images)
    for image in generated_images:
        most_similar = "" 
        minsim = 10000
        target = Image.open(image)
        targethash = imagehash.phash(target)
        for hashe, source in zip(corpus["hash"], corpus["image"]):                           
            dissimilarity = imagehash.hex_to_hash(hashe) - targethash
            print(str(iterator) + " / " + str(last_operation))
            iterator = iterator + 1
            if(dissimilarity < minsim):
                minsim = dissimilarity
                most_similar = source      
        sources.append(most_similar)
        targets.append(image)
        dissimilarities.append(dissimilarity)
                
    pd.DataFrame(data= {"source": sources, "target": targets, "dissimilarity": dissimilarities}, columns=["source", "target", "dissimilarity"]).to_csv("similar_images.csv")
    return()
Exemple #13
0
    def get_hash(self, filename):
        filename = os.path.abspath(filename)
        to_return = None
        if filename in self.jsondata:
            to_return = self.jsondata[filename]
            if to_return.startswith("Error"):
                return None
            to_return = imagehash.hex_to_hash(to_return)
            return to_return

        hash = None
        strhash = None
        try:
            image = PIL.Image.open(filename)
            hash = imagehash.average_hash(image)
            strhash = str(hash)
            self.jsondata[filename] = strhash
        except BaseException as ex:
            hash = None
            strhash = "Error: %s" % str(ex)
            print(colorama.Back.RED + "Exception %s when processing %s" %
                  (str(ex), filename))
            if save_errors:
                self.jsondata[filename] = strhash
        self.save()
        return hash
def search(args):
    '''Search the database for similar files'''
    # open the shelve database
    db = shelve.open(args["shelve"])

    query = Image.open(args["query"])

    if args['hash_name'] == 'grayscale':
        h = imagehash.grayscale_hash(query)
        db_hash = db['grayscale']
    elif args['hash_name'] == 'color':
        h = imagehash.color_hash(query)
        db_hash = db['color']

    print(collections.Counter(len(hex) for hex, image in db_hash.items()).most_common())
    l = [(h - imagehash.hex_to_hash(hex), hex)  for hex, image in db_hash.items()]

    c = collections.Counter(item[0] for item in l)
    print(sorted(c.most_common(), key=lambda item: item[0]))

    command = []
    for strength, item in sorted(l, key=lambda item: item[0]):
        if args['threshold'] < 0 or strength <= args['threshold']:
            print('{} count: {} stength: {}'.format(db_hash[item][0], len(db_hash[item]), strength))
            command.append(db_hash[item][0])
    if command:
        subprocess.call(['feh', '-t', '-F', '-y 150', '-E 150'] + command)
Exemple #15
0
 def unserialize(cls, data):
     path = pathlib.Path(data["path"])
     date = None if data["date"] is None else pendulum.parse(data["date"])
     hash_ = imagehash.hex_to_hash(data["hash"])
     quality = data["quality"]
     size = (data["size"]["w"], data["size"]["h"])
     return cls(path, date, hash_, quality, size)
Exemple #16
0
def load_hashes(path):
    global _hashes
    with open(path, "r", encoding="utf-8") as f:
        _hashes = {
            imagehash.hex_to_hash(key): value
            for key, value in json.load(f).items()
        }
Exemple #17
0
def search_hash(db, hash_str, skip_array):
    image_hash = imagehash.hex_to_hash(hash_str)
    skip_set = set(skip_array)
    results = []
    for x in db:
        video = db[x]
        if x in skip_set:
            continue
        if video['hashed'] is False:
            continue
        for h in video['hashes']:
            distance = imagehash.hex_to_hash(h['hash']) - image_hash
            if distance < threshold:
                results.append({'id': x, 't': h['t'], 'distance': distance})

    return results
Exemple #18
0
def main(args):

    if args.use:
        frames = []
        for filename in sorted(args.use):
            with open(filename) as f:
                frames.extend(
                    imagehash.hex_to_hash(frame) for frame in json.load(f))
    else:
        clip = VideoFileClip(args.file)
        frames = [
            imagehash.dhash(Image.fromarray(frame))
            for frame in clip.iter_frames()
        ]

    if args.save:
        with open(args.file + '.json', 'w') as f:
            json.dump([str(frame) for frame in frames], f)

    duplicate_lists = defaultdict(list)
    for i, frame in enumerate(frames):
        duplicate_lists[str(frame)].append(i)

    if args.common_frames:
        most_common_frames = sorted(duplicate_lists.values(),
                                    reverse=True,
                                    key=lambda l: len(l))[:args.common_frames]

        clip = VideoFileClip(args.file)
        for i, frame_list in enumerate(most_common_frames):
            frame = Image.fromarray(clip.get_frame(frame_list[0] / clip.fps))
            frame.save(str(i) + '.jpg')

    scores = [len(duplicate_lists[str(frame)]) for frame in frames]
    print(json.dumps(scores))
Exemple #19
0
def compare():
    tar_dic = {}
    org_dic = {}
    with open('image_hash.txt', 'r') as f:
        rls = f.readlines()
        for i in rls:
            tar_hash = imagehash.hex_to_hash(i.split(' : ')[0])
            tar_img_name = i.split(' : ')[1]
            tar_dic[tar_hash] = tar_img_name

    conn = pymysql.connect(host='localhost',
                           user='******',
                           password='******',
                           db='hash_test',
                           charset='utf8')
    curs = conn.cursor()
    curs.execute("SELECT phash, image_name FROM hash")
    result = curs.fetchall()

    for i in result:
        org_hash = imagehash.hex_to_hash(str(i).split("'")[1])
        org_img_name = str(i).split("'")[3]
        org_dic[org_hash] = org_img_name

    resultList = []

    for oh, oin in org_dic.items():
        for th, tin in tar_dic.items():
            diff_hash = th - oh
            if diff_hash <= 3:
                resultList.append(tin[:-1] + " - " + oin + " : " +
                                  str(diff_hash))
                print(tin[:-1] + " - " + oin + " : " + str(diff_hash))
            elif diff_hash <= 6 and diff_hash >= 4:
                resultList.append(tin[:-1] + " - " + oin + " : " +
                                  str(diff_hash))
                print(tin[:-1] + " - " + oin + " : " + str(diff_hash))
            elif diff_hash <= 10 and diff_hash >= 7:
                resultList.append(tin[:-1] + " - " + oin + " : " +
                                  str(diff_hash))
                print(tin[:-1] + " - " + oin + " : " + str(diff_hash))

    f = open("\\result.txt", "w")
    for reList in resultList:
        f.write(reList + "\n")
    f.close()
Exemple #20
0
def get_hash_search_index(search_index_with_hash_as_str):
    hash_search_index = dict()

    for hash_as_str in search_index_with_hash_as_str:
        hash_value = imagehash.hex_to_hash(hash_as_str)
        hash_search_index[hash_value] = search_index_with_hash_as_str[
            hash_as_str]

    return hash_search_index
Exemple #21
0
def match(theImage, maxDiff):

	theHash = theImage.hash

	# theHash = whash(Image.open("media"+os.path.sep+theImage.docfile.name))

	results = []

	allImages = ImageModel.objects.all()
	for Image in allImages:
		a = hex_to_hash(Image.hash)
		b = hex_to_hash(theHash)
		difference = 100*(a - b)/(len(a.hash)**2)
		#print(difference)
		if(difference < maxDiff):
			results.append(Image)
	#print(results)
	return results
def hashArr(hashes):
    acc = None
    for hash in hashes:
        ar = imagehash.hex_to_hash(hash).hash
        if acc is None:
            acc = ar
        else:
            acc = np.concatenate((acc,ar))
    return acc
Exemple #23
0
def read_hashed_from_file(filename='hash_imgs_dhash_default_filename.json',
                          filepath='dataset/upsampling/'):
    with open(filename, 'r') as filehandle:
        # print(filehandle.readline)
        similar_ids = json.load(filehandle)
    #Convert back to hashes
    hash_imgs_train = [(pair[0], imagehash.hex_to_hash(pair[1]))
                       for pair in similar_ids]
    return hash_imgs_train
Exemple #24
0
 def __find_similar_ad_from_pic(self, picture):
     new_hash = phash(Image.open(urlopen(picture)))
     hashes = [ad.picturehash for ad in Annonce.select()]
     for old_hash in hashes:
         if old_hash is not None and hex_to_hash(
                 old_hash) - new_hash < self.HASH_SIMILAR_TRESHOLD:
             return Annonce.get(Annonce.picturehash == old_hash)
         else:
             return False
Exemple #25
0
 def _get_imagehash_type_from_any(
         self, anything):  # only url, ImageHash and hex str is accepted
     if isinstance(anything, str):
         if anything.startswith('http'):
             return HashedImage(
                 anything,
                 calculate_on_init=False).get_phash()  # default to phash
         elif "," in anything:
             str_split = anything.split(',')
             return imagehash.ImageMultiHash(
                 [imagehash.hex_to_hash(segment) for segment in str_split])
         else:
             return imagehash.hex_to_hash(anything)
     elif isinstance(anything,
                     (imagehash.ImageMultiHash, imagehash.ImageHash)):
         return anything
     else:
         raise NotImplementedError
Exemple #26
0
def is_activity(img):
    os = (1920, 1080)
    cs = img.size

    t = img.crop((1414, 922, 1455, 964))
    t = img.crop(resize(os, cs, (1404, 922, 1465, 964)))
    ori = imagehash.hex_to_hash('183c3c7c3c2c383c')
    new_hash = imagehash.average_hash(t)
    return ori - new_hash <= 9
 def test(self):
     exceptions = []
     for fname in Path(".").glob("images/*.png"):
         phash = imagehash.phash(Image.open(fname), hash_size=_HASH_SIZE)
         fname_hash = imagehash.hex_to_hash(fname.stem)
         hamming = fname_hash - phash
         if hamming > _HAMMING_DISTANCE:
             msg = f'phash {phash} does not match {fname.name} [{hamming=}].'
             exceptions.append(ValueError(msg))
     self.assertEqual([], exceptions)
Exemple #28
0
 def __lookup_by_hash(self, type_, hash):
     def diff(this, another):
         return abs(this-another)/len(another.hash)**2
     key_and_similarity = []
     results = self.lookup_by(type_)
     for result in results:
         key_and_similarity.append({
             'key': result['key'],
             'similarity': diff(hash, imagehash.hex_to_hash(result['value']))})
     return key_and_similarity
Exemple #29
0
def test_stored_hashes():
    img = retrieve_example()
    hash = imagehash.average_hash(img)
    stored_hex = str(hash)
    print('checking if stringified hash is the same')
    otherhash = imagehash.hex_to_hash(stored_hex)
    print(otherhash)
    print(hash == otherhash)
    assert hash == otherhash
    assert hash - otherhash == 0
Exemple #30
0
def test_stored_hashes():
	img = retrieve_example()
	hash = imagehash.average_hash(img)
	stored_hex = str(hash)
	print('checking if stringified hash is the same')
	otherhash = imagehash.hex_to_hash(stored_hex)
	print(otherhash)
	print(hash == otherhash)
	assert hash == otherhash
	assert hash - otherhash == 0
Exemple #31
0
def train(input):
    df = pd.read_csv(input)
    # convert values to bool
    df['d'] = df['d'].astype('bool')

    l_model = GradientBoostingClassifier()
    l_model = l_model.fit(df[df.columns.difference(['d'])], df['d'])
    s = [[
        "crop_0_0_True",
        "bf0fff33feff01102df52f0035010700ff243fcf9fc70080dfffffff00000000"
    ],
         [
             "crop_0_0.1_True",
             "bffffffffbfb0000070031003f030300ffe63f009f87fdcf067072ff57070000"
         ],
         [
             "crop_0.1_0_True",
             "bf0fff33feff01102df52f0035010700ff243fcf9fc70080dfffffff00000000"
         ],
         [
             "crop_0.1_0.1_True",
             "bffffffffbfb0000070031003f030300ffe63f009f87fdcf067072ff57070000"
         ],
         [
             "crop_0_0_False",
             "ff03ff01ffff01007d1e7f007e011e007e007f13fff90700ff1fff7f07000000"
         ],
         [
             "crop_0_0.1_False",
             "ff1fff1ffcff01003f0056007e011e00fc0c7f00bffbee5b0704d17fbf010700"
         ],
         [
             "crop_0.1_0_False",
             "ff03ff03ffbf00001f3e5f007f011f007f00ff16bff30700df3fff7f03000000"
         ],
         [
             "crop_0.1_0.1_False",
             "7f3fff3ffefd00001f0043007f011f007e087f00bfd3efb9070cd5ff9f010700"
         ]]
    scores2 = dict(
        (name, hex_to_hash(bytes_str, HASH_SIZE)) for name, bytes_str in s)

    images = ['7.jpg']
    img_objs = map(lambda i: Image.open(os.path.join(BASEDIR, i)), images)
    scores = map(dict, map(calc_scores, img_objs))

    vector = get_diff_vector(list(scores)[0], scores2)

    print(vector)
    df2 = pd.DataFrame.from_dict([vector])
    # print(df2.values)

    p_class = l_model.predict(df2)[0]
    class_prob = l_model.predict_proba(df2)[0][int(p_class)]
    print(p_class, class_prob)
Exemple #32
0
 def check_hash_stored(self, func, image):
     image_hash = func(image)
     other_hash = imagehash.hex_to_hash(str(image_hash))
     emsg = 'stringified hash {} != original hash {}'.format(other_hash,
                                                             image_hash)
     self.assertEqual(image_hash, other_hash, emsg)
     distance = image_hash - other_hash
     emsg = ('unexpected hamming distance {}: original hash {} '
             '- stringified hash {}'.format(distance, image_hash,
                                            other_hash))
     self.assertEqual(distance, 0, emsg)
Exemple #33
0
 def exec_similar_hash(self, redisdb, raw, bias, threshold, execute, lock=None, rotations=True):
     if rotations:
         to_check = self.get_rotations(raw)  # if i want rotation, precompute them
     else:
         to_check = [raw]  # else set the default one
     for key in self.get_keys_from_db(redisdb,
                                      lock=lock):  # check if key. Using SQL it's probabily better
         for raw in to_check:
             value = round(((im.hex_to_hash(key) - raw) / len(raw.hash) ** 2) * 100, 2)  # compute value
             if (value + bias) <= threshold:  # check if less than threshold (+ bias if any)
                 execute(redisdb, key, lock=lock)  # exec on key
Exemple #34
0
def grabData():
	db = json.loads(urllib2.urlopen("http://testware.cloudapp.net:3000/getAllData").read())['data']
	# db = [{'redirect': 'google.com', 'videoHash': u'a'*128}]

	hashToUrl = {}

	for elem in db:
		url = elem['srcurl']
		hashes = elem['videoHash']
		lst = tuple(imagehash.hex_to_hash(str(hashes[i:i+16])) for  i in range(0, len(hashes), 16))
		hashToUrl[lst] = url

	return hashToUrl
Exemple #35
0
    def get_similar_images(self, hash_value, hash_func):
        # TODO: this should be refactored in the future.

        # Map.
        if hash_func == imagehash.average_hash:
            hash_name = "a_hash"
        elif hash_func == imagehash.phash:
            hash_name = "p_hash"
        elif hash_func == imagehash.dhash:
            hash_name = "d_hash"

        # Search.
        image_hash = imagehash.hex_to_hash(hash_value)
        similarities = list()
        for img in self.task.case.images.filter(state="C").exclude(id=self.task.id):
            if img.report and \
            "imghash" in img.report and \
            hash_name in img.report["imghash"] and \
            image_hash == imagehash.hex_to_hash(img.report["imghash"][hash_name]):
                # TODO: store also image distance.
                similarities.append(img.id)
        return similarities
def find_images_params(ia1, ia2, im_full_arr):
    # print(ia1, ia2)
    arr1 = []
    for i in range(len(ia1)):
        arr1.append(get_image_params(ia1[i], im_full_arr))
    arr2 = []
    for i in range(len(ia2)):
        arr2.append(get_image_params(ia2[i], im_full_arr))

    # print(arr1)
    # print(arr2)

    have_same = 0
    min_diff_hash1 = 99999999999
    min_diff_hash2 = 99999999999
    min_diff_hash3 = 99999999999
    for i in range(len(ia1)):
        param1 = arr1[i]
        if len(param1) == 0:
            continue
        for j in range(len(ia2)):
            param2 = arr2[j]
            if len(param2) == 0:
                continue
            if param1[0] == param2[0]:
                have_same += 1
            h1 = imagehash.hex_to_hash(param1[1]) - imagehash.hex_to_hash(param2[1])
            h2 = imagehash.hex_to_hash(param1[2]) - imagehash.hex_to_hash(param2[2])
            h3 = imagehash.hex_to_hash(param1[3]) - imagehash.hex_to_hash(param2[3])
            if h1 < min_diff_hash1:
                min_diff_hash1 = h1
            if h2 < min_diff_hash2:
                min_diff_hash2 = h2
            if h3 < min_diff_hash3:
                min_diff_hash3 = h3
            # print(have_same, h1, h2, h3)

    return have_same, min_diff_hash1, min_diff_hash2, min_diff_hash3
Exemple #37
0
	def Save(self, nimi, url, filetype, urli=None):
		if urli is None: urli = self.urli
		

		self.Print("SAVE", nimi, url, filetype)

		loaded = self.sessio.query(Strippi.url).filter(
				Strippi.sarjakuva_id==self.sarjakuva.id
			).all()
		loaded = [i.url for i in loaded]+self.ignore

		if url in loaded:
			return True
		
		print("save", url)
		# katsotaan oliko kyseisestä sarjasta jo kyseinen kuva
		#url = url"
		tmp_file = ""
		img = None
		if not "base64" in url:
			headers = app.config["REQUEST_HEADER"]
			#req = urllib.request.Request(url, None, headers)
			
			try:
				#tmp_file = urllib.request.urlopen(req).read()
				tmp_file = requests.get(url, headers=headers).content
				
			except Exception as e:
				try:
					tmp_file = urllib.request.urlopen(url).read()
					
				except Exception as e:
					Log(self.sarjakuva.id, urli, "Kuvan lataus epäonnistui", e, url)
					return True
			
			if len(tmp_file) < 10:
				Log(self.sarjakuva.id, urli, "Liian pieni kuva", None, url)
				return True
		else:
			order = self.sessio.query(Strippi).filter(Strippi.sarjakuva_id==self.sarjakuva.id).count()+1
			nimi = "{}_{}".format(self.sarjakuva.nimi, order)
			filetype = "jpeg"
			url = url.split(",", 2)[-1]
			tmp_file = url.decode('base64')

		polku = os.path.join(app.config["SARJAKUVA_FOLDER"], self.sarjakuva.lyhenne)

		import io
		try:
			img = Image.open(io.BytesIO(tmp_file))
		except: 
			Log(self.sarjakuva.id, urli, "Virheellinen kuva", None, url)
		try:
			width, height = img.size
			dhash = imagehash.dhash(img)
		except:
			return True

		found = self.sessio.query(Strippi).filter(
				Strippi.sarjakuva_id == self.sarjakuva.id,
				Strippi.dhash == str(dhash)
			).first()
		
		if found is None:
			for i in self.sarjakuva.stripit:
				#polku_old = os.path.join(polku, i.filename)
				#old = Image.open(polku_old)
				if (dhash - imagehash.hex_to_hash(i.dhash)) < 5:
					found = i
					break

			
		if found and found.width >= width:
			self.Print("ALREADY HAD THIS PICTURE", dhash)
			return True
		
		order = self.sessio.query(Strippi).filter(Strippi.sarjakuva_id==self.sarjakuva.id).count()+1
		if found: 
			order = found.Order()
			print("Suurempi resoluutio. Korvataan kuva", found.width, "vs", width)
		md5_name = "{}_{}.{}".format(self.sarjakuva.lyhenne, order, filetype)

		
		polku = os.path.join(polku, md5_name)

		# luodaan kansio if needed
		dir = os.path.dirname(polku) 
		try:
			os.stat(dir)
		except:
			os.mkdir(dir)

		f = open(polku,'wb')
		f.write(tmp_file)
		f.close()

		# lisätään kantaan tieto, että kuva on haettu
		if found:
			tmp = found
		else:
			tmp = Strippi(self.sarjakuva.id, urli, md5_name, nimi, url, str(dhash))
			self.sessio.add(tmp)

		tmp.width = width
		tmp.height = height

		# löydettiin kuva, tallennetaan vikaksi urliksi
		save_urli = True
		if self.sarjakuva.ending:
			lopetukset = self.sarjakuva.ending.split(",")

			turli = urli
			while turli[-1] == "/":
				turli = turli[:-1]
			if turli.split("/")[-1] in lopetukset:
				save_urli = False 

		if save_urli:
			self.sarjakuva.last_url = urli
		self.sarjakuva.last_parse = datetime.datetime.now()
		self.sessio.commit()

		Log(self.sarjakuva.id, urli, "Tallennetaan kuva", None, url, self.sessio)

		return True
 def __init__(self, filename, descriptor):
     # print(filename+" \t"+str(descriptor))
     self.filename = filename
     self.height = descriptor["height"]
     self.width = descriptor["width"]
     self.aHash = imagehash.hex_to_hash(descriptor["aHash"])
     self.pHash = imagehash.hex_to_hash(descriptor["pHash"])
     self.dHash = imagehash.hex_to_hash(descriptor["dHash"])
     self.cTime = descriptor["created"]
     self.mTime = descriptor["modified"]
Exemple #39
-1
 def imagehashes(self):
     """Calculate or retrieve imagehash values."""
     try:
         return {
             key: imagehash.hex_to_hash(self.stat[key])
             for key in self.HASH_TYPES
         }
     except KeyError:
         hashes = (
             get_imagehashes(self.small) or get_imagehashes(self.original)
         )
         self.imagehashes = hashes
         return hashes
     except ValueError:
         # could not calculate imagehash
         return imagehash.hex_to_hash('F' * 16)