def hamming_calc_tn(number_of_runs, hash_directory, hamm_directory): if not os.path.exists(hamm_directory): #directory for storing files with hash values os.mkdir(hamm_directory) for i in range(number_of_runs): #each run hamm_dict = {'aHash': {'orig' :{}, 'face' :{}, 'twit': {}, 'goog':{}, 'inst':{}}, 'dHash': {'orig' :{}, 'face' :{}, 'twit': {}, 'goog':{}, 'inst':{}}, 'pHash': {'orig' :{}, 'face' :{}, 'twit': {}, 'goog':{}, 'inst':{}}, 'wHash': {'orig' :{}, 'face' :{}, 'twit': {}, 'goog':{}, 'inst':{}}} #dictionary which will hold hamming distances test_hash_dict = numpy.load(hash_directory+os.listdir(hash_directory)[0],allow_pickle='TRUE').item() # load first dictionary to choose a random test image from rand_photo = random.choice(list(test_hash_dict['orig'].keys())) # get one random photo from originals print(f'Random photo chosen: {rand_photo}') for file in os.listdir(hash_directory): # for every hashing algorithm, which holds hash values hash_alg = file[7:12] # getting algorithm name hash_dict = numpy.load(hash_directory+file, allow_pickle='TRUE').item() # loading dictionaru for this algorithm test_hash = hex_to_hash(hash_dict['orig'][rand_photo]) # getting hash of random photo for this algorithm for platform, hashes in hash_dict.items(): # for each platform, incl orig (facebook, instagram..) for image, hash_value in hashes.items(): # for each image name and hash value inside this platfrom dictionary hamming_distance = test_hash - hex_to_hash(hash_value) hamming_distance = round(hamming_distance/(len(hash_value)*4), 3) #normalising and rounding up hamm_dict[hash_alg][platform][image] = hamming_distance # add hamming distance to corresponding dictionoary numpy.save(hamm_directory + '\\hamm_dist_tn_' + rand_photo, hamm_dict) #save to separata file for each run print('All hashes calculated and saved for this run')
def test(): if request.method == 'POST': file = request.files['image'] db = get_db() error = None if not file: error = 'Image is required.' if error is None: img = Image.open(file) hash = str(imagehash.phash(img)) images = db.execute('SELECT * FROM images').fetchall() differences = [] for image in images: difference = {} difference["title"] = image["title"] difference["difference"] = imagehash.hex_to_hash( hash) - imagehash.hex_to_hash(image['hash']) differences.append(difference) session.clear() session['filename'] = file.filename session['differences'] = differences return redirect(url_for('images.test')) flash(error) g.differences = session.get('differences') g.filename = session.get('filename') return render_template('images/test.html')
def hash(img): image = Image.fromarray(img) image = image.convert("RGB") image.save('./data/test_test.jpg') image_file = Image.open('./data/test_test.jpg') phashONE = imagehash.phash(image_file) print(phashONE) a = str(phashONE) image_file = Image.open('./data/pic44.jpg') phashTWO = imagehash.phash(image_file) print(phashTWO) b = str(phashTWO) gs_hash = imagehash.hex_to_hash(a) ori_hash = imagehash.hex_to_hash(b) avg_hash = gs_hash - ori_hash print('Hamming distance:', gs_hash - ori_hash) if avg_hash <= 25: print('image is similar') arduino.sendData([1]) # sleep(5) print('FOR-sleep') # arduino.sendData([2]) # sleep(3) # print('RE-sleep') else: print('image is identical') # arduino.sendData([0]) # sleep(5) print('NON-sleep')
def getHammingDistance(hash1: str, hash2: str) -> int: """ Gets the hamming distance of 2 hashes :param hash1: value of first hash :param hash2: value of second hash :return: an integer describe the hamming distance between the 2 hashes """ return hex_to_hash(hash1) - hex_to_hash(hash2)
def calculate_distance(origin, candidates): res = [] origin = imagehash.hex_to_hash(str(origin)) for u in candidates: candidate = imagehash.hex_to_hash(str(u)) calc = (float(origin - candidate) / len(origin.hash)**2) res.append(calc) click.echo(res)
def worker(start, end, data, hash): print('start ' + str(start)) print('end ' + str(end)) for i in range(start, end): imagehash.hex_to_hash(data[i]) - hash print("--- %s seconds ---" % (time.time() - start_time)) return
def pre_train(db): start_time = time.time() all_records = db.find({}) total_records = db.find({}) all_hashes = [] dic = {} data = [] for items in all_records: all_hashes.append((items["_id"], items["hash"])) for var in range(len(all_hashes)): var1 = var + 1 l = [] curr_hash = all_hashes[var][1].split(",") while (var1 < len(all_hashes)): next_hash = all_hashes[var1][1].split(",") hamming = [ abs( imagehash.hex_to_hash(next_hash[loop]) - imagehash.hex_to_hash(curr_hash[loop])) for loop in range(8) ] if ((var < 37 and var1 < 37) or (var > 37 and var1 < 45) or (var > 46 and var1 < 55) or (var > 55 and var1 < 60) or (var > 60 and var1 < 66) or (var > 66 and var1 < 72) or (var > 72 and var1 < 76) or (var > 76 and var1 < 80) or (var > 80 and var1 < 91) or (var > 91 and var1 < 100)): hamming.append(1) #duplicates data.append(hamming) else: hamming.append(0) # not duplicates data.append(hamming) l.append((all_hashes[var1][0], hamming)) var1 += 1 dic[all_hashes[var][0]] = l """txt_file=open('train_data.txt','w') txt_file.write(str(dic)) txt_file.close() txt_file2=open('train_data_marked.txt','w') txt_file2.write(str(data)) txt_file2.close()""" data = np.array(data) X = data[:, :-1] # Take all data except last Y = data[:, -1] # take only last column of all rows which is the target logreg = linear_model.LogisticRegression() logreg.fit(X, Y) coefficients = logreg.coef_ intercept = logreg.intercept_ text_file3 = open("classifier_attributes.txt", 'w') text_file3.write(str(coefficients) + "\n" + str(intercept)) text_file3.close() """ coefficients=[0.20229608,0.2128063,-0.11489182,-0.76958153,-0.2130312,-0.02579938,0.23349941,-0.06487865] intercept=[ 3.88161427] """ print("Time taken for training all hashes : %s" % (time.time() - start_time))
def compare(self, h1, h2): ''' 计算两个 hash 的相似度 :param h1: hash 字符串 :param h2: hash 字符串 :return: ''' hash_1 = imagehash.hex_to_hash(h1) hash_2 = imagehash.hex_to_hash(h2) a = 1 - (hash_1 - hash_2) / len(hash_1.hash)**2 return a
def hash_diff(hash1, hash2): ''' Returns the integer Hash Difference between :hash1: and :hash2: Automatically converts stored text hashs to hex ''' if type(hash1) == str: hash1 = imagehash.hex_to_hash(hash1) if type(hash2) == str: hash2 = imagehash.hex_to_hash(hash2) return hash1 - hash2
def findMatches(self, maxDiff): theHash = self.hash results = [] allImages = ImageModel.objects.all() for Image in allImages: a = hex_to_hash(Image.hash) b = hex_to_hash(theHash) difference = 100 * (a - b) / (len(a.hash)**2) if (difference < maxDiff): results.append(Image) return results
def search(args): '''Search the database for similar files''' # open the shelve database db = shelve.open(args["shelve"]) query = Image.open(args["query"]) if args['hash_name'] == 'grayscale': h = imagehash.grayscale_hash(query) db_hash = db['grayscale'] elif args['hash_name'] == 'color': h = imagehash.color_hash(query) db_hash = db['color'] print( collections.Counter(len(hex) for hex, image in db_hash.items()).most_common()) l = [(h - imagehash.hex_to_hash(hex), hex) for hex, image in db_hash.items()] c = collections.Counter(item[0] for item in l) print(sorted(c.most_common(), key=lambda item: item[0])) command = [] for strength, item in sorted(l, key=lambda item: item[0]): if args['threshold'] < 0 or strength <= args['threshold']: print('{} count: {} stength: {}'.format(db_hash[item][0], len(db_hash[item]), strength)) command.append(db_hash[item][0]) if command: subprocess.call(['feh', '-t', '-F', '-y 150', '-E 150'] + command)
def find_similar_images(arg): """ #Uses phash to find similar images in corpus and generated images #https://en.wikipedia.org/wiki/Perceptual_hashing """ generated_images = glob.glob(arg +"/*.png") hashfile = Path("corpus-hashes.csv") if not hashfile.is_file(): print("Hashvalues for corpus are not precomputed.") corpus = generate_corpus_hashes() corpus = pd.read_csv(hashfile) print(corpus) targets = [] sources = [] dissimilarities = [] iterator = 0 last_operation = len(corpus) * len(generated_images) for image in generated_images: most_similar = "" minsim = 10000 target = Image.open(image) targethash = imagehash.phash(target) for hashe, source in zip(corpus["hash"], corpus["image"]): dissimilarity = imagehash.hex_to_hash(hashe) - targethash print(str(iterator) + " / " + str(last_operation)) iterator = iterator + 1 if(dissimilarity < minsim): minsim = dissimilarity most_similar = source sources.append(most_similar) targets.append(image) dissimilarities.append(dissimilarity) pd.DataFrame(data= {"source": sources, "target": targets, "dissimilarity": dissimilarities}, columns=["source", "target", "dissimilarity"]).to_csv("similar_images.csv") return()
def get_hash(self, filename): filename = os.path.abspath(filename) to_return = None if filename in self.jsondata: to_return = self.jsondata[filename] if to_return.startswith("Error"): return None to_return = imagehash.hex_to_hash(to_return) return to_return hash = None strhash = None try: image = PIL.Image.open(filename) hash = imagehash.average_hash(image) strhash = str(hash) self.jsondata[filename] = strhash except BaseException as ex: hash = None strhash = "Error: %s" % str(ex) print(colorama.Back.RED + "Exception %s when processing %s" % (str(ex), filename)) if save_errors: self.jsondata[filename] = strhash self.save() return hash
def search(args): '''Search the database for similar files''' # open the shelve database db = shelve.open(args["shelve"]) query = Image.open(args["query"]) if args['hash_name'] == 'grayscale': h = imagehash.grayscale_hash(query) db_hash = db['grayscale'] elif args['hash_name'] == 'color': h = imagehash.color_hash(query) db_hash = db['color'] print(collections.Counter(len(hex) for hex, image in db_hash.items()).most_common()) l = [(h - imagehash.hex_to_hash(hex), hex) for hex, image in db_hash.items()] c = collections.Counter(item[0] for item in l) print(sorted(c.most_common(), key=lambda item: item[0])) command = [] for strength, item in sorted(l, key=lambda item: item[0]): if args['threshold'] < 0 or strength <= args['threshold']: print('{} count: {} stength: {}'.format(db_hash[item][0], len(db_hash[item]), strength)) command.append(db_hash[item][0]) if command: subprocess.call(['feh', '-t', '-F', '-y 150', '-E 150'] + command)
def unserialize(cls, data): path = pathlib.Path(data["path"]) date = None if data["date"] is None else pendulum.parse(data["date"]) hash_ = imagehash.hex_to_hash(data["hash"]) quality = data["quality"] size = (data["size"]["w"], data["size"]["h"]) return cls(path, date, hash_, quality, size)
def load_hashes(path): global _hashes with open(path, "r", encoding="utf-8") as f: _hashes = { imagehash.hex_to_hash(key): value for key, value in json.load(f).items() }
def search_hash(db, hash_str, skip_array): image_hash = imagehash.hex_to_hash(hash_str) skip_set = set(skip_array) results = [] for x in db: video = db[x] if x in skip_set: continue if video['hashed'] is False: continue for h in video['hashes']: distance = imagehash.hex_to_hash(h['hash']) - image_hash if distance < threshold: results.append({'id': x, 't': h['t'], 'distance': distance}) return results
def main(args): if args.use: frames = [] for filename in sorted(args.use): with open(filename) as f: frames.extend( imagehash.hex_to_hash(frame) for frame in json.load(f)) else: clip = VideoFileClip(args.file) frames = [ imagehash.dhash(Image.fromarray(frame)) for frame in clip.iter_frames() ] if args.save: with open(args.file + '.json', 'w') as f: json.dump([str(frame) for frame in frames], f) duplicate_lists = defaultdict(list) for i, frame in enumerate(frames): duplicate_lists[str(frame)].append(i) if args.common_frames: most_common_frames = sorted(duplicate_lists.values(), reverse=True, key=lambda l: len(l))[:args.common_frames] clip = VideoFileClip(args.file) for i, frame_list in enumerate(most_common_frames): frame = Image.fromarray(clip.get_frame(frame_list[0] / clip.fps)) frame.save(str(i) + '.jpg') scores = [len(duplicate_lists[str(frame)]) for frame in frames] print(json.dumps(scores))
def compare(): tar_dic = {} org_dic = {} with open('image_hash.txt', 'r') as f: rls = f.readlines() for i in rls: tar_hash = imagehash.hex_to_hash(i.split(' : ')[0]) tar_img_name = i.split(' : ')[1] tar_dic[tar_hash] = tar_img_name conn = pymysql.connect(host='localhost', user='******', password='******', db='hash_test', charset='utf8') curs = conn.cursor() curs.execute("SELECT phash, image_name FROM hash") result = curs.fetchall() for i in result: org_hash = imagehash.hex_to_hash(str(i).split("'")[1]) org_img_name = str(i).split("'")[3] org_dic[org_hash] = org_img_name resultList = [] for oh, oin in org_dic.items(): for th, tin in tar_dic.items(): diff_hash = th - oh if diff_hash <= 3: resultList.append(tin[:-1] + " - " + oin + " : " + str(diff_hash)) print(tin[:-1] + " - " + oin + " : " + str(diff_hash)) elif diff_hash <= 6 and diff_hash >= 4: resultList.append(tin[:-1] + " - " + oin + " : " + str(diff_hash)) print(tin[:-1] + " - " + oin + " : " + str(diff_hash)) elif diff_hash <= 10 and diff_hash >= 7: resultList.append(tin[:-1] + " - " + oin + " : " + str(diff_hash)) print(tin[:-1] + " - " + oin + " : " + str(diff_hash)) f = open("\\result.txt", "w") for reList in resultList: f.write(reList + "\n") f.close()
def get_hash_search_index(search_index_with_hash_as_str): hash_search_index = dict() for hash_as_str in search_index_with_hash_as_str: hash_value = imagehash.hex_to_hash(hash_as_str) hash_search_index[hash_value] = search_index_with_hash_as_str[ hash_as_str] return hash_search_index
def match(theImage, maxDiff): theHash = theImage.hash # theHash = whash(Image.open("media"+os.path.sep+theImage.docfile.name)) results = [] allImages = ImageModel.objects.all() for Image in allImages: a = hex_to_hash(Image.hash) b = hex_to_hash(theHash) difference = 100*(a - b)/(len(a.hash)**2) #print(difference) if(difference < maxDiff): results.append(Image) #print(results) return results
def hashArr(hashes): acc = None for hash in hashes: ar = imagehash.hex_to_hash(hash).hash if acc is None: acc = ar else: acc = np.concatenate((acc,ar)) return acc
def read_hashed_from_file(filename='hash_imgs_dhash_default_filename.json', filepath='dataset/upsampling/'): with open(filename, 'r') as filehandle: # print(filehandle.readline) similar_ids = json.load(filehandle) #Convert back to hashes hash_imgs_train = [(pair[0], imagehash.hex_to_hash(pair[1])) for pair in similar_ids] return hash_imgs_train
def __find_similar_ad_from_pic(self, picture): new_hash = phash(Image.open(urlopen(picture))) hashes = [ad.picturehash for ad in Annonce.select()] for old_hash in hashes: if old_hash is not None and hex_to_hash( old_hash) - new_hash < self.HASH_SIMILAR_TRESHOLD: return Annonce.get(Annonce.picturehash == old_hash) else: return False
def _get_imagehash_type_from_any( self, anything): # only url, ImageHash and hex str is accepted if isinstance(anything, str): if anything.startswith('http'): return HashedImage( anything, calculate_on_init=False).get_phash() # default to phash elif "," in anything: str_split = anything.split(',') return imagehash.ImageMultiHash( [imagehash.hex_to_hash(segment) for segment in str_split]) else: return imagehash.hex_to_hash(anything) elif isinstance(anything, (imagehash.ImageMultiHash, imagehash.ImageHash)): return anything else: raise NotImplementedError
def is_activity(img): os = (1920, 1080) cs = img.size t = img.crop((1414, 922, 1455, 964)) t = img.crop(resize(os, cs, (1404, 922, 1465, 964))) ori = imagehash.hex_to_hash('183c3c7c3c2c383c') new_hash = imagehash.average_hash(t) return ori - new_hash <= 9
def test(self): exceptions = [] for fname in Path(".").glob("images/*.png"): phash = imagehash.phash(Image.open(fname), hash_size=_HASH_SIZE) fname_hash = imagehash.hex_to_hash(fname.stem) hamming = fname_hash - phash if hamming > _HAMMING_DISTANCE: msg = f'phash {phash} does not match {fname.name} [{hamming=}].' exceptions.append(ValueError(msg)) self.assertEqual([], exceptions)
def __lookup_by_hash(self, type_, hash): def diff(this, another): return abs(this-another)/len(another.hash)**2 key_and_similarity = [] results = self.lookup_by(type_) for result in results: key_and_similarity.append({ 'key': result['key'], 'similarity': diff(hash, imagehash.hex_to_hash(result['value']))}) return key_and_similarity
def test_stored_hashes(): img = retrieve_example() hash = imagehash.average_hash(img) stored_hex = str(hash) print('checking if stringified hash is the same') otherhash = imagehash.hex_to_hash(stored_hex) print(otherhash) print(hash == otherhash) assert hash == otherhash assert hash - otherhash == 0
def train(input): df = pd.read_csv(input) # convert values to bool df['d'] = df['d'].astype('bool') l_model = GradientBoostingClassifier() l_model = l_model.fit(df[df.columns.difference(['d'])], df['d']) s = [[ "crop_0_0_True", "bf0fff33feff01102df52f0035010700ff243fcf9fc70080dfffffff00000000" ], [ "crop_0_0.1_True", "bffffffffbfb0000070031003f030300ffe63f009f87fdcf067072ff57070000" ], [ "crop_0.1_0_True", "bf0fff33feff01102df52f0035010700ff243fcf9fc70080dfffffff00000000" ], [ "crop_0.1_0.1_True", "bffffffffbfb0000070031003f030300ffe63f009f87fdcf067072ff57070000" ], [ "crop_0_0_False", "ff03ff01ffff01007d1e7f007e011e007e007f13fff90700ff1fff7f07000000" ], [ "crop_0_0.1_False", "ff1fff1ffcff01003f0056007e011e00fc0c7f00bffbee5b0704d17fbf010700" ], [ "crop_0.1_0_False", "ff03ff03ffbf00001f3e5f007f011f007f00ff16bff30700df3fff7f03000000" ], [ "crop_0.1_0.1_False", "7f3fff3ffefd00001f0043007f011f007e087f00bfd3efb9070cd5ff9f010700" ]] scores2 = dict( (name, hex_to_hash(bytes_str, HASH_SIZE)) for name, bytes_str in s) images = ['7.jpg'] img_objs = map(lambda i: Image.open(os.path.join(BASEDIR, i)), images) scores = map(dict, map(calc_scores, img_objs)) vector = get_diff_vector(list(scores)[0], scores2) print(vector) df2 = pd.DataFrame.from_dict([vector]) # print(df2.values) p_class = l_model.predict(df2)[0] class_prob = l_model.predict_proba(df2)[0][int(p_class)] print(p_class, class_prob)
def check_hash_stored(self, func, image): image_hash = func(image) other_hash = imagehash.hex_to_hash(str(image_hash)) emsg = 'stringified hash {} != original hash {}'.format(other_hash, image_hash) self.assertEqual(image_hash, other_hash, emsg) distance = image_hash - other_hash emsg = ('unexpected hamming distance {}: original hash {} ' '- stringified hash {}'.format(distance, image_hash, other_hash)) self.assertEqual(distance, 0, emsg)
def exec_similar_hash(self, redisdb, raw, bias, threshold, execute, lock=None, rotations=True): if rotations: to_check = self.get_rotations(raw) # if i want rotation, precompute them else: to_check = [raw] # else set the default one for key in self.get_keys_from_db(redisdb, lock=lock): # check if key. Using SQL it's probabily better for raw in to_check: value = round(((im.hex_to_hash(key) - raw) / len(raw.hash) ** 2) * 100, 2) # compute value if (value + bias) <= threshold: # check if less than threshold (+ bias if any) execute(redisdb, key, lock=lock) # exec on key
def grabData(): db = json.loads(urllib2.urlopen("http://testware.cloudapp.net:3000/getAllData").read())['data'] # db = [{'redirect': 'google.com', 'videoHash': u'a'*128}] hashToUrl = {} for elem in db: url = elem['srcurl'] hashes = elem['videoHash'] lst = tuple(imagehash.hex_to_hash(str(hashes[i:i+16])) for i in range(0, len(hashes), 16)) hashToUrl[lst] = url return hashToUrl
def get_similar_images(self, hash_value, hash_func): # TODO: this should be refactored in the future. # Map. if hash_func == imagehash.average_hash: hash_name = "a_hash" elif hash_func == imagehash.phash: hash_name = "p_hash" elif hash_func == imagehash.dhash: hash_name = "d_hash" # Search. image_hash = imagehash.hex_to_hash(hash_value) similarities = list() for img in self.task.case.images.filter(state="C").exclude(id=self.task.id): if img.report and \ "imghash" in img.report and \ hash_name in img.report["imghash"] and \ image_hash == imagehash.hex_to_hash(img.report["imghash"][hash_name]): # TODO: store also image distance. similarities.append(img.id) return similarities
def find_images_params(ia1, ia2, im_full_arr): # print(ia1, ia2) arr1 = [] for i in range(len(ia1)): arr1.append(get_image_params(ia1[i], im_full_arr)) arr2 = [] for i in range(len(ia2)): arr2.append(get_image_params(ia2[i], im_full_arr)) # print(arr1) # print(arr2) have_same = 0 min_diff_hash1 = 99999999999 min_diff_hash2 = 99999999999 min_diff_hash3 = 99999999999 for i in range(len(ia1)): param1 = arr1[i] if len(param1) == 0: continue for j in range(len(ia2)): param2 = arr2[j] if len(param2) == 0: continue if param1[0] == param2[0]: have_same += 1 h1 = imagehash.hex_to_hash(param1[1]) - imagehash.hex_to_hash(param2[1]) h2 = imagehash.hex_to_hash(param1[2]) - imagehash.hex_to_hash(param2[2]) h3 = imagehash.hex_to_hash(param1[3]) - imagehash.hex_to_hash(param2[3]) if h1 < min_diff_hash1: min_diff_hash1 = h1 if h2 < min_diff_hash2: min_diff_hash2 = h2 if h3 < min_diff_hash3: min_diff_hash3 = h3 # print(have_same, h1, h2, h3) return have_same, min_diff_hash1, min_diff_hash2, min_diff_hash3
def Save(self, nimi, url, filetype, urli=None): if urli is None: urli = self.urli self.Print("SAVE", nimi, url, filetype) loaded = self.sessio.query(Strippi.url).filter( Strippi.sarjakuva_id==self.sarjakuva.id ).all() loaded = [i.url for i in loaded]+self.ignore if url in loaded: return True print("save", url) # katsotaan oliko kyseisestä sarjasta jo kyseinen kuva #url = url" tmp_file = "" img = None if not "base64" in url: headers = app.config["REQUEST_HEADER"] #req = urllib.request.Request(url, None, headers) try: #tmp_file = urllib.request.urlopen(req).read() tmp_file = requests.get(url, headers=headers).content except Exception as e: try: tmp_file = urllib.request.urlopen(url).read() except Exception as e: Log(self.sarjakuva.id, urli, "Kuvan lataus epäonnistui", e, url) return True if len(tmp_file) < 10: Log(self.sarjakuva.id, urli, "Liian pieni kuva", None, url) return True else: order = self.sessio.query(Strippi).filter(Strippi.sarjakuva_id==self.sarjakuva.id).count()+1 nimi = "{}_{}".format(self.sarjakuva.nimi, order) filetype = "jpeg" url = url.split(",", 2)[-1] tmp_file = url.decode('base64') polku = os.path.join(app.config["SARJAKUVA_FOLDER"], self.sarjakuva.lyhenne) import io try: img = Image.open(io.BytesIO(tmp_file)) except: Log(self.sarjakuva.id, urli, "Virheellinen kuva", None, url) try: width, height = img.size dhash = imagehash.dhash(img) except: return True found = self.sessio.query(Strippi).filter( Strippi.sarjakuva_id == self.sarjakuva.id, Strippi.dhash == str(dhash) ).first() if found is None: for i in self.sarjakuva.stripit: #polku_old = os.path.join(polku, i.filename) #old = Image.open(polku_old) if (dhash - imagehash.hex_to_hash(i.dhash)) < 5: found = i break if found and found.width >= width: self.Print("ALREADY HAD THIS PICTURE", dhash) return True order = self.sessio.query(Strippi).filter(Strippi.sarjakuva_id==self.sarjakuva.id).count()+1 if found: order = found.Order() print("Suurempi resoluutio. Korvataan kuva", found.width, "vs", width) md5_name = "{}_{}.{}".format(self.sarjakuva.lyhenne, order, filetype) polku = os.path.join(polku, md5_name) # luodaan kansio if needed dir = os.path.dirname(polku) try: os.stat(dir) except: os.mkdir(dir) f = open(polku,'wb') f.write(tmp_file) f.close() # lisätään kantaan tieto, että kuva on haettu if found: tmp = found else: tmp = Strippi(self.sarjakuva.id, urli, md5_name, nimi, url, str(dhash)) self.sessio.add(tmp) tmp.width = width tmp.height = height # löydettiin kuva, tallennetaan vikaksi urliksi save_urli = True if self.sarjakuva.ending: lopetukset = self.sarjakuva.ending.split(",") turli = urli while turli[-1] == "/": turli = turli[:-1] if turli.split("/")[-1] in lopetukset: save_urli = False if save_urli: self.sarjakuva.last_url = urli self.sarjakuva.last_parse = datetime.datetime.now() self.sessio.commit() Log(self.sarjakuva.id, urli, "Tallennetaan kuva", None, url, self.sessio) return True
def __init__(self, filename, descriptor): # print(filename+" \t"+str(descriptor)) self.filename = filename self.height = descriptor["height"] self.width = descriptor["width"] self.aHash = imagehash.hex_to_hash(descriptor["aHash"]) self.pHash = imagehash.hex_to_hash(descriptor["pHash"]) self.dHash = imagehash.hex_to_hash(descriptor["dHash"]) self.cTime = descriptor["created"] self.mTime = descriptor["modified"]
def imagehashes(self): """Calculate or retrieve imagehash values.""" try: return { key: imagehash.hex_to_hash(self.stat[key]) for key in self.HASH_TYPES } except KeyError: hashes = ( get_imagehashes(self.small) or get_imagehashes(self.original) ) self.imagehashes = hashes return hashes except ValueError: # could not calculate imagehash return imagehash.hex_to_hash('F' * 16)