async def gather_profile_pic_hashes(event, user): hashes = [] async for photo in event.client.iter_profile_photos(user): io = BytesIO() await event.client.download_media(photo, io) image = Image.open(io) hashes.append(average_hash(image)) return hashes
def compareone2all(userpath, hashfunc = imagehash.phash): import os def is_image(filename): f = filename.lower() return f.endswith(".jpg") or f.endswith(".jpeg") image_filenames = [os.path.join(userpath, path) for path in os.listdir(userpath) if is_image(path)] images = {} for img in sorted(image_filenames): hash = hashfunc(Image.open(img)) images[hash] = images.get(hash, []) + [img] for i in range (len(image_filenames)): compareme = '/Users/kpham/Desktop/drumpfingarchives/image-download3/Ccg5K73XEAErgpK.jpg' distance = photohash.hash_distance(photohash.average_hash(compareme), photohash.average_hash(image_filenames[i])) if distance < 12: print (distance, image_filenames[i])
def hash_image(img_filename): hashes_dict = dict() try: img = Image.open(img_filename) hashes_dict['photohash_average_hash'] = photohash.average_hash(img_filename) hashes_dict['imagehash_average_hash'] = imagehash.average_hash(img) hashes_dict['imagehash_phash'] = imagehash.phash(img) except: print("error in hash_image on: {0}".format(img_filename)) return hashes_dict
def search_duplicated(dirs, perceptual=False): images = {} for image in list_picture(dirs): imhash = photohash.average_hash(image) if imhash in images: logging.warning('duplicated: %s', imhash) logging.warning(' f1: %s', images[imhash]) logging.warning(' f2: %s', image) else: images[imhash] = image
def tweeter(post_dict): auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_secret) api = tweepy.API(auth) for post in post_dict: # Grab post details from dictionary post_id = post_dict[post][0] if not duplicate_check(post_id): # Make sure post is not a duplicate file_path = get_media(post_dict[post][1], post_dict[post][0]) post_link = post_dict[post][2] post_op = post_dict[post][3] # Make sure the post contains media (if it doesn't, then file_path would be blank) if (file_path): # Scan the image against previously-posted images, but only if repost protection is enabled in config.ini try: hash = photohash.average_hash(file_path) print ('[ OK ] Image hash check:', hash_check(hash)) except: # Set hash to an empty string if the check failed hash = "" print ('[WARN] Could not check image hash, skipping.') if (REPOST_PROTECTION is True and hash_check(hash) is True): print ('[WARN] Skipping', post_id, 'because it seems to be a repost') else: print ('[ OK ] Posting this on main twitter account:', post, file_path) try: # Post the tweet api.update_with_media(filename=file_path, status=post) # Log the tweet username = api.me().screen_name latestTweets = api.user_timeline(screen_name = username, count = 1, include_rts = False) newestTweet = latestTweets[0].id_str log_post(post_id, hash, 'https://twitter.com/' + username + '/status/' + newestTweet + '/') # Post alt tweet if ALT_ACCESS_TOKEN: alt_tweeter(post_link, post_op, username, newestTweet) else: print('[WARN] No authentication info for alternate account in config.ini, skipping alt tweet.') print('[ OK ] Sleeping for', DELAY_BETWEEN_TWEETS, 'seconds') time.sleep(DELAY_BETWEEN_TWEETS) except BaseException as e: print ('[EROR] Error while posting tweet:', str(e)) # Log the post anyways log_post(post_id, hash, "Error while posting tweet:" + str(e)) else: print ('[WARN] Ignoring', post_id, 'because there was not a media file downloaded') # Cleanup image file if (file_path) is not None: if (os.path.isfile(file_path)): os.remove(file_path) print ('[ OK ] Deleted media file at ' + file_path) else: print ('[WARN] Ignoring', post_id, 'because it was already posted')
def _elaborate_perceptual_hash_media(self, image_dir): ''' Elabora gli hash per le immagini presenti nella directory specificata Params: @image_dir: Percorso della directory delle immagini ''' # Ottiene l'elenco di immagini presenti nella cartella dir_abs_path = os.path.abspath(image_dir) images_list = [os.path.join(dir_abs_path, name) for name in os.listdir( dir_abs_path) if os.path.isfile(os.path.join(dir_abs_path, name))] # Elabora gli hash delle immagini for image_path in images_list: image_hash = photohash.average_hash(image_path) if not image_hash in self._perceptual_hashes: # Evita di aggiungere doppi self._perceptual_hashes.append(image_hash)
async def _add_photo(e, opts): reply = await e.get_reply_message() message = reply if reply else e.message if message.photo: await e.edit("**Downloading photo...**") else: await e.edit("**Choose a file to add**", delete_in=3) return photo = io.BytesIO() await message.download_media(file=photo) image = Image.open(photo) _hash = average_hash(image) if await add_file_hash(_hash, 'pic'): message = TGDoc( Section(Bold("Added file"), KeyValueItem(Bold("photo"), Code(_hash)))) await e.edit(str(message)) else: await e.edit("**Failed to add photo**", delete_in=3)
def make_api_call(self): result = self.api.request('statuses/user_timeline', self.calldic) print result.get_rest_quota()['remaining'], self.calldic['screen_name'] #if the quota is nearing end move to new key if result.get_rest_quota()['remaining'] < 2 and result.get_rest_quota()['remaining'] != None: del self.api sleep(200) self.keynumber += 1 self.keynumber = self.keynumber % 2 self.setkey(self.keys[self.keynumber]) for tweet in result.get_iterator(): #exit if error try: tweetid = tweet['id'] except: print "No more calls left" return "shit" language = tweet['lang'] #get date created_time = datetime.datetime.strptime(tweet['created_at'].encode('utf-8'),"%a %b %d %H:%M:%S +0000 %Y") #fix date time = created_time.replace(tzinfo=utc) message = tweet['text'].encode('utf-8') retweets = tweet['retweet_count'] favorites = tweet['favorite_count'] in_reply_to_screen_name = tweet['in_reply_to_screen_name'] #these should be tested if tweet['entities'].has_key('media'): media_type = tweet['entities']['media'][0]['type'] urllib.urlretrieve(tweet['entities']['media'][0]['media_url'].encode('utf-8'),"temp_pic.jpg") picture = average_hash("temp_pic.jpg", hash_size = 64) else: media_type = None picture = None if tweet.has_key('retweeted_status'): retweeted_from = tweet['retweeted_status']['user']['screen_name'].encode('utf-8') else: retweeted_from = None if tweet['entities'].has_key("urls"): for url in tweet['entities']['urls']: message = regex.sub(url['url'].encode('utf-8'),url['expanded_url'].encode('utf-8'),message) tweet_update = TwitterTweet(tweetid = tweetid, language = language, created_time = time, message = message, retweets = retweets, favorites = favorites, in_reply_to_screen_name = in_reply_to_screen_name, media_type = media_type, picture = picture, retweeted_from = retweeted_from, twitterpage = self.twitterpage, ) tweet_update.save() #add urls if tweet['entities'].has_key("urls"): for url in tweet['entities']['urls']: try: new_url = Url(url = url[u'expanded_url'].encode('utf-8')) new_url.save() tweet_update.urls.add(new_url) except: pass #try to get the created time try: self.last_time_in_list = created_time self.last_tweet_id_in_list = tweetid #if not feed it a break except: "no tweets" self.last_time_in_list = datetime.datetime(1900,1,1)
def getposts(self,pageid): #initialize the page to be updated facebookpage = FacebookPage.objects.get(pageid=pageid) #print "FB " + facebookpage.username #define the page and parameters url = "https://graph.facebook.com/v2.0/" + regex.sub("\r|\n| ","",str(pageid)) + "/posts?" headers = urllib.urlencode({ 'limit':self.limit, 'until':self.firstday, 'date_format':'U', 'fields':'message,shares,link,id,from,picture,type,likes.summary(true).limit(1),comments.summary(true).limit(1)', 'access_token':self.token }) #get the first update main_dic = self.open_page(url + headers) #start a loop for paging keep_going = True while keep_going == True: #check if data is empty if main_dic == None or main_dic['data'] == []: print "something is very wrong" return #extract posts if not empty for post in main_dic['data']: #get created time, stop if its out of range created_time = post['created_time'] if int(self.lastday) > created_time: return time = datetime.datetime.fromtimestamp(created_time).replace(tzinfo=utc) created_time = str(created_time) #get the post id postid = post['id'].encode('utf-8') #get the type of post kind = post['type'].encode('utf-8') #get the message if "message" in post.keys(): message = post['message'].encode('utf-8') else: message = None #get picture if "picture" in post.keys(): urllib.urlretrieve(post['picture'].encode('utf-8'),"temp_pic.jpg") picture = average_hash("temp_pic.jpg", hash_size = 64) else: picture = None #get the number of shares if "shares" in post.keys(): shares = post['shares']['count'] else: shares = 0 #get the likes if "likes" in post.keys(): likes = post['likes']['summary']['total_count'] else: likes = 0 #get the comments if "comments" in post.keys(): comments = post['comments']['summary']['total_count'] else: comments = 0 if kind=="link" and "link" in post.keys(): if message == None: message = "" message = message + " url:" + post['link'].encode('utf-8') #update data post_update = FacebookPost(postid = postid, message = message, kind = kind, created_time = time, likes = likes, shares = shares, comments = comments, picture = picture, facebookpage = facebookpage ) post_update.save() #get link try: if kind=="link" and "link" in post.keys(): link = post['link'].encode('utf-8') new_url = Url(url=link) new_url.save() post_update.urls.add(new_url) except: print "already there?",new_url #determin to continue nextpage = main_dic['paging']['next'].encode('utf-8') main_dic = self.open_page(nextpage)
def test_average_hash(self): for photo in self.photos: self.assertEqual(photo['average_hash'], average_hash(photo['path']))