def download_insta(message): try: print("download_insta") post = False file_identifier = str(message.chat.id) + str(int(time.time())) if ((" " not in message.text)): risposta( message, "L' utilizzo del comando è /insta post urldelpost o /insta nomeutente") else: messaggio = message.text.split(" ")[1] if ("post" in messaggio): post = True messaggio = message.text.split(" ")[2] os.system("mkdir " + bot_path + file_identifier) username = shlex.split(messaggio) if post == True: InstaLooter(directory=bot_path + file_identifier).download_post(re.search('[A-Z][^/]+',str(username[0])).group()) os.system("mv "+bot_path + file_identifier + "/*.jpg "+bot_path + file_identifier + "/"+file_identifier+".jpg") bot.send_chat_action(message.chat.id, 'upload_photo') bot.send_photo(message.chat.id, open(bot_path + file_identifier + "/" + file_identifier + ".jpg", "rb")) else: InstaLooter(directory=bot_path + file_identifier, profile=str(username[0])).download() shutil.make_archive(bot_path + file_identifier, 'zip', bot_path + file_identifier) bot.send_chat_action(message.chat.id, 'upload_document') bot.send_document(message.chat.id, open(bot_path + file_identifier + ".zip", "rb")) except Exception as e: print(e) risposta(message, "Si è verificato un errore, riprova") finally: os.system("rm -r " + bot_path + "*"+file_identifier+"*")
def new_looter(*args, **kwargs): """Create a new looter with the given parameters Args: *args: All the parameters as in :class:`instaLooter.core.InstaLooter` **kwargs: All the parameters as in :class:`instaLooter.core.InstaLooter` """ global looter looter = InstaLooter(*args, **kwargs) if INSTA_USERNAME and INSTA_PASSWORD: looter.login(INSTA_USERNAME, INSTA_PASSWORD)
def getInstaLinks(username): looter = InstaLooter(profile=username) images = [] i = 0 for media in looter.medias(): if i > cfg.instaLimit(): break if not media['is_video']: console.subtask("Got Image: {0}".format(media['display_src'].strip()[:90])) images.append(media['display_src']) i = i + 1 return images
def __init__(self, userName, userFolder): #donePicturesQueue, userName): self.donePicturesQueue = queue.Queue()#donePicturesQueue self.userName = userName self.userFolder = userFolder self.looter = InstaLooter(profile=userName) self.fullName = self.looter.get_metadata()['full_name'] self.profilePicture = self.looter.get_metadata()['profile_pic_url'] self.profileUrl = 'r{}/{}'.format(INSTAGRAM_URL, userName)[1:] self.isPrivate = self.looter.get_metadata()['is_private'] self.pics_dic = {} self.sum = 0 self.counter = 0 self.scan()
from instaLooter import InstaLooter import re import os insgtagram_user = "******" looter = InstaLooter(profile=insgtagram_user) post_list = [] _baseurl = "https://www.instagram.com/p" for media in looter.medias(with_pbar=True): post_dict = {} post_info = looter.get_post_info(media['code']) post_dict['photo_url'] = post_info['display_url'] comment_user = [] comment_text = [] for component in post_info['edge_media_to_comment']['edges']: comment_user.append(component['node']['owner']['username']) comment_text.append(component['node']['text']) try: caption = post_info['edge_media_to_caption']['edges'][0]['node']['text'] except : caption = [] post_dict['caption'] = caption if caption: post_dict['hashtag'] = re.findall(r"#(\w+)", caption) else : post_dict['hashtag'] = [] post_dict['caption']=caption post_dict['insta_url'] =os.path.join(_baseurl,post_info['shortcode']) post_dict['instagram_id'] = post_info['owner']['username']
conta = sys.argv[1] if not sys.argv[1]: conta = raw_input("Informe a conta do instagram: ") pasta_imagens = "imagens" if not os.path.exists(pasta_imagens): os.makedirs(pasta_imagens) pasta_legendas = "legendas" if not os.path.exists(pasta_legendas): os.makedirs(pasta_legendas) looter = InstaLooter(profile=conta, get_videos=False, url_generator=resizer(640), directory=pasta_imagens) print "Baixando imagens..." looter.download() print "Salvo na pasta %s" % pasta_imagens print "Baixando legendas..." counter = 0 for media in looter.medias(): filename = "%s/caption%d.txt" % (pasta_legendas, counter) arq = open(filename, "w") try: arq.write(media["caption"].encode('utf8', 'ignore')) except KeyError: pass arq.close()
def crawlTag(self, tag, goal=0): """ Loads the last crawled time from timelog_dir, and crawl posts made after that time. Save the time when crawling started in starttime_dict Returns list of dict of posts. """ with open(os.path.join(self.timelog_dir, tag), 'r') as time_log_file: last_crawled_timestamp = time_log_file.read().strip() try: last_crawled_timestamp = float(last_crawled_timestamp) except ValueError: last_crawled_timestamp = 0 start_time = datetime.now() print("{} : {} starts".format(tag, str(start_time))) print("Last crawled date : {}\n".format( str(datetime.fromtimestamp(last_crawled_timestamp)))) self.starttime_dict[tag] = start_time post_list = [] looter = InstaLooter(hashtag=tag) count = 0 for media in looter.medias(): code = media['code'] try: postDict = looter.get_post_info(code) except KeyError: continue except AttributeError: continue try: rowDict = self.makeRowDict(postDict) except IndexError: continue if not rowDict: continue print("{} / {}".format(postDict['date'], last_crawled_timestamp)) if float(postDict['date']) <= last_crawled_timestamp: break post_list.append(rowDict) count += 1 if (count % 500) == 0: print("{} : {} counts at {}\n".format(tag, str(count), str(datetime.now()))) # Finish Point if count == goal: break print("{} : {} ends \n".format(tag, str(datetime.now()))) looter.__del__() return post_list
def image_crawler(): table = dynamodb.Table('Images') target = request.form.get('target') num = request.form.get('num') num = int(num) radio = request.form.get('gridRadios') if radio == 'Greedy': url = target url = str(url) greedy_crawler = GreedyImageCrawler( storage={'root_dir': 'downloaded_pictures'}) greedy_crawler.crawl(domains=url, max_num=num, min_size=(200, 200), max_size=None) print(file_names) for file_name in file_names: response = table.put_item(Item={ 'username': session['username'], 'imagename': file_name, }) if radio == 'Instagram': looter = InstaLooter(directory="/tmp/", profile=target) looter.download_pictures(media_count=num) counter = 0 for media in looter.medias(): print(media) if (counter < num): if media['is_video']: continue # url = looter.get_post_info(media['code'])['video_url'] else: counter = counter + 1 url = media['display_src'] s3 = boto3.client('s3') fp = io.BytesIO(urlopen(url).read()) s3.upload_fileobj(fp, 'ece1779project', media['id'] + '.jpg') response = table.put_item( Item={ 'username': session['username'], 'imagename': media['id'] + '.jpg', }) else: break if radio == 'Google': google_crawler = GoogleImageCrawler( parser_threads=2, downloader_threads=4, storage={'root_dir': 'downloaded_pictures'}) google_crawler.crawl(keyword=target, max_num=num, date_min=None, date_max=None, min_size=(200, 200), max_size=None) for file_name in file_names: response = table.put_item(Item={ 'username': session['username'], 'imagename': file_name, }) return render_template("/imagecrawler/form.html")
class Instagram: def __init__(self, userName, userFolder): #donePicturesQueue, userName): self.donePicturesQueue = queue.Queue()#donePicturesQueue self.userName = userName self.userFolder = userFolder self.looter = InstaLooter(profile=userName) self.fullName = self.looter.get_metadata()['full_name'] self.profilePicture = self.looter.get_metadata()['profile_pic_url'] self.profileUrl = 'r{}/{}'.format(INSTAGRAM_URL, userName)[1:] self.isPrivate = self.looter.get_metadata()['is_private'] self.pics_dic = {} self.sum = 0 self.counter = 0 self.scan() #self.connectedFacebookPage = self.looter.get_metadata()['connected_fb_page'] --> Connected Facebook page, can be None. def scan(self): http = urllib3.PoolManager() # Write the profile picture with open(r'{}\profile.jpg'.format(self.userFolder), 'wb') as profile_picture: profile_picture.write(http.request("GET",self.profilePicture).data) if self.looter.get_metadata()['media']['count'] <= 0: return #No Pictures availablePicturesQueue = queue.Queue() threads = [] for media in self.looter.medias(): if not media['is_video']: availablePicturesQueue.put(media) self.counter+=1 for thread in range(THREAD_COUNT): thread = threading.Thread(target=self.write_picture, args=(availablePicturesQueue, )) thread.start() for thread in threads: thread.join() def write_picture(self, availablePicturesQueue): http = urllib3.PoolManager() while not availablePicturesQueue.empty(): media = availablePicturesQueue.get() if media['is_video']: #url = looter.get_post_info(media['code'])['video_url'] --> Download mp4 video continue else: url = media['display_src'] dataToWrite = http.request("GET",url).data picturePath = '{}\{}{}.jpg'.format(self.userFolder, self.userName, str(threading.current_thread().ident * randint(1, 10000000))) if os.path.exists(picturePath): print("if u see this the programmer is an idiot " + picturePath) with open(picturePath, 'wb') as picture: picture.write(dataToWrite) availablePicturesQueue.task_done() self.donePicturesQueue.put({picturePath : url}) self.pics_dic[picturePath] = url self.donePicturesQueue.task_done() self.sum += 1