def test_pr_122(self): """ Feature implemented by @susundberg. Set the access time and modification time of a downloaded media according to its IG date. """ looter = ProfileLooter('franz_ferdinand', template='{code}', session=self.session) info = looter.get_post_info('BY77tSfBnRm') # Test download_post post_looter = PostLooter('BY77tSfBnRm', session=self.session, template='{code}') post_looter.download(self.destfs) stat = self.destfs.getdetails('BY77tSfBnRm.jpg') self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp']) # Test download_pictures pic = next(m for m in looter.medias() if not m['is_video']) looter.download_pictures(self.destfs, media_count=1) stat = self.destfs.getdetails('{}.jpg'.format(pic['shortcode'])) self.assertEqual(stat.raw["details"]["accessed"], pic['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], pic['taken_at_timestamp']) # Test download_videos vid = next(m for m in looter.medias() if m['is_video']) looter.download_videos(self.destfs, media_count=1) stat = self.destfs.getdetails('{}.mp4'.format(vid['shortcode'])) self.assertEqual(stat.raw["details"]["accessed"], vid['taken_at_timestamp']) self.assertEqual(stat.raw["details"]["modified"], vid['taken_at_timestamp'])
class TestLogin(unittest.TestCase): def setUp(self): self.looter = ProfileLooter(USERNAME, template="test") self.destfs = fs.memoryfs.MemoryFS() def tearDown(self): self.destfs.close() def test_login(self): self.assertFalse(self.looter.logged_in()) self.assertRaises(RuntimeError, self.looter.medias) self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE)) try: self.looter.login(USERNAME, PASSWORD) self.assertTrue(self.looter.logged_in()) self.assertTrue( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) self.assertTrue(next(self.looter.medias())) finally: self.looter.logout() self.assertFalse( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) def test_download(self): try: self.looter.login(USERNAME, PASSWORD) self.looter.download(self.destfs) self.assertTrue(self.destfs.exists('test.jpg')) self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF') finally: self.looter.logout()
def test_issue_006(self): """ Checks that instalooter does not iterate forever on a private profile. """ with self.assertRaises(RuntimeError): looter = ProfileLooter("tldr", session=self.session) next(looter.medias())
def post(self, target): looter = ProfileLooter(target) for media in looter.medias(): post_info = looter.get_post_info(media['shortcode']) for comment in post_info['edge_media_to_comment']['edges']: user = comment['node']['owner']['username'] users_comment.add(user) return "ok", 201
def test_issue_006(self): """ Checks that instalooter does not iterate forever on a private profile. """ with self.assertRaises(RuntimeError): username = os.getenv("IG_USERNAME") looter = ProfileLooter(username, session=self.session) looter.logout() next(looter.medias())
def test_issue_012(self): """ Feature request by @paramjitrohit. Allows downloading pictures and videos only within a timeframe. """ looter = ProfileLooter("slotfaceofficial", session=self.session) day = datetime.date(2017, 2, 18) medias_in_timeframe = list(looter.medias(timeframe=[day, day])) self.assertEqual(len(medias_in_timeframe), 2)
def test_issue_012(self): """Feature request by @paramjitrohit. Allows downloading pictures and videos only within a timeframe. """ looter = ProfileLooter("nintendo", session=self.session) day = datetime.date(2018, 3, 16) with contexter.Contexter() as ctx: ctx << mock.patch.object(looter, 'pages', MockPages('nintendo')) medias_in_timeframe = list(looter.medias(timeframe=[day, day])) self.assertEqual(len(medias_in_timeframe), 2)
def test_issue_194(self): """Feature request by @raphaelbernardino When trying to download from an non-existing user, try to display a meaningful message instead of a cryptic error. """ username = "******" looter = ProfileLooter(username) with self.assertRaises(ValueError) as ctx: media = next(looter.medias()) self.assertEqual(str(ctx.exception), "user not found: '{}'".format(username))
def get_media_links(): InstagramScraper.num_hits += 1 if InstagramScraper.num_hits % 20 == 1: InstagramScraper.num_hits = 0 else: return InstagramScraper.cache looter = ProfileLooter('embracingtheenemy') InstagramScraper.cache = [{ 'src': media['thumbnail_src'], 'is_video': media['is_video'], 'shortcode': media['shortcode'] } for media in looter.medias()][:3] return InstagramScraper.cache
def test_issue_084(self): """ Thanks to @raphaelbernardino for reporting this bug. Make sure private profiles with few pictures (less than a page worth) raise the private warning as expected. """ with warnings.catch_warnings(record=True) as registry: warnings.simplefilter('always') looter = ProfileLooter("raphaelbernardino", session=self.session) list(looter.medias()) self.assertEqual( six.text_type(registry[0].message), u"Profile raphaelbernardino is private, retry after logging in." )
class TestLogin(unittest.TestCase): @classmethod def setUpClass(cls): cls.session = requests.Session() InstaLooter._user_agent = cls.session.headers["User-Agent"] @classmethod def tearDownClass(cls): cls.session.close() del InstaLooter._user_agent def setUp(self): self.looter = ProfileLooter(USERNAME, template="test") self.destfs = fs.memoryfs.MemoryFS() def tearDown(self): self.destfs.close() def test_login(self): self.assertFalse(self.looter.logged_in()) self.assertRaises(RuntimeError, self.looter.medias) self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE)) try: self.looter.login(USERNAME, PASSWORD) self.assertTrue(self.looter.logged_in()) self.assertTrue( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) self.assertTrue(next(self.looter.medias())) finally: self.looter.logout() self.assertFalse( self.looter._cachefs.exists(self.looter._COOKIE_FILE)) def test_download(self): try: self.looter.login(USERNAME, PASSWORD) self.looter.download(self.destfs) self.assertTrue(self.destfs.exists('test.jpg')) self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF') finally: self.looter.logout()
def update(event, context): start_time = datetime.now() print("Starting soup") queries = Queries() print("configuring looter") # https://github.com/althonos/InstaLooter/issues/173 ProfileLooter._cachefs = fs.open_fs("osfs:///tmp/") looter = ProfileLooter("davesfreshpasta") print("configured looter") print("finished setup") last_date = queries.most_recent_image_date() timeframe = (start_time.date(), last_date) print(f"last post date: {last_date}. Timeframe is {timeframe}") posts = looter.medias(timeframe=timeframe) if posts: classify_posts(posts) repost_soup = os.environ.get("REPOST_SOUP", default=False) confidence = os.environ.get("CONFIDENCE", default=0.8) # TODO: move posting into post iteration loop top_image = queries.top_soup(confidence) if (not top_image.posted) or repost_soup: messenger = Messenger() messenger.post_message_to_channel(top_image) queries.commit() # messenger will mark image as posted elapsed_time = datetime.now() - start_time print(f"Finished soup. Elapsed time {elapsed_time}")
async def update(tg_chatid, ig_profile): write(f"\033[2K\rchecking @{ig_profile}…") await bot.send_chat_action(tg_chatid, types.ChatActions.TYPING) try: pl = ProfileLooter(ig_profile) except Exception as e: write(f"\033[2K\r\033[31munable to get profile @{ig_profile}\033[0m\n") print(tb.format_exc()) return False with open(sent_fp, "r") as f: sent = json.load(f) sent_something = False for j, media in enumerate(pl.medias()): i = media["id"] sc = media["shortcode"] write(f"\033[2K\rchecking @{ig_profile} ({j}|{i}|{sc})") if i not in sent: write(": \033[sgetting post…") _pl = PostLooter(sc) try: info = _pl.get_post_info(sc) except Exception as e: #because the library I use can randomly throw errors while getting stuff… write("\033[u\033[0K\033[31munable to get post\033[0m\n") print(tb.format_exc()) continue caption = "\n".join( edge["node"]["text"] for edge in info["edge_media_to_caption"]["edges"]) with MemoryFS() as fs: if media["is_video"]: await bot.send_chat_action(tg_chatid, types.ChatActions.RECORD_VIDEO) _pl.download_videos(fs, media_count=1) func = bot.send_video fn = fs.listdir("./")[0] await bot.send_chat_action(tg_chatid, types.ChatActions.UPLOAD_VIDEO) elif media["__typename"].lower() == "graphimage": await bot.send_chat_action(tg_chatid, types.ChatActions.UPLOAD_PHOTO) _pl.download_pictures(fs, media_count=1) func = bot.send_photo fn = fs.listdir("./")[0] elif media["__typename"].lower() == "graphsidecar": await bot.send_chat_action(tg_chatid, types.ChatActions.UPLOAD_PHOTO) _pl.download_pictures(fs) fn = tuple(fs.listdir("./")) if len(fn) == 1: func = bot.send_photo fn = fn[0] else: func = bot.send_media_group else: await bot.send_message( tg_chatid, f"Oh-oh. I've encountered a new post type!\nPlease tell my developer, so he can tell me what I should do with a {media}." ) print("\n\033[31mUNKNOWN MEDIA TYPE AAAAA\033[0m", media) break if isinstance(fn, tuple): write("\033[u\033[0Ksending album…") f = [fs.openbin(_fn) for _fn in fn] _media = types.input_media.MediaGroup() for _f in f: _media.attach_photo(_f) else: write("\033[u\033[0Ksending file…") _media = f = fs.openbin(fn) if len( caption ) > 100: #telegram media captions have a character limit of 200 chars & I want to have a buffer caption = caption[:100] + "[…]" markdown.quote_html(caption) text = f"{caption}\n→<a href=\"https://www.instagram.com/p/{sc}\">original post</a>" try: if isinstance(fn, tuple): msg_id = (await func(tg_chatid, _media))[-1]["message_id"] await bot.send_message(tg_chatid, text, reply_to_message_id=msg_id, parse_mode=types.ParseMode.HTML) else: await func(tg_chatid, _media, caption=text, parse_mode=types.ParseMode.HTML) except exceptions.BadRequest as e: write( "\033[u\033[0K\033[31mskipped\033[0m\nGot Bad Request while trying to send message.\n" ) except exceptions.RetryAfter as e: write( "\nMEEP MEEP FLOOD CONTROL - YOU'RE FLOODING TELEGRAM\nstopping sending messages & waiting for next cycle…\n" ) break else: sent.append(i) write("\033[u\033[0Ksaving sent messages…\033[0m") with open(sent_fp, "w+") as f: json.dump(sent, f) write("\033[u\033[0K\033[32msent\033[0m\n") if isinstance(f, list): for _f in f: _f.close() else: f.close() sent_something = True # sometimes the page has to be reloaded, which would prolong the time the checking post… # message would be displayed if I didn't do this write(f"\033[2K\rchecking @{ig_profile}…") return sent_something
os.makedirs(photo_path + "/probably_not_soup", exist_ok=True) os.makedirs(photo_path + "/probably_soup", exist_ok=True) queries = Queries() print("configuring looter") # https://github.com/althonos/InstaLooter/issues/173 looter = ProfileLooter("davesfreshpasta") print("configured looter") start_time = datetime.now() last_date = queries.oldest_image_date() timeframe = (start_time.date(), last_date) posts = looter.medias(timeframe) # https://github.com/althonos/InstaLooter/issues/171 try: with classifier.Classifier() as c: for post in posts: url = post['display_url'] if queries.image_exists(url): print(f"\talready classified, skipping {url}") continue timestamp = post['taken_at_timestamp'] print(url) image = Image(url=url, post_date=timestamp) c.classify(image)
def scrape_profile( username, scrape_posts=True): # scrape profile from a given username (not link) if scrape_posts == False: MAX_POST_SCRAPE = 1 else: MAX_POST_SCRAPE = MAX_POST_COUNT looter = ProfileLooter(username) user_dict = {} user_dict['username'] = username post_list = [] post_count = 0 for i in looter.medias(): post_count += 1 code = str(i['shortcode']) post_list += scrape_post_from_link(_baseurl + code) if post_count == MAX_POST_SCRAPE: break user_dict['z_posts'] = post_list user_dict['full_name'] = "" try: user_dict['full_name'] = post_list[0]['user_full_name'] except KeyError as e: print(e) user_dict['is_verified'] = False try: user_dict['is_verified'] = post_list[0]['user_verified'] except KeyError as e: print(e) user_dict['id'] = '' try: user_dict['id'] = post_list[0]['user_id'] except KeyError as e: print(e) user_dict['profile_pic_url'] = "" user_dict['profile_pic'] = 1 try: user_dict['profile_pic_url'] = post_list[0]['user_profile_pic_url'] te = "" + user_dict['profile_pic_url'] te = re.split("[\/?]+", te) if EMPTY_PROFILE_PIC in te: user_dict['profile_pic'] = 0 except KeyError as e: print(e) try: user_dict['post_count'] = post_list[0]['user_post_count'] except KeyError as e: print(e) user_dict['is_private'] = False try: user_dict['is_private'] = post_list[0]['user_private'] except KeyError as e: print(e) user_dict['followers'] = 0 user_dict['following'] = 0 user_dict['description'] = "" user_dict['external_url_01'] = 0 user_dict['external_url'] = "" user_dict['is_business'] = False try: InstagramAPI.searchUsername(username) if InstagramAPI.LastResponse.status_code == 200: js = InstagramAPI.LastJson user_dict['followers'] = js['user']['follower_count'] user_dict['following'] = js['user']['following_count'] user_dict['description'] = js['user']['biography'] user_dict['is_business'] = js['user']['is_business'] if len(js['user']['external_url']) > 0: user_dict['external_url_01'] = 1 user_dict['external_url'] = js['user']['external_url'] except Exception as e: print(e) return user_dict
# print(img_src) os.system('wget -q -O '+ThumbsFilePath+followinglist[i]+'.jpg '+img_src[i]+' &') #Close selenium driver.quit() #Login into instagram looter=ProfileLooter("instagram") looter.login(username_,password_) #Loop through all the people who are being followed and grab their photo urls for i in followinglist: try: print(i) i = i.strip() looter=ProfileLooter(i) with open(UserFilePath+i+".txt", "a") as output: for media in looter.medias(): for link in instalinks(media,looter): if not (os.path.isfile(UserFilePath+i+"/"+link.split('/')[-1])): print(link) output.write("{}\n".format(link)) else: print("Image already exists") #Wget from the file os.system('(wget -q -i '+UserFilePath+i+".txt"+' -P '+UserFilePath+i+';rm '+UserFilePath+i+".txt"') &') #Try and not get rate limited by instagram rnd=randint(110,147) print("Waiting for:",str(rnd),"seconds") time.sleep(rnd) except Exception as e: print("Rate limited on",i,"waiting for 2 minutes before restarting with next user this user has been added to the back of the line") followinglist.append(i)