예제 #1
0
    def test_pr_122(self):
        """
        Feature implemented by @susundberg.

        Set the access time and modification time of a downloaded media
        according to its IG date.
        """

        looter = ProfileLooter('franz_ferdinand',
            template='{code}', session=self.session)
        info = looter.get_post_info('BY77tSfBnRm')

        # Test download_post
        post_looter = PostLooter('BY77tSfBnRm',
            session=self.session, template='{code}')
        post_looter.download(self.destfs)
        stat = self.destfs.getdetails('BY77tSfBnRm.jpg')
        self.assertEqual(stat.raw["details"]["accessed"], info['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], info['taken_at_timestamp'])

        # Test download_pictures
        pic = next(m for m in looter.medias() if not m['is_video'])
        looter.download_pictures(self.destfs, media_count=1)
        stat = self.destfs.getdetails('{}.jpg'.format(pic['shortcode']))
        self.assertEqual(stat.raw["details"]["accessed"], pic['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], pic['taken_at_timestamp'])

        # Test download_videos
        vid = next(m for m in looter.medias() if m['is_video'])
        looter.download_videos(self.destfs, media_count=1)
        stat = self.destfs.getdetails('{}.mp4'.format(vid['shortcode']))
        self.assertEqual(stat.raw["details"]["accessed"], vid['taken_at_timestamp'])
        self.assertEqual(stat.raw["details"]["modified"], vid['taken_at_timestamp'])
예제 #2
0
class TestLogin(unittest.TestCase):
    def setUp(self):
        self.looter = ProfileLooter(USERNAME, template="test")
        self.destfs = fs.memoryfs.MemoryFS()

    def tearDown(self):
        self.destfs.close()

    def test_login(self):

        self.assertFalse(self.looter.logged_in())
        self.assertRaises(RuntimeError, self.looter.medias)
        self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE))

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.assertTrue(self.looter.logged_in())
            self.assertTrue(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))
            self.assertTrue(next(self.looter.medias()))
        finally:
            self.looter.logout()
            self.assertFalse(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))

    def test_download(self):

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.looter.download(self.destfs)
            self.assertTrue(self.destfs.exists('test.jpg'))
            self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF')
        finally:
            self.looter.logout()
예제 #3
0
 def test_issue_006(self):
     """
     Checks that instalooter does not iterate forever on a private
     profile.
     """
     with self.assertRaises(RuntimeError):
         looter = ProfileLooter("tldr", session=self.session)
         next(looter.medias())
예제 #4
0
    def post(self, target):
        looter = ProfileLooter(target)

        for media in looter.medias():
            post_info = looter.get_post_info(media['shortcode'])
            for comment in post_info['edge_media_to_comment']['edges']:
                user = comment['node']['owner']['username']
                users_comment.add(user)
            return "ok", 201
예제 #5
0
 def test_issue_006(self):
     """
     Checks that instalooter does not iterate forever on a private
     profile.
     """
     with self.assertRaises(RuntimeError):
         username = os.getenv("IG_USERNAME")
         looter = ProfileLooter(username, session=self.session)
         looter.logout()
         next(looter.medias())
예제 #6
0
    def test_issue_012(self):
        """
        Feature request by @paramjitrohit.

        Allows downloading pictures and videos only within a timeframe.
        """
        looter = ProfileLooter("slotfaceofficial", session=self.session)
        day = datetime.date(2017, 2, 18)
        medias_in_timeframe = list(looter.medias(timeframe=[day, day]))
        self.assertEqual(len(medias_in_timeframe), 2)
예제 #7
0
    def test_issue_012(self):
        """Feature request by @paramjitrohit.

        Allows downloading pictures and videos only within a timeframe.
        """
        looter = ProfileLooter("nintendo", session=self.session)
        day = datetime.date(2018, 3, 16)
        with contexter.Contexter() as ctx:
            ctx << mock.patch.object(looter, 'pages', MockPages('nintendo'))
            medias_in_timeframe = list(looter.medias(timeframe=[day, day]))
        self.assertEqual(len(medias_in_timeframe), 2)
예제 #8
0
    def test_issue_194(self):
        """Feature request by @raphaelbernardino

        When trying to download from an non-existing user, try to display a
        meaningful message instead of a cryptic error.
        """
        username = "******"
        looter = ProfileLooter(username)
        with self.assertRaises(ValueError) as ctx:
            media = next(looter.medias())
        self.assertEqual(str(ctx.exception), "user not found: '{}'".format(username))
예제 #9
0
    def get_media_links():
        InstagramScraper.num_hits += 1
        if InstagramScraper.num_hits % 20 == 1:
            InstagramScraper.num_hits = 0
        else:
            return InstagramScraper.cache

        looter = ProfileLooter('embracingtheenemy')
        InstagramScraper.cache = [{
            'src': media['thumbnail_src'],
            'is_video': media['is_video'],
            'shortcode': media['shortcode']
        } for media in looter.medias()][:3]
        return InstagramScraper.cache
예제 #10
0
    def test_issue_084(self):
        """
        Thanks to @raphaelbernardino for reporting this bug.

        Make sure private profiles with few pictures (less than a page worth)
        raise the private warning as expected.
        """

        with warnings.catch_warnings(record=True) as registry:
            warnings.simplefilter('always')
            looter = ProfileLooter("raphaelbernardino", session=self.session)
            list(looter.medias())

        self.assertEqual(
            six.text_type(registry[0].message),
            u"Profile raphaelbernardino is private, retry after logging in."
        )
예제 #11
0
class TestLogin(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.session = requests.Session()
        InstaLooter._user_agent = cls.session.headers["User-Agent"]

    @classmethod
    def tearDownClass(cls):
        cls.session.close()
        del InstaLooter._user_agent

    def setUp(self):
        self.looter = ProfileLooter(USERNAME, template="test")
        self.destfs = fs.memoryfs.MemoryFS()

    def tearDown(self):
        self.destfs.close()

    def test_login(self):

        self.assertFalse(self.looter.logged_in())
        self.assertRaises(RuntimeError, self.looter.medias)
        self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE))

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.assertTrue(self.looter.logged_in())
            self.assertTrue(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))
            self.assertTrue(next(self.looter.medias()))
        finally:
            self.looter.logout()
            self.assertFalse(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))

    def test_download(self):
        try:
            self.looter.login(USERNAME, PASSWORD)
            self.looter.download(self.destfs)
            self.assertTrue(self.destfs.exists('test.jpg'))
            self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF')
        finally:
            self.looter.logout()
예제 #12
0
def update(event, context):
    start_time = datetime.now()

    print("Starting soup")

    queries = Queries()

    print("configuring looter")
    # https://github.com/althonos/InstaLooter/issues/173
    ProfileLooter._cachefs = fs.open_fs("osfs:///tmp/")
    looter = ProfileLooter("davesfreshpasta")
    print("configured looter")

    print("finished setup")

    last_date = queries.most_recent_image_date()
    timeframe = (start_time.date(), last_date)
    print(f"last post date: {last_date}. Timeframe is {timeframe}")

    posts = looter.medias(timeframe=timeframe)

    if posts:
        classify_posts(posts)

    repost_soup = os.environ.get("REPOST_SOUP", default=False)
    confidence = os.environ.get("CONFIDENCE", default=0.8)

    # TODO: move posting into post iteration loop
    top_image = queries.top_soup(confidence)

    if (not top_image.posted) or repost_soup:
        messenger = Messenger()
        messenger.post_message_to_channel(top_image)
        queries.commit()  # messenger will mark image as posted

    elapsed_time = datetime.now() - start_time
    print(f"Finished soup. Elapsed time {elapsed_time}")
예제 #13
0
async def update(tg_chatid, ig_profile):
    write(f"\033[2K\rchecking @{ig_profile}…")
    await bot.send_chat_action(tg_chatid, types.ChatActions.TYPING)
    try:
        pl = ProfileLooter(ig_profile)
    except Exception as e:
        write(f"\033[2K\r\033[31munable to get profile @{ig_profile}\033[0m\n")
        print(tb.format_exc())
        return False
    with open(sent_fp, "r") as f:
        sent = json.load(f)
    sent_something = False
    for j, media in enumerate(pl.medias()):
        i = media["id"]
        sc = media["shortcode"]
        write(f"\033[2K\rchecking @{ig_profile} ({j}|{i}|{sc})")
        if i not in sent:
            write(": \033[sgetting post…")
            _pl = PostLooter(sc)
            try:
                info = _pl.get_post_info(sc)
            except Exception as e:  #because the library I use can randomly throw errors while getting stuff…
                write("\033[u\033[0K\033[31munable to get post\033[0m\n")
                print(tb.format_exc())
                continue
            caption = "\n".join(
                edge["node"]["text"]
                for edge in info["edge_media_to_caption"]["edges"])
            with MemoryFS() as fs:
                if media["is_video"]:
                    await bot.send_chat_action(tg_chatid,
                                               types.ChatActions.RECORD_VIDEO)
                    _pl.download_videos(fs, media_count=1)
                    func = bot.send_video
                    fn = fs.listdir("./")[0]
                    await bot.send_chat_action(tg_chatid,
                                               types.ChatActions.UPLOAD_VIDEO)
                elif media["__typename"].lower() == "graphimage":
                    await bot.send_chat_action(tg_chatid,
                                               types.ChatActions.UPLOAD_PHOTO)
                    _pl.download_pictures(fs, media_count=1)
                    func = bot.send_photo
                    fn = fs.listdir("./")[0]
                elif media["__typename"].lower() == "graphsidecar":
                    await bot.send_chat_action(tg_chatid,
                                               types.ChatActions.UPLOAD_PHOTO)
                    _pl.download_pictures(fs)
                    fn = tuple(fs.listdir("./"))
                    if len(fn) == 1:
                        func = bot.send_photo
                        fn = fn[0]
                    else:
                        func = bot.send_media_group
                else:
                    await bot.send_message(
                        tg_chatid,
                        f"Oh-oh. I've encountered a new post type!\nPlease tell my developer, so he can tell me what I should do with a {media}."
                    )
                    print("\n\033[31mUNKNOWN MEDIA TYPE AAAAA\033[0m", media)
                    break
                if isinstance(fn, tuple):
                    write("\033[u\033[0Ksending album…")
                    f = [fs.openbin(_fn) for _fn in fn]
                    _media = types.input_media.MediaGroup()
                    for _f in f:
                        _media.attach_photo(_f)
                else:
                    write("\033[u\033[0Ksending file…")
                    _media = f = fs.openbin(fn)
                if len(
                        caption
                ) > 100:  #telegram media captions have a character limit of 200 chars & I want to have a buffer
                    caption = caption[:100] + "[…]"
                markdown.quote_html(caption)
                text = f"{caption}\n→<a href=\"https://www.instagram.com/p/{sc}\">original post</a>"
                try:
                    if isinstance(fn, tuple):
                        msg_id = (await func(tg_chatid,
                                             _media))[-1]["message_id"]
                        await bot.send_message(tg_chatid,
                                               text,
                                               reply_to_message_id=msg_id,
                                               parse_mode=types.ParseMode.HTML)
                    else:
                        await func(tg_chatid,
                                   _media,
                                   caption=text,
                                   parse_mode=types.ParseMode.HTML)
                except exceptions.BadRequest as e:
                    write(
                        "\033[u\033[0K\033[31mskipped\033[0m\nGot Bad Request while trying to send message.\n"
                    )
                except exceptions.RetryAfter as e:
                    write(
                        "\nMEEP MEEP FLOOD CONTROL - YOU'RE FLOODING TELEGRAM\nstopping sending messages & waiting for next cycle…\n"
                    )
                    break
                else:
                    sent.append(i)
                    write("\033[u\033[0Ksaving sent messages…\033[0m")
                    with open(sent_fp, "w+") as f:
                        json.dump(sent, f)
                    write("\033[u\033[0K\033[32msent\033[0m\n")
                if isinstance(f, list):
                    for _f in f:
                        _f.close()
                else:
                    f.close()
            sent_something = True
        # sometimes the page has to be reloaded, which would prolong the time the checking post…
        # message would be displayed if I didn't do this
        write(f"\033[2K\rchecking @{ig_profile}…")
    return sent_something
예제 #14
0
os.makedirs(photo_path + "/probably_not_soup", exist_ok=True)
os.makedirs(photo_path + "/probably_soup", exist_ok=True)

queries = Queries()

print("configuring looter")
# https://github.com/althonos/InstaLooter/issues/173
looter = ProfileLooter("davesfreshpasta")
print("configured looter")

start_time = datetime.now()

last_date = queries.oldest_image_date()
timeframe = (start_time.date(), last_date)

posts = looter.medias(timeframe)

# https://github.com/althonos/InstaLooter/issues/171
try:
    with classifier.Classifier() as c:
        for post in posts:
            url = post['display_url']

            if queries.image_exists(url):
                print(f"\talready classified, skipping {url}")
                continue

            timestamp = post['taken_at_timestamp']
            print(url)
            image = Image(url=url, post_date=timestamp)
            c.classify(image)
예제 #15
0
def scrape_profile(
        username,
        scrape_posts=True):  # scrape profile from a given username (not link)
    if scrape_posts == False:
        MAX_POST_SCRAPE = 1
    else:
        MAX_POST_SCRAPE = MAX_POST_COUNT
    looter = ProfileLooter(username)
    user_dict = {}
    user_dict['username'] = username
    post_list = []
    post_count = 0
    for i in looter.medias():
        post_count += 1
        code = str(i['shortcode'])
        post_list += scrape_post_from_link(_baseurl + code)
        if post_count == MAX_POST_SCRAPE:
            break
    user_dict['z_posts'] = post_list

    user_dict['full_name'] = ""
    try:
        user_dict['full_name'] = post_list[0]['user_full_name']
    except KeyError as e:
        print(e)

    user_dict['is_verified'] = False
    try:
        user_dict['is_verified'] = post_list[0]['user_verified']
    except KeyError as e:
        print(e)

    user_dict['id'] = ''
    try:
        user_dict['id'] = post_list[0]['user_id']
    except KeyError as e:
        print(e)

    user_dict['profile_pic_url'] = ""
    user_dict['profile_pic'] = 1
    try:
        user_dict['profile_pic_url'] = post_list[0]['user_profile_pic_url']
        te = "" + user_dict['profile_pic_url']
        te = re.split("[\/?]+", te)
        if EMPTY_PROFILE_PIC in te:
            user_dict['profile_pic'] = 0
    except KeyError as e:
        print(e)

    try:
        user_dict['post_count'] = post_list[0]['user_post_count']
    except KeyError as e:
        print(e)

    user_dict['is_private'] = False
    try:
        user_dict['is_private'] = post_list[0]['user_private']
    except KeyError as e:
        print(e)

    user_dict['followers'] = 0
    user_dict['following'] = 0
    user_dict['description'] = ""
    user_dict['external_url_01'] = 0
    user_dict['external_url'] = ""
    user_dict['is_business'] = False
    try:
        InstagramAPI.searchUsername(username)
        if InstagramAPI.LastResponse.status_code == 200:
            js = InstagramAPI.LastJson
            user_dict['followers'] = js['user']['follower_count']
            user_dict['following'] = js['user']['following_count']
            user_dict['description'] = js['user']['biography']
            user_dict['is_business'] = js['user']['is_business']
            if len(js['user']['external_url']) > 0:
                user_dict['external_url_01'] = 1
                user_dict['external_url'] = js['user']['external_url']
    except Exception as e:
        print(e)
    return user_dict
예제 #16
0
	# print(img_src)
	os.system('wget -q -O '+ThumbsFilePath+followinglist[i]+'.jpg '+img_src[i]+' &')

#Close selenium
driver.quit()
#Login into instagram
looter=ProfileLooter("instagram")
looter.login(username_,password_)
#Loop through all the people who are being followed and grab their photo urls
for i in followinglist:
	try:
		print(i)
		i = i.strip()
		looter=ProfileLooter(i)
		with open(UserFilePath+i+".txt", "a") as output:
			for media in looter.medias():
				for link in instalinks(media,looter):
					if not (os.path.isfile(UserFilePath+i+"/"+link.split('/')[-1])):
						print(link)
						output.write("{}\n".format(link))
					else:
						print("Image already exists")
		#Wget from the file
		os.system('(wget -q -i '+UserFilePath+i+".txt"+' -P '+UserFilePath+i+';rm '+UserFilePath+i+".txt"') &')
		#Try and not get rate limited by instagram
		rnd=randint(110,147)
		print("Waiting for:",str(rnd),"seconds")
		time.sleep(rnd)
	except Exception as e:
		print("Rate limited on",i,"waiting for 2 minutes before restarting with next user this user has been added to the back of the line")
		followinglist.append(i)