コード例 #1
0
class TestLogin(unittest.TestCase):
    def setUp(self):
        self.looter = ProfileLooter(USERNAME, template="test")
        self.destfs = fs.memoryfs.MemoryFS()

    def tearDown(self):
        self.destfs.close()

    def test_login(self):

        self.assertFalse(self.looter.logged_in())
        self.assertRaises(RuntimeError, self.looter.medias)
        self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE))

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.assertTrue(self.looter.logged_in())
            self.assertTrue(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))
            self.assertTrue(next(self.looter.medias()))
        finally:
            self.looter.logout()
            self.assertFalse(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))

    def test_download(self):

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.looter.download(self.destfs)
            self.assertTrue(self.destfs.exists('test.jpg'))
            self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF')
        finally:
            self.looter.logout()
コード例 #2
0
ファイル: test_issues.py プロジェクト: andy-br/InstaLooterBR
 def test_issue_006(self):
     """
     Checks that instalooter does not iterate forever on a private
     profile.
     """
     with self.assertRaises(RuntimeError):
         username = os.getenv("IG_USERNAME")
         looter = ProfileLooter(username, session=self.session)
         looter.logout()
         next(looter.medias())
コード例 #3
0
class TestLogin(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.session = requests.Session()
        InstaLooter._user_agent = cls.session.headers["User-Agent"]

    @classmethod
    def tearDownClass(cls):
        cls.session.close()
        del InstaLooter._user_agent

    def setUp(self):
        self.looter = ProfileLooter(USERNAME, template="test")
        self.destfs = fs.memoryfs.MemoryFS()

    def tearDown(self):
        self.destfs.close()

    def test_login(self):

        self.assertFalse(self.looter.logged_in())
        self.assertRaises(RuntimeError, self.looter.medias)
        self.assertFalse(self.looter._cachefs.exists(self.looter._COOKIE_FILE))

        try:
            self.looter.login(USERNAME, PASSWORD)
            self.assertTrue(self.looter.logged_in())
            self.assertTrue(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))
            self.assertTrue(next(self.looter.medias()))
        finally:
            self.looter.logout()
            self.assertFalse(
                self.looter._cachefs.exists(self.looter._COOKIE_FILE))

    def test_download(self):
        try:
            self.looter.login(USERNAME, PASSWORD)
            self.looter.download(self.destfs)
            self.assertTrue(self.destfs.exists('test.jpg'))
            self.assertEqual(self.destfs.getbytes('test.jpg')[6:10], b'JFIF')
        finally:
            self.looter.logout()
コード例 #4
0
ファイル: instagram.py プロジェクト: mbrine555/streamglob
class InstagramFeedMediaChannelMixin(object):

    LISTING_CLASS = InstagramMediaListing

    POST_TYPE_MAP = {
        "GraphImage": "image",
        "GraphVideo": "video",
        "GraphSidecar": "carousel"
    }

    looter_ : typing.Any = None

    @property
    @db_session
    def end_cursor(self):
        return self.attrs.get("end_cursor", None)

    @db_session
    def save_end_cursor(self, timestamp, end_cursor):
        self.attrs["end_cursor"] = [timestamp, end_cursor]
        commit()

    @property
    def looter(self):
        if not hasattr(self, "looter_") or not self.looter_ or self.looter_._username != self.locator[1:]:
            self.looter_ = ProfileLooter(self.locator[1:])
            if self.provider.config.credentials and not self.looter_.logged_in:
                self.looter_.login(**self.provider.session_params)
        return self.looter_


    def get_post_info(self, shortcode):

        return self.looter.get_post_info(shortcode)

    @property
    def posts(self):

        url = f"https://www.instagram.com/{self.locator[1:]}/?__a=1"
        data = self.looter.session.get(url).json()
        return data["graphql"]["user"]["edge_owner_to_timeline_media"]["count"]

    def extract_content(self, post):

        media_type = self.POST_TYPE_MAP[post["__typename"]]

        if media_type == "image":
            content = [
                dict(
                    url = post.display_url,
                    media_type = media_type,
                    shortcode = post.shortcode
                )
            ]
        elif media_type == "video":
            if post.get("video_url"):
                content = [
                    dict(
                        url = post.video_url,
                        url_thumbnail = post.display_url,
                        media_type = media_type,
                        shortcode = post.shortcode
                    )
                ]
            else:
                content = [
                    dict(
                        url = None,
                        url_thumbnail = post.display_url,
                        media_type = media_type,
                        shortcode = post.shortcode
                    )
                ]

        elif media_type == "carousel":
            if post.get('edge_sidecar_to_children'):
                content = [
                    dict(
                        url = s.video_url if s.is_video else s.display_url,
                        url_thumbnail = s.display_url,
                        media_type = "video" if s.is_video else "image",
                        shortcode = post.shortcode
                    )
                    for s in [AttrDict(e['node']) for e in post['edge_sidecar_to_children']['edges']]
                ]
            else:
                content = [
                    dict(
                        url = None,
                        url_thumbnail = post.display_url,
                        media_type = media_type
                    )
                ]

        else:
            raise Exception(f"invalid media type: {media_type}")

        return content


    async def fetch(self, limit=None, resume=False, replace=False):

        logger.info(f"fetching {self.locator} {resume}, {replace}")

        # update cached post count
        with db_session:
            self.attrs["posts"] = self.posts

        try:
            (_, end_cursor) = self.end_cursor if resume else None
        except TypeError:
            end_cursor = None

        logger.info(f"cursor: {end_cursor}")
        try:
            self.pages = self.looter.pages(cursor=end_cursor)
        except ValueError:
            self.looter_.logout()
            self.looter_.login(
                username=self.provider.session_params["username"],
                password=self.provider.session_params["password"],
            )
            self.pages = self.looter.pages(cursor=end_cursor)

        # def get_posts(pages):
        #     posts = list()
        #     for page in pages:
        #         cursor = page["edge_owner_to_timeline_media"]["page_info"]["end_cursor"]
        #         for media in self.looter._medias(iter([page])):
        #             posts.append((cursor, AttrDict(media)))
        #     return posts
        #
        def get_posts(pages):
            try:
                for page in pages:
                    cursor = page["edge_owner_to_timeline_media"]["page_info"]["end_cursor"]
                    for media in self.looter._medias(iter([page])):
                        yield (cursor, AttrDict(media))
            except json.decoder.JSONDecodeError:
                logger.error("".join(traceback.format_exc()))
                raise StopIteration

        count = 0
        new_count = 0

        posts = state.event_loop.run_in_executor(
            None, get_posts, self.pages
        )

        for end_cursor, post in await posts:

            count += 1

            logger.info(f"cursor: {end_cursor}")

            logger.debug(f"{count} {new_count} {limit}")

            if new_count >= limit or new_count == 0 and count >= limit:
                break

            created_timestamp = post.get(
                "date", post.get("taken_at_timestamp")
            )

            if end_cursor and (self.end_cursor is None or created_timestamp < self.end_cursor[0]):
                logger.info(f"saving end_cursor: {created_timestamp}, {self.end_cursor[0] if self.end_cursor else None}")
                self.save_end_cursor(created_timestamp, end_cursor)

            created = datetime.utcfromtimestamp(created_timestamp)

            i = self.items.select(lambda i: i.guid == post.shortcode).first()

            if i and not replace:
                logger.debug(f"old: {created}")
                return
            else:
                logger.debug(f"new: {created}")
                caption = (
                    post["edge_media_to_caption"]["edges"][0]["node"]["text"]
                    if "edge_media_to_caption" in post and post["edge_media_to_caption"]["edges"]
                    else  post["caption"]
                    if "caption" in post
                    else None
                )

                try:
                    media_type = self.POST_TYPE_MAP[post["__typename"]]
                except:
                    logger.warn(f"unknown post type: {post.__typename}")
                    continue

                content = self.extract_content(post)

                i = dict(
                    channel = self,
                    guid = post.shortcode,
                    title = (caption or "(no caption)").replace("\n", " "),
                    created = created,
                    media_type = media_type,
                    sources =  content,
                    attrs = dict(
                        short_code = post.shortcode
                    ),
                    is_inflated = media_type == "image"
                )
                new_count += 1
                yield i

    @db_session
    def reset(self):
        super().reset()
        if "post_iter" in self.attrs:
            del self.attrs["post_iter"]
            commit()