Beispiel #1
0
    def test__crawl(self) -> None:
        # arrange
        self.crawler._get_query_hash = lambda: self.__class__._QUERY_HASH  # type: ignore
        self.crawler._get_profile_id = self._get_profile_id  # type: ignore
        expected_images = ImageCollection()
        expected_cursor = 'QVFBbjdTc0dOQ2JQTW1vb1JzMTQxeGpkMEFnTzhYWmh5dFRfMXRWT1VwX28' \
                          'wMUxkSExpZ2s5SVZfWmM5VWtjYUJrTS0wTW5Va2JqSEpTSUpPcENnN1g1OQ=='
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/75467914_150782276185354_'
                '1270489924400076442_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&'
                '_nc_cat=1&oh=f293b2a234c82263dfd37b3785e19625&oe=5E812486',
                source='https://www.instagram.com/p/B5GWlkfjgWZ/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/75467914_150782276185354_'
                '1270489924400076442_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&_nc_cat=1&'
                'oh=f293b2a234c82263dfd37b3785e19625&oe=5E812486',
                source='https://www.instagram.com/p/B5GWlkfjgWZ/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/75349300_522545705143892_'
                '7892885809773901918_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&_nc_cat=1&'
                'oh=1d2f00065294117027b17a585b7d05ab&oe=5E4E791D',
                source='https://www.instagram.com/p/B5GWlkfjgWZ/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/73512651_760344041101107_'
                '8305449940174698639_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&'
                '_nc_cat=1&oh=cc60db8291b59c26af119139ec130edf&oe=5E6B47D3',
                source='https://www.instagram.com/p/B5GWlkfjgWZ/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/s1080x1080/75349296'
                '_565205657598924_1414110584006258216_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&'
                '_nc_cat=1&oh=5cd52e00ce5b9571013820950d32a9db&oe=5E4E8A5F',
                source='https://www.instagram.com/p/B5GCEIsj-Wl/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/s1080x1080/75349296_'
                '565205657598924_1414110584006258216_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&'
                '_nc_cat=1&oh=5cd52e00ce5b9571013820950d32a9db&oe=5E4E8A5F',
                source='https://www.instagram.com/p/B5GCEIsj-Wl/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/s1080x1080/70023824_'
                '3272578002812323_8917281619820840144_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&'
                '_nc_cat=1&oh=46773eb8dbebc8db8a5acf07e0d9ee94&oe=5E539F29',
                source='https://www.instagram.com/p/B5GCEIsj-Wl/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/s1080x1080/73480790_830405794055849_'
                '4155404398603377777_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&_nc_cat=1&'
                'oh=5ab282415040be2108c1e0e5fadf8a2a&oe=5E50079A',
                source='https://www.instagram.com/p/B5GCEIsj-Wl/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/s1080x1080/74350656_146186010045632_'
                '5113331273863195582_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&_nc_cat=1&'
                'oh=67f218bed182ad8bc7ff9118b2d88139&oe=5E5664A6',
                source='https://www.instagram.com/p/B5GCEIsj-Wl/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/s1080x1080/75375719_164423041289362_'
                '4914559208372349272_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&_nc_cat=1&'
                'oh=b401628c44cf7155f6e7963872782306&oe=5E52153E',
                source='https://www.instagram.com/p/B5GCEIsj-Wl/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/e35/s1080x1080/75538167_2161689137265601_'
                '3450507258498841854_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&'
                '_nc_cat=1&oh=484ebc5923e3755f4b88a9cde4d01e37&oe=5E6CE284',
                source='https://www.instagram.com/p/B5GCEIsj-Wl/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/fr/e15/s1080x1080/73401893_805717106534521_'
                '4743540237997542732_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&_nc_cat=1&'
                'oh=a036d5fbe716cd7b11d313200e5ba73d&oe=5E538C0A',
                source='https://www.instagram.com/p/B5FY2u9j8nG/'))
        expected_images.add(
            Image(
                uri=
                'https://scontent-frt3-1.cdninstagram.com/v/t51.2885-15/fr/e15/s1080x1080/74600036_170329604158818_'
                '2739654930228765968_n.jpg?_nc_ht=scontent-frt3-1.cdninstagram.com&_nc_cat=1&'
                'oh=84732a7538ab59d3bd1458d400e181fa&oe=5E86FC97',
                source='https://www.instagram.com/p/B5FEWeHAT-M/'))

        # act
        images = self.crawler._crawl()
        # assert
        self.assertEqual(self.crawler._cursor, expected_cursor)
        self.assertSetEqual(images, expected_images)
        for expected_image in expected_images:
            for image in images:
                if image == expected_image:
                    # sources are irrelevant for equality, need to be checked manually
                    self.assertEqual(image.source, expected_image.source)
Beispiel #2
0
    def test__crawl(self) -> None:
        # arrange
        self.crawler._get_query_hash = lambda: self.__class__._QUERY_HASH  # type: ignore
        expected_images = ImageCollection()
        expected_cursor = 'QVFDdV9PUXYxc0hjcU9TYUI5dWZZWmNsOGdSaUsxcU9oUHg5endkc2hiUnV' \
                          'CZVVDZWFUM2QzdlVvSnN0Z053Q2ItQkxvSGRObm1hdlR5X3dDZ1JKMWduRg=='

        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5F7QoklRxu/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/75397747_'
                '403992793822822_8324298994393298267_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com'
                '&_nc_cat=110&oh=f3e9d48ba846ef1d7371e450ab8099e7&oe=5E567BF8')
        )
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5F68piFIAs/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/72281363_241905656781018_'
                '6014571893708900461_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com'
                '&_nc_cat=106&oh=8b66604749764befc12a2565a2485649&oe=5E6C6C16')
        )
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5F4v16HTM5/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/70998485_152987982771030_'
                '3346050114145538962_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com'
                '&_nc_cat=106&oh=5cf81cd1f81acf8565656bd4af311717&oe=5E6FC7D5')
        )
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5F4qHBHNrD/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/p1080x1080/74862751_'
                '439529453427565_562257601125925561_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com'
                '&_nc_cat=101&oh=4ae866a0b55b092ef9868e74f19b5623&oe=5E8BA669')
        )
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5F4e14AO9k/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/73480811_2530202687056717'
                '_5816769842868095145_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com'
                '&_nc_cat=101&oh=2515a15280e2d1602f672593c9f0da29&oe=5E5116A9')
        )
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5F27k1BxQG/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/71318465_578947536241820_'
                '8770680984207687503_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com&_nc_cat=108&'
                'oh=8612bc5a2e5ad265d885dbeb5861741c&oe=5E81D4BE'))
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5Fz_oqDbrY/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/73171316_154443005781309_'
                '5952715908442524817_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com&_nc_cat=109'
                '&oh=61633aa07eb05b103b799f65a21bd60a&oe=5E6D0DFF'))
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5Fzi5oHY-O/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/74600030_100916318024628_'
                '8288469442916469948_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com&_nc_cat=100'
                '&oh=a8d237a032264976286879a581c0d904&oe=5E4FD98D'))
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5Fu-2CgcoG/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/p1080x1080/77151874_'
                '149403059703505_1673985012081769188_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com'
                '&_nc_cat=101&oh=496e626094a5dc32b60eea9f5785a814&oe=5E4D8D42')
        )
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5FssMBFyUL/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/73381083_998377233839407_'
                '3873966681762746452_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com&_nc_cat=107'
                '&oh=651b7a89927a465d758679c599c9aa18&oe=5E687FE2'))
        expected_images.add(
            Image(
                source='https://www.instagram.com/p/B5FpfZ2B4t1/',
                uri=
                'https://scontent-frx5-1.cdninstagram.com/v/t51.2885-15/e35/73300147_1391854841147986_'
                '2913574931708576695_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com&_nc_cat=110'
                '&oh=18800daa9416377ed6b870acc0502e2a&oe=5E6E8495'))
        # act
        images = self.crawler._crawl()
        # assert
        self.assertEqual(self.crawler._cursor, expected_cursor)
        self.assertSetEqual(images, expected_images)
        for expected_image in expected_images:
            for image in images:
                if image == expected_image:
                    # sources are irrelevant for equality, need to be checked manually
                    self.assertEqual(image.source, expected_image.source)