Ejemplos de JavaScriptScraper.scrape en Python

Lenguaje de programación: Python

Namespace/Package Name: wpull.scraper.javascript

Clase / Tipo: JavaScriptScraper

Método / Función: scrape

Ejemplos en hotexamples.com: 6

Python JavaScriptScraper.scrape - 6 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de wpull.scraper.javascript.JavaScriptScraper.scrape extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

JavaScriptScraper(16)

scrape(3)

Métodos usados con frecuencia

JavaScriptScraper (16)

scrape (3)

Ejemplo n.º 1

Mostrar archivo

    def test_javascript_heavy_inline_monstrosity(self):
        scraper = JavaScriptScraper()
        request = Request('http://example.com/test.js')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH, 'testing', 'samples',
                                          'twitchplayspokemonfirered.html')
            with open(html_file_path, 'rb') as in_file:
                in_file.seek(0x147)
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertIn(
            'http://cdn.bulbagarden.net/upload/archive/a/a4/'
            '20090718115357%21195Quagsire.png', inline_urls)
        self.assertIn(
            'http://www.google.com/url?q=http%3A%2F%2Fwww.reddit.com%2F'
            'user%2FGoldenSandslash15&sa=D&sntz=1&'
            'usg=AFQjCNElFBxZYdNm5mWoRSncf5tbdIJQ-A', linked_urls)

        print('\n'.join(inline_urls))
        print('\n'.join(linked_urls))

Ejemplo n.º 2

Mostrar archivo

Archivo: javascript_test.py Proyecto: Willianvdv/wpull

    def test_javascript_heavy_inline_monstrosity(self):
        scraper = JavaScriptScraper()
        request = Request('http://example.com/test.js')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples',
                                          'twitchplayspokemonfirered.html')
            with open(html_file_path, 'rb') as in_file:
                in_file.seek(0x147)
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertIn(
            'http://cdn.bulbagarden.net/upload/archive/a/a4/'
            '20090718115357%21195Quagsire.png',
            inline_urls
        )
        self.assertIn(
            'http://www.google.com/url?q=http%3A%2F%2Fwww.reddit.com%2F'
            'user%2FGoldenSandslash15&sa=D&sntz=1&'
            'usg=AFQjCNElFBxZYdNm5mWoRSncf5tbdIJQ-A',
            linked_urls
        )

        print('\n'.join(inline_urls))
        print('\n'.join(linked_urls))

Ejemplo n.º 3

Mostrar archivo

Archivo: javascript_test.py Proyecto: Willianvdv/wpull

    def test_javascript_reject_type(self):
        scraper = JavaScriptScraper()
        request = Request('http://example.com/script.js')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples', 'script.js')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response,
                                       link_type=LinkType.css)
        self.assertFalse(scrape_result)

Ejemplo n.º 4

Mostrar archivo

    def test_javascript_reject_type(self):
        scraper = JavaScriptScraper()
        request = Request('http://example.com/script.js')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH, 'testing', 'samples',
                                          'script.js')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request,
                                       response,
                                       link_type=LinkType.css)
        self.assertFalse(scrape_result)

Ejemplo n.º 5

Mostrar archivo

Archivo: javascript_test.py Proyecto: Super-Rad/wpull

    def test_javascript_scraper(self):
        scraper = JavaScriptScraper()
        request = Request('http://example.com/script.js')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH,
                                          'testing', 'samples', 'script.js')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual({
            'http://example.com/script_variable.png',
            'http://example.com/dragonquery.js',
        },
            inline_urls
        )
        self.assertEqual({
            'http://example.com/document_write.html',
            'http://example.com/http_document_write.html',
            'http://example.com/http_document_write2.html',
            'http://example.com/http document write.html',
            'http://example.com/script_variable.html',
            'http://example.com/http_script_variable.html',
            'https://example.com/https_script_variable.html',
            'ftp://example.com/ftp_script_variable.html',
            'http://example.com/end_dir_script_variable/',
            'http://example.com/start_dir_script_variable',
            'http://example.com/../relative_dir_script_variable'
            if sys.version_info < (3, 5) else
            'http://example.com/relative_dir_script_variable',
            'http://example.com/script_json.html',
            'http://example.com/http_script_json.html?a=b',
        },
            linked_urls
        )

Ejemplo n.º 6

Mostrar archivo

    def test_javascript_scraper(self):
        scraper = JavaScriptScraper()
        request = Request('http://example.com/script.js')
        response = Response(200, 'OK')
        response.body = Body()

        with wpull.util.reset_file_offset(response.body):
            html_file_path = os.path.join(ROOT_PATH, 'testing', 'samples',
                                          'script.js')
            with open(html_file_path, 'rb') as in_file:
                shutil.copyfileobj(in_file, response.body)

        scrape_result = scraper.scrape(request, response)
        inline_urls = scrape_result.inline_links
        linked_urls = scrape_result.linked_links

        self.assertEqual(
            {
                'http://example.com/script_variable.png',
                'http://example.com/dragonquery.js',
            }, inline_urls)
        self.assertEqual(
            {
                'http://example.com/document_write.html',
                'http://example.com/http_document_write.html',
                'http://example.com/http_document_write2.html',
                'http://example.com/http document write.html',
                'http://example.com/script_variable.html',
                'http://example.com/http_script_variable.html',
                'https://example.com/https_script_variable.html',
                'ftp://example.com/ftp_script_variable.html',
                'http://example.com/end_dir_script_variable/',
                'http://example.com/start_dir_script_variable',
                'http://example.com/../relative_dir_script_variable'
                if sys.version_info <
                (3, 5) else 'http://example.com/relative_dir_script_variable',
                'http://example.com/script_json.html',
                'http://example.com/http_script_json.html?a=b',
            }, linked_urls)