Exemple #1
0
    def testDownloading_css_03(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/Пример 3/'
        exampleHtmlPath = os.path.join(examplePath, 'пример 3.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname1 = os.path.join(self._tempDir, self._staticDirName, 'fname1.css')

        fname2 = os.path.join(self._tempDir, self._staticDirName, 'fname2.css')

        fname3 = os.path.join(self._tempDir, self._staticDirName, 'fname3.css')

        fname4 = os.path.join(self._tempDir, self._staticDirName, 'fname4.css')

        fname5 = os.path.join(self._tempDir, self._staticDirName,
                              'fname1_1.css')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname1))
        self.assertTrue(os.path.exists(fname2))
        self.assertTrue(os.path.exists(fname3))
        self.assertTrue(os.path.exists(fname4))
        self.assertTrue(os.path.exists(fname5))
Exemple #2
0
    def testDownloading_favicon(self):
        from webpage.downloader import Downloader, DownloadController

        template = 'href="{path}"'
        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example_favicon/'
        exampleHtmlPath = os.path.join(examplePath, 'example.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        fname_1 = os.path.join(self._tempDir, self._staticDirName,
                               'favicon_1.png')
        fname_2 = os.path.join(self._tempDir, self._staticDirName,
                               'favicon_2.png')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname_1))
        self.assertTrue(os.path.exists(fname_2))

        self.assertIn(
            template.format(path=self._staticDirName + '/favicon_1.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/favicon_2.png'),
            downloader.contentResult)
Exemple #3
0
    def testDownloading_img_02(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example2/'
        exampleHtmlPath = os.path.join(examplePath, u'example2.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname1 = os.path.join(self._tempDir, self._staticDirName,
                              u'image_01.png')

        fname2 = os.path.join(self._tempDir, self._staticDirName,
                              u'image_02.png')

        fname3 = os.path.join(self._tempDir, self._staticDirName,
                              u'image_03.png')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname1))
        self.assertTrue(os.path.exists(fname2))
        self.assertTrue(os.path.exists(fname3))
Exemple #4
0
    def testDownloading_javascript_01(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname1 = os.path.join(self._tempDir, self._staticDirName, u'fname1.js')

        fname2 = os.path.join(self._tempDir, self._staticDirName, u'fname2.js')

        fname3 = os.path.join(self._tempDir, self._staticDirName, u'fname3.js')

        fname4 = os.path.join(self._tempDir, self._staticDirName, u'fname4.js')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname1))
        self.assertTrue(os.path.exists(fname2))
        self.assertTrue(os.path.exists(fname3))
        self.assertTrue(os.path.exists(fname4))
Exemple #5
0
    def testDownloading_css_import_01(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'import1.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'import2.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'import3.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'import4.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic2.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic3.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic4.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic5.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic5_1.css')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'basic6.css')))
Exemple #6
0
    def testDownloading_beautifulsoup(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'http://www.crummy.com/software/BeautifulSoup/bs4/doc/'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)
        self.assertTrue(os.path.exists(downloadDir))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'default.css'))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'pygments.css'))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'jquery.js'))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'underscore.js'))

        self.assertTrue(
            os.path.join(self._tempDir, self._staticDirName, 'doctools.js'))
Exemple #7
0
    def testContentScriptExample1(self):
        from webpage.downloader import Downloader, DownloadController

        template = '<script src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, 'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertIn(template.format(path=self._staticDirName + '/fname1.js'),
                      downloader.contentResult)

        self.assertIn(template.format(path=self._staticDirName + '/fname2.js'),
                      downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/fname2_1.js'),
            downloader.contentResult)

        self.assertIn(template.format(path=self._staticDirName + '/fname3.js'),
                      downloader.contentResult)

        self.assertIn(template.format(path=self._staticDirName + '/fname4.js'),
                      downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + '/fname1_1.js'),
            downloader.contentResult)
Exemple #8
0
    def run(self):
        controller = WebPageDownloadController(self._runEvent,
                                               self._downloadDir,
                                               STATIC_DIR_NAME,
                                               self._parentWnd, self._timeout)

        downloader = Downloader(self._timeout)

        self._log(_('Start downloading\n'))

        try:
            downloader.start(self._url, controller)
        except urllib.error.URLError as error:
            self._error(_('Download error: {}\n').format(str(error.reason)))
        except (IOError, ValueError) as e:
            self._error(_('Invalid URL or file format\n'))
            self._error(str(e))
        else:
            self._log(_('Finish downloading\n'))

            content = downloader.contentResult
            staticPath = os.path.join(self._downloadDir, STATIC_DIR_NAME)
            title = downloader.pageTitle
            favicon = self._prepareFavicon(downloader.favicon)

            finishEvent = webpage.events.FinishDownloadEvent(
                content=content,
                staticPath=staticPath,
                title=title,
                favicon=favicon,
                url=self._url)
            wx.PostEvent(self._parentWnd, finishEvent)
Exemple #9
0
    def testContentExample2(self):
        from webpage.downloader import Downloader, DownloadController

        template = u'<img src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example2/'
        exampleHtmlPath = os.path.join(examplePath, u'example2.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertIn(
            template.format(path=self._staticDirName + u'/image_01.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/image_01_1.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/image_02.png'),
            downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + u'/image_02_1.png'),
            downloader.contentResult)
Exemple #10
0
    def testDownloading_img_srcset_files(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example3/'
        exampleHtmlPath = os.path.join(examplePath, 'example3.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname1 = os.path.join(self._tempDir, self._staticDirName,
                              'image_01.png')

        fname2 = os.path.join(self._tempDir, self._staticDirName,
                              'image_02.png')

        fname3 = os.path.join(self._tempDir, self._staticDirName,
                              'image_03.png')

        fname4 = os.path.join(self._tempDir, self._staticDirName,
                              'image_04.png')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname1))
        self.assertTrue(os.path.exists(fname2))
        self.assertTrue(os.path.exists(fname3))
        self.assertTrue(os.path.exists(fname4))
Exemple #11
0
    def testDownloading_stackoverflow_2(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'https://ru.stackoverflow.com/questions/241337/Как-обработать-кириллические-символы-в-urllib-request-urlopen'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)
Exemple #12
0
    def testDownloading_stackoverflow_01(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'http://ru.stackoverflow.com/questions/476918/django-%D0%97%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B5-%D0%B2-%D0%B7%D0%B0%D0%B2%D0%B8%D1%81%D0%B8%D0%BC%D0%BE%D1%81%D1%82%D0%B8-%D0%BE%D1%82-%D0%B7%D0%BD%D0%B0%D1%87%D0%B5%D0%BD%D0%B8%D0%B9-%D0%B2-%D0%91%D0%94'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)
Exemple #13
0
    def testDownloading_toster(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        url = 'https://toster.ru/q/273244'
        downloader.start(url, controller)

        self.assertTrue(downloader.success)
Exemple #14
0
    def testNoTitle(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example_no_title/'
        exampleHtmlPath = os.path.join(examplePath, u'example_no_title.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertTrue(downloader.success)
        self.assertIsNone(downloader.pageTitle)
Exemple #15
0
    def testTitleExample1(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertTrue(downloader.success)
        self.assertEqual(downloader.pageTitle, u'Заголовок страницы')
Exemple #16
0
    def testDownloading_img_srcset_content(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = '../test/webpage/example3/'
        exampleHtmlPath = os.path.join(examplePath, 'example3.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)
        downloadDir = os.path.join(self._tempDir, self._staticDirName)
        content = downloader.contentResult

        sample = 'srcset="{path}/image_02.png 2x, {path}/image_03.png w600, {path}/image_04.png"'.format(path=self._staticDirName)

        self.assertIn(sample, content)
Exemple #17
0
    def testDownloading_favicon_03(self):
        from webpage.downloader import Downloader, DownloadController

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example_favicon_03/'
        exampleHtmlPath = os.path.join(examplePath, 'example.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        favicon_fname = os.path.join(
            self._tempDir, self._staticDirName, 'favicon.ico')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertEqual(controller.favicon,
                         os.path.join(self._tempDir, self._staticDirName) + '/favicon.ico')
        self.assertTrue(os.path.exists(favicon_fname))
Exemple #18
0
    def testDownloading_css_back_img_01(self):
        from webpage.downloader import Downloader, DownloadController

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_01.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_02.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_03.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_04.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_05.png')))

        self.assertTrue(
            os.path.exists(
                os.path.join(self._tempDir, self._staticDirName,
                             u'back_img_06.png')))
Exemple #19
0
    def testContentImgExample1(self):
        from webpage.downloader import Downloader, DownloadController

        template = '<img src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, 'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertIn(
            template.format(path=self._staticDirName + '/image_01.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/картинка.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/image_01_1.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/image_02.png'),
            downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + '/image_02_1.png'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + '/image_03.png'),
            downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + '/image_03_1.png'),
            downloader.contentResult)
Exemple #20
0
    def testDownloading_css_url_02(self):
        from webpage.downloader import Downloader, DownloadController

        template = u'url("{url}")'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        fname2_text = readTextFile(
            os.path.join(self._tempDir, self._staticDirName, u'fname2.css'))

        self.assertIn(template.format(url=u'basic2.css'), fname2_text)
        self.assertIn(template.format(url=u'basic4.css'), fname2_text)
        self.assertIn(template.format(url=u'basic5.css'), fname2_text)
        self.assertIn(template.format(url=u'basic6.css'), fname2_text)
        self.assertIn('basic3.css', fname2_text)
        self.assertIn('basic5.css', fname2_text)
Exemple #21
0
    def testDownloading_css_rename(self):
        from webpage.downloader import Downloader, DownloadController

        template = 'href="{path}"'
        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = '../test/webpage/example_css_rename/'
        exampleHtmlPath = os.path.join(examplePath, 'example.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        fname = os.path.join(self._tempDir, self._staticDirName, 'style.php.css')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname))

        self.assertIn(
            template.format(path=self._staticDirName + '/style.php.css'),
            downloader.contentResult)
Exemple #22
0
    def testDownloading_img_urlquote(self):
        from webpage.downloader import Downloader, DownloadController

        template = '<img src="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = 'testdata/webpage/example_urlquote/'
        exampleHtmlPath = os.path.join(examplePath, 'example_urlquote.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        downloadDir = os.path.join(self._tempDir, self._staticDirName)

        fname = os.path.join(self._tempDir, self._staticDirName, 'рисунок.png')

        self.assertTrue(os.path.exists(downloadDir))
        self.assertTrue(os.path.exists(fname))

        self.assertIn(
            template.format(path=self._staticDirName + '/рисунок.png'),
            downloader.contentResult)
Exemple #23
0
    def testDownloading_css_url_01(self):
        from webpage.downloader import Downloader, DownloadController

        template = u'url("{url}")'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        fname1_text = readTextFile(
            os.path.join(self._tempDir, self._staticDirName, u'fname1.css'))

        self.assertIn(template.format(url=u'import1.css'), fname1_text)
        self.assertIn(template.format(url=u'back_img_01.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_02.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_03.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_04.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_05.png'), fname1_text)
        self.assertIn(template.format(url=u'back_img_06.png'), fname1_text)
Exemple #24
0
    def testContentCSSExample1_01(self):
        from webpage.downloader import Downloader, DownloadController

        template = u'<link href="{path}"'

        controller = DownloadController(self._tempDir, self._staticDirName)
        downloader = Downloader()

        examplePath = u'../test/webpage/example1/'
        exampleHtmlPath = os.path.join(examplePath, u'example1.html')

        downloader.start(self._path2url(exampleHtmlPath), controller)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname1.css'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname2.css'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname3.css'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname4.css'),
            downloader.contentResult)

        self.assertIn(
            template.format(path=self._staticDirName + u'/fname1_1.css'),
            downloader.contentResult)

        self.assertNotIn(
            template.format(path=self._staticDirName + u'/fname2_1.css'),
            downloader.contentResult)