コード例 #1
0
    def crawl(self):
        # site wide checks
        self._analyze_crawlers()
        self._analyze_mobile()
        self._analyze_analytics()

        # iterate over individual pages to crawl
        for page_url in self.pages_to_crawl:
            print("Crawled {0} Pages of {1}".format(len(self.pages_crawled),
                                                    len(self.pages_to_crawl)))
            resp = requests.get(page_url)

            if resp.status_code == requests.codes.ok:
                html = webpage.Webpage(page_url, resp.content, self.titles,
                                       self.descriptions)

                page_report = html.report()
                self.report['pages'].append(page_report)

                # mark the page as crawled
                self.pages_crawled.append(page_url.strip().lower())
            elif resp.status_code == requests.codes.not_found:
                self.warn(WARNINGS["BROKEN_LINK"], page_url)
            else:
                self.warn(
                    WARNINGS["SERVER_ERROR"],
                    "HTTP{0} received for {1}".format(resp.status_code,
                                                      page_url))

        # aggregate the site wide issues/achievements
        self.report['site'] = {}
        self.report['site']["issues"] = self.issues
        self.report['site']["achieved"] = self.achieved

        return self.report
コード例 #2
0
    def test_visible_tags(self, data):
        html = ""
        self.wp = webpage.Webpage("https://www.drawbuildplay.com", html,
                                  self.titles, self.descriptions)

        soup = self.soup_file(data[0])
        elements = soup.findAll(text=True)
        for tag in elements:
            result = self.wp.visible_tags(tag)
            self.assertEqual(result, data[1])
コード例 #3
0
    def test_analyze_negative_url(self, data):
        url = data[0]
        expected_error = data[1]
        html = ""

        self.wp = webpage.Webpage(url, html, self.titles, self.descriptions)

        self.wp.report()
        self.assertTrue(
            any(issue["warning"] == WARNINGS[expected_error]
                for issue in self.wp.issues),
            "{0} not raised.".format(WARNINGS[expected_error]))
コード例 #4
0
    def test_analyze_negative(self, data):
        html = data[0]
        expected_error = data[1]

        self.wp = webpage.Webpage("https://www.drawbuildplay.com", html,
                                  self.titles, self.descriptions)

        self.wp.report()
        self.assertTrue(
            any(issue["warning"] == WARNINGS[expected_error]
                for issue in self.wp.issues),
            "{0} not raised.".format(WARNINGS[expected_error]))
コード例 #5
0
    def test_analyze_positive_url(self, data):
        url = data[0]
        badge = data[1]
        html = ""

        self.wp = webpage.Webpage(url, html, self.titles, self.descriptions)

        self.wp.report()
        if badge != "":
            self.assertTrue(
                any(earned["achievement"] == BADGES[badge]
                    for earned in self.wp.achieved),
                "{0} not earned".format(BADGES[badge]))
コード例 #6
0
    def test_analyze_positive(self, data):
        html = data[0]
        badge = data[1]

        self.wp = webpage.Webpage("https://www.drawbuildplay.com", html,
                                  self.titles, self.descriptions)

        self.wp.report()

        # title should have achieved the following
        if badge != "":
            self.assertTrue(
                any(earned["achievement"] == BADGES[badge]
                    for earned in self.wp.achieved),
                "{0} not earned".format(BADGES[badge]))
コード例 #7
0
    def test_analyze_duplicates_negative(self, page):
        html = page[0]
        expected_error = page[1]

        report = {"pages": []}
        for i in range(0, 2):
            self.wp = webpage.Webpage(
                "https://www.drawbuildplay.com/page{0}.html".format(i), html,
                self.titles, self.descriptions)

            page_report = self.wp.report()
            report['pages'].append(page_report)

        # warn about duplicate information
        self.assertTrue(
            any(issue["warning"] == WARNINGS[expected_error]
                for p in report['pages'] for issue in p['issues']),
            "{0} not raised. {1} {2}".format(WARNINGS[expected_error],
                                             self.titles, self.descriptions))