예제 #1
0
 def setUp(self):
     super(TestPageExtractor, self).setUp()
     self.visitor = PageVisitor()
예제 #2
0
 def setUp(self):
     super(TestPageExtractor, self).setUp()
     self.visitor = PageVisitor()
예제 #3
0
class TestPageExtractor(BaseTestCase):
    def setUp(self):
        super(TestPageExtractor, self).setUp()
        self.visitor = PageVisitor()

    def assert_extract(
            self, page, specs=None, compat=None, footnotes=None, issues=None):
        parsed = kumascript_grammar.parse(page)
        elements = self.visitor.visit(parsed)
        extractor = PageExtractor(elements=elements, feature=self.feature)
        extracted = extractor.extract()
        self.assertEqual(specs or [], extracted['specs'])
        self.assertEqual(compat or [], extracted['compat'])
        self.assertEqual(footnotes, extracted['footnotes'])
        self.assertEqual(issues or [], extracted['issues'])

    def test_valid_spec_section(self):
        sample_spec_section, expected_specs = self.get_sample_specs()
        self.assert_extract(sample_spec_section, specs=expected_specs)

    def test_invalid_spec_section(self):
        page = '<h2>Specifications</h2><p>Incomplete</p>'
        self.assert_extract(page, issues=[('skipped_content', 23, 40, {})])

    def test_valid_compat_section(self):
        sample_compat_section, expected_compat = self.get_sample_compat()
        self.assert_extract(sample_compat_section, compat=expected_compat)

    def test_invalid_compat_section(self):
        page = '<h2>Browser Compatibility</h2><p>Not present</p>'
        self.assert_extract(page, issues=[('skipped_content', 30, 48, {})])

    def test_full_page(self):
        sample_spec_section, expected_specs = self.get_sample_specs()
        sample_compat_section, expected_compat = self.get_sample_compat()
        page = """\
<p>Some lead content</p>
<h2>Other Text</h2>
<p>Here's some other content</p>
%s
%s
<h2>Other Pages</h2>
<p>See <a href="/foo">foo</a></p>
""" % (sample_spec_section, sample_compat_section)
        self.assert_extract(page, specs=expected_specs, compat=expected_compat)

    def test_full_page_reversed_sections(self):
        # https://bugzilla.mozilla.org/show_bug.cgi?id=1175177
        # https://developer.mozilla.org/en-US/docs/Web/API/Blob/slice
        sample_spec_section, expected_specs = self.get_sample_specs()
        sample_compat_section, expected_compat = self.get_sample_compat()
        page = """\
<p>Some lead content</p>
<h2>Other Text</h2>
<p>Here's some other content</p>
<h3>More Detail</h3>
<p>Trigger &lt;h3&gt; check not in a compat section.</p>
%s
%s
<h2>Other Pages</h2>
<p>See <a href="/foo">foo</a></p>
""" % (sample_compat_section, sample_spec_section)
        self.assert_extract(page, specs=expected_specs, compat=expected_compat)

    def test_compat_h3(self):
        # https://developer.mozilla.org/en-US/docs/Web/API/MozContact/key
        sample_compat_section, expected_compat = self.get_sample_compat()
        page = (
            sample_compat_section +
            '<h3 id="Gecko Gecko">Gecko Note</h3>\n<p>A note</p>')
        issues = [('skipped_h3', 354, 390, {'h3': 'Gecko Note'})]
        self.assert_extract(page, compat=expected_compat, issues=issues)

    def test_div_wrapped(self):
        # https://developer.mozilla.org/en-US/docs/Web/API/Document/execCommand
        sample_spec_section, expected_specs = self.get_sample_specs()
        sample_compat_section, expected_compat = self.get_sample_compat()
        page = """\
<p>Some lead content</p>
<div>
%s
%s
</div>
""" % (sample_spec_section, sample_compat_section)
        issue = ('no_data', 0, 24, {})
        self.assert_extract(page, issues=[issue])
예제 #4
0
class TestPageExtractor(BaseTestCase):
    def setUp(self):
        super(TestPageExtractor, self).setUp()
        self.visitor = PageVisitor()

    def assert_extract(self,
                       page,
                       specs=None,
                       compat=None,
                       footnotes=None,
                       issues=None):
        parsed = kumascript_grammar.parse(page)
        elements = self.visitor.visit(parsed)
        extractor = PageExtractor(elements=elements, feature=self.feature)
        extracted = extractor.extract()
        self.assertEqual(specs or [], extracted['specs'])
        self.assertEqual(compat or [], extracted['compat'])
        self.assertEqual(footnotes, extracted['footnotes'])
        self.assertEqual(issues or [], extracted['issues'])

    def test_valid_spec_section(self):
        sample_spec_section, expected_specs = self.get_sample_specs()
        self.assert_extract(sample_spec_section, specs=expected_specs)

    def test_invalid_spec_section(self):
        page = '<h2>Specifications</h2><p>Incomplete</p>'
        self.assert_extract(page, issues=[('skipped_content', 23, 40, {})])

    def test_valid_compat_section(self):
        sample_compat_section, expected_compat = self.get_sample_compat()
        self.assert_extract(sample_compat_section, compat=expected_compat)

    def test_invalid_compat_section(self):
        page = '<h2>Browser Compatibility</h2><p>Not present</p>'
        self.assert_extract(page, issues=[('skipped_content', 30, 48, {})])

    def test_full_page(self):
        sample_spec_section, expected_specs = self.get_sample_specs()
        sample_compat_section, expected_compat = self.get_sample_compat()
        page = """\
<p>Some lead content</p>
<h2>Other Text</h2>
<p>Here's some other content</p>
%s
%s
<h2>Other Pages</h2>
<p>See <a href="/foo">foo</a></p>
""" % (sample_spec_section, sample_compat_section)
        self.assert_extract(page, specs=expected_specs, compat=expected_compat)

    def test_full_page_reversed_sections(self):
        # https://bugzilla.mozilla.org/show_bug.cgi?id=1175177
        # https://developer.mozilla.org/en-US/docs/Web/API/Blob/slice
        sample_spec_section, expected_specs = self.get_sample_specs()
        sample_compat_section, expected_compat = self.get_sample_compat()
        page = """\
<p>Some lead content</p>
<h2>Other Text</h2>
<p>Here's some other content</p>
<h3>More Detail</h3>
<p>Trigger &lt;h3&gt; check not in a compat section.</p>
%s
%s
<h2>Other Pages</h2>
<p>See <a href="/foo">foo</a></p>
""" % (sample_compat_section, sample_spec_section)
        self.assert_extract(page, specs=expected_specs, compat=expected_compat)

    def test_compat_h3(self):
        # https://developer.mozilla.org/en-US/docs/Web/API/MozContact/key
        sample_compat_section, expected_compat = self.get_sample_compat()
        page = (sample_compat_section +
                '<h3 id="Gecko Gecko">Gecko Note</h3>\n<p>A note</p>')
        issues = [('skipped_h3', 354, 390, {'h3': 'Gecko Note'})]
        self.assert_extract(page, compat=expected_compat, issues=issues)

    def test_div_wrapped(self):
        # https://developer.mozilla.org/en-US/docs/Web/API/Document/execCommand
        sample_spec_section, expected_specs = self.get_sample_specs()
        sample_compat_section, expected_compat = self.get_sample_compat()
        page = """\
<p>Some lead content</p>
<div>
%s
%s
</div>
""" % (sample_spec_section, sample_compat_section)
        issue = ('no_data', 0, 24, {})
        self.assert_extract(page, issues=[issue])