コード例 #1
0
    def test_copy_pickle(self):
        """Test copy and pickle."""

        # Test that we can pickle and unpickle
        # We force a pattern that contains all custom types:
        # `Selector`, `NullSelector`, `SelectorTag`, `SelectorAttribute`,
        # `SelectorNth`, `SelectorLang`, `SelectorList`, and `Namespaces`
        p1 = sv.compile('p.class#id[id]:nth-child(2):lang(en):focus',
                        {'html': 'http://www.w3.org/TR/html4/'})
        sp1 = pickle.dumps(p1)
        pp1 = pickle.loads(sp1)
        self.assertTrue(pp1 == p1)

        # Test that we pull the same one from cache
        p2 = sv.compile('p.class#id[id]:nth-child(2):lang(en):focus',
                        {'html': 'http://www.w3.org/TR/html4/'})
        self.assertTrue(p1 is p2)

        # Test that we compile a new one when providing a different flags
        p3 = sv.compile('p.class#id[id]:nth-child(2):lang(en):focus',
                        {'html': 'http://www.w3.org/TR/html4/'},
                        flags=0x10)
        self.assertTrue(p1 is not p3)
        self.assertTrue(p1 != p3)

        # Test that the copy is equivalent, but not same.
        p4 = copy.copy(p1)
        self.assertTrue(p4 is not p1)
        self.assertTrue(p4 == p1)

        p5 = copy.copy(p3)
        self.assertTrue(p5 is not p3)
        self.assertTrue(p5 == p3)
        self.assertTrue(p5 is not p4)
コード例 #2
0
    def test_pseudo_element(self):
        """Test pseudo element."""

        with self.assertRaises(NotImplementedError):
            sv.compile(':first-line')

        with self.assertRaises(NotImplementedError):
            sv.compile('::first-line')
コード例 #3
0
ファイル: test_api.py プロジェクト: sm-ghavami/soupsieve
    def test_syntax_error_on_third_line(self):
        """Test that multiline selector errors have the right position."""

        with self.assertRaises(sv.SelectorSyntaxError) as cm:
            sv.compile('input:is(\n' '  [name=foo]\n' '  [type=42]\n' ')\n')
        e = cm.exception
        self.assertEqual(e.line, 3)
        self.assertEqual(e.col, 3)
コード例 #4
0
    def test_invalid_incomplete_has(self):
        """Test invalid `:has()`."""

        with self.assertRaises(SyntaxError):
            sv.compile(':has(>)')

        with self.assertRaises(SyntaxError):
            sv.compile(':has()')
コード例 #5
0
    def test_invalid_pseudo_close(self):
        """Test invalid pseudo close."""

        with self.assertRaises(SyntaxError):
            sv.compile('div)')

        with self.assertRaises(SyntaxError):
            sv.compile(':is(div,)')
コード例 #6
0
    def test_invalid_pseudo(self):
        """Test invalid pseudo class."""

        with self.assertRaises(NotImplementedError):
            sv.compile(':before')

        with self.assertRaises(SyntaxError):
            sv.compile(':nth-child(a)')
コード例 #7
0
    def test_invalid_tag(self):
        """
        Test invalid tag.

        Tag must come first.
        """

        with self.assertRaises(SyntaxError):
            sv.compile(':is(div)p')
コード例 #8
0
ファイル: test_api.py プロジェクト: sm-ghavami/soupsieve
    def test_syntax_error_has_text_and_position(self):
        """Test that selector syntax errors contain the position."""

        with self.assertRaises(sv.SelectorSyntaxError) as cm:
            sv.compile('input.field[type=42]')
        e = cm.exception
        self.assertEqual(e.context, 'input.field[type=42]\n           ^')
        self.assertEqual(e.line, 1)
        self.assertEqual(e.col, 12)
コード例 #9
0
ファイル: test_api.py プロジェクト: sm-ghavami/soupsieve
    def test_syntax_error_with_multiple_lines(self):
        """Test that multiline selector errors have the right position."""

        with self.assertRaises(sv.SelectorSyntaxError) as cm:
            sv.compile('input\n' '.field[type=42]')
        e = cm.exception
        self.assertEqual(e.context,
                         '    input\n--> .field[type=42]\n          ^')
        self.assertEqual(e.line, 2)
        self.assertEqual(e.col, 7)
コード例 #10
0
ファイル: xml.py プロジェクト: skeptycal/pyspelling
    def setup(self):
        """Setup."""

        self.user_break_tags = set(self.config['break_tags'])
        self.comments = self.config['comments']
        self.attributes = set(self.config['attributes'])
        self.parser = 'xml'
        self.type = 'xml'
        self.ignores = sv.compile(','.join(self.config['ignores']), self.config['namespaces'])
        self.captures = sv.compile(','.join(self.config['captures']), self.config['namespaces'])
コード例 #11
0
ファイル: selector.py プロジェクト: MirkoCovizzi/torrenttv
    def compile(selector: str) -> "Selector":
        parts = [""]
        in_brackets = 0
        last_char = None
        for char in selector:
            if char == '|' and last_char != '\\' and in_brackets == 0:
                parts.append('')
                continue
            if char == '(' and last_char != '\\':
                in_brackets += 1
            elif char == ')' and last_char != '\\':
                in_brackets -= 1
            last_char = char
            parts[-1] += char

        parts = [part.strip() for part in parts]

        attr_selector = parts[0] if len(parts) > 0 else ""

        attr_selector_parts = attr_selector.split('@')
        attr_selector_parts = [part.strip() for part in attr_selector_parts]

        pattern = attr_selector_parts[0] if len(
            attr_selector_parts) > 0 else ""
        attr = attr_selector_parts[1] if len(attr_selector_parts) > 1 else None

        regx = None
        fmt = None
        cvt = None
        defval = None
        for part in parts[1:]:
            if part.startswith('regx:'):
                matched = match(r"regx:\s*(.*)\s*", part)
                if matched:
                    regx = matched.group(1).strip()
            elif part.startswith('fmt:'):
                matched = match(r"fmt:\s*(.*)\s*", part)
                if matched:
                    fmt = matched.group(1).strip()
            elif part.startswith('cvt:'):
                matched = match(r"cvt:\s*(\w+)\s*", part)
                if matched:
                    cvt = matched.group(1).strip()
            elif part.startswith('defval:'):
                matched = match(r"defval:(.*)\s*", part)
                if matched:
                    defval = matched.group(1).strip()

        if pattern:
            try:
                soupsieve.compile(pattern)
            except soupsieve.SelectorSyntaxError as e:
                raise ValueError("Syntax error: {}".format(e)) from e

        return Selector(pattern, attr, regx, fmt, cvt, defval)
コード例 #12
0
    def test_quirks_warn_attribute_unquoted(self):
        """Test that quirks mode raises a warning with attribute values that normally should be quoted."""

        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")
            # Trigger a warning.
            sv.compile('[data={}]', flags=sv._QUIRKS)
            # Verify some things
            self.assertTrue(len(w) == 1)
            self.assertTrue(issubclass(w[-1].category, sv_util.QuirksWarning))
コード例 #13
0
    def test_quirks_warn_relative_combinator(self):
        """Test that quirks mode raises a warning with relative combinator."""

        sv.purge()

        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")
            # Trigger a warning.
            sv.compile('> p', flags=sv._QUIRKS)
            # Verify some things
            self.assertTrue(len(w) == 1)
            self.assertTrue(issubclass(w[-1].category, sv_util.QuirksWarning))
コード例 #14
0
    def __init__(
        self,
        client,
        *,
        initial_paths: Iterable[str] = None,
        rules: Iterable[Rule] = None,
        path_attrs: Iterable[str] = (HREF, ),
        ignore_css_selectors: Iterable = None,
        ignore_form_fields: Iterable[str] = None,
        max_requests: Optional[int] = None,
        capture_exceptions: bool = True,
        output_summary: bool = True,
        should_process_handlers: Iterable[Callable] = None,
        check_response_handlers: Iterable[Callable] = None,
    ):
        # params
        self._client = client
        self.initial_paths = list(initial_paths or [])
        self.rules = list(rules or [])
        self.path_attrs = tuple(path_attrs)
        self.ignore_css_selectors = list(ignore_css_selectors or [])
        self.ignore_form_fields = list(ignore_form_fields or [])
        self.max_requests = max_requests
        self.capture_exceptions = capture_exceptions
        self.output_summary = output_summary

        # data structures
        self.queue: Queue = Queue()
        self.graph = DirectedGraph()
        self.tracebacks: List = []

        # handler lists
        self.should_process_handlers = list(should_process_handlers or [])
        self.check_response_handlers = list(check_response_handlers or [])

        # check css selectors
        for selector in self.ignore_css_selectors:
            try:
                soupsieve.compile(selector)
            except soupsieve.SelectorSyntaxError as e:
                msg = f"Invalid CSS selector '{selector}' (see parent exception)"
                raise ValueError(msg) from e

        # detect client and construct wrapper
        self.client = detect_and_wrap_client(self._client,
                                             ignore_css_selectors)

        # get logger
        self.logger = logging.getLogger(LOGGER_NAME)
コード例 #15
0
    def setup(self):
        """Setup."""

        self.additional_context = ''
        self.comments = False
        self.attributes = []
        self.parser = 'xml'
        self.type = None
        self.filepattern = 'content.xml'
        self.namespaces = {
            'text': 'urn:oasis:names:tc:opendocument:xmlns:text:1.0',
            'draw': 'urn:oasis:names:tc:opendocument:xmlns:drawing:1.0'
        }
        self.ignores = sv.compile('', {})
        self.captures = sv.compile(','.join(self.default_capture), self.namespaces)
コード例 #16
0
    def test_compiled_icomments(self):
        """Test comments iterator from compiled pattern."""

        markup = """
        <!-- before header -->
        <html>
        <head>
        </head>
        <body>
        <!-- comment -->
        <p id="1"><code id="2"></code><img id="3" src="./image.png"/></p>
        <pre id="4"></pre>
        <p><span id="5" class="some-class"></span><span id="some-id"></span></p>
        <pre id="6" class='ignore'>
            <!-- don't ignore -->
        </pre>
        </body>
        </html>
        """

        soup = self.soup(markup, 'html5lib')
        pattern = sv.compile('div', None, 0)
        comments = [
            sv_util.ustr(c).strip() for c in pattern.icomments(soup, limit=2)
        ]
        self.assertEqual(sorted(comments), sorted(['before header',
                                                   'comment']))
コード例 #17
0
    def test_compiled_comments(self):
        """Test comments from compiled pattern."""

        markup = """
        <!-- before header -->
        <html>
        <head>
        </head>
        <body>
        <!-- comment -->
        <p id="1"><code id="2"></code><img id="3" src="./image.png"/></p>
        <pre id="4"></pre>
        <p><span id="5" class="some-class"></span><span id="some-id"></span></p>
        <pre id="6" class='ignore'>
            <!-- don't ignore -->
        </pre>
        </body>
        </html>
        """

        soup = self.soup(markup, 'html.parser')

        # Check that comments on compiled object work just like `sv.comments`
        pattern = sv.compile('div', None, 0)
        comments = [sv_util.ustr(c).strip() for c in pattern.comments(soup)]
        self.assertEqual(sorted(comments),
                         sorted(['before header', 'comment', "don't ignore"]))
コード例 #18
0
ファイル: html.py プロジェクト: nhanguyen66/CIS440_Group4
    def setup(self):
        """Setup."""

        self.user_break_tags = set(self.config['break_tags'])
        self.comments = self.config.get('comments', True) is True
        self.attributes = set(self.config['attributes'])
        self.type = self.config['mode']
        if self.type not in MODE:
            self.type = 'html'
        self.parser = MODE[self.type]
        ignores = ','.join(self.config['ignores'])
        self.ignores = sv.compile(
            ignores, self.config['namespaces']) if ignores.strip() else None
        captures = ','.join(self.config['captures'])
        self.captures = sv.compile(
            captures, self.config['namespaces']) if captures.strip() else None
コード例 #19
0
    def compile_pattern(self, selectors, namespaces=None, flags=0):
        """Compile pattern."""

        print('PATTERN: ', selectors)
        flags |= sv.DEBUG
        if self.quirks:
            flags |= sv._QUIRKS
        return sv.compile(selectors, namespaces=namespaces, flags=flags)
コード例 #20
0
    def parse(selector: str) -> "Selector":
        """
        <css selector>@<attribute> | re: <matcher> | fmt: <formatter>
        """
        parts = [""]
        in_brackets = 0
        last_char = None
        for char in selector:
            if char == '|' and last_char != '\\' and in_brackets == 0:
                parts.append('')
                continue
            if char == '(' and last_char != '\\':
                in_brackets += 1
            elif char == ')' and last_char != '\\':
                in_brackets -= 1
            last_char = char
            parts[-1] += char

        # parts = selector.split('|')
        parts = [part.strip() for part in parts]

        attr_selector = parts[0] if len(parts) > 0 else ''

        attr_selector_parts = attr_selector.split('@')
        attr_selector_parts = [part.strip() for part in attr_selector_parts]

        css = attr_selector_parts[0] if len(attr_selector_parts) > 0 else ''
        attr = attr_selector_parts[1] if len(attr_selector_parts) > 1 else None

        regex = None
        fmt = None
        for part in parts[1:]:
            if part.startswith('re:'):
                m = match(r"re:\s*(.*)\s*", part)
                if m:
                    regex = m.group(1).strip()
            elif part.startswith('fmt:'):
                m = match(r"fmt:\s*(.*)\s*", part)
                if m:
                    fmt = m.group(1).strip()

        if css:
            soupsieve.compile(css)

        return Selector(css, attr, regex, fmt)
コード例 #21
0
ファイル: ooxml.py プロジェクト: skeptycal/pyspelling
    def setup(self):
        """Setup."""

        self.additional_context = ''
        self.comments = False
        self.attributes = []
        self.parser = 'xml'
        self.type = None
        self.filepattern = ''
        self.ignores = sv.compile('', {})
        self.captures = None
コード例 #22
0
    def test_invalid_combination(self):
        """
        Test invalid combination.

        Selectors cannot start with relational symbols unless in `:has()`.
        `:has()` cannot start with `,`.
        """

        with self.assertRaises(SyntaxError):
            sv.compile('> p')

        with self.assertRaises(SyntaxError):
            sv.compile(', p')

        with self.assertRaises(SyntaxError):
            sv.compile(':has(, p)')

        with self.assertRaises(SyntaxError):
            sv.compile('div >> p')

        with self.assertRaises(SyntaxError):
            sv.compile('div >')
コード例 #23
0
    def test_cache(self):
        """Test cache."""

        sv.purge()
        self.assertEqual(sv.cp._cached_css_compile.cache_info().currsize, 0)
        for x in range(1000):
            value = '[value="{}"]'.format(sv_util.ustr(random.randint(1, 10000)))
            p = sv.compile(value)
            self.assertTrue(p.pattern == value)
            self.assertTrue(sv.cp._cached_css_compile.cache_info().currsize > 0)
        self.assertTrue(sv.cp._cached_css_compile.cache_info().currsize == 500)
        sv.purge()
        self.assertEqual(sv.cp._cached_css_compile.cache_info().currsize, 0)
コード例 #24
0
ファイル: test_api.py プロジェクト: sm-ghavami/soupsieve
    def test_recompile(self):
        """If you feed through the same object, it should pass through unless you change parameters."""

        p1 = sv.compile('p[id]')
        p2 = sv.compile(p1)
        self.assertTrue(p1 is p2)

        with pytest.raises(ValueError):
            sv.compile(p1, flags=sv.DEBUG)

        with pytest.raises(ValueError):
            sv.compile(p1, namespaces={"": ""})

        with pytest.raises(ValueError):
            sv.compile(p1, custom={":--header": 'h1, h2, h3, h4, h5, h6'})
コード例 #25
0
    def test_icomment_compilied(self):
        """Test compiled `icomment`."""

        html = '<div><!-- comments -->text</div>'
        soup = self.soup(html, 'html.parser')

        with warnings.catch_warnings(record=True) as w:
            # Cause all warnings to always be triggered.
            warnings.simplefilter("always")

            pattern = sv.compile('div')
            pattern.icomments(soup)
            self.assertTrue(len(w) == 1)
            self.assertTrue(issubclass(w[-1].category, DeprecationWarning))
コード例 #26
0
    def test_recompile(self):
        """If you feed through the same object, it should pass through unless you change flags."""

        p1 = sv.compile('p[id]')
        p2 = sv.compile(p1)
        self.assertTrue(p1 is p2)

        with pytest.raises(ValueError):
            sv.compile(p1, flags=0x10)

        with pytest.raises(ValueError):
            sv.compile(p1, namespaces={"": ""})
コード例 #27
0
ファイル: ooxml.py プロジェクト: skeptycal/pyspelling
    def determine_file_type(self, z):
        """Determine file type."""

        content = z.read('[Content_Types].xml')
        with io.BytesIO(content) as b:
            encoding = self._analyze_file(b)
            if encoding is None:
                encoding = 'utf-8'
            b.seek(0)
            text = b.read().decode(encoding)
            soup = bs4.BeautifulSoup(text, 'xml')
            for o in soup.find_all('Override'):
                name = o.attrs.get('PartName')
                for k, v in MIMEMAP.items():
                    if name.startswith('/{}/'.format(k)):
                        self.type = v
                        break
                if self.type:
                    break
        self.filepattern = DOC_PARAMS[self.type]['filepattern']
        self.namespaces = DOC_PARAMS[self.type]['namespaces']
        self.captures = sv.compile(DOC_PARAMS[self.type]['captures'],
                                   DOC_PARAMS[self.type]['namespaces'])
コード例 #28
0
ファイル: base_rule.py プロジェクト: yjqiang/yj_ebook_reader
 def __init__(self, css_selector, attr, string_pattern):
     self.css_selector = sv.compile(css_selector)
     self.attr = attr
     self.string_pattern = string_pattern
コード例 #29
0
    def test_invalid_syntax(self):
        """Test invalid syntax."""

        with self.assertRaises(SyntaxError):
            sv.compile('div?')
コード例 #30
0
    def test_at_rule(self):
        """Test at-rule (not supported)."""

        with self.assertRaises(NotImplementedError):
            sv.compile('@page :left')