예제 #1
0
    def __init__(self,
                 parselet,
                 selector_handler=None,
                 strict=False,
                 debug=False):
        """
        Take a parselet and optional selector_handler
        and build an abstract representation of the Parsley extraction
        logic.

        Two helper class methods can be used to instantiate a Parselet
        from JSON rules: :meth:`.from_jsonstring`, :meth:`.from_jsonfile`.

        :param dict parselet: Parsley script as a Python dict object
        :param boolean strict: Set to *True* is you want to
            enforce that missing required keys raise an Exception; default is False
            (i.e. lenient/non-strict mode)
        :param selector_handler: an instance of :class:`selectors.SelectorHandler`
            optional selector handler instance;
            defaults to an instance of :class:`selectors.DefaultSelectorHandler`
        :raises: :class:`.InvalidKeySyntax`

        Example:

        >>> import parslepy
        >>> rules = {
        ...     "heading": "h1#main",
        ...     "news(li.newsitem)": [{
        ...         "title": ".",
        ...         "url": "a/@href"
        ...     }],
        ... }
        >>> p = parslepy.Parselet(rules)
        >>> type(p)
        <class 'parslepy.base.Parselet'>

        Use :meth:`~base.Parselet.extract` or :meth:`~base.Parselet.parse`
        to get extracted content from documents.
        """

        if debug:
            self.DEBUG = True
        if strict:
            self.STRICT_MODE = True

        self.parselet = parselet

        if not selector_handler:
            self.selector_handler = DefaultSelectorHandler(debug=self.DEBUG)

        elif not (isinstance(selector_handler, SelectorHandler)):
            raise ValueError("You must provide a SelectorHandler instance")

        else:
            self.selector_handler = selector_handler

        self.compile()