예제 #1
0
 def __init__(self, csspath=None, xpath=None, idname=None):
     from mobilize.util import classvalue
     from mobilize.filters import noimgsize
     assert not (csspath is None and xpath is None), 'You must provide either a csspath or an xpath!'
     if xpath is None:
         xpath = _csspath2xpath(csspath)
     kwargs = dict(
         classvalue = classvalue('bigimage'),
         postfilters = [noimgsize],
         )
     if idname is not None:
         kwargs['idname'] = idname
     super().__init__(xpath, **kwargs)
예제 #2
0
    def __init__(self,
                 selector,
                 filters=None,
                 prefilters=None,
                 postfilters=None,
                 omitfilters=None,
                 classvalue=None,
                 idname=None,
                 style='',
                 filtermode=FILT_EACHELEM,
                 usecopy=False,
                 tag='div',
                 innerhtml=False,
                 keep_if=None,
                 ):
        '''
        ctor

        The selector's job is to unambiguously specify which part of
        the source document to extract.  Its exact syntax and meaning
        depends on the subclass.  For example, it could be an xpath
        expression, a CSS "path", etc.

        In general, the selector may actually be plural, i.e. some
        iterable of individual selectors.  The effect, again depending
        on the subclass, will be to the matches of each selector
        together.

        SPECIFYING FILTERS
        
        Before final rendering, zero or more filters will be applied
        to the extracted content. By "filter", we mean some callable
        that conforms to the Mobilize filter API; read the docs of
        mobilize.filters for details.  Since each filter may transform
        the HTML snippet in an arbitrary way, the order matters.

        The filters, prefilters, postfilters, and omitfilters
        arguments all allow you to control what filters are applied to
        the extracted content. Each argument is optional, and if
        supplied, is a collection of filter-api callables. By default,
        filters.DEFAULT_FILTERS are set to be applied.  If you specify
        prefilters, that list is prepended to the default list;
        likewise, postfilters is appended to the default.  If you
        specify filters, that will *replace* the default.

        omitfilters is a set of filters, or some sequence convertable
        to a set (with the set() constructor).  If you specify
        omitfilters, any filters within (identified by Python's id())
        will be removed from the default list.  This is convenient if
        you want to use all default filters except one or two for a
        component, in a forward-compatible way.

        If you use the filters argument, you cannot specify
        prefilters, postfilters or omitfilters.  You can use one or
        more of prefilters, postfilters or omitfilters, but then
        cannot use filters.  To specify that no filters are to be used
        at all, pass filters=[].

        OTHER OPTIONS
        
        filtermode specifies the manner and timing in which filters
        are applied to matching source elements.  In the process of
        creating the final HTML used in the mobile web page, the first
        step is fetching and extracting 0 or more elements from the
        source page. If filtermode is FILT_EACHELEM, the filters are
        then applied to each element individually before
        proceeding. The next stage is to collapse these elements into
        a single container div element.  If filtermode is
        FILT_COLLAPSED, the filters are instead applied to this final
        single element.

        Note the significant complexity difference here: with N
        filters and M elements matched from the source, the filter
        application has complexity of no less than \Omega(N*M) with
        filtermode FILT_EACHELEM, but complexity \Omega(M) for
        FILT_COLLAPSED.

        If innerhtml is False (the default), the selected element will
        be extracted.  If innerhtml is True, the actual content of
        that element is extracted; the parent tag itself is dropped.
        This only has an effect if there is exactly one matching
        element, otherwise the innerhtml=False behavior is forced.

        keep_if is, if supplied, a function taking an HtmlElement
        argument, and returning True or False.  The function is called
        on every matching element; if False, the element is discarded
        before proceeding.  All this happens before any filters are
        applied.

        TODO: make FILT_COLLAPSED the default filtermode

        @param selector    : What part of the document to extract
        @type  selector    : str, or list of str

        @param filters     : Absolute list of filters to use
        @type  filters     : list of function

        @param prefilters  : Filters to prepend to default list
        @type  prefilters  : list of function
        
        @param postfilters : Filters to append to default list
        @type  postfilters : list of function

        @param omitfilters : Filters to omit from default
        @type  omitfilters : set, or sequence that can be converted to a set

        @param classvalue  : Value of "class" attribute for containing div
        @type  classvalue  : str or None (indicating use default)
        
        @param idname      : Value of "id" attribute for containing div
        @type  idname      : str or None (indicating use default)

        @param style       : Value for "style" attribute for containing div
        @type  style       : str

        @param filtermode  : Filter application mode
        @type  filtermode  : int

        @param usecopy     : Whether to operate on a copy of the source element
        @type  usecopy     : bool

        @param tag         : Name of containing tag
        @type  tag         : str

        @param keep_if     : Selection predicate
        @type  keep_if     : function: HtmlElement -> bool
    
        @param innerhtml   : If true, select the matching element's content only
        @type  innerhtml   : bool
        
        '''
        if type(selector) in (list, tuple):
            self.selectors = list(selector) # casting tuple to list
        else:
            self.selectors = [selector]
        self.filters = _pick_filters(filters, prefilters, postfilters, omitfilters)
        if classvalue is None:
            classvalue = util.classvalue()
        self.classvalue = classvalue
        self.idname = idname
        self.style = style
        self.filtermode = filtermode
        self.usecopy = usecopy
        self.tag = tag
        self.innerhtml = innerhtml
        self.keep_if = keep_if
예제 #3
0
 def test_classvalue(self):
     self.assertSequenceEqual('mwu-elem', util.classvalue())
     self.assertSequenceEqual('mwu-elem mwu-elem-alpha mwu-elem-beta', util.classvalue('alpha', 'beta'))