Example #1
0
class Dict(with_metaclass(_DictMeta, Filter)):
    def __init__(self, selector=None, default=_NO_DEFAULT):
        super(Dict, self).__init__(self, default=default)
        if selector is None:
            self.selector = []
        elif isinstance(selector, basestring):
            self.selector = selector.split('/')
        elif callable(selector):
            self.selector = [selector]
        else:
            self.selector = selector

    def __getitem__(self, name):
        self.selector.append(name)
        return self

    @debug()
    def filter(self, elements):
        if elements is not _NOT_FOUND:
            return elements
        else:
            return self.default_or_raise(ParseError('Element %r not found' % self.selector))

    @classmethod
    def select(cls, selector, item, obj=None, key=None):
        if isinstance(item, (dict, list)):
            content = item
        else:
            content = item.el

        for el in selector:
            if isinstance(content, list):
                el = int(el)
            elif isinstance(el, _Filter):
                el._key = key
                el._obj = obj
                el = el(item)
            elif callable(el):
                el = el(item)

            try:
                content = content[el]
            except (KeyError, IndexError, TypeError):
                return _NOT_FOUND

        return content
Example #2
0
class Enum(with_metaclass(EnumMeta, object)):
    pass
Example #3
0
class BaseObject(with_metaclass(_BaseObjectMeta, StrConv, object)):
    """
    This is the base class for a capability object.

    A capability interface may specify to return several kind of objects, to formalise
    retrieved information from websites.

    As python is a flexible language where variables are not typed, we use a system to
    force backends to set wanted values on all fields. To do that, we use the :class:`Field`
    class and all derived ones.

    For example::

        class Transfer(BaseObject):
            " Transfer from an account to a recipient.  "

            amount =    DecimalField('Amount to transfer')
            date =      Field('Date of transfer', basestring, date, datetime)
            origin =    Field('Origin of transfer', int, long, basestring)
            recipient = Field('Recipient', int, long, basestring)

    The docstring is mandatory.
    """

    id = None
    backend = None
    url = StringField('url')
    _fields = None

    def __init__(self, id=u'', url=NotLoaded, backend=None):
        self.id = to_unicode(id) if id is not None else u''
        self.backend = backend
        self._fields = deepcopy(self._fields)
        self.__setattr__('url', url)

    @property
    def fullid(self):
        """
        Full ID of the object, in form '**ID@backend**'.
        """
        return '%s@%s' % (self.id, self.backend)

    def __iscomplete__(self):
        """
        Return True if the object is completed.

        It is useful when the object is a field of an other object which is
        going to be filled.

        The default behavior is to iter on fields (with iter_fields) and if
        a field is NotLoaded, return False.
        """
        for key, value in self.iter_fields():
            if value is NotLoaded:
                return False
        return True

    def copy(self):
        obj = copy(self)
        obj._fields = copy(self._fields)
        for k in obj._fields:
            obj._fields[k] = copy(obj._fields[k])
        return obj

    def __deepcopy__(self, memo):
        return self.copy()

    def set_empty_fields(self, value, excepts=()):
        """
        Set the same value on all empty fields.

        :param value: value to set on all empty fields
        :param excepts: if specified, do not change fields listed
        """
        for key, old_value in self.iter_fields():
            if empty(old_value) and key not in excepts:
                setattr(self, key, value)

    def iter_fields(self):
        """
        Iterate on the fields keys and values.

        Can be overloaded to iterate on other things.

        :rtype: iter[(key, value)]
        """

        if hasattr(self, 'id') and self.id is not None:
            yield 'id', self.id
        for name, field in self._fields.items():
            yield name, field.value

    def __eq__(self, obj):
        if isinstance(obj, BaseObject):
            return self.backend == obj.backend and self.id == obj.id
        else:
            return False

    def __getattr__(self, name):
        if self._fields is not None and name in self._fields:
            return self._fields[name].value
        else:
            raise AttributeError("'%s' object has no attribute '%s'" %
                                 (self.__class__.__name__, name))

    def __setattr__(self, name, value):
        try:
            attr = (self._fields or {})[name]
        except KeyError:
            if name not in dir(self) and not name.startswith('_'):
                warnings.warn(
                    'Creating a non-field attribute %s. Please prefix it with _'
                    % name,
                    AttributeCreationWarning,
                    stacklevel=2)
            object.__setattr__(self, name, value)
        else:
            if not empty(value):
                try:
                    # Try to convert value to the wanted one.
                    nvalue = attr.convert(value)
                    # If the value was converted
                    if nvalue is not value:
                        warnings.warn('Value %s was converted from %s to %s' %
                                      (name, type(value), type(nvalue)),
                                      ConversionWarning,
                                      stacklevel=2)
                    value = nvalue
                except Exception:
                    # error during conversion, it will probably not
                    # match the wanted following types, so we'll
                    # raise ValueError.
                    pass
            from collections import deque
            actual_types = ()
            for v in attr.types:
                if isinstance(v, str):
                    # the following is a (almost) copy/paste from
                    # https://stackoverflow.com/questions/11775460/lexical-cast-from-string-to-type
                    q = deque([object])
                    while q:
                        t = q.popleft()
                        if t.__name__ == v:
                            actual_types += (t, )
                        else:
                            try:
                                # keep looking!
                                q.extend(t.__subclasses__())
                            except TypeError:
                                # type.__subclasses__ needs an argument for
                                # whatever reason.
                                if t is type:
                                    continue
                                else:
                                    raise
                else:
                    actual_types += (v, )

            if not isinstance(value, actual_types) and not empty(value):
                raise ValueError(
                    'Value for "%s" needs to be of type %r, not %r' %
                    (name, actual_types, type(value)))
            attr.value = value

    def __delattr__(self, name):
        try:
            self._fields.pop(name)
        except KeyError:
            object.__delattr__(self, name)

    def to_dict(self):
        def iter_decorate(d):
            for key, value in d:
                if key == 'id' and self.backend is not None:
                    value = self.fullid
                yield key, value

        fields_iterator = self.iter_fields()
        return OrderedDict(iter_decorate(fields_iterator))

    def __getstate__(self):
        d = self.to_dict()
        d.update((k, v) for k, v in self.__dict__.items() if k != '_fields')
        return d

    @classmethod
    def from_dict(cls, values, backend=None):
        self = cls()

        for attr in values:
            setattr(self, attr, values[attr])

        return self

    def __setstate__(self, state):
        self._fields = deepcopy(
            self._fields)  # because yaml does not call __init__
        for k in state:
            setattr(self, k, state[k])

    if sys.version_info.major >= 3:

        def __dir__(self):
            return list(super(BaseObject, self).__dir__()) + list(
                self._fields.keys())
Example #4
0
class ItemElement(with_metaclass(_ItemElementMeta, AbstractElement)):
    _attrs = None
    _loaders = None
    klass = None
    validate = None
    skip_optional_fields_errors = False

    class Index(object):
        pass

    def __init__(self, *args, **kwargs):
        super(ItemElement, self).__init__(*args, **kwargs)
        self.obj = None
        self.saved_attrib = {}  # safer way would be to clone lxml tree

    def build_object(self):
        if self.klass is None:
            return
        return self.klass()

    def _restore_attrib(self):
        for el in self.saved_attrib:
            el.attrib.clear()
            el.attrib.update(self.saved_attrib[el])
        self.saved_attrib = {}

    def should_highlight(self):
        try:
            responses_dirname = self.page.browser.responses_dirname and self.page.browser.highlight_el
            if not responses_dirname:
                return False
            if not self.el.getroottree():
                return False
        except AttributeError:
            return False
        else:
            return True

    def _write_highlighted(self):
        if not self.should_highlight():
            return

        responses_dirname = self.page.browser.responses_dirname
        html = lxml.html.tostring(self.el.getroottree().getroot())

        fn = os.path.join(responses_dirname, 'obj-%s.html' % self._random_id)
        with open(fn, 'w') as fd:
            fd.write(html)
        self.logger.debug('highlighted object to %s', fn)

    def __call__(self, obj=None):
        if obj is not None:
            self.obj = obj

        for obj in self:
            return obj

    def __iter__(self):
        if self.condition is not None and not self.condition():
            return

        highlight = False
        try:
            if self.should_highlight():
                self.saved_attrib[self.el] = dict(self.el.attrib)
                self.el.attrib[
                    'style'] = 'color: white !important; background: orange !important;'

            try:
                if self.obj is None:
                    self.obj = self.build_object()
                self.parse(self.el)
                self.handle_loaders()
                for attr in self._attrs:
                    self.handle_attr(attr, getattr(self, 'obj_%s' % attr))
            except SkipItem:
                return

            if self.validate is not None and not self.validate(self.obj):
                return

            highlight = True
        finally:
            if highlight:
                self._write_highlighted()
            self._restore_attrib()

        yield self.obj

    def handle_attr(self, key, func):
        try:
            value = self.use_selector(func, key=key)
        except SkipItem as e:
            # Help debugging as tracebacks do not give us the key
            self.logger.debug("Attribute %s raises a %r", key, e)
            raise
        except Exception as e:
            # If we are here, we have probably a real parsing issue
            self.logger.warning('Attribute %s (in %s:%s) raises %s', key,
                                self._class_file, self._class_line, repr(e))
            if not self.skip_optional_fields_errors or key not in self.obj._fields or self.obj._fields[
                    key].mandatory:
                raise
            else:
                value = FetchError
        logger = getLogger('b2filters')
        logger.log(DEBUG_FILTERS, "%s.%s = %r" % (self._random_id, key, value))
        setattr(self.obj, key, value)