コード例 #1
0
ファイル: __init__.py プロジェクト: zmyer/riko
        def wrapper(item=None, **kwargs):
            module_name = wrapper.__module__.split('.')[-1]

            defaults = {
                'dictize': True,
                'ftype': 'pass',
                'ptype': 'pass',
                'objectify': True
            }

            combined = cdicts(self.defaults, defaults, self.opts, kwargs)
            is_source = combined['ftype'] == 'none'
            def_assign = 'content' if is_source else module_name
            extracted = 'extract' in combined
            pdictize = combined.get('listize') if extracted else True

            combined.setdefault('assign', def_assign)
            combined.setdefault('emit', is_source)
            combined.setdefault('pdictize', pdictize)
            conf = {k: combined[k] for k in self.defaults}
            conf.update(kwargs.get('conf', {}))
            combined.update({'conf': conf})
            # replace conf with dictized version so we can access its
            # attributes even if we already extracted a value
            updates = {'conf': DotDict(conf), 'assign': combined.get('assign')}
            kwargs.update(updates)

            item = item or {}
            _input = DotDict(item) if combined.get('dictize') else item
            bfuncs = get_broadcast_funcs(**combined)
            types = {combined['ftype'], combined['ptype']}

            if types.difference({'pass', 'none'}):
                dfuncs = get_dispatch_funcs(**combined)
            else:
                dfuncs = None

            parsed, orig_item = dispatch(_input, bfuncs, dfuncs=dfuncs)

            if self. async:
                stream, skip = yield pipe(*parsed, stream=orig_item, **kwargs)
            else:
                stream, skip = pipe(*parsed, stream=orig_item, **kwargs)

            one, assignment = get_assignment(stream, skip, **combined)

            if skip or combined.get('emit'):
                stream = assignment
            elif not skip:
                key = combined.get('assign')
                stream = assign(_input, assignment, key, one=one)

            if self. async:
                return_value(stream)
            else:
                for s in stream:
                    yield s
コード例 #2
0
ファイル: itembuilder.py プロジェクト: zmyer/riko
def parser(_, attrs, skip, **kwargs):
    """ Parses the pipe content

    Args:
        _ (None): Ignored
        attrs (List[dict]): Attributes
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple[Iter(dict), bool]: Tuple of (stream, skip)

    Examples:
        >>> from riko.lib.utils import Objectify
        >>> attrs = [
        ...     {'key': 'title', 'value': 'the title'},
        ...     {'key': 'desc', 'value': 'the desc'}]
        >>> result, skip = parser(None, map(Objectify, attrs), False)
        >>> result == {'title': 'the title', 'desc': 'the desc'}
        True
    """
    items = ((a.key, a.value) for a in attrs)
    stream = kwargs['stream'] if skip else DotDict(items)
    return stream, skip
コード例 #3
0
ファイル: rssitembuilder.py プロジェクト: zmyer/riko
def parser(item, objconf, skip, **kwargs):
    """ Parses the pipe content

    Args:
        item (obj): The entry to process (a DotDict instance)
        objconf (obj): The pipe configuration (an Objectify instance)
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple(Iter[dict], bool): Tuple of (stream, skip)

    Examples:
        >>> from riko.lib.dotdict import DotDict
        >>> from riko.lib.utils import Objectify
        >>>
        >>> item = DotDict()
        >>> conf = {'guid': 'a1', 'mediaThumbURL': 'image.png'}
        >>> objconf = Objectify(conf)
        >>> kwargs = {'stream': item}
        >>> result, skip = parser(item, objconf, False, **kwargs)
        >>> result == {'media:thumbnail': {'url': 'image.png'}, 'y:id': 'a1'}
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        items = objconf.items()
        rdict = ((RSS.get(k, k), item.get(v, v, **kwargs)) for k, v in items)
        stream = DotDict(rdict)

    return stream, skip
コード例 #4
0
 def meta_reducer(item, rules):
     field = rules[0]['field']
     word = item.get(field, **kwargs)
     grouped = utils.group_by(rules, 'flags')
     group_rules = [g[1] for g in grouped] if multi else rules
     reducer = utils.multi_substitute if multi else utils.substitute
     replacement = reduce(reducer, group_rules, word)
     return DotDict(cdicts(item, {field: replacement}))
コード例 #5
0
 def async_reducer(item, rules):
     field = rules[0]['field']
     word = item.get(field, **kwargs)
     grouped = utils.group_by(rules, 'flags')
     group_rules = [g[1] for g in grouped] if multi else rules
     reducer = utils.multi_substitute if multi else utils.substitute
     replacement = yield ait.coop_reduce(reducer, group_rules, word)
     combined = cdicts(item, {field: replacement})
     return_value(DotDict(combined))
コード例 #6
0
ファイル: urlbuilder.py プロジェクト: zmyer/riko
def parser(item, params, skip, **kwargs):
    """ Parsers the pipe content

    Args:
        item (obj): The entry to process (a DotDict instance)
        params (List[dict]): Query parameters
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Tuple (dict, bool): Tuple of (item, skip)

    Examples:
        >>> from riko.lib.utils import Objectify
        >>>
        >>> item = DotDict()
        >>> params = {'key': 's', 'value': 'gm'}
        >>> path = [{'value': 'rss'}, {'value': 'headline'}]
        >>> base = 'http://finance.yahoo.com'
        >>> conf = {'base': base, 'path': path, 'params': params}
        >>> kwargs = {'stream': item, 'conf': conf}
        >>> result = parser(item, [Objectify(params)], False, **kwargs)[0]
        >>> sorted(result.keys()) == [
        ...     'fragment', 'netloc', 'params', 'path', 'query', 'scheme',
        ...     'url']
        True
        >>> result['url'] == 'http://finance.yahoo.com/rss/headline?s=gm'
        True
    """
    if skip:
        stream = kwargs['stream']
    else:
        conf = kwargs.pop('conf')
        path = conf.get('path')
        paths = (get_value(item, DotDict(p), **kwargs) for p in path)
        params = urlencode([(p.key, p.value) for p in params])
        url = '%s?%s' % (urljoin(conf['base'], '/'.join(paths)), params)
        stream = cast_url(url)

    return stream, skip
コード例 #7
0
ファイル: __init__.py プロジェクト: zmyer/riko
def get_broadcast_funcs(**kwargs):
    kw = utils.Objectify(kwargs, conf={})
    pieces = kw.conf[kw.extract] if kw.extract else kw.conf
    no_conf = remove_keys(kwargs, 'conf')
    noop = partial(utils.cast, _type='none')

    if kw.listize:
        listed = utils.listize(pieces)
        piece_defs = map(DotDict, listed) if kw.pdictize else listed
        parser = partial(utils.parse_conf, **no_conf)
        pfuncs = [partial(parser, conf=conf) for conf in piece_defs]
        get_pieces = lambda item: utils.broadcast(item, *pfuncs)
    elif kw.ptype != 'none':
        conf = DotDict(pieces) if kw.pdictize and pieces else pieces
        get_pieces = partial(utils.parse_conf, conf=conf, **no_conf)
    else:
        get_pieces = noop

    ffunc = partial(utils.get_field, **kwargs)
    get_field = noop if kw.ftype == 'none' else ffunc
    return (get_field, get_pieces, partial(utils.get_skip, **kwargs))
コード例 #8
0
ファイル: __init__.py プロジェクト: zmyer/riko
        def wrapper(items=None, **kwargs):
            module_name = wrapper.__module__.split('.')[-1]
            wrapper.__dict__['name'] = module_name

            defaults = {
                'dictize': True,
                'ftype': 'pass',
                'ptype': 'pass',
                'objectify': True,
                'emit': True,
                'assign': module_name
            }

            combined = cdicts(self.defaults, defaults, self.opts, kwargs)
            extracted = 'extract' in combined
            pdictize = combined.get('listize') if extracted else True

            combined.setdefault('pdictize', pdictize)
            conf = {k: combined[k] for k in self.defaults}
            conf.update(kwargs.get('conf', {}))
            combined.update({'conf': conf})

            # replace conf with dictized version so we can access its
            # attributes even if we already extracted a value
            updates = {'conf': DotDict(conf), 'assign': combined.get('assign')}
            kwargs.update(updates)

            items = items or iter([])
            _INPUT = map(DotDict, items) if combined.get('dictize') else items
            bfuncs = get_broadcast_funcs(**combined)
            types = {combined['ftype'], combined['ptype']}

            if types.difference({'pass', 'none'}):
                dfuncs = get_dispatch_funcs(**combined)
            else:
                dfuncs = None

            pairs = (dispatch(item, bfuncs, dfuncs=dfuncs) for item in _INPUT)
            parsed, _ = dispatch(DotDict(), bfuncs, dfuncs=dfuncs)

            # - operators can't skip items
            # - purposely setting both variables to maps of the same iterable
            #   since only one is intended to be used at any given time
            # - `tuples` is an iterator of tuples of the first two `parsed`
            #   elements
            tuples = ((p[0][0], p[0][1]) for p in pairs)
            orig_stream = (p[0][0] for p in pairs)
            objconf = parsed[1]

            if self. async:
                stream = yield pipe(orig_stream, objconf, tuples, **kwargs)
            else:
                stream = pipe(orig_stream, objconf, tuples, **kwargs)

            sub_type = 'aggregator' if hasattr(stream, 'keys') else 'composer'
            wrapper.__dict__['sub_type'] = sub_type

            # operators can only assign one value per item and can't skip items
            _, assignment = get_assignment(stream, False, **combined)

            if combined.get('emit'):
                stream = assignment
            else:
                singles = (iter([v]) for v in assignment)
                key = combined.get('assign')
                assigned = (assign({}, s, key, one=True) for s in singles)
                stream = utils.multiplex(assigned)

            if self. async:
                return_value(stream)
            else:
                for s in stream:
                    yield s
コード例 #9
0
ファイル: __init__.py プロジェクト: zmyer/riko
def assign(item, assignment, key, one=False):
    value = next(assignment) if one else list(assignment)
    yield DotDict(cdicts(item, {key: value}))
コード例 #10
0
ファイル: rename.py プロジェクト: zmyer/riko
def reducer(item, rule):
    new_dict = {rule.newval: item.get(rule.field)} if rule.newval else {}
    old_dict = item if rule.copy else remove_keys(item, rule.field)
    return DotDict(cdicts(old_dict, new_dict))