Beispiel #1
0
def async_parser(stream, rules, tuples, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        stream (Iter[dict]): The source. Note: this shares the `tuples`
            iterator, so consuming it will consume `tuples` as well.

        rules (List[obj]): the item independent rules (Objectify instances).

        tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf)
            `item` is an element in the source stream and `objconf` is the item
            configuration (an Objectify instance). Note: this shares the
            `stream` iterator, so consuming it will consume `stream` as well.

        kwargs (dict): Keyword arguments.

    Kwargs:
        conf (dict): The pipe configuration.

    Returns:
        List(dict): Deferred output stream

    Examples:
        >>> from itertools import repeat
        >>> from riko.bado import react, _issync
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(x[0] == {'content': 4})
        ...     kwargs = {'sort_key': 'content', 'sort_dir': 'desc'}
        ...     rule = Objectify(kwargs)
        ...     stream = ({'content': x} for x in range(5))
        ...     tuples = zip(stream, repeat(rule))
        ...     d = async_parser(stream, [rule], tuples, **kwargs)
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> if _issync:
        ...     True
        ... else:
        ...     try:
        ...         react(run, _reactor=FakeReactor())
        ...     except SystemExit:
        ...         pass
        True
    """
    return ait.async_reduce(reducer, rules, stream)
Beispiel #2
0
def async_parser(stream, rules, tuples, **kwargs):
    """ Asynchronously parses the pipe content

    Args:
        stream (Iter[dict]): The source. Note: this shares the `tuples`
            iterator, so consuming it will consume `tuples` as well.

        rules (List[obj]): the item independent rules (Objectify instances).

        tuples (Iter[(dict, obj)]): Iterable of tuples of (item, objconf)
            `item` is an element in the source stream and `objconf` is the item
            configuration (an Objectify instance). Note: this shares the
            `stream` iterator, so consuming it will consume `stream` as well.

        kwargs (dict): Keyword arguments.

    Kwargs:
        conf (dict): The pipe configuration.

    Returns:
        List(dict): Deferred output stream

    Examples:
        >>> from itertools import repeat
        >>> from riko.bado import react, _issync
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(x[0] == {'content': 4})
        ...     kwargs = {'sort_key': 'content', 'sort_dir': 'desc'}
        ...     rule = Objectify(kwargs)
        ...     stream = ({'content': x} for x in range(5))
        ...     tuples = zip(stream, repeat(rule))
        ...     d = async_parser(stream, [rule], tuples, **kwargs)
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> if _issync:
        ...     True
        ... else:
        ...     try:
        ...         react(run, _reactor=FakeReactor())
        ...     except SystemExit:
        ...         pass
        True
    """
    return ait.async_reduce(reducer, rules, stream)
Beispiel #3
0
def async_parser(item, rules, skip=False, **kwargs):
    """ Asynchronously parsers the pipe content

    Args:
        item (obj): The entry to process (a DotDict instance)
        rules (List[obj]): the parsed rules (Objectify instances).
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Deferred: twisted.internet.defer.Deferred dict

    Examples:
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> item = DotDict({'content': 'hello world', 'title': 'greeting'})
        >>> match = r'(\w+)\s(\w+)'
        >>> replace = '$2wide'
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(x['content'])
        ...     rule = {'field': 'content', 'match': match, 'replace': replace}
        ...     conf = {'rule': rule, 'multi': False, 'convert': True}
        ...     rules = [Objectify(rule)]
        ...     kwargs = {'stream': item, 'conf': conf}
        ...     d = async_parser(item, rules, **kwargs)
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        worldwide
    """
    multi = kwargs['conf']['multi']
    recompile = not multi

    @coroutine
    def async_reducer(item, rules):
        field = rules[0]['field']
        word = item.get(field, **kwargs)
        grouped = group_by(rules, 'flags')
        group_rules = [g[1] for g in grouped] if multi else rules
        reducer = multi_substitute if multi else substitute
        replacement = yield ait.coop_reduce(reducer, group_rules, word)
        combined = merge([item, {field: replacement}])
        return_value(DotDict(combined))

    if skip:
        item = kwargs['stream']
    else:
        new_rules = [get_new_rule(r, recompile=recompile) for r in rules]
        grouped = group_by(new_rules, 'field')
        field_rules = [g[1] for g in grouped]
        item = yield ait.async_reduce(async_reducer, field_rules, item)

    return_value(item)
Beispiel #4
0
def async_parser(item, rules, skip=False, **kwargs):
    """ Asynchronously parsers the pipe content

    Args:
        item (obj): The entry to process (a DotDict instance)
        rules (List[obj]): the parsed rules (Objectify instances).
        skip (bool): Don't parse the content
        kwargs (dict): Keyword arguments

    Kwargs:
        stream (dict): The original item

    Returns:
        Deferred: twisted.internet.defer.Deferred dict

    Examples:
        >>> from riko.bado import react
        >>> from riko.bado.mock import FakeReactor
        >>> from meza.fntools import Objectify
        >>>
        >>> item = DotDict({'content': 'hello world', 'title': 'greeting'})
        >>> match = r'(\w+)\s(\w+)'
        >>> replace = '$2wide'
        >>>
        >>> def run(reactor):
        ...     callback = lambda x: print(x['content'])
        ...     rule = {'field': 'content', 'match': match, 'replace': replace}
        ...     conf = {'rule': rule, 'multi': False, 'convert': True}
        ...     rules = [Objectify(rule)]
        ...     kwargs = {'stream': item, 'conf': conf}
        ...     d = async_parser(item, rules, **kwargs)
        ...     return d.addCallbacks(callback, logger.error)
        >>>
        >>> try:
        ...     react(run, _reactor=FakeReactor())
        ... except SystemExit:
        ...     pass
        ...
        worldwide
    """
    multi = kwargs['conf']['multi']
    recompile = not multi

    @coroutine
    def async_reducer(item, rules):
        field = rules[0]['field']
        word = item.get(field, **kwargs)
        grouped = group_by(rules, 'flags')
        group_rules = [g[1] for g in grouped] if multi else rules
        reducer = multi_substitute if multi else substitute
        replacement = yield ait.coop_reduce(reducer, group_rules, word)
        combined = merge([item, {field: replacement}])
        return_value(DotDict(combined))

    if skip:
        item = kwargs['stream']
    else:
        new_rules = [get_new_rule(r, recompile=recompile) for r in rules]
        grouped = group_by(new_rules, 'field')
        field_rules = [g[1] for g in grouped]
        item = yield ait.async_reduce(async_reducer, field_rules, item)

    return_value(item)