def get_splits(_INPUT, pieces=None, funcs=None, **kwargs): finitize = kwargs.pop('finitize', False) dictize = kwargs.pop('dictize', False) finite = utils.finitize(_INPUT) if finitize and _INPUT else _INPUT funcs = funcs or get_broadcast_funcs(pieces, **kwargs) inputs = imap(DotDict, finite) if finite and dictize else finite return utils.broadcast(inputs, *funcs) if inputs else funcs
def pipe_rssitembuilder(context=None, _INPUT=None, conf=None, **kwargs): """A source that builds an rss item. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipeforever asyncPipe or an iterable of items or fields conf : { 'mediaContentType': {'type': 'text', 'value': ''}, 'mediaContentHeight': {'type': 'text', 'value': ''}, 'mediaContentWidth': {'type': 'text', 'value': ''}, 'mediaContentURL': {'type': 'text', 'value': 'url'}, 'mediaThumbHeight': {'type': 'text', 'value': ''}, 'mediaThumbWidth': {'type': 'text', 'value': ''}, 'mediaThumbURL': {'type': 'text', 'value': 'url'}, 'description': {'type': 'text', 'value': 'description'}, 'pubdate': {'type': 'text', 'value': 'pubdate'}, 'author': {'type': 'text', 'value': 'author'}, 'title': {'type': 'text', 'value': 'title'}, 'link': {'type': 'text', 'value': 'url'}, 'guid': {'type': 'text', 'value': 'guid'}, } Yields ------ _OUTPUT : items """ get_value = partial(utils.get_value, **kwargs) pkwargs = utils.combine_dicts({'parse_func': get_value}, kwargs) parse_conf = partial(utils.parse_conf, DotDict(conf), **pkwargs) get_RSS = lambda key, value: (RSS.get(key, key), value) get_YAHOO = lambda key, value: (YAHOO.get(key), value) make_dict = lambda func, conf: dict(starmap(func, conf.iteritems())) clean_dict = lambda d: dict(i for i in d.items() if all(i)) funcs = [partial(make_dict, get_RSS), partial(make_dict, get_YAHOO)] finite = utils.finitize(_INPUT) inputs = imap(DotDict, finite) confs = imap(parse_conf, inputs) splits = utils.broadcast(confs, *funcs) combined = starmap(utils.combine_dicts, splits) result = imap(clean_dict, combined) _OUTPUT = imap(DotDict, result) return _OUTPUT
def pipe_filter(context=None, _INPUT=None, conf=None, **kwargs): """An operator that filters for source items matching the given rules. Not loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipe2py.modules pipe like object (iterable of items) conf : { 'MODE': {'value': <'permit' or 'block'>}, 'COMBINE': {'value': <'and' or 'or'>} 'RULE': [ { 'field': {'value': 'search field'}, 'op': {'value': 'one of SWITCH above'}, 'value': {'value': 'search term'} } ] } kwargs : other inputs, e.g., to feed terminals for rule values Returns ------- _OUTPUT : generator of filtered items Examples -------- >>> import os.path as p >>> from pipe2py.modules.pipeforever import pipe_forever >>> from pipe2py.modules.pipefetchdata import pipe_fetchdata >>> parent = p.dirname(p.dirname(__file__)) >>> file_name = p.abspath(p.join(parent, 'data', 'gigs.json')) >>> path = 'value.items' >>> url = 'file://%s' % file_name >>> conf = {'URL': {'value': url}, 'path': {'value': path}} >>> input = pipe_fetchdata(_INPUT=pipe_forever(), conf=conf) >>> mode = {'value': 'permit'} >>> combine = {'value': 'and'} >>> rule = [{'field': {'value': 'title'}, 'op': {'value': 'contains'}, \ 'value': {'value': 'web'}}] >>> conf = {'MODE': mode, 'COMBINE': combine, 'RULE': rule} >>> pipe_filter(_INPUT=input, conf=conf).next()['title'] u'E-Commerce Website Developer | Elance Job' >>> rule = [{'field': {'value': 'title'}, 'op': {'value': 'contains'}, \ 'value': {'value': 'kjhlked'}}] >>> conf = {'MODE': mode, 'COMBINE': combine, 'RULE': rule} >>> list(pipe_filter(_INPUT=input, conf=conf)) [] """ conf = DotDict(conf) test = kwargs.pop('pass_if', None) permit = conf.get('MODE', **kwargs) == 'permit' combine = conf.get('COMBINE', **kwargs) if not combine in {'and', 'or'}: raise Exception("Invalid combine: %s. (Expected 'and' or 'or')" % combine) rule_defs = map(DotDict, utils.listize(conf['RULE'])) get_pass = partial(utils.get_pass, test=test) get_value = partial(utils.get_value, **kwargs) parse_conf = partial(utils.parse_conf, parse_func=get_value, **kwargs) get_rules = lambda i: imap(parse_conf, rule_defs, repeat(i)) funcs = [COMBINE_BOOLEAN[combine], utils.passthrough, utils.passthrough] inputs = imap(DotDict, _INPUT) splits = utils.broadcast(inputs, get_rules, utils.passthrough, get_pass) outputs = starmap(partial(parse_rules, **kwargs), splits) parsed = utils.dispatch(outputs, *funcs) gathered = starmap(partial(parse_result, permit=permit), parsed) _OUTPUT = ifilter(None, gathered) return _OUTPUT
def asyncBroadcast(_INPUT, *asyncCallables): kwargs = {'map_func': _map_func, 'apply_func': _apply_func} return utils.broadcast(_INPUT, *asyncCallables, **kwargs)
def pipe_filter(context=None, _INPUT=None, conf=None, **kwargs): """An operator that filters for source items matching the given rules. Not loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipe2py.modules pipe like object (iterable of items) conf : { 'MODE': {'value': <'permit' or 'block'>}, 'COMBINE': {'value': <'and' or 'or'>} 'RULE': [ { 'field': {'value': 'search field'}, 'op': {'value': 'one of SWITCH above'}, 'value': {'value': 'search term'} } ] } kwargs : other inputs, e.g., to feed terminals for rule values Returns ------- _OUTPUT : generator of filtered items Examples -------- >>> import os.path as p >>> from pipe2py.modules.pipeforever import pipe_forever >>> from pipe2py.modules.pipefetchdata import pipe_fetchdata >>> parent = p.dirname(p.dirname(__file__)) >>> file_name = p.abspath(p.join(parent, 'data', 'gigs.json')) >>> path = 'value.items' >>> url = 'file://%s' % file_name >>> conf = {'URL': {'value': url}, 'path': {'value': path}} >>> input = pipe_fetchdata(_INPUT=pipe_forever(), conf=conf) >>> mode = {'value': 'permit'} >>> combine = {'value': 'and'} >>> rule = [{'field': {'value': 'title'}, 'op': {'value': 'contains'}, \ 'value': {'value': 'web'}}] >>> conf = {'MODE': mode, 'COMBINE': combine, 'RULE': rule} >>> pipe_filter(_INPUT=input, conf=conf).next()['title'] u'E-Commerce Website Developer | Elance Job' >>> rule = [{'field': {'value': 'title'}, 'op': {'value': 'contains'}, \ 'value': {'value': 'kjhlked'}}] >>> conf = {'MODE': mode, 'COMBINE': combine, 'RULE': rule} >>> list(pipe_filter(_INPUT=input, conf=conf)) [] """ conf = DotDict(conf) test = kwargs.pop('pass_if', None) permit = conf.get('MODE', **kwargs) == 'permit' combine = conf.get('COMBINE', **kwargs) if not combine in {'and', 'or'}: raise Exception( "Invalid combine: %s. (Expected 'and' or 'or')" % combine) rule_defs = map(DotDict, utils.listize(conf['RULE'])) get_pass = partial(utils.get_pass, test=test) get_value = partial(utils.get_value, **kwargs) parse_conf = partial(utils.parse_conf, parse_func=get_value, **kwargs) get_rules = lambda i: imap(parse_conf, rule_defs, repeat(i)) funcs = [COMBINE_BOOLEAN[combine], utils.passthrough, utils.passthrough] inputs = imap(DotDict, _INPUT) splits = utils.broadcast(inputs, get_rules, utils.passthrough, get_pass) outputs = starmap(partial(parse_rules, **kwargs), splits) parsed = utils.dispatch(outputs, *funcs) gathered = starmap(partial(parse_result, permit=permit), parsed) _OUTPUT = ifilter(None, gathered) return _OUTPUT