def asyncPipeLoop(context=None, _INPUT=None, conf=None, embed=None, **kwargs): """An operator that asynchronously loops over the input and performs the embedded submodule. Not loopable. Parameters ---------- context : pipe2py.Context object _INPUT : asyncPipe like object (twisted Deferred iterable of items) embed : the submodule, i.e., asyncPipe*(context, _INPUT, conf) Most modules, with the exception of User inputs and Operators can be sub-modules. conf : { 'assign_part': {'value': <all or first>}, 'assign_to': {'value': <assigned field name>}, 'emit_part': {'value': <all or first>}, 'mode': {'value': <assign or EMIT>}, 'with': {'value': <looped field name or blank>}, 'embed': {'value': {'conf': <module conf>}} } Returns ------- _OUTPUT : twisted.internet.defer.Deferred generator of items """ cust_func = get_cust_func(context, conf, embed, parse_embed, **kwargs) opts.update({'cust_func': cust_func}) splits = yield asyncGetSplits(_INPUT, conf, **cdicts(opts, kwargs)) gathered = yield asyncStarMap(asyncParseResult, splits) _OUTPUT = utils.multiplex(gathered) returnValue(_OUTPUT)
def asyncPipeFetchdata(context=None, _INPUT=None, conf=None, **kwargs): asyncFuncs = yield asyncGetSplits(None, conf, **cdicts(opts, kwargs)) parsed = yield asyncGetParsed(_INPUT, asyncFuncs[0]) results = yield asyncStarMap(asyncParseResult, parsed) items = imap(utils.gen_items, results) _OUTPUT = utils.multiplex(items) returnValue(_OUTPUT)
def pipe_loop(context=None, _INPUT=None, conf=None, embed=None, **kwargs): """An operator that loops over the input and performs the embedded submodule. Not loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipe2py.modules pipe like object (iterable of items) embed : the submodule, i.e., pipe_*(context, _INPUT, conf) Most modules, with the exception of User inputs and Operators can be sub-modules. conf : { 'assign_part': {'value': <all or first>}, 'assign_to': {'value': <assigned field name>}, 'emit_part': {'value': <all or first>}, 'mode': {'value': <assign or EMIT>}, 'with': {'value': <looped field name or blank>}, 'embed': {'value': {'conf': <module conf>}} } Returns ------- _OUTPUT : generator of items """ cust_func = get_cust_func(context, conf, embed, parse_embed, **kwargs) opts.update({'cust_func': cust_func}) splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs)) gathered = starmap(parse_result, splits) _OUTPUT = utils.multiplex(gathered) return _OUTPUT
def asyncParseResult(urls, _, _pass): # asyncParse = partial(deferToThread, speedparser.parse) asyncParse = partial(maybeDeferred, speedparser.parse) str_urls = get_urls(urls) contents = yield asyncImap(getPage, str_urls) parsed = yield asyncImap(asyncParse, contents) entries = imap(utils.gen_entries, parsed) items = utils.multiplex(entries) returnValue(items)
def pipe_fetchdata(context=None, _INPUT=None, conf=None, **kwargs): """A source that fetches and parses an XML or JSON file. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipeforever pipe or an iterable of items or fields conf : { 'URL': {'value': <url>}, 'path': {'value': <dot separated path to data list>} } Yields ------ _OUTPUT : items Examples -------- >>> from os import path as p >>> from pipe2py.modules.pipeforever import pipe_forever >>> parent = p.dirname(p.dirname(__file__)) >>> abspath = p.abspath(p.join(parent, 'data', 'gigs.json')) >>> path = 'value.items' >>> url = "file://%s" % abspath >>> conf = {'URL': {'value': url}, 'path': {'value': path}} >>> pipe_fetchdata(_INPUT=pipe_forever(), conf=conf).next().keys()[:5] [u'y:repeatcount', u'description', u'pubDate', u'title', u'y:published'] >>> abspath = p.abspath(p.join(parent, 'data', 'places.xml')) >>> path = 'appointment' >>> url = "file://%s" % abspath >>> conf = {'URL': {'value': url}, 'path': {'value': path}} >>> sorted(pipe_fetchdata(_INPUT=pipe_forever(), conf=conf).next().keys()) ['alarmTime', 'begin', 'duration', 'places', 'subject', 'uid'] >>> conf = {'URL': {'value': url}, 'path': {'value': ''}} >>> sorted(pipe_fetchdata(_INPUT=pipe_forever(), conf=conf).next().keys()) ['appointment', 'reminder'] """ # todo: iCal and KML funcs = get_splits(None, conf, **cdicts(opts, kwargs)) parsed = get_parsed(_INPUT, funcs[0]) results = starmap(parse_result, parsed) items = imap(utils.gen_items, results) _OUTPUT = utils.multiplex(items) return _OUTPUT
def asyncPipeStringtokenizer(context=None, _INPUT=None, conf=None, **kwargs): """A string module that asynchronously splits a string into tokens delimited by separators. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : twisted Deferred iterable of items or strings conf : { 'to-str': {'value': <delimiter>}, 'dedupe': {'type': 'bool', value': <1>}, 'sort': {'type': 'bool', value': <1>} } Returns ------- _OUTPUT : twisted.internet.defer.Deferred generator of items """ conf['delimiter'] = conf.pop('to-str', dict.get(conf, 'delimiter')) splits = yield asyncGetSplits(_INPUT, conf, **cdicts(opts, kwargs)) parsed = yield asyncDispatch(splits, *get_async_dispatch_funcs()) items = yield asyncStarMap(partial(maybeDeferred, parse_result), parsed) _OUTPUT = utils.multiplex(items) returnValue(_OUTPUT)
def pipe_stringtokenizer(context=None, _INPUT=None, conf=None, **kwargs): """A string module that splits a string into tokens delimited by separators. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : iterable of items or strings conf : { 'to-str': {'value': <delimiter>}, 'dedupe': {'type': 'bool', value': <1>}, 'sort': {'type': 'bool', value': <1>} } Returns ------- _OUTPUT : generator of items """ conf['delimiter'] = conf.pop('to-str', dict.get(conf, 'delimiter')) splits = get_splits(_INPUT, conf, **cdicts(opts, kwargs)) parsed = utils.dispatch(splits, *get_dispatch_funcs()) items = starmap(parse_result, parsed) _OUTPUT = utils.multiplex(items) return _OUTPUT
def asyncPipeFetch(context=None, _INPUT=None, conf=None, **kwargs): """A source that asynchronously fetches and parses one or more feeds to return the feed entries. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : asyncPipe like object (twisted Deferred iterable of items) conf : { 'URL': [ {'type': 'url', 'value': <url1>}, {'type': 'url', 'value': <url2>}, {'type': 'url', 'value': <url3>}, ] } Returns ------- _OUTPUT : twisted.internet.defer.Deferred generator of items """ splits = yield asyncGetSplits(_INPUT, conf['URL'], **cdicts(opts, kwargs)) items = yield asyncStarMap(asyncParseResult, splits) _OUTPUT = utils.multiplex(items) returnValue(_OUTPUT)
def pipe_fetch(context=None, _INPUT=None, conf=None, **kwargs): """A source that fetches and parses one or more feeds to return the entries. Loopable. Parameters ---------- context : pipe2py.Context object _INPUT : pipeforever pipe or an iterable of items or fields conf : { 'URL': [ {'type': 'url', 'value': <url1>}, {'type': 'url', 'value': <url2>}, {'type': 'url', 'value': <url3>}, ] } Returns ------- _OUTPUT : generator of items """ splits = get_splits(_INPUT, conf['URL'], **cdicts(opts, kwargs)) items = starmap(parse_result, splits) _OUTPUT = utils.multiplex(items) return _OUTPUT
def parse_result(urls, _, _pass): str_urls = get_urls(urls) contents = (urlopen(url).read() for url in str_urls) parsed = imap(speedparser.parse, contents) entries = imap(utils.gen_entries, parsed) return utils.multiplex(entries)
def get_output(_INPUT, **kwargs): others = (v for k, v in kwargs.iteritems() if k.startswith('_OTHER')) others_items = utils.multiplex(others) input_items = utils.finitize(_INPUT) return chain(input_items, others_items)