def parser(_, objconf, skip=False, **kwargs): """ Parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content Returns: Iter[dict]: The stream of items Examples: >>> from riko import get_path >>> from meza.fntools import Objectify >>> >>> url = get_path('spreadsheet.csv') >>> conf = { ... 'url': url, 'sanitize': True, 'skip_rows': 0, ... 'encoding': ENCODING} >>> objconf = Objectify(conf) >>> result = parser(None, objconf, stream={}) >>> next(result)['mileage'] == '7213' True """ if skip: stream = kwargs['stream'] else: first_row, custom_header = objconf.skip_rows, objconf.col_names renamed = {'first_row': first_row, 'custom_header': custom_header} f = fetch(decode=True, **objconf) rkwargs = merge([objconf, renamed]) stream = auto_close(read_csv(f, **rkwargs), f) return stream
def parser(_, objconf, skip=False, **kwargs): """ Parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: stream (dict): The original item Returns: Iter[dict]: The stream of items Examples: >>> from riko import get_path >>> from meza.fntools import Objectify >>> >>> url = get_path('lorem.txt') >>> objconf = Objectify({'url': url, 'encoding': ENCODING}) >>> result = parser(None, objconf, assign='content') >>> next(result)['content'] == 'What is Lorem Ipsum?' True """ if skip: stream = kwargs['stream'] else: f = fetch(decode=True, **objconf) _stream = ({kwargs['assign']: line.strip()} for line in f) stream = auto_close(_stream, f) return stream
def async_parser(_, objconf, skip=False, **kwargs): """ Asynchronously parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: stream (dict): The original item Returns: Iter[dict]: The stream of items Examples: >>> from riko import get_path >>> from riko.bado import react >>> from riko.bado.mock import FakeReactor >>> from meza.fntools import Objectify >>> >>> def run(reactor): ... callback = lambda x: print(next(x)['mileage']) ... url = get_path('spreadsheet.csv') ... conf = { ... 'url': url, 'sanitize': True, 'skip_rows': 0, ... 'encoding': ENCODING} ... objconf = Objectify(conf) ... d = async_parser(None, objconf, stream={}) ... return d.addCallbacks(callback, logger.error) >>> >>> try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass ... 7213 """ if skip: stream = kwargs['stream'] else: url = get_abspath(objconf.url) r = yield io.async_url_open(url) first_row, custom_header = objconf.skip_rows, objconf.col_names renamed = {'first_row': first_row, 'custom_header': custom_header} rkwargs = merge([objconf, renamed]) stream = auto_close(read_csv(r, **rkwargs), r) return_value(stream)
def async_parser(_, objconf, skip=False, **kwargs): """ Asynchronously parses the pipe content Args: _ (None): Ignored objconf (obj): The pipe configuration (an Objectify instance) skip (bool): Don't parse the content kwargs (dict): Keyword arguments Kwargs: stream (dict): The original item Returns: Iter[dict]: The stream of items Examples: >>> from riko import get_path >>> from riko.bado import react >>> from riko.bado.mock import FakeReactor >>> from meza.fntools import Objectify >>> >>> def run(reactor): ... callback = lambda x: print(next(x)['content']) ... url = get_path('lorem.txt') ... objconf = Objectify({'url': url, 'encoding': ENCODING}) ... d = async_parser(None, objconf, assign='content') ... return d.addCallbacks(callback, logger.error) >>> >>> try: ... react(run, _reactor=FakeReactor()) ... except SystemExit: ... pass ... What is Lorem Ipsum? """ if skip: stream = kwargs['stream'] else: url = get_abspath(objconf.url) f = yield io.async_url_open(url) assign = kwargs['assign'] encoding = objconf.encoding _stream = ({assign: line.strip().decode(encoding)} for line in f) stream = auto_close(_stream, f) return_value(stream)