def classic_iterator(urls, reader=func.chain_reader, input_stream=(func.map_input_stream, ), notifier=func.notifier, params=None, ddfs=None): """ An iterator over records as seen by the classic map interface. :type reader: :func:`disco.classic.worker.func.input_stream` :param reader: shortcut for the last input stream applied. :type input_stream: sequence of :func:`disco.classic.worker.func.input_stream` :param input_stream: used to read from a custom file format. :type notifier: :func:`disco.classic.worker.func.notifier` :param notifier: called when the task opens a url. """ from disco.worker import Input from disco.worker.classic.worker import Worker worker = Worker(map_reader=reader, map_input_stream=input_stream) settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings() for input in util.inputlist(urls, settings=settings): notifier(input) for record in Input(input, open=worker.opener('map', 'in', params)): yield record
def classic_iterator(urls, reader=task_io.chain_reader, input_stream=(func.map_input_stream, ), notifier=func.notifier, params=None, ddfs=None): """ An iterator over records as seen by the classic map interface. :type reader: :func:`disco.worker.task_io.input_stream` :param reader: shortcut for the last input stream applied. :type input_stream: sequence of :func:`disco.worker.task_io.input_stream` :param input_stream: used to read from a custom file format. :type notifier: :func:`disco.func.notifier` :param notifier: called when the task opens a url. """ from disco.worker import Input from disco.worker.classic.worker import Worker worker = Worker(map_reader=reader, map_input_stream=input_stream) settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings() for input in util.inputlist(urls, settings=settings): if isinstance(input, basestring): dest = proxy_url(input, to_master=False) elif isinstance(input, tuple): dest = tuple([proxy_url(i, to_master=False) for i in input]) else: dest = [proxy_url(i, to_master=False) for i in input] notifier(dest) for record in Input(dest, open=worker.opener('map', 'in', params)): yield record
def sorted_iterator(urls, reader=func.chain_reader, input_stream=(func.map_input_stream, ), notifier=func.notifier, params=None, ddfs=None): from disco.worker import Input from disco.worker.classic.worker import Worker worker = Worker(map_reader=reader, map_input_stream=input_stream) settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings() inputs = [] for input in util.inputlist(urls, settings=settings): notifier(input) instream = Input(input, open=worker.opener('map', 'in', params)) if instream: inputs.append(instream) return SortedIterator(inputs)
def sorted_iterator(urls, reader=func.chain_reader, input_stream=(func.map_input_stream,), notifier=func.notifier, params=None, ddfs=None): from disco.worker import Input from disco.worker.classic.worker import Worker worker = Worker(map_reader=reader, map_input_stream=input_stream) settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings() inputs = [] for input in util.inputlist(urls, settings=settings): notifier(input) instream = Input(input, open=worker.opener('map', 'in', params)) if instream: inputs.append(instream) return SortedIterator(inputs)