def __init__(self, urls, notifier=func.noop, reader=func.chain_reader, input_stream=(func.map_input_stream, ), params=None, ddfs=None): from disco.task import Map self.task = Map(jobdict=JobDict( map_input_stream=input_stream, map_reader=reader, params=params)) self.urls = urls self.notifier = notifier self.ddfs = ddfs
class RecordIter(object): """ Produces an iterator over the records in a list of inputs. :type urls: list of urls :param urls: urls of the inputs e.g. as returned by :meth:`Disco.wait`. :type notifier: function :param notifier: called when the iterator moves to the next url:: def notifier(url[s]): ... .. note:: notifier argument is a list if urls are replicated. :type reader: :func:`disco.func.input_stream` :param reader: used to read from a custom :func:`disco.func.output_stream`. """ def __init__(self, urls, notifier=func.noop, reader=func.chain_reader, input_stream=(func.map_input_stream, ), params=None, ddfs=None): from disco.task import Map self.task = Map(jobdict=JobDict( map_input_stream=input_stream, map_reader=reader, params=params)) self.urls = urls self.notifier = notifier self.ddfs = ddfs def __iter__(self): for urls in self.urls: for replicas in util.urllist(urls, ddfs=self.ddfs): self.notifier(replicas) for entry in self.try_replicas(list(util.iterify(replicas))): yield entry def try_replicas(self, urls, start=0): while urls: try: for entry in self.entries(urls.pop(0), start=start): yield entry start += 1 except Exception: if not urls: raise def entries(self, url, start=0): fd, _size, _url = self.task.connect_input(url) for n, entry in enumerate(fd): if n >= start: yield entry
class RecordIter(object): """ Produces an iterator over the records in a list of inputs. :type urls: list of urls :param urls: urls of the inputs e.g. as returned by :meth:`Disco.wait`. :type notifier: function :param notifier: called when the iterator moves to the next url:: def notifier(url[s]): ... .. note:: notifier argument is a list if urls are replicated. :type reader: :func:`disco.func.input_stream` :param reader: used to read from a custom :func:`disco.func.output_stream`. """ def __init__(self, urls, notifier=func.noop, reader=func.chain_reader, input_stream=(func.map_input_stream, ), params=None, ddfs=None): from disco.task import Map self.task = Map(jobdict=JobDict(map_input_stream=input_stream, map_reader=reader, params=params)) self.urls = urls self.notifier = notifier self.ddfs = ddfs def __iter__(self): for urls in self.urls: for replicas in util.urllist(urls, ddfs=self.ddfs): self.notifier(replicas) for entry in self.try_replicas(list(util.iterify(replicas))): yield entry def try_replicas(self, urls, start=0): while urls: try: for entry in self.entries(urls.pop(0), start=start): yield entry start += 1 except Exception: if not urls: raise def entries(self, url, start=0): fd, _size, _url = self.task.connect_input(url) for n, entry in enumerate(fd): if n >= start: yield entry
def __init__(self, results, notifier=func.noop, reader=func.chain_reader, input_stream=(func.map_input_stream, ), params=None, ddfs=None): from disco.task import Map self.task = Map(jobdict=JobDict(map_input_stream=input_stream, map_reader=reader, params=params)) self.results = results self.notifier = notifier self.ddfs = ddfs