Esempio n. 1
0
File: core.py Progetto: mshron/disco
 def __init__(self,
              urls,
              notifier=func.noop,
              reader=func.chain_reader,
              input_stream=(func.map_input_stream, ),
              params=None,
              ddfs=None):
     from disco.task import Map
     self.task = Map(jobdict=JobDict(
         map_input_stream=input_stream, map_reader=reader, params=params))
     self.urls = urls
     self.notifier = notifier
     self.ddfs = ddfs
Esempio n. 2
0
File: core.py Progetto: mshron/disco
class RecordIter(object):
    """
    Produces an iterator over the records in a list of inputs.

    :type  urls: list of urls
    :param urls: urls of the inputs
                 e.g. as returned by :meth:`Disco.wait`.

    :type  notifier: function
    :param notifier: called when the iterator moves to the next url::

                      def notifier(url[s]):
                          ...

                     .. note::

                         notifier argument is a list if urls are replicated.

    :type  reader: :func:`disco.func.input_stream`
    :param reader: used to read from a custom :func:`disco.func.output_stream`.
    """
    def __init__(self,
                 urls,
                 notifier=func.noop,
                 reader=func.chain_reader,
                 input_stream=(func.map_input_stream, ),
                 params=None,
                 ddfs=None):
        from disco.task import Map
        self.task = Map(jobdict=JobDict(
            map_input_stream=input_stream, map_reader=reader, params=params))
        self.urls = urls
        self.notifier = notifier
        self.ddfs = ddfs

    def __iter__(self):
        for urls in self.urls:
            for replicas in util.urllist(urls, ddfs=self.ddfs):
                self.notifier(replicas)
                for entry in self.try_replicas(list(util.iterify(replicas))):
                    yield entry

    def try_replicas(self, urls, start=0):
        while urls:
            try:
                for entry in self.entries(urls.pop(0), start=start):
                    yield entry
                    start += 1
            except Exception:
                if not urls:
                    raise

    def entries(self, url, start=0):
        fd, _size, _url = self.task.connect_input(url)
        for n, entry in enumerate(fd):
            if n >= start:
                yield entry
Esempio n. 3
0
class RecordIter(object):
    """
    Produces an iterator over the records in a list of inputs.

    :type  urls: list of urls
    :param urls: urls of the inputs
                 e.g. as returned by :meth:`Disco.wait`.

    :type  notifier: function
    :param notifier: called when the iterator moves to the next url::

                      def notifier(url[s]):
                          ...

                     .. note::

                         notifier argument is a list if urls are replicated.

    :type  reader: :func:`disco.func.input_stream`
    :param reader: used to read from a custom :func:`disco.func.output_stream`.
    """
    def __init__(self, urls,
                 notifier=func.noop,
                 reader=func.chain_reader,
                 input_stream=(func.map_input_stream, ),
                 params=None,
                 ddfs=None):
        from disco.task import Map
        self.task = Map(jobdict=JobDict(map_input_stream=input_stream,
                                        map_reader=reader,
                                        params=params))
        self.urls = urls
        self.notifier = notifier
        self.ddfs = ddfs

    def __iter__(self):
        for urls in self.urls:
            for replicas in util.urllist(urls, ddfs=self.ddfs):
                self.notifier(replicas)
                for entry in self.try_replicas(list(util.iterify(replicas))):
                    yield entry

    def try_replicas(self, urls, start=0):
        while urls:
            try:
                for entry in self.entries(urls.pop(0), start=start):
                    yield entry
                    start += 1
            except Exception:
                if not urls:
                    raise

    def entries(self, url, start=0):
        fd, _size, _url = self.task.connect_input(url)
        for n, entry in enumerate(fd):
            if n >= start:
                yield entry
Esempio n. 4
0
 def __init__(self, results,
              notifier=func.noop,
              reader=func.chain_reader,
              input_stream=(func.map_input_stream, ),
              params=None,
              ddfs=None):
     from disco.task import Map
     self.task = Map(jobdict=JobDict(map_input_stream=input_stream,
                                     map_reader=reader,
                                     params=params))
     self.results = results
     self.notifier = notifier
     self.ddfs = ddfs