Example #1
0
File: core.py Project: hmas/disco
def classic_iterator(urls,
                     reader=task_io.chain_reader,
                     input_stream=(func.map_input_stream, ),
                     notifier=func.notifier,
                     params=None,
                     ddfs=None):
    """
    An iterator over records as seen by the classic map interface.

    :type  reader: :func:`disco.worker.task_io.input_stream`
    :param reader: shortcut for the last input stream applied.

    :type  input_stream: sequence of :func:`disco.worker.task_io.input_stream`
    :param input_stream: used to read from a custom file format.

    :type  notifier: :func:`disco.func.notifier`
    :param notifier: called when the task opens a url.
    """
    from disco.worker import Input
    from disco.worker.classic.worker import Worker
    worker = Worker(map_reader=reader, map_input_stream=input_stream)
    settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings()
    for input in util.inputlist(urls, settings=settings):
        if isinstance(input, basestring):
            dest = proxy_url(input, to_master=False)
        elif isinstance(input, tuple):
            dest = tuple([proxy_url(i, to_master=False) for i in input])
        else:
            dest = [proxy_url(i, to_master=False) for i in input]
        notifier(dest)
        for record in Input(dest, open=worker.opener('map', 'in', params)):
            yield record
Example #2
0
def classic_iterator(urls,
                     reader=task_io.chain_reader,
                     input_stream=(func.map_input_stream, ),
                     notifier=func.notifier,
                     params=None,
                     ddfs=None):
    """
    An iterator over records as seen by the classic map interface.

    :type  reader: :func:`disco.worker.task_io.input_stream`
    :param reader: shortcut for the last input stream applied.

    :type  input_stream: sequence of :func:`disco.worker.task_io.input_stream`
    :param input_stream: used to read from a custom file format.

    :type  notifier: :func:`disco.func.notifier`
    :param notifier: called when the task opens a url.
    """
    from disco.worker import Input
    from disco.worker.classic.worker import Worker
    worker = Worker(map_reader=reader, map_input_stream=input_stream)
    settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings()
    for input in util.inputlist(urls, settings=settings):
        if isinstance(input, basestring):
            dest = proxy_url(input, to_master=False)
        elif isinstance(input, tuple):
            dest = tuple([proxy_url(i, to_master=False) for i in input])
        else:
            dest = [proxy_url(i, to_master=False) for i in input]
        notifier(dest)
        for record in Input(dest, open=worker.opener('map', 'in', params)):
            yield record
Example #3
0
 def _download(self, url, data=None, token=None, method='GET'):
     return json.loads(download(self._resolve(proxy_url(url,
                                                        proxy=self.proxy,
                                                        meth=method)),
                                data=data,
                                method=method,
                                token=self._token(url, token, method)))
Example #4
0
 def _upload(self, urls, source, token=None, to_master=True, **kwargs):
     urls = [self._resolve(proxy_url(url,
                                     proxy=self.proxy,
                                     meth='PUT',
                                     to_master=to_master))
             for url in iterify(urls)]
     return upload(urls, source, token=self._token(url, token, 'PUT'), **kwargs)
Example #5
0
 def curl(replicas):
     for replica in replicas:
         try:
             return download(proxy_url(urlresolve(replica, master=program.ddfs.master),
                                       to_master=False))
         except Exception, e:
             sys.stderr.write("%s\n" % e)
Example #6
0
 def curl(replicas):
     for replica in replicas:
         try:
             return download(proxy_url(urlresolve(replica, master=program.ddfs.master),
                                       to_master=False))
         except Exception, e:
             sys.stderr.write("%s\n" % e)
Example #7
0
def result_iterator(results, notifier = None,\
        proxy = None, reader = func.netstr_reader):

        res = []
        for dir_url in results:
                if dir_url.startswith("dir://"):
                        res += util.parse_dir(dir_url, proxy)
                else:
                        res.append(dir_url)

        x, x, root = util.load_conf()

        for url in res:
                if url.startswith("file://"):
                        fname = url[7:]
                        fd = file(fname)
                        sze = os.stat(fname).st_size
                elif url.startswith("disco://"):
                        host, fname = url[8:].split("/", 1)
                        url = util.proxy_url(proxy, fname, host)
                        if util.resultfs_enabled:
                                f = "%s/data/%s" % (root, fname)
                                fd = file(f)
                                sze = os.stat(f).st_size
                        else:
                                sze, fd = comm.open_remote(url)
                else:
                        raise JobException("Invalid result url: %s" % url)

                if notifier:
                        notifier(url)

                for x in reader(fd, sze, fname):
                        yield x
Example #8
0
 def _download(self, url, data=None, token=None, method="GET", to_master=True):
     byts = download(
         self._resolve(proxy_url(url, proxy=self.proxy, meth=method, to_master=to_master)),
         data=data,
         method=method,
         token=self._token(url, token, method),
     )
     return json.loads(bytes_to_str(byts))
Example #9
0
 def _download(self, url, data=None, token=None, method='GET', to_master=True):
     return json.loads(download(self._resolve(proxy_url(url,
                                                        proxy=self.proxy,
                                                        meth=method,
                                                        to_master=to_master)),
                                data=data,
                                method=method,
                                token=self._token(url, token, method)))
Example #10
0
File: core.py Project: darkua/disco
 def request(self, url, data=None, offset=0):
     try:
         return download(proxy_url('%s%s' % (self.master, url), proxy=self.proxy),
                         data=data,
                         offset=offset)
     except CommError, e:
         if e.code == None:
             e.msg += " (is disco master running at %s?)" % self.master
         raise
Example #11
0
 def request(self, url, data=None, offset=0):
     try:
         return download(proxy_url('%s%s' % (self.master, url),
                                   proxy=self.proxy),
                         data=data,
                         offset=offset)
     except CommError, e:
         if e.code == None:
             e.msg += " (is disco master running at %s?)" % self.master
         raise
Example #12
0
 def curl(replicas):
     for replica in replicas:
         try:
             return download(proxy_url(urlresolve(replica, master=program.ddfs.master),
                                       to_master=False))
         except Exception as e:
             sys.stderr.write("{0}\n".format(e))
     if not ignore_missing:
         raise Exception("Failed downloading all replicas: {0}".format(replicas))
     return ''
Example #13
0
File: ddfscli.py Project: yuj/disco
 def curl(replicas):
     for replica in replicas:
         try:
             return download(proxy_url(urlresolve(replica, master=program.ddfs.master),
                                       to_master=False))
         except Exception as e:
             sys.stderr.write("{0}\n".format(e))
     if not ignore_missing:
         raise Exception("Failed downloading all replicas: {0}".format(replicas))
     return ''
Example #14
0
File: core.py Project: wquan/disco
 def request(self, url, data=None, offset=0):
     try:
         byts = download(proxy_url('{0}{1}'.format(self.master, url), proxy=self.proxy),
                         data=data,
                         offset=offset)
         return byts.decode('utf-8')
     except CommError as e:
         if e.code == None:
             e.msg += " (is disco master running at {0}?)".format(self.master)
         raise
Example #15
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 try:
                     yield open_remote(proxy_url(url,proxy=self.proxy))
                     break
                 except CommError, error:
                     continue
             else:
                 raise error
Example #16
0
def result_iterator(urls,
                     reader=task_io.chain_reader,
                     input_stream=(func.map_input_stream, ),
                     notifier=func.notifier,
                     params=None,
                     ddfs=None):
    """
    An iterator over records stored in either disco or ddfs.

    :type  reader: :func:`disco.worker.task_io.input_stream`
    :param reader: shortcut for the last input stream applied.

    :type  input_stream: sequence of :func:`disco.worker.task_io.input_stream`
    :param input_stream: used to read from a custom file format.

    :type  notifier: :func:`disco.func.notifier`
    :param notifier: called when the task opens a url.
    """
    from disco.worker import Input
    from disco.worker.task_io import StreamCombiner
    settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings()
    for input in util.inputlist(urls, settings=settings):
        if isinstance(input, basestring):
            dest = proxy_url(input, to_master=False)
        elif isinstance(input, tuple):
            dest = tuple([proxy_url(i, to_master=False) for i in input])
        else:
            dest = [proxy_url(i, to_master=False) for i in input]
        notifier(dest)

        def open(url):
            streams = [s for s in input_stream]
            if reader:
                streams += [reader]
            return StreamCombiner(url, streams, params)

        for record in Input(dest, open=open):
            yield record
Example #17
0
def result_iterator(urls,
                    reader=task_io.chain_reader,
                    input_stream=(func.map_input_stream, ),
                    notifier=func.notifier,
                    params=None,
                    ddfs=None):
    """
    An iterator over records stored in either disco or ddfs.

    :type  reader: :func:`disco.worker.task_io.input_stream`
    :param reader: shortcut for the last input stream applied.

    :type  input_stream: sequence of :func:`disco.worker.task_io.input_stream`
    :param input_stream: used to read from a custom file format.

    :type  notifier: :func:`disco.func.notifier`
    :param notifier: called when the task opens a url.
    """
    from disco.worker import Input
    from disco.worker.task_io import StreamCombiner
    settings = DiscoSettings(DISCO_MASTER=ddfs) if ddfs else DiscoSettings()
    for input in util.inputlist(urls, settings=settings):
        if isinstance(input, basestring):
            dest = proxy_url(input, to_master=False)
        elif isinstance(input, tuple):
            dest = tuple([proxy_url(i, to_master=False) for i in input])
        else:
            dest = [proxy_url(i, to_master=False) for i in input]
        notifier(dest)

        def open(url):
            streams = [s for s in input_stream]
            if reader:
                streams += [reader]
            return StreamCombiner(url, streams, params)

        for record in Input(dest, open=open):
            yield record
Example #18
0
 def request(self, url, data=None, offset=0, as_bytes=False):
     try:
         byts = download(proxy_url('{0}{1}'.format(self.master, url),
                                   proxy=self.proxy),
                         data=data,
                         offset=offset)
         if as_bytes:
             return byts
         return byts.decode('utf-8')
     except CommError as e:
         if e.code == None:
             e.msg += " (is disco master running at {0}?)".format(
                 self.master)
         raise
Example #19
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 url = self._resolve(
                     proxy_url(url,
                               meth='GET',
                               proxy=self.proxy,
                               to_master=False))
                 try:
                     yield open_remote(url)
                     break
                 except CommError, error:
                     continue
             else:
                 raise error
Example #20
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     """
     Iterate over the blobs in a ``tag`` after optionally applying
     a ``blobfilter`` over the blob names.
     """
     comm_error = None
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 url = self._resolve(proxy_url(url, meth="GET", proxy=self.proxy, to_master=False))
                 try:
                     yield open_remote(url)
                     break
                 except CommError as error:
                     comm_error = error
                     continue
             else:
                 raise comm_error
Example #21
0
File: ddfs.py Project: wquan/disco
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     comm_error = None
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 url = self._resolve(
                     proxy_url(url,
                               meth='GET',
                               proxy=self.proxy,
                               to_master=False)
                 )
                 try:
                     yield open_remote(url)
                     break
                 except CommError as error:
                     comm_error = error
                     continue
             else:
                 raise comm_error
Example #22
0
def xcat(program, *urls):
    """Usage: [urls ...]

    Concatenate the extracted results stored in url[s] and print to stdout.
    If any of the url[s] are tags,
    the blobs reachable from the tags will be printed after any non-tag url[s].
    """
    from itertools import chain
    from disco.core import classic_iterator
    from disco.util import iterify, reify, urlresolve, proxy_url

    tags, urls = program.separate_tags(*program.input(*urls))
    stream = reify(program.options.stream)
    reader = program.options.reader
    reader = reify('disco.func.chain_reader' if reader is None else reader)
    bloburls = [[proxy_url(urlresolve(u), to_master=False) for u in repset]
                for repset in chain(urls, program.blobs(*tags))]
    for record in classic_iterator(bloburls,
                                   input_stream=stream,
                                   reader=reader):
        print '\t'.join('%s' % (e, ) for e in iterify(record)).rstrip()
Example #23
0
def xcat(program, *urls):
    """Usage: [urls ...]

    Concatenate the extracted results stored in url[s] and print to stdout.
    If any of the url[s] are tags,
    the blobs reachable from the tags will be printed after any non-tag url[s].
    """
    from itertools import chain
    from disco.core import classic_iterator
    from disco.util import iterify, reify, urlresolve, proxy_url

    tags, urls = program.separate_tags(*program.input(*urls))
    stream = reify(program.options.stream)
    reader = program.options.reader
    reader = reify('disco.func.chain_reader' if reader is None else reader)
    bloburls = [[proxy_url(urlresolve(u), to_master=False) for u in repset]
                for repset in chain(urls, program.blobs(*tags))]
    for record in classic_iterator(bloburls,
                                   input_stream=stream,
                                   reader=reader):
        print '\t'.join('%s' % (e,) for e in iterify(record)).rstrip()
Example #24
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     """
     Iterate over the blobs in a ``tag`` after optionally applying
     a ``blobfilter`` over the blob names.
     """
     comm_error = None
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 url = self._resolve(
                     proxy_url(url,
                               meth='GET',
                               proxy=self.proxy,
                               to_master=False))
                 try:
                     yield open_remote(url)
                     break
                 except CommError as error:
                     comm_error = error
                     continue
             else:
                 raise comm_error