Example #1
0
def result_iterator(results, notifier = None,\
        proxy = None, reader = func.netstr_reader):

        res = []
        for dir_url in results:
                if dir_url.startswith("dir://"):
                        res += util.parse_dir(dir_url, proxy)
                else:
                        res.append(dir_url)

        x, x, root = util.load_conf()

        for url in res:
                if url.startswith("file://"):
                        fname = url[7:]
                        fd = file(fname)
                        sze = os.stat(fname).st_size
                elif url.startswith("disco://"):
                        host, fname = url[8:].split("/", 1)
                        url = util.proxy_url(proxy, fname, host)
                        if util.resultfs_enabled:
                                f = "%s/data/%s" % (root, fname)
                                fd = file(f)
                                sze = os.stat(f).st_size
                        else:
                                sze, fd = comm.open_remote(url)
                else:
                        raise JobException("Invalid result url: %s" % url)

                if notifier:
                        notifier(url)

                for x in reader(fd, sze, fname):
                        yield x
Example #2
0
 def map_input_stream(stream, size, url, params):
     from disco.util import urlsplit
     from disco import comm
     scheme, netloc, path = urlsplit(url)
     # test that scheduler preserved data locality
     msg("NODE %s GOT URL %s" % (Task.netloc, url))
     assert netloc == Task.netloc
     return comm.open_remote("http://%s/%s" % (path, netloc))
Example #3
0
 def exists(self, tag):
     """Returns whether or not ``tag`` exists."""
     try:
         if open_remote('%s/ddfs/tag/%s' % (self.master, tagname(tag))):
             return True
     except CommError, e:
         if e.code not in (403, 404):
             raise
Example #4
0
def input_stream(fd, size, url, params):
    """Opens the path on host using an http client and the setting `DISCO_PORT`.

    For instance, if `DISCO_PORT = 8989`, `disco://host/path` would be converted to `http://host:8989/path`.
    """
    host, fname = url[8:].split("/", 1)
    if host == Task.host or Task.has_flag("resultfs"):
        path = os.path.join(Task.root, "data", fname)
        return comm.open_local(path, url)
    return comm.open_remote("http://%s:%s/%s" % (host, Task.port, fname))
Example #5
0
 def exists(self, tag):
     """Returns whether or not ``tag`` exists."""
     try:
         if open_remote(self._resolve(canonizetag(tag))):
             return True
     except CommError, e:
         if e.code == 401:
             return True
         if e.code not in (403, 404):
             raise
Example #6
0
 def exists(self, tag):
     """Returns whether or not ``tag`` exists."""
     try:
         if open_remote(self._resolve(canonizetag(tag))):
             return True
     except CommError, e:
         if e.code == 401:
             return True
         if e.code not in (403, 404):
             raise
Example #7
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 try:
                     yield open_remote(url)
                     break
                 except CommError, error:
                     continue
             else:
                 raise error
Example #8
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 try:
                     yield open_remote(url)
                     break
                 except CommError, error:
                     continue
             else:
                 raise error
Example #9
0
File: util.py Project: davin/disco
def load_oob(host, name, key):
    settings = DiscoSettings()
    params = {'name': name,
          'key': key,
          'proxy': '1' if settings['DISCO_PROXY'] else '0'}
    url = '%s/disco/ctrl/oob_get?%s' % (host, urlencode(params))
    if 'resultfs' in settings['DISCO_FLAGS']:
        size, fd = open_remote(url, expect=302)
        location = fd.getheader('location').split('/', 3)[-1]
        path = '%s/data/%s' % (settings['DISCO_ROOT'], location)
        return file(path).read()
    return download(url, redir=True)
Example #10
0
def input_stream(fd, size, url, params):
    """
    Opens the path on host using an http client and the setting `DISCO_PORT`.
    """
    scheme, netloc, rest = urlsplit(url)
    prefix, fname = rest.split('/', 1)
    if netloc[0] == Task.netloc[0]:
        if prefix == 'ddfs':
            root = Task.settings['DDFS_ROOT']
        else:
            root = Task.settings['DISCO_DATA']
        path = os.path.join(root, fname)
        return comm.open_local(path)
    return comm.open_remote('http://%s/%s/%s' % (netloc, prefix, fname))
Example #11
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 url = self._resolve(
                     proxy_url(url,
                               meth='GET',
                               proxy=self.proxy,
                               to_master=False))
                 try:
                     yield open_remote(url)
                     break
                 except CommError, error:
                     continue
             else:
                 raise error
Example #12
0
def load_oob(host, name, key):
        use_proxy = "DISCO_PROXY" in os.environ
        url = "%s/disco/ctrl/oob_get?name=%s&key=%s&proxy=%d" %\
                (host, name, key, use_proxy)
        if resultfs_enabled:
                sze, fd = open_remote(url, expect = 302)
                loc = fd.getheader("location")
                fname = "%s/data/%s" % (ROOT, "/".join(loc.split("/")[3:]))
                try:
                        return file(fname).read()
                except KeyboardInterrupt:
                        raise
                except Exception:
                        raise DiscoError("OOB key (%s) not found at %s" %\
                                 (key, fname))
        else:
                return download(url, redir = True)
Example #13
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 url = self._resolve(
                     proxy_url(url,
                               meth='GET',
                               proxy=self.proxy,
                               to_master=False)
                 )
                 try:
                     yield open_remote(url)
                     break
                 except CommError, error:
                     continue
             else:
                 raise error
Example #14
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     """
     Iterate over the blobs in a ``tag`` after optionally applying
     a ``blobfilter`` over the blob names.
     """
     comm_error = None
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 url = self._resolve(proxy_url(url, meth="GET", proxy=self.proxy, to_master=False))
                 try:
                     yield open_remote(url)
                     break
                 except CommError as error:
                     comm_error = error
                     continue
             else:
                 raise comm_error
Example #15
0
 def pull(self, tag, blobfilter=lambda x: True, token=None):
     """
     Iterate over the blobs in a ``tag`` after optionally applying
     a ``blobfilter`` over the blob names.
     """
     comm_error = None
     for repl in self.urls(tag, token=token):
         if blobfilter(self.blob_name(repl[0])):
             random.shuffle(repl)
             for url in repl:
                 url = self._resolve(
                     proxy_url(url,
                               meth='GET',
                               proxy=self.proxy,
                               to_master=False))
                 try:
                     yield open_remote(url)
                     break
                 except CommError as error:
                     comm_error = error
                     continue
             else:
                 raise comm_error
Example #16
0
def input_stream(fd, sze, url, params):
    """Opens the specified url using an http client."""
    return comm.open_remote(url)
Example #17
0
 def map_input_stream(stream, size, url, params):
     scheme, (host, port), test_server = urlsplit(url)
     # test that scheduler observed the blacklist
     msg("NODE {0} GOT URL {1}".format(Task.host, url))
     assert Task.host <= host
     return open_remote("http://{0}/{1}".format(test_server, host))
Example #18
0
def open_remote(input, ext_host, ext_file):
        try:
                return comm.open_remote("http://%s%s" % (ext_host, ext_file))
        except Exception, x:
                data_err("Can't access an external input file (%s%s): %s"\
                         % (ext_host, ext_file, x), x)
Example #19
0
def input_stream(fd, sze, url, params):
    """Opens the specified url using an http client."""
    from disco import comm
    return comm.open_remote(url)
Example #20
0
 def map_input_stream(stream, size, url, params):
     scheme, (host, port), test_server = urlsplit(url)
     # test that scheduler observed the blacklist
     print("NODE {0} GOT URL {1}".format(Task.host, url))
     assert Task.host <= host
     return open_remote("http://{0}/{1}".format(test_server, host))
Example #21
0
 def map_input_stream(stream, size, url, params):
     scheme, (host, port), test_server = urlsplit(url)
     # test that scheduler preserved data locality
     msg("NODE {0} GOT URL {1}".format(Task.host, url))
     assert Task.host == host
     return open_remote("http://{0}/{1}".format(test_server, host))
Example #22
0
 def map_input_stream(stream, size, url, params):
     scheme, (host, port), test_server = urlsplit(url)
     # test that scheduler preserved data locality
     msg("NODE %s GOT URL %s" % (Task.host, url))
     assert Task.host == host
     return open_remote("http://%s/%s" % (test_server, host))
Example #23
0
 def open_url(self, url):
     scheme, netloc, rest = util.urlsplit(url, localhost=self.host)
     if not scheme or scheme == 'file':
         return comm.open_local(rest)
     return comm.open_remote('%s://%s/%s' % (scheme, netloc, rest))