def result_iterator(results, notifier = None,\ proxy = None, reader = func.netstr_reader): res = [] for dir_url in results: if dir_url.startswith("dir://"): res += util.parse_dir(dir_url, proxy) else: res.append(dir_url) x, x, root = util.load_conf() for url in res: if url.startswith("file://"): fname = url[7:] fd = file(fname) sze = os.stat(fname).st_size elif url.startswith("disco://"): host, fname = url[8:].split("/", 1) url = util.proxy_url(proxy, fname, host) if util.resultfs_enabled: f = "%s/data/%s" % (root, fname) fd = file(f) sze = os.stat(f).st_size else: sze, fd = comm.open_remote(url) else: raise JobException("Invalid result url: %s" % url) if notifier: notifier(url) for x in reader(fd, sze, fname): yield x
def map_input_stream(stream, size, url, params): from disco.util import urlsplit from disco import comm scheme, netloc, path = urlsplit(url) # test that scheduler preserved data locality msg("NODE %s GOT URL %s" % (Task.netloc, url)) assert netloc == Task.netloc return comm.open_remote("http://%s/%s" % (path, netloc))
def exists(self, tag): """Returns whether or not ``tag`` exists.""" try: if open_remote('%s/ddfs/tag/%s' % (self.master, tagname(tag))): return True except CommError, e: if e.code not in (403, 404): raise
def input_stream(fd, size, url, params): """Opens the path on host using an http client and the setting `DISCO_PORT`. For instance, if `DISCO_PORT = 8989`, `disco://host/path` would be converted to `http://host:8989/path`. """ host, fname = url[8:].split("/", 1) if host == Task.host or Task.has_flag("resultfs"): path = os.path.join(Task.root, "data", fname) return comm.open_local(path, url) return comm.open_remote("http://%s:%s/%s" % (host, Task.port, fname))
def exists(self, tag): """Returns whether or not ``tag`` exists.""" try: if open_remote(self._resolve(canonizetag(tag))): return True except CommError, e: if e.code == 401: return True if e.code not in (403, 404): raise
def pull(self, tag, blobfilter=lambda x: True, token=None): for repl in self.urls(tag, token=token): if blobfilter(self.blob_name(repl[0])): random.shuffle(repl) for url in repl: try: yield open_remote(url) break except CommError, error: continue else: raise error
def load_oob(host, name, key): settings = DiscoSettings() params = {'name': name, 'key': key, 'proxy': '1' if settings['DISCO_PROXY'] else '0'} url = '%s/disco/ctrl/oob_get?%s' % (host, urlencode(params)) if 'resultfs' in settings['DISCO_FLAGS']: size, fd = open_remote(url, expect=302) location = fd.getheader('location').split('/', 3)[-1] path = '%s/data/%s' % (settings['DISCO_ROOT'], location) return file(path).read() return download(url, redir=True)
def input_stream(fd, size, url, params): """ Opens the path on host using an http client and the setting `DISCO_PORT`. """ scheme, netloc, rest = urlsplit(url) prefix, fname = rest.split('/', 1) if netloc[0] == Task.netloc[0]: if prefix == 'ddfs': root = Task.settings['DDFS_ROOT'] else: root = Task.settings['DISCO_DATA'] path = os.path.join(root, fname) return comm.open_local(path) return comm.open_remote('http://%s/%s/%s' % (netloc, prefix, fname))
def pull(self, tag, blobfilter=lambda x: True, token=None): for repl in self.urls(tag, token=token): if blobfilter(self.blob_name(repl[0])): random.shuffle(repl) for url in repl: url = self._resolve( proxy_url(url, meth='GET', proxy=self.proxy, to_master=False)) try: yield open_remote(url) break except CommError, error: continue else: raise error
def load_oob(host, name, key): use_proxy = "DISCO_PROXY" in os.environ url = "%s/disco/ctrl/oob_get?name=%s&key=%s&proxy=%d" %\ (host, name, key, use_proxy) if resultfs_enabled: sze, fd = open_remote(url, expect = 302) loc = fd.getheader("location") fname = "%s/data/%s" % (ROOT, "/".join(loc.split("/")[3:])) try: return file(fname).read() except KeyboardInterrupt: raise except Exception: raise DiscoError("OOB key (%s) not found at %s" %\ (key, fname)) else: return download(url, redir = True)
def pull(self, tag, blobfilter=lambda x: True, token=None): for repl in self.urls(tag, token=token): if blobfilter(self.blob_name(repl[0])): random.shuffle(repl) for url in repl: url = self._resolve( proxy_url(url, meth='GET', proxy=self.proxy, to_master=False) ) try: yield open_remote(url) break except CommError, error: continue else: raise error
def pull(self, tag, blobfilter=lambda x: True, token=None): """ Iterate over the blobs in a ``tag`` after optionally applying a ``blobfilter`` over the blob names. """ comm_error = None for repl in self.urls(tag, token=token): if blobfilter(self.blob_name(repl[0])): random.shuffle(repl) for url in repl: url = self._resolve(proxy_url(url, meth="GET", proxy=self.proxy, to_master=False)) try: yield open_remote(url) break except CommError as error: comm_error = error continue else: raise comm_error
def pull(self, tag, blobfilter=lambda x: True, token=None): """ Iterate over the blobs in a ``tag`` after optionally applying a ``blobfilter`` over the blob names. """ comm_error = None for repl in self.urls(tag, token=token): if blobfilter(self.blob_name(repl[0])): random.shuffle(repl) for url in repl: url = self._resolve( proxy_url(url, meth='GET', proxy=self.proxy, to_master=False)) try: yield open_remote(url) break except CommError as error: comm_error = error continue else: raise comm_error
def input_stream(fd, sze, url, params): """Opens the specified url using an http client.""" return comm.open_remote(url)
def map_input_stream(stream, size, url, params): scheme, (host, port), test_server = urlsplit(url) # test that scheduler observed the blacklist msg("NODE {0} GOT URL {1}".format(Task.host, url)) assert Task.host <= host return open_remote("http://{0}/{1}".format(test_server, host))
def open_remote(input, ext_host, ext_file): try: return comm.open_remote("http://%s%s" % (ext_host, ext_file)) except Exception, x: data_err("Can't access an external input file (%s%s): %s"\ % (ext_host, ext_file, x), x)
def input_stream(fd, sze, url, params): """Opens the specified url using an http client.""" from disco import comm return comm.open_remote(url)
def map_input_stream(stream, size, url, params): scheme, (host, port), test_server = urlsplit(url) # test that scheduler observed the blacklist print("NODE {0} GOT URL {1}".format(Task.host, url)) assert Task.host <= host return open_remote("http://{0}/{1}".format(test_server, host))
def map_input_stream(stream, size, url, params): scheme, (host, port), test_server = urlsplit(url) # test that scheduler preserved data locality msg("NODE {0} GOT URL {1}".format(Task.host, url)) assert Task.host == host return open_remote("http://{0}/{1}".format(test_server, host))
def map_input_stream(stream, size, url, params): scheme, (host, port), test_server = urlsplit(url) # test that scheduler preserved data locality msg("NODE %s GOT URL %s" % (Task.host, url)) assert Task.host == host return open_remote("http://%s/%s" % (test_server, host))
def open_url(self, url): scheme, netloc, rest = util.urlsplit(url, localhost=self.host) if not scheme or scheme == 'file': return comm.open_local(rest) return comm.open_remote('%s://%s/%s' % (scheme, netloc, rest))