def run(self, **kwargs): """ Returns the job immediately after the request has been submitted. A typical pattern in Disco scripts is to run a job synchronously, that is, to block the script until the job has finished. This is accomplished as follows:: from disco.core import Disco results = Disco(master).new_job(...).wait() Note that job methods of the :class:`Disco` class are directly accessible through the :class:`Job` object, such as :meth:`Disco.wait` above. A :class:`JobError` is raised if an error occurs while starting the job. """ if 'nr_reduces' in kwargs: from warnings import warn warn("Use partitions instead of nr_reduces", DeprecationWarning) if 'partitions' in kwargs or 'merge_partitions' in kwargs: raise DeprecationWarning("Cannot specify nr_reduces with " "partitions and/or merge_partitions") kwargs['partitions'] = kwargs.pop('nr_reduces') jobpack = Job.JobDict(self, prefix=self.name, ddfs=self.master.master, **kwargs).pack() reply = json.loads(self.master.request('/disco/job/new', jobpack)) if reply[0] != 'ok': raise DiscoError("Failed to start a job. Server replied: " + reply) self.name = reply[1] return self
def add_node(): orig_config = json.loads( disco.request("/disco/ctrl/load_config_table")) config = orig_config[:] config.append(["missingnode", "2"]) r = disco.request("/disco/ctrl/save_config_table", json.dumps(config)) if r != "\"table saved!\"": raise Exception("Couldn't add a dummy node: %s" % r) return orig_config
def put(self, tag, urls): """Put the list of ``urls`` to the tag ``tag``. .. warning:: Generally speaking, concurrent applications should use :meth:`DDFS.tag` instead. """ from comm_httplib import download status, body = download('%s/ddfs/tag/%s' % (self.master, tagname(tag)), data=json.dumps(urls), method='PUT') return json.loads(body)
def results(self, jobspec, timeout=2000): """ Returns a list of results for a single job or for many concurrently running jobs, depending on the type of *jobspec*. If *jobspec* is a string (job name) or the function is called through the job object (``job.results()``), this function returns a list of results for the job if the results become available in *timeout* milliseconds. If not, returns an empty list. (*Added in version 0.2.1*) If *jobspec* is a list of jobs, the function waits at most for *timeout* milliseconds for at least one on the jobs to finish. In this mode, *jobspec* can be a list of strings (job names), a list of job objects, or a list of result entries as returned by this function. Two lists are returned: a list of finished jobs and a list of still active jobs. Both the lists contain elements of the following type:: ["job name", ["status", [results]]] where status is either ``unknown_job``, ``dead``, ``active`` or ``ready``. You can use the latter mode as an efficient way to wait for several jobs to finish. Consider the following example that prints out results of jobs as soon as they finish. Here ``jobs`` is initially a list of jobs, produced by several calls to :meth:`Disco.new_job`:: while jobs: ready, jobs = disco.results(jobs) for name, results in ready: for k, v in result_iterator(results[1]): print k, v disco.purge(name) Note how the list of active jobs, ``jobs``, returned by :meth:`Disco.results` can be used as the input to the function itself. """ jobspecifier = JobSpecifier(jobspec) data = json.dumps([timeout, list(jobspecifier.jobnames)]) results = json.loads(self.request('/disco/ctrl/get_results', data)) if isinstance(jobspec, basestring): return results[0][1] others, active = [], [] for result in results: if result[1][0] == 'active': active.append(result) else: others.append(result) return others, active
def results(self, jobspec, timeout=2000): jobspecifier = JobSpecifier(jobspec) data = json.dumps([timeout, list(jobspecifier.jobnames)]) results = json.loads(self.request("/disco/ctrl/get_results", data)) if type(jobspec) == str: return results[0][1] others, active = [], [] for result in results: if result[1][0] == "active": active.append(result) else: others.append(result) return others, active
def event_iter(events): offs = offset lines = events.split('\n') for i, line in enumerate(lines): if len(line): offs += len(line) + 1 try: event = tuple(json.loads(line)) except ValueError: break # HTTP range request doesn't like empty ranges: # Let's ensure that at least the last newline # is always retrieved. if i == len(lines) - 1 and events.endswith('\n'): offs -= 1 yield offs, event
def event_iter(events): offs = offset lines = events.splitlines() for i, line in enumerate(lines): if len(line): offs += len(line) + 1 try: event = tuple(json.loads(line)) except ValueError: break # HTTP range request doesn't like empty ranges: # Let's ensure that at least the last newline # is always retrieved. if i == len(lines) - 1 and events.endswith('\n'): offs -= 1 yield offs, event
def event_iter(events): offs = offset lines = events.splitlines() for i, l in enumerate(lines): offs += len(l) + 1 if not len(l): continue try: ent = tuple(json.loads(l)) except ValueError: break # HTTP range request doesn't like empty ranges: # Let's ensure that at least the last newline # is always retrieved. if i == len(lines) - 1: offs -= 1 yield offs, ent
def run(self, **kwargs): """ Returns the job immediately after the request has been submitted. Accepts the same set of keyword arguments as :class:`JobDict`. A typical pattern in Disco scripts is to run a job synchronously, that is, to block the script until the job has finished. This is accomplished as follows:: from disco.core import Disco results = Disco(master).new_job(...).wait() Note that job methods of the :class:`Disco` class are directly accessible through the :class:`Job` object, such as :meth:`Disco.wait` above. A :class:`JobError` is raised if an error occurs while starting the job. """ if 'nr_reduces' in kwargs: warn("Use partitions instead of nr_reduces", DeprecationWarning) if 'partitions' in kwargs or 'merge_partitions' in kwargs: raise DeprecationWarning("Cannot specify nr_reduces with " "partitions and/or merge_partitions") kwargs['partitions'] = kwargs.pop('nr_reduces') if 'mem_sort_limit' in kwargs: warn("mem_sort_limit deprecated: sort=True always uses disk sort", DeprecationWarning) jobpack = Job.JobDict(self, prefix=self.name, ddfs=self.master.master, **kwargs).pack() reply = json.loads(self.master.request('/disco/job/new', jobpack)) if reply[0] != 'ok': raise DiscoError("Failed to start a job. Server replied: " + reply) self.name = reply[1] return self
def jobinfo(self, name): """Returns a dictionary containing information about the job *name*.""" return json.loads(self.request('/disco/ctrl/jobinfo?name=%s' % name))
def nodeinfo(self): return json.loads(self.request("/disco/ctrl/nodeinfo"))
def mapresults(self, name): return json.loads( self.request('/disco/ctrl/get_mapresults?name=%s' % name))
def _download(self, url, data=None, method='GET'): response = download(self.master + url, data=data, method=method) return json.loads(response)
def _maybe_proxy(self, url, method='GET'): if self.proxy: scheme, (host, port), path = urlsplit(url) return '%s/proxy/%s/%s/%s' % (self.proxy, host, method, path) return url def _push(self, (source, target), replicas=None, exclude=[], **kwargs): qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)), ('replicas', replicas)) if v]) urls = self._download('%s/ddfs/new_blob/%s?%s' % (self.master, target, qs)) try: return [ json.loads(url) for url in self._upload(urls, source, **kwargs) ] except CommError, e: scheme, (host, port), path = urlsplit(e.url) return self._push((source, target), replicas=replicas, exclude=exclude + [host], **kwargs) def _tagattr(self, tag, attr): return '%s/%s' % (self._resolve(canonizetag(tag)), attr) def _token(self, token, method): if token is None: if method == 'GET':
def jobinfo(self, name): return json.loads(self.request("/disco/ctrl/jobinfo?name=%s" % name))
def nodeinfo(self): """ Returns a dictionary describing status of the nodes that are managed by this Disco master. """ return json.loads(self.request('/disco/ctrl/nodeinfo'))
def oob_list(self, name): r = self.request("/disco/ctrl/oob_list?name=%s" % name, redir=True) return json.loads(r)
def _download(self, url, data=None, token=None, method='GET'): return json.loads(download(self._resolve(url), data=data, method=method, token=self._token(token, method)))
def joblist(self): """Returns a list of jobs and their statuses.""" return json.loads(self.request('/disco/ctrl/joblist'))
def _request(self, url, data=None, method=None): response = download(self.master + url, data=data, method=method) return json.loads(response)
def get_config(self): return json.loads(self.request('/disco/ctrl/load_config_table'))
def set_config(self, config): response = json.loads(self.request('/disco/ctrl/save_config_table', json.dumps(config))) if response != 'table saved!': raise DiscoError(response)
def set_config(self, config): response = json.loads( self.request('/disco/ctrl/save_config_table', json.dumps(config))) if response != 'table saved!': raise DiscoError(response)
print """ Usage: python gluster_config.py [inputfs|resultfs] config.json This script generates Disco-compatible config files for Gluster, a distributed filesystem. Two modes are available: - inputfs, which produces a Gluster volfile that is suitable for storing input data for Disco so that data is k-way replicated over nodes. - resultfs, which produces a Gluster volfile for communication between Disco nodes, in place of the default HTTP-based solution. See gluster_example.json for an example config gile. For more information, see http://discoproject.org/doc/start/dfs.html. """ sys.exit(1) config = json.loads(file(sys.argv[2]).read()) path = os.path.abspath(config["config_dir"]) if sys.argv[1] == "inputfs": check_config(config, replicas = True) client = create_replicating_config(config, path) elif sys.argv[1] == "resultfs": check_config(config, replicas = False) client = create_nufa_config(config, path) create_master_config(sys.argv[1], config, path, client)
def joblist(self): return json.loads(self.request("/disco/ctrl/joblist"))
def _download(self, url, data=None, token=None, method='GET'): return json.loads( download(self._resolve(url), data=data, method=method, token=self._token(token, method)))
return s def _maybe_proxy(self, url, method='GET'): if self.proxy: scheme, (host, port), path = urlsplit(url) return '%s/proxy/%s/%s/%s' % (self.proxy, host, method, path) return url def _push(self, (source, target), replicas=None, retries=None, exclude=[]): qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)), ('replicas', replicas)) if v]) urls = [(url, source) for url in self._request('/ddfs/new_blob/%s?%s' % (target, qs))] try: return [json.loads(url) for url in self._upload(urls, retries=retries)] except CommError, e: scheme, (host, port), path = urlsplit(e.url) return self._push((source, target), replicas=replicas, retries=retries, exclude=exclude + [host]) def _request(self, url, data=None, method=None): response = download(self.master + url, data=data, method=method) return json.loads(response) def _upload(self, urls, retries=10): urls = [(self._maybe_proxy(url, method='PUT'), fd) for url, fd in urls] return upload(urls, retries=retries)