Example #1
0
    def run(self, **kwargs):
        """
        Returns the job immediately after the request has been submitted.

        A typical pattern in Disco scripts is to run a job synchronously,
        that is, to block the script until the job has finished.
        This is accomplished as follows::

                from disco.core import Disco
                results = Disco(master).new_job(...).wait()

        Note that job methods of the :class:`Disco` class are directly
        accessible through the :class:`Job` object, such as :meth:`Disco.wait`
        above.

        A :class:`JobError` is raised if an error occurs while starting the job.
        """
        if 'nr_reduces' in kwargs:
            from warnings import warn
            warn("Use partitions instead of nr_reduces", DeprecationWarning)
            if 'partitions' in kwargs or 'merge_partitions' in kwargs:
                raise DeprecationWarning("Cannot specify nr_reduces with "
                                         "partitions and/or merge_partitions")
            kwargs['partitions'] = kwargs.pop('nr_reduces')

        jobpack = Job.JobDict(self,
                              prefix=self.name,
                              ddfs=self.master.master,
                              **kwargs).pack()
        reply = json.loads(self.master.request('/disco/job/new', jobpack))
        if reply[0] != 'ok':
            raise DiscoError("Failed to start a job. Server replied: " + reply)
        self.name = reply[1]
        return self
Example #2
0
def add_node():
        orig_config = json.loads(
                disco.request("/disco/ctrl/load_config_table"))
        config = orig_config[:]
        config.append(["missingnode", "2"])
        r = disco.request("/disco/ctrl/save_config_table",
                json.dumps(config))
        if r != "\"table saved!\"":
                raise Exception("Couldn't add a dummy node: %s" % r)
        return orig_config
Example #3
0
    def put(self, tag, urls):
        """Put the list of ``urls`` to the tag ``tag``.

        .. warning::

                Generally speaking, concurrent applications should use
                :meth:`DDFS.tag` instead.
        """
        from comm_httplib import download
        status, body = download('%s/ddfs/tag/%s' % (self.master, tagname(tag)),
                                data=json.dumps(urls),
                                method='PUT')
        return json.loads(body)
Example #4
0
    def results(self, jobspec, timeout=2000):
        """
        Returns a list of results for a single job or for many
        concurrently running jobs, depending on the type of *jobspec*.

        If *jobspec* is a string (job name) or the function is called through the
        job object (``job.results()``), this function returns a list of results for
        the job if the results become available in *timeout* milliseconds. If not,
        returns an empty list.

        (*Added in version 0.2.1*)
        If *jobspec* is a list of jobs, the function waits at most
        for *timeout* milliseconds for at least one on the jobs to finish. In
        this mode, *jobspec* can be a list of strings (job names), a list of job
        objects, or a list of result entries as returned by this function. Two
        lists are returned: a list of finished jobs and a list of still active jobs.
        Both the lists contain elements of the following type::

                ["job name", ["status", [results]]]

        where status is either ``unknown_job``, ``dead``, ``active`` or ``ready``.

        You can use the latter mode as an efficient way to wait for several jobs
        to finish. Consider the following example that prints out results of jobs
        as soon as they finish. Here ``jobs`` is initially a list of jobs,
        produced by several calls to :meth:`Disco.new_job`::

                while jobs:
                 ready, jobs = disco.results(jobs)
                  for name, results in ready:
                   for k, v in result_iterator(results[1]):
                    print k, v
                   disco.purge(name)

        Note how the list of active jobs, ``jobs``, returned by :meth:`Disco.results`
        can be used as the input to the function itself.
        """
        jobspecifier = JobSpecifier(jobspec)
        data = json.dumps([timeout, list(jobspecifier.jobnames)])
        results = json.loads(self.request('/disco/ctrl/get_results', data))

        if isinstance(jobspec, basestring):
            return results[0][1]

        others, active = [], []
        for result in results:
            if result[1][0] == 'active':
                active.append(result)
            else:
                others.append(result)
        return others, active
Example #5
0
File: core.py Project: mshron/disco
    def results(self, jobspec, timeout=2000):
        """
        Returns a list of results for a single job or for many
        concurrently running jobs, depending on the type of *jobspec*.

        If *jobspec* is a string (job name) or the function is called through the
        job object (``job.results()``), this function returns a list of results for
        the job if the results become available in *timeout* milliseconds. If not,
        returns an empty list.

        (*Added in version 0.2.1*)
        If *jobspec* is a list of jobs, the function waits at most
        for *timeout* milliseconds for at least one on the jobs to finish. In
        this mode, *jobspec* can be a list of strings (job names), a list of job
        objects, or a list of result entries as returned by this function. Two
        lists are returned: a list of finished jobs and a list of still active jobs.
        Both the lists contain elements of the following type::

                ["job name", ["status", [results]]]

        where status is either ``unknown_job``, ``dead``, ``active`` or ``ready``.

        You can use the latter mode as an efficient way to wait for several jobs
        to finish. Consider the following example that prints out results of jobs
        as soon as they finish. Here ``jobs`` is initially a list of jobs,
        produced by several calls to :meth:`Disco.new_job`::

                while jobs:
                 ready, jobs = disco.results(jobs)
                  for name, results in ready:
                   for k, v in result_iterator(results[1]):
                    print k, v
                   disco.purge(name)

        Note how the list of active jobs, ``jobs``, returned by :meth:`Disco.results`
        can be used as the input to the function itself.
        """
        jobspecifier = JobSpecifier(jobspec)
        data = json.dumps([timeout, list(jobspecifier.jobnames)])
        results = json.loads(self.request('/disco/ctrl/get_results', data))

        if isinstance(jobspec, basestring):
            return results[0][1]

        others, active = [], []
        for result in results:
            if result[1][0] == 'active':
                active.append(result)
            else:
                others.append(result)
        return others, active
Example #6
0
    def results(self, jobspec, timeout=2000):
        jobspecifier = JobSpecifier(jobspec)
        data = json.dumps([timeout, list(jobspecifier.jobnames)])
        results = json.loads(self.request("/disco/ctrl/get_results", data))

        if type(jobspec) == str:
            return results[0][1]

        others, active = [], []
        for result in results:
            if result[1][0] == "active":
                active.append(result)
            else:
                others.append(result)
        return others, active
Example #7
0
File: core.py Project: mshron/disco
 def event_iter(events):
     offs = offset
     lines = events.split('\n')
     for i, line in enumerate(lines):
         if len(line):
             offs += len(line) + 1
             try:
                 event = tuple(json.loads(line))
             except ValueError:
                 break
             # HTTP range request doesn't like empty ranges:
             # Let's ensure that at least the last newline
             # is always retrieved.
             if i == len(lines) - 1 and events.endswith('\n'):
                 offs -= 1
             yield offs, event
Example #8
0
 def event_iter(events):
     offs = offset
     lines = events.splitlines()
     for i, line in enumerate(lines):
         if len(line):
             offs += len(line) + 1
             try:
                 event = tuple(json.loads(line))
             except ValueError:
                 break
             # HTTP range request doesn't like empty ranges:
             # Let's ensure that at least the last newline
             # is always retrieved.
             if i == len(lines) - 1 and events.endswith('\n'):
                offs -= 1
             yield offs, event
Example #9
0
 def event_iter(events):
         offs = offset
         lines = events.splitlines()
         for i, l in enumerate(lines):
                 offs += len(l) + 1
                 if not len(l):
                         continue
                 try:
                         ent = tuple(json.loads(l))
                 except ValueError:
                         break
                 # HTTP range request doesn't like empty ranges:
                 # Let's ensure that at least the last newline
                 # is always retrieved.
                 if i == len(lines) - 1:
                         offs -= 1
                 yield offs, ent
Example #10
0
File: core.py Project: mshron/disco
    def run(self, **kwargs):
        """
        Returns the job immediately after the request has been submitted.

        Accepts the same set of keyword arguments as :class:`JobDict`.

        A typical pattern in Disco scripts is to run a job synchronously,
        that is, to block the script until the job has finished.
        This is accomplished as follows::

                from disco.core import Disco
                results = Disco(master).new_job(...).wait()

        Note that job methods of the :class:`Disco` class are directly
        accessible through the :class:`Job` object, such as :meth:`Disco.wait`
        above.

        A :class:`JobError` is raised if an error occurs while starting the job.
        """
        if 'nr_reduces' in kwargs:
            warn("Use partitions instead of nr_reduces", DeprecationWarning)
            if 'partitions' in kwargs or 'merge_partitions' in kwargs:
                raise DeprecationWarning("Cannot specify nr_reduces with "
                                         "partitions and/or merge_partitions")
            kwargs['partitions'] = kwargs.pop('nr_reduces')

        if 'mem_sort_limit' in kwargs:
            warn("mem_sort_limit deprecated: sort=True always uses disk sort",
                 DeprecationWarning)

        jobpack = Job.JobDict(self,
                              prefix=self.name,
                              ddfs=self.master.master,
                              **kwargs).pack()
        reply = json.loads(self.master.request('/disco/job/new', jobpack))
        if reply[0] != 'ok':
            raise DiscoError("Failed to start a job. Server replied: " + reply)
        self.name = reply[1]
        return self
Example #11
0
 def jobinfo(self, name):
     """Returns a dictionary containing information about the job *name*."""
     return json.loads(self.request('/disco/ctrl/jobinfo?name=%s' % name))
Example #12
0
 def nodeinfo(self):
     return json.loads(self.request("/disco/ctrl/nodeinfo"))
Example #13
0
 def mapresults(self, name):
     return json.loads(
         self.request('/disco/ctrl/get_mapresults?name=%s' % name))
Example #14
0
 def _download(self, url, data=None, method='GET'):
     response = download(self.master + url, data=data, method=method)
     return json.loads(response)
Example #15
0
    def _maybe_proxy(self, url, method='GET'):
        if self.proxy:
            scheme, (host, port), path = urlsplit(url)
            return '%s/proxy/%s/%s/%s' % (self.proxy, host, method, path)
        return url

    def _push(self, (source, target), replicas=None, exclude=[], **kwargs):
        qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)),
                                            ('replicas', replicas)) if v])
        urls = self._download('%s/ddfs/new_blob/%s?%s' %
                              (self.master, target, qs))

        try:
            return [
                json.loads(url)
                for url in self._upload(urls, source, **kwargs)
            ]
        except CommError, e:
            scheme, (host, port), path = urlsplit(e.url)
            return self._push((source, target),
                              replicas=replicas,
                              exclude=exclude + [host],
                              **kwargs)

    def _tagattr(self, tag, attr):
        return '%s/%s' % (self._resolve(canonizetag(tag)), attr)

    def _token(self, token, method):
        if token is None:
            if method == 'GET':
Example #16
0
 def jobinfo(self, name):
     return json.loads(self.request("/disco/ctrl/jobinfo?name=%s" % name))
Example #17
0
 def nodeinfo(self):
     """
     Returns a dictionary describing status of the nodes that are managed by
     this Disco master.
     """
     return json.loads(self.request('/disco/ctrl/nodeinfo'))
Example #18
0
File: core.py Project: mshron/disco
 def jobinfo(self, name):
     """Returns a dictionary containing information about the job *name*."""
     return json.loads(self.request('/disco/ctrl/jobinfo?name=%s' % name))
Example #19
0
 def oob_list(self, name):
     r = self.request("/disco/ctrl/oob_list?name=%s" % name, redir=True)
     return json.loads(r)
Example #20
0
File: core.py Project: mshron/disco
 def mapresults(self, name):
     return json.loads(
         self.request('/disco/ctrl/get_mapresults?name=%s' % name))
Example #21
0
 def _download(self, url, data=None, token=None, method='GET'):
     return json.loads(download(self._resolve(url),
                                data=data,
                                method=method,
                                token=self._token(token, method)))
Example #22
0
File: core.py Project: mshron/disco
 def joblist(self):
     """Returns a list of jobs and their statuses."""
     return json.loads(self.request('/disco/ctrl/joblist'))
Example #23
0
 def _request(self, url, data=None, method=None):
     response = download(self.master + url, data=data, method=method)
     return json.loads(response)
Example #24
0
 def get_config(self):
     return json.loads(self.request('/disco/ctrl/load_config_table'))
Example #25
0
File: core.py Project: mshron/disco
 def get_config(self):
     return json.loads(self.request('/disco/ctrl/load_config_table'))
Example #26
0
 def set_config(self, config):
     response = json.loads(self.request('/disco/ctrl/save_config_table', json.dumps(config)))
     if response != 'table saved!':
         raise DiscoError(response)
Example #27
0
File: core.py Project: mshron/disco
 def set_config(self, config):
     response = json.loads(
         self.request('/disco/ctrl/save_config_table', json.dumps(config)))
     if response != 'table saved!':
         raise DiscoError(response)
Example #28
0
 def joblist(self):
     """Returns a list of jobs and their statuses."""
     return json.loads(self.request('/disco/ctrl/joblist'))
Example #29
0
File: core.py Project: mshron/disco
 def nodeinfo(self):
     """
     Returns a dictionary describing status of the nodes that are managed by
     this Disco master.
     """
     return json.loads(self.request('/disco/ctrl/nodeinfo'))
Example #30
0
        print """
        Usage: python gluster_config.py [inputfs|resultfs] config.json

        This script generates Disco-compatible config files for Gluster,
        a distributed filesystem.

        Two modes are available:
        - inputfs, which produces a Gluster volfile that is suitable for
          storing input data for Disco so that data is k-way replicated
          over nodes.
        - resultfs, which produces a Gluster volfile for communication
          between Disco nodes, in place of the default HTTP-based
          solution.

        See gluster_example.json for an example config gile. For more
        information, see http://discoproject.org/doc/start/dfs.html.
        """
        sys.exit(1)

config = json.loads(file(sys.argv[2]).read())
path = os.path.abspath(config["config_dir"])

if sys.argv[1] == "inputfs":
        check_config(config, replicas = True)
        client = create_replicating_config(config, path)
elif sys.argv[1] == "resultfs":
        check_config(config, replicas = False)
        client = create_nufa_config(config, path)
create_master_config(sys.argv[1], config, path, client)

Example #31
0
 def joblist(self):
     return json.loads(self.request("/disco/ctrl/joblist"))
Example #32
0
 def _download(self, url, data=None, token=None, method='GET'):
     return json.loads(
         download(self._resolve(url),
                  data=data,
                  method=method,
                  token=self._token(token, method)))
Example #33
0
        return s

    def _maybe_proxy(self, url, method='GET'):
        if self.proxy:
            scheme, (host, port), path = urlsplit(url)
            return '%s/proxy/%s/%s/%s' % (self.proxy, host, method, path)
        return url

    def _push(self, (source, target), replicas=None, retries=None, exclude=[]):
        qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)),
                                            ('replicas', replicas)) if v])
        urls = [(url, source)
            for url in self._request('/ddfs/new_blob/%s?%s' % (target, qs))]

        try:
            return [json.loads(url)
                for url in self._upload(urls, retries=retries)]
        except CommError, e:
            scheme, (host, port), path = urlsplit(e.url)
            return self._push((source, target),
                              replicas=replicas,
                              retries=retries,
                              exclude=exclude + [host])

    def _request(self, url, data=None, method=None):
        response = download(self.master + url, data=data, method=method)
        return json.loads(response)

    def _upload(self, urls, retries=10):
        urls = [(self._maybe_proxy(url, method='PUT'), fd) for url, fd in urls]
        return upload(urls, retries=retries)