def test_badprefix(self):
        jobenvs, jobzip, jobdata = {}, '', ''
        jobdict = {
            'prefix': 'a/b',
            'scheduler': {},
            'input': [],
            'worker': "w",
            'owner': "o",
            'nr_reduces': "2"
        }
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(
            self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)

        jobdict = {
            'prefix': 'a.b',
            'scheduler': {},
            'input': [],
            'worker': "w",
            'owner': "o",
            'nr_reduces': "2"
        }
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(
            self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)
Beispiel #2
0
 def send(cls, type, payload=''):
     from disco.json import dumps, loads
     body = dumps(payload)
     sys.stderr.write('%s %d %s\n' % (type, len(body), body))
     spent, rtype = sys.stdin.t_read_until(' ')
     spent, rsize = sys.stdin.t_read_until(' ', spent=spent)
     spent, rbody = sys.stdin.t_read(int(rsize) + 1, spent=spent)
     if type == 'ERROR':
         raise ValueError(loads(rbody[:-1]))
     return loads(rbody[:-1])
Beispiel #3
0
 def send(cls, type, payload=''):
     from disco.json import dumps, loads
     body = dumps(payload)
     cls.stderr.write('%s %d %s\n' % (type, len(body), body))
     spent, rtype = sys.stdin.t_read_until(' ')
     spent, rsize = sys.stdin.t_read_until(' ', spent=spent)
     spent, rbody = sys.stdin.t_read(int(rsize) + 1, spent=spent)
     if type == 'ERROR':
         raise ValueError(loads(rbody[:-1]))
     return loads(rbody[:-1])
Beispiel #4
0
    def test_badprefix(self):
        jobenvs, jobzip, jobdata = {}, '', ''
        jobdict = {'prefix':'a/b', 'scheduler':{}, 'input':[],
                   'worker':"w", 'owner':"o", 'nr_reduces':"2"}
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)

        jobdict = {'prefix':'a.b', 'scheduler':{}, 'input':[],
                   'worker':"w", 'owner':"o", 'nr_reduces':"2"}
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)
Beispiel #5
0
 def _download(self, url, data=None, token=None, method='GET'):
     return json.loads(download(self._resolve(proxy_url(url,
                                                        proxy=self.proxy,
                                                        meth=method)),
                                data=data,
                                method=method,
                                token=self._token(url, token, method)))
Beispiel #6
0
 def test_badinfo(self):
     jobenvs, jobzip, jobdata = {}, '', ''
     jobdict = {}
     jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
     status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("missing key") >= 0)
Beispiel #7
0
 def _download(self, url, data=None, token=None, method='GET', to_master=True):
     return json.loads(download(self._resolve(proxy_url(url,
                                                        proxy=self.proxy,
                                                        meth=method,
                                                        to_master=to_master)),
                                data=data,
                                method=method,
                                token=self._token(url, token, method)))
 def test_badinfo(self):
     jobenvs, jobzip, jobdata = {}, '', ''
     jobdict = {}
     jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
     status, response = loads(
         self.disco.request('/disco/job/new', jobpack.dumps()))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("missing key") >= 0)
Beispiel #9
0
 def test_badlength(self):
     jobenvs, jobzip, jobdata = {}, '0'*64, '0'*64
     jobdict = {'prefix':'JobPackBadLength', 'scheduler':{}, 'input':["raw://data"],
                "map?":True, 'worker':"w", 'owner':"o", 'nr_reduces':"2"}
     jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata).dumps()
     jobpack = jobpack[:(len(jobpack)-len(jobdata)-1)]
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("invalid_header") >= 0)
    def test_badheader(self):
        offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2]
        jobpack = JobPack.header(offsets) + '0' * 3
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1]
        jobpack = JobPack.header(offsets) + '0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size + 1, hdr_size, hdr_size + 1]
        jobpack = JobPack.header(offsets) + '0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size + 1, hdr_size + 1, hdr_size]
        jobpack = JobPack.header(offsets) + '0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')
Beispiel #11
0
    def test_badheader(self):
        offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2]
        jobpack = JobPack.header(offsets) + '0'*3
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1]
        jobpack = JobPack.header(offsets) + '0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size+1, hdr_size, hdr_size + 1]
        jobpack = JobPack.header(offsets) + '0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size+1, hdr_size+1, hdr_size]
        jobpack = JobPack.header(offsets) + '0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')
Beispiel #12
0
 def event_iter(events):
     offs = offset
     lines = events.split('\n')
     for i, line in enumerate(lines):
         if len(line):
             offs += len(line) + 1
             try:
                 event = tuple(json.loads(line))
             except ValueError:
                 break
             # HTTP range request doesn't like empty ranges:
             # Let's ensure that at least the last newline
             # is always retrieved.
             if i == len(lines) - 1 and events.endswith('\n'):
                offs -= 1
             yield offs, event
 def test_badlength(self):
     jobenvs, jobzip, jobdata = {}, '0' * 64, '0' * 64
     jobdict = {
         'prefix': 'JobPackBadLength',
         'scheduler': {},
         'input': ["raw://data"],
         "map?": True,
         'worker': "w",
         'owner': "o",
         'nr_reduces': "2"
     }
     jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata).dumps()
     jobpack = jobpack[:(len(jobpack) - len(jobdata) - 1)]
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("invalid header") >= 0)
Beispiel #14
0
 def event_iter(events):
     offs = offset
     lines = events.split('\n')
     for i, line in enumerate(lines):
         if len(line):
             offs += len(line) + 1
             try:
                 event = tuple(json.loads(line))
             except ValueError:
                 break
             # HTTP range request doesn't like empty ranges:
             # Let's ensure that at least the last newline
             # is always retrieved.
             if i == len(lines) - 1 and events.endswith('\n'):
                 offs -= 1
             yield offs, event
Beispiel #15
0
 def jobinfo(self, jobname):
     """Returns a dict containing information about the job."""
     return json.loads(self.request('/disco/ctrl/jobinfo?name=%s' % jobname))
Beispiel #16
0
 def _download(self, url, data=None, token=None, method='GET'):
     return json.loads(
         download(self._resolve(url),
                  data=data,
                  method=method,
                  token=self._token(url, token, method)))
Beispiel #17
0
    def _maybe_proxy(self, url, method='GET'):
        if self.proxy:
            scheme, (host, port), path = urlsplit(url)
            return '%s/proxy/%s/%s/%s' % (self.proxy, host, method, path)
        return url

    def _push(self, (source, target), replicas=None, exclude=[], **kwargs):
        qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)),
                                            ('replicas', replicas)) if v])
        urls = self._download('%s/ddfs/new_blob/%s?%s' %
                              (self.master, target, qs))

        try:
            return [
                json.loads(url)
                for url in self._upload(urls, source, **kwargs)
            ]
        except CommError, e:
            scheme, (host, port), path = urlsplit(e.url)
            return self._push((source, target),
                              replicas=replicas,
                              exclude=exclude + [host],
                              **kwargs)

    def _tagattr(self, tag, attr):
        return '%s/%s' % (self._resolve(canonizetag(tag)), attr)

    def _token(self, url, token, method):
        if token is None:
            token = urltoken(url)
Beispiel #18
0
 def jobenvs(self):
     dict_offset, envs_offset, home_offset, data_offset = self.offsets(self.jobfile)
     self.jobfile.seek(envs_offset)
     return json.loads(self.jobfile.read(home_offset - envs_offset))
Beispiel #19
0
 def test_badmagic(self):
     offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3]
     jobpack = JobPack.header(offsets, magic=0) + '0'*4
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
Beispiel #20
0
            b = src.read(8192)
            if not b:
                break
            s += len(b)
            dst.write(b)
        return s

    def _push(self, (source, target), replicas=None, exclude=[], **kwargs):
        qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)),
                                            ('replicas', replicas)) if v])
        urls = self._download('%s/ddfs/new_blob/%s?%s' %
                              (self.master, target, qs))

        try:
            return [
                json.loads(url) for url in self._upload(
                    urls, source, to_master=False, **kwargs)
            ]
        except CommError, e:
            scheme, (host, port), path = urlsplit(e.url)
            return self._push((source, target),
                              replicas=replicas,
                              exclude=exclude + [host],
                              **kwargs)

    def _tagattr(self, tag, attr):
        return '%s/%s' % (self._resolve(canonizetag(tag)), attr)

    def _token(self, url, token, method):
        if token is None:
            token = urltoken(url)
Beispiel #21
0
 def jobenvs(self):
     dict_offset, envs_offset, home_offset, data_offset = self.offsets(
         self.jobfile)
     self.jobfile.seek(envs_offset)
     return json.loads(self.jobfile.read(home_offset - dict_offset))
Beispiel #22
0
 def mapresults(self, jobname):
     return json.loads(
         self.request('/disco/ctrl/get_mapresults?name=%s' % jobname))
Beispiel #23
0
            b = src.read(8192)
            if not b:
                break
            s += len(b)
            dst.write(b)
        return s

    def _push(self, (source, target), replicas=None, exclude=[], **kwargs):
        qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)),
                                            ('replicas', replicas)) if v])
        urls = self._download('%s/ddfs/new_blob/%s?%s' % (self.master,
                                                          target,
                                                          qs))

        try:
            return [json.loads(url)
                    for url in self._upload(urls, source, to_master=False, **kwargs)]
        except CommError, e:
            scheme, (host, port), path = urlsplit(e.url)
            return self._push((source, target),
                              replicas=replicas,
                              exclude=exclude + [host],
                              **kwargs)

    def _tagattr(self, tag, attr):
        return '%s/%s' % (self._resolve(canonizetag(tag)), attr)

    def _token(self, url, token, method):
        if token is None:
            token = urltoken(url)
            if token is None:
Beispiel #24
0
 def nodeinfo(self):
     """
     Returns a dictionary describing status of the nodes that are managed by
     this Disco master.
     """
     return json.loads(self.request('/disco/ctrl/nodeinfo'))
Beispiel #25
0
 def get_config(self):
     return json.loads(self.request('/disco/ctrl/load_config_table'))
Beispiel #26
0
 def jobinfo(self, jobname):
     """Returns a dict containing information about the job."""
     return json.loads(self.request('/disco/ctrl/jobinfo?name=%s' %
                                    jobname))
Beispiel #27
0
 def mapresults(self, jobname):
     return json.loads(
         self.request('/disco/ctrl/get_mapresults?name=%s' % jobname))
Beispiel #28
0
    def results(self, jobspec, timeout=2000):
        """
        Returns a list of results for a single job or for many
        concurrently running jobs, depending on the type of *jobspec*.

        :type  jobspec: :class:`disco.job.Job`, string, or list
        :param jobspec: If a job or job name is provided,
                        return a tuple which looks like::

                                status, results

                        If a list is provided,
                        return two lists: inactive jobs and active jobs.
                        Both the lists contain elements of the following type::

                                jobname, (status, results)

                        where status is one of:
                        ``'unknown job'``,
                        ``'dead'``,
                        ``'active'``, or
                        ``'ready'``.

        :type  timeout: int
        :param timeout: wait at most this many milliseconds,
                        for at least one on the jobs to finish.

        Using a list of jobs is a more efficient way to wait
        for multiple jobs to finish.
        Consider the following example that prints out results
        as soon as the jobs (initially ``active``) finish::

                while active:
                  inactive, active = disco.results(jobs)
                  for jobname, (status, results) in inactive:
                    if status == 'ready':
                      for k, v in result_iterator(results):
                        print k, v
                      disco.purge(jobname)

        Note how the list of active jobs, ``active``,
        returned by :meth:`Disco.results`,
        can be used as the input to this function as well.
        """
        def jobname(job):
            if isinstance(job, Job):
                return job.name
            elif isinstance(job, basestring):
                return job
            return job[0]
        jobnames = [jobname(job) for job in util.iterify(jobspec)]
        results = json.loads(self.request('/disco/ctrl/get_results',
                                          json.dumps([timeout, jobnames])))
        others, active = [], []
        for jobname, (status, result) in results:
            if isinstance(jobspec, (Job, basestring)):
                return status, result
            elif status == 'active':
                active.append((jobname, (status, result)))
            else:
                others.append((jobname, (status, result)))
        return others, active
Beispiel #29
0
 def submit(self, jobpack):
     status, body = json.loads(self.request('/disco/job/new', jobpack))
     if status != 'ok':
         raise DiscoError("Failed to submit jobpack: %s" % body)
     return body
Beispiel #30
0
 def get_config(self):
     return json.loads(self.request('/disco/ctrl/load_config_table'))
Beispiel #31
0
    def results(self, jobspec, timeout=2000):
        """
        Returns a list of results for a single job or for many
        concurrently running jobs, depending on the type of *jobspec*.

        :type  jobspec: :class:`disco.job.Job`, string, or list
        :param jobspec: If a job or job name is provided,
                        return a tuple which looks like::

                                status, results

                        If a list is provided,
                        return two lists: inactive jobs and active jobs.
                        Both the lists contain elements of the following type::

                                jobname, (status, results)

                        where status is one of:
                        ``'unknown job'``,
                        ``'dead'``,
                        ``'active'``, or
                        ``'ready'``.

        :type  timeout: int
        :param timeout: wait at most this many milliseconds,
                        for at least one on the jobs to finish.

        Using a list of jobs is a more efficient way to wait
        for multiple jobs to finish.
        Consider the following example that prints out results
        as soon as the jobs (initially ``active``) finish::

                while active:
                  inactive, active = disco.results(jobs)
                  for jobname, (status, results) in inactive:
                    if status == 'ready':
                      for k, v in result_iterator(results):
                        print k, v
                      disco.purge(jobname)

        Note how the list of active jobs, ``active``,
        returned by :meth:`Disco.results`,
        can be used as the input to this function as well.
        """
        def jobname(job):
            if isinstance(job, Job):
                return job.name
            elif isinstance(job, basestring):
                return job
            return job[0]

        jobnames = [jobname(job) for job in util.iterify(jobspec)]
        results = json.loads(
            self.request('/disco/ctrl/get_results',
                         json.dumps([timeout, jobnames])))
        others, active = [], []
        for jobname, (status, result) in results:
            if isinstance(jobspec, (Job, basestring)):
                return status, result
            elif status == 'active':
                active.append((jobname, (status, result)))
            else:
                others.append((jobname, (status, result)))
        return others, active
Beispiel #32
0
 def set_config(self, config):
     response = json.loads(self.request('/disco/ctrl/save_config_table', json.dumps(config)))
     if response != 'table saved!':
         raise DiscoError(response)
Beispiel #33
0
 def submit(self, jobpack):
     status, body = json.loads(self.request('/disco/job/new', jobpack))
     if status != 'ok':
         raise DiscoError("Failed to submit jobpack: %s" % body)
     return body
Beispiel #34
0
 def nodeinfo(self):
     """
     Returns a dictionary describing status of the nodes that are managed by
     this Disco master.
     """
     return json.loads(self.request('/disco/ctrl/nodeinfo'))
Beispiel #35
0
 def set_config(self, config):
     response = json.loads(
         self.request('/disco/ctrl/save_config_table', json.dumps(config)))
     if response != 'table saved!':
         raise DiscoError(response)
Beispiel #36
0
 def joblist(self):
     """Returns a list of jobs and their statuses."""
     return json.loads(self.request('/disco/ctrl/joblist'))
Beispiel #37
0
 def joblist(self):
     """Returns a list of jobs and their statuses."""
     return json.loads(self.request('/disco/ctrl/joblist'))
 def test_badmagic(self):
     offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3]
     jobpack = JobPack.header(offsets, magic=0) + '0' * 4
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')