def test_badprefix(self): jobenvs, jobzip, jobdata = {}, '', '' jobdict = { 'prefix': 'a/b', 'scheduler': {}, 'input': [], 'worker': "w", 'owner': "o", 'nr_reduces': "2" } jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads( self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0) jobdict = { 'prefix': 'a.b', 'scheduler': {}, 'input': [], 'worker': "w", 'owner': "o", 'nr_reduces': "2" } jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads( self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0)
def send(cls, type, payload=''): from disco.json import dumps, loads body = dumps(payload) sys.stderr.write('%s %d %s\n' % (type, len(body), body)) spent, rtype = sys.stdin.t_read_until(' ') spent, rsize = sys.stdin.t_read_until(' ', spent=spent) spent, rbody = sys.stdin.t_read(int(rsize) + 1, spent=spent) if type == 'ERROR': raise ValueError(loads(rbody[:-1])) return loads(rbody[:-1])
def send(cls, type, payload=''): from disco.json import dumps, loads body = dumps(payload) cls.stderr.write('%s %d %s\n' % (type, len(body), body)) spent, rtype = sys.stdin.t_read_until(' ') spent, rsize = sys.stdin.t_read_until(' ', spent=spent) spent, rbody = sys.stdin.t_read(int(rsize) + 1, spent=spent) if type == 'ERROR': raise ValueError(loads(rbody[:-1])) return loads(rbody[:-1])
def test_badprefix(self): jobenvs, jobzip, jobdata = {}, '', '' jobdict = {'prefix':'a/b', 'scheduler':{}, 'input':[], 'worker':"w", 'owner':"o", 'nr_reduces':"2"} jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0) jobdict = {'prefix':'a.b', 'scheduler':{}, 'input':[], 'worker':"w", 'owner':"o", 'nr_reduces':"2"} jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0)
def _download(self, url, data=None, token=None, method='GET'): return json.loads(download(self._resolve(proxy_url(url, proxy=self.proxy, meth=method)), data=data, method=method, token=self._token(url, token, method)))
def test_badinfo(self): jobenvs, jobzip, jobdata = {}, '', '' jobdict = {} jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("missing key") >= 0)
def _download(self, url, data=None, token=None, method='GET', to_master=True): return json.loads(download(self._resolve(proxy_url(url, proxy=self.proxy, meth=method, to_master=to_master)), data=data, method=method, token=self._token(url, token, method)))
def test_badinfo(self): jobenvs, jobzip, jobdata = {}, '', '' jobdict = {} jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads( self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("missing key") >= 0)
def test_badlength(self): jobenvs, jobzip, jobdata = {}, '0'*64, '0'*64 jobdict = {'prefix':'JobPackBadLength', 'scheduler':{}, 'input':["raw://data"], "map?":True, 'worker':"w", 'owner':"o", 'nr_reduces':"2"} jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata).dumps() jobpack = jobpack[:(len(jobpack)-len(jobdata)-1)] status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid_header") >= 0)
def test_badheader(self): offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2] jobpack = JobPack.header(offsets) + '0' * 3 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1] jobpack = JobPack.header(offsets) + '0' * 2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size + 1, hdr_size, hdr_size + 1] jobpack = JobPack.header(offsets) + '0' * 2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size + 1, hdr_size + 1, hdr_size] jobpack = JobPack.header(offsets) + '0' * 2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error')
def test_badheader(self): offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2] jobpack = JobPack.header(offsets) + '0'*3 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1] jobpack = JobPack.header(offsets) + '0'*2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size+1, hdr_size, hdr_size + 1] jobpack = JobPack.header(offsets) + '0'*2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size+1, hdr_size+1, hdr_size] jobpack = JobPack.header(offsets) + '0'*2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error')
def event_iter(events): offs = offset lines = events.split('\n') for i, line in enumerate(lines): if len(line): offs += len(line) + 1 try: event = tuple(json.loads(line)) except ValueError: break # HTTP range request doesn't like empty ranges: # Let's ensure that at least the last newline # is always retrieved. if i == len(lines) - 1 and events.endswith('\n'): offs -= 1 yield offs, event
def test_badlength(self): jobenvs, jobzip, jobdata = {}, '0' * 64, '0' * 64 jobdict = { 'prefix': 'JobPackBadLength', 'scheduler': {}, 'input': ["raw://data"], "map?": True, 'worker': "w", 'owner': "o", 'nr_reduces': "2" } jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata).dumps() jobpack = jobpack[:(len(jobpack) - len(jobdata) - 1)] status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid header") >= 0)
def jobinfo(self, jobname): """Returns a dict containing information about the job.""" return json.loads(self.request('/disco/ctrl/jobinfo?name=%s' % jobname))
def _download(self, url, data=None, token=None, method='GET'): return json.loads( download(self._resolve(url), data=data, method=method, token=self._token(url, token, method)))
def _maybe_proxy(self, url, method='GET'): if self.proxy: scheme, (host, port), path = urlsplit(url) return '%s/proxy/%s/%s/%s' % (self.proxy, host, method, path) return url def _push(self, (source, target), replicas=None, exclude=[], **kwargs): qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)), ('replicas', replicas)) if v]) urls = self._download('%s/ddfs/new_blob/%s?%s' % (self.master, target, qs)) try: return [ json.loads(url) for url in self._upload(urls, source, **kwargs) ] except CommError, e: scheme, (host, port), path = urlsplit(e.url) return self._push((source, target), replicas=replicas, exclude=exclude + [host], **kwargs) def _tagattr(self, tag, attr): return '%s/%s' % (self._resolve(canonizetag(tag)), attr) def _token(self, url, token, method): if token is None: token = urltoken(url)
def jobenvs(self): dict_offset, envs_offset, home_offset, data_offset = self.offsets(self.jobfile) self.jobfile.seek(envs_offset) return json.loads(self.jobfile.read(home_offset - envs_offset))
def test_badmagic(self): offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3] jobpack = JobPack.header(offsets, magic=0) + '0'*4 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error')
b = src.read(8192) if not b: break s += len(b) dst.write(b) return s def _push(self, (source, target), replicas=None, exclude=[], **kwargs): qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)), ('replicas', replicas)) if v]) urls = self._download('%s/ddfs/new_blob/%s?%s' % (self.master, target, qs)) try: return [ json.loads(url) for url in self._upload( urls, source, to_master=False, **kwargs) ] except CommError, e: scheme, (host, port), path = urlsplit(e.url) return self._push((source, target), replicas=replicas, exclude=exclude + [host], **kwargs) def _tagattr(self, tag, attr): return '%s/%s' % (self._resolve(canonizetag(tag)), attr) def _token(self, url, token, method): if token is None: token = urltoken(url)
def jobenvs(self): dict_offset, envs_offset, home_offset, data_offset = self.offsets( self.jobfile) self.jobfile.seek(envs_offset) return json.loads(self.jobfile.read(home_offset - dict_offset))
def mapresults(self, jobname): return json.loads( self.request('/disco/ctrl/get_mapresults?name=%s' % jobname))
b = src.read(8192) if not b: break s += len(b) dst.write(b) return s def _push(self, (source, target), replicas=None, exclude=[], **kwargs): qs = urlencode([(k, v) for k, v in (('exclude', ','.join(exclude)), ('replicas', replicas)) if v]) urls = self._download('%s/ddfs/new_blob/%s?%s' % (self.master, target, qs)) try: return [json.loads(url) for url in self._upload(urls, source, to_master=False, **kwargs)] except CommError, e: scheme, (host, port), path = urlsplit(e.url) return self._push((source, target), replicas=replicas, exclude=exclude + [host], **kwargs) def _tagattr(self, tag, attr): return '%s/%s' % (self._resolve(canonizetag(tag)), attr) def _token(self, url, token, method): if token is None: token = urltoken(url) if token is None:
def nodeinfo(self): """ Returns a dictionary describing status of the nodes that are managed by this Disco master. """ return json.loads(self.request('/disco/ctrl/nodeinfo'))
def get_config(self): return json.loads(self.request('/disco/ctrl/load_config_table'))
def results(self, jobspec, timeout=2000): """ Returns a list of results for a single job or for many concurrently running jobs, depending on the type of *jobspec*. :type jobspec: :class:`disco.job.Job`, string, or list :param jobspec: If a job or job name is provided, return a tuple which looks like:: status, results If a list is provided, return two lists: inactive jobs and active jobs. Both the lists contain elements of the following type:: jobname, (status, results) where status is one of: ``'unknown job'``, ``'dead'``, ``'active'``, or ``'ready'``. :type timeout: int :param timeout: wait at most this many milliseconds, for at least one on the jobs to finish. Using a list of jobs is a more efficient way to wait for multiple jobs to finish. Consider the following example that prints out results as soon as the jobs (initially ``active``) finish:: while active: inactive, active = disco.results(jobs) for jobname, (status, results) in inactive: if status == 'ready': for k, v in result_iterator(results): print k, v disco.purge(jobname) Note how the list of active jobs, ``active``, returned by :meth:`Disco.results`, can be used as the input to this function as well. """ def jobname(job): if isinstance(job, Job): return job.name elif isinstance(job, basestring): return job return job[0] jobnames = [jobname(job) for job in util.iterify(jobspec)] results = json.loads(self.request('/disco/ctrl/get_results', json.dumps([timeout, jobnames]))) others, active = [], [] for jobname, (status, result) in results: if isinstance(jobspec, (Job, basestring)): return status, result elif status == 'active': active.append((jobname, (status, result))) else: others.append((jobname, (status, result))) return others, active
def submit(self, jobpack): status, body = json.loads(self.request('/disco/job/new', jobpack)) if status != 'ok': raise DiscoError("Failed to submit jobpack: %s" % body) return body
def results(self, jobspec, timeout=2000): """ Returns a list of results for a single job or for many concurrently running jobs, depending on the type of *jobspec*. :type jobspec: :class:`disco.job.Job`, string, or list :param jobspec: If a job or job name is provided, return a tuple which looks like:: status, results If a list is provided, return two lists: inactive jobs and active jobs. Both the lists contain elements of the following type:: jobname, (status, results) where status is one of: ``'unknown job'``, ``'dead'``, ``'active'``, or ``'ready'``. :type timeout: int :param timeout: wait at most this many milliseconds, for at least one on the jobs to finish. Using a list of jobs is a more efficient way to wait for multiple jobs to finish. Consider the following example that prints out results as soon as the jobs (initially ``active``) finish:: while active: inactive, active = disco.results(jobs) for jobname, (status, results) in inactive: if status == 'ready': for k, v in result_iterator(results): print k, v disco.purge(jobname) Note how the list of active jobs, ``active``, returned by :meth:`Disco.results`, can be used as the input to this function as well. """ def jobname(job): if isinstance(job, Job): return job.name elif isinstance(job, basestring): return job return job[0] jobnames = [jobname(job) for job in util.iterify(jobspec)] results = json.loads( self.request('/disco/ctrl/get_results', json.dumps([timeout, jobnames]))) others, active = [], [] for jobname, (status, result) in results: if isinstance(jobspec, (Job, basestring)): return status, result elif status == 'active': active.append((jobname, (status, result))) else: others.append((jobname, (status, result))) return others, active
def set_config(self, config): response = json.loads(self.request('/disco/ctrl/save_config_table', json.dumps(config))) if response != 'table saved!': raise DiscoError(response)
def set_config(self, config): response = json.loads( self.request('/disco/ctrl/save_config_table', json.dumps(config))) if response != 'table saved!': raise DiscoError(response)
def joblist(self): """Returns a list of jobs and their statuses.""" return json.loads(self.request('/disco/ctrl/joblist'))
def test_badmagic(self): offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3] jobpack = JobPack.header(offsets, magic=0) + '0' * 4 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error')