def test_badinfo(self): jobenvs, jobzip, jobdata = {}, b'', b'' jobdict = {} jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata) status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("missing key") >= 0)
def test_badinfo(self): jobenvs, jobzip, jobdata = {}, b'', b'' jobdict = {} jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("missing key") >= 0)
def job(program, worker, *inputs): """Usage: worker [input ...] Create a jobpack and submit it to the master. Worker is automatically added to the jobhome. Input urls are specified as arguments or read from stdin. """ from disco.fileutils import DiscoZipFile from disco.job import JobPack def jobzip(*paths): jobzip = DiscoZipFile() for path in paths: jobzip.writepath(path) jobzip.close() return jobzip def jobdata(data): if data.startswith('@'): return open(data[1:]).read() return data def prefix(p): return p or os.path.basename(worker).split(".")[0] jobdict = { 'input': program.input(*inputs), 'worker': worker, 'map?': program.options.has_map, 'reduce?': program.options.has_reduce, 'nr_reduces': program.options.nr_reduces, 'prefix': prefix(program.options.prefix), 'scheduler': program.scheduler, 'owner': program.options.owner or program.settings['DISCO_JOB_OWNER'] } jobenvs = dict(program.options.env) jobzip = jobzip(worker, *program.options.files) jobdata = jobdata(program.options.data) jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata) if program.options.verbose: print("jobdict:") print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobdict.items())) print("jobenvs:") print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobenvs.items())) print("jobzip:") print("\n".join("\t{0}".format(name) for name in jobzip.namelist())) print("jobdata:") print("\n".join("\t{0}".format(line) for line in jobdata.splitlines())) if program.options.dump_jobpack: print(jobpack.dumps()) else: print(program.disco.submit(jobpack.dumps()))
def job(program, worker, *inputs): """Usage: worker [input ...] Create a jobpack and submit it to the master. Worker is automatically added to the jobhome. Input urls are specified as arguments or read from stdin. """ from disco.fileutils import DiscoZipFile from disco.job import JobPack def jobzip(*paths): jobzip = DiscoZipFile() for path in paths: jobzip.writepath(path) jobzip.close() return jobzip def jobdata(data): if data.startswith("@"): return open(data[1:]).read() return data def prefix(p): return p or os.path.basename(worker).split(".")[0] jobdict = { "input": program.input(*inputs), "worker": worker, "map?": program.options.has_map, "reduce?": program.options.has_reduce, "nr_reduces": program.options.nr_reduces, "prefix": prefix(program.options.prefix), "scheduler": program.scheduler, "owner": program.options.owner or program.settings["DISCO_JOB_OWNER"], } jobenvs = dict(program.options.env) jobzip = jobzip(worker, *program.options.files) jobdata = jobdata(program.options.data) jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata) if program.options.verbose: print("jobdict:") print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobdict.items())) print("jobenvs:") print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobenvs.items())) print("jobzip:") print("\n".join("\t{0}".format(name) for name in jobzip.namelist())) print("jobdata:") print("\n".join("\t{0}".format(line) for line in jobdata.splitlines())) if program.options.dump_jobpack: print(jobpack.dumps()) else: print(program.disco.submit(jobpack.dumps()))
def test_badprefix(self): jobenvs, jobzip, jobdata = {}, b'', b'' jobdict = {'prefix':'a/b', 'scheduler':{}, 'input':[], 'worker':"w", 'owner':"o", 'nr_reduces':"2"} jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0) jobdict = {'prefix':'a.b', 'scheduler':{}, 'input':[], 'worker':"w", 'owner':"o", 'nr_reduces':"2"} jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0)
def __init__(self, host='', jobfile='', jobname='', master=None, disco_port=None, put_port=None, ddfs_data='', disco_data='', mode=None, taskid=-1): from disco.job import JobPack self.host = host self.jobfile = jobfile self.jobname = jobname self.jobpack = JobPack.load(open(jobfile, 'rb')) self.jobobjs = dPickle.loads(self.jobpack.jobdata) self.master = master self.disco_port = disco_port self.put_port = put_port self.ddfs_data = ddfs_data self.disco_data = disco_data self.mode = mode self.taskid = taskid self.outputs = {} self.uid = '{0}:{1}-{2}-{3}'.format(mode, taskid, hexhash(str((time.time())).encode()), os.getpid())
def __init__(self, host='', jobfile='', jobname='', master=None, disco_port=None, put_port=None, ddfs_data='', disco_data='', stage=None, group=None, grouping=None, taskid=-1): from disco.job import JobPack from disco.ddfs import DDFS self.host = host self.jobfile = jobfile self.jobname = jobname self.jobpack = JobPack.load(open(jobfile, 'rb')) self.jobobjs = dPickle.loads(self.jobpack.jobdata) self.master = master self.disco_port = disco_port self.put_port = put_port self.ddfs_data = ddfs_data self.disco_data = disco_data self.stage = stage self.group = '{0[0]}-{0[1]}'.format(group) self.group_label, self.group_host = group self.grouping = grouping self.taskid = taskid self.outputs = {} self.uid = '{0}:{1}-{2}-{3}-{4}'.format( self.stage, DDFS.safe_name(self.group), self.taskid, hexhash(str((time.time())).encode()), os.getpid())
def pack(jobname, jobdict, worker, workdir): import cStringIO zmem = cStringIO.StringIO() z = zipfile.ZipFile(zmem, "w") # FIXME: Strip off leading pathname elements from zipped paths if not workdir: z.write(worker) else: def walker(arg, dirname, names): z.write(dirname) for n in names: z.write(os.path.join(dirname, n)) os.path.walk(workdir, walker, None) z.close() def contents(*t): offset = JobPack.HEADER_SIZE for segment in t: yield offset, segment offset += len(segment) offsets, fields = zip(*contents(json.dumps(jobdict), json.dumps({}), zmem.getvalue(), '')) hdr = JobPack.header(offsets) return hdr + ''.join(fields)
def __init__(self, host='', jobfile='', jobname='', master=None, disco_port=None, put_port=None, ddfs_data='', disco_data='', mode=None, taskid=-1): from disco.job import JobPack self.host = host self.jobfile = jobfile self.jobname = jobname self.jobpack = JobPack.load(open(jobfile)) self.jobobjs = dPickle.loads(self.jobpack.jobdata) self.master = master self.disco_port = disco_port self.put_port = put_port self.ddfs_data = ddfs_data self.disco_data = disco_data self.mode = mode self.taskid = taskid self.outputs = {} self.uid = '%s:%s-%s-%x' % (mode, taskid, hexhash(str((time.time()))), os.getpid())
def job(program, worker, *inputs): """Usage: worker [input ...] Create a jobpack and submit it to the master. Worker is automatically added to the jobhome. Input urls are specified as arguments or read from stdin. """ from disco.fileutils import DiscoZipFile from disco.job import JobPack def jobzip(*paths): jobzip = DiscoZipFile() for path in paths: jobzip.writepath(path) jobzip.close() return jobzip def jobdata(data): if data.startswith('@'): return open(data[1:]).read() return data def prefix(p): return p or os.path.basename(worker).split(".")[0] jobdict = {'input': program.input(*inputs), 'worker': worker, 'map?': program.options.has_map, 'reduce?': program.options.has_reduce, 'nr_reduces': program.options.nr_reduces, 'prefix': prefix(program.options.prefix), 'scheduler': program.scheduler, 'owner': program.options.owner or program.settings['DISCO_JOB_OWNER']} jobenvs = dict(program.options.env) jobzip = jobzip(worker, *program.options.files) jobdata = jobdata(program.options.data) jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata) if program.options.verbose: print "jobdict:" print "\n".join("\t%s\t%s" % item for item in jobdict.items()) print "jobenvs:" print "\n".join("\t%s\t%s" % item for item in jobenvs.items()) print "jobzip:" print "\n".join("\t%s" % name for name in jobzip.namelist()) print "jobdata:" print "\n".join("\t%s" % line for line in jobdata.splitlines()) if program.options.dump_jobpack: print jobpack.dumps() else: print program.disco.submit(jobpack.dumps())
def jobpack(self, jobname): """Return the :class:`disco.job.JobPack` submitted for the job.""" from disco.compat import BytesIO from disco.job import JobPack return JobPack.load( BytesIO( self.request('/disco/ctrl/parameters?name={0}'.format(jobname), as_bytes=True)))
def test_badlength(self): jobenvs, jobzip, jobdata = {}, b'0'*64, b'0'*64 jobdict = {'prefix':'JobPackBadLength', 'scheduler':{}, 'input':["raw://data"], "map?":True, 'worker':"w", 'owner':"o", 'nr_reduces':"2"} jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata).dumps() jobpack = jobpack[:(len(jobpack)-len(jobdata)-1)] status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid header") >= 0)
def test_badheader(self): offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2] jobpack = JobPack.header(offsets) + b'0' * 3 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1] jobpack = JobPack.header(offsets) + b'0' * 2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size + 1, hdr_size, hdr_size + 1] jobpack = JobPack.header(offsets) + b'0' * 2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size + 1, hdr_size + 1, hdr_size] jobpack = JobPack.header(offsets) + b'0' * 2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error')
def test_badheader(self): offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2] jobpack = JobPack.header(offsets) + b'0'*3 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1] jobpack = JobPack.header(offsets) + b'0'*2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size+1, hdr_size, hdr_size + 1] jobpack = JobPack.header(offsets) + b'0'*2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') offsets = [hdr_size, hdr_size+1, hdr_size+1, hdr_size] jobpack = JobPack.header(offsets) + b'0'*2 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error')
def test_badprefix(self): jobenvs, jobzip, jobdata = {}, b'', b'' jobdict = { 'prefix': 'a/b', 'scheduler': {}, 'input': [], 'worker': "w", 'owner': "o", 'nr_reduces': "2" } jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads( self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0) jobdict = { 'prefix': 'a.b', 'scheduler': {}, 'input': [], 'worker': "w", 'owner': "o", 'nr_reduces': "2" } jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads( self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0)
def __init__(self, jobpack): self.jobpack = JobPack.load(StringIO(jobpack)) self.host = "localhost" self.data_root = os.environ['DISCO_DATA'] self.name = "{}@{}".format(self.prefix, leisure.disco.timestamp()) self.home = os.path.join(self.host, leisure.disco.hex_hash(self.name), self.name) self.job_dir = extract_jobhome(os.path.join(self.data_root, self.home), self.jobpack.jobhome) self.save_jobfile(jobpack) self.ensure_worker_executable() self.results = [] self.status = "active"
def __init__(self, jobpack): self.jobpack = JobPack.load(StringIO(jobpack)) self.host = "localhost" self.data_root = os.environ['DISCO_DATA'] self.name = "{}@{}".format(self.prefix, leisure.disco.timestamp()) self.home = os.path.join( self.host, leisure.disco.hex_hash(self.name), self.name ) self.job_dir = extract_jobhome( os.path.join(self.data_root, self.home), self.jobpack.jobhome ) self.save_jobfile(jobpack) self.ensure_worker_executable() self.results = [] self.status = "active"
def __init__(self, host='', jobfile='', jobname='', master=None, disco_port=None, put_port=None, ddfs_data='', disco_data='', stage=None, group=None, grouping=None, taskid=-1): from disco.job import JobPack from disco.ddfs import DDFS self.host = host self.jobfile = jobfile self.jobname = jobname self.jobpack = JobPack.load(open(jobfile, 'rb')) self.jobobjs = dPickle.loads(self.jobpack.jobdata) self.master = master self.disco_port = disco_port self.put_port = put_port self.ddfs_data = ddfs_data self.disco_data = disco_data self.stage = stage self.group = '{0[0]}-{0[1]}'.format(group) self.group_label, self.group_host = group self.grouping = grouping self.taskid = taskid self.outputs = {} self.uid = '{0}:{1}-{2}-{3}-{4}'.format(self.stage, DDFS.safe_name(self.group), self.taskid, hexhash(str((time.time())).encode()), os.getpid())
def test_badmagic(self): offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3] jobpack = JobPack.header(offsets, magic=0) + b'0'*4 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error')
def jobpack(self, jobname): """Return the :class:`disco.job.JobPack` submitted for the job.""" from cStringIO import StringIO from disco.job import JobPack return JobPack.load( StringIO(self.request('/disco/ctrl/parameters?name=%s' % jobname)))
def test_badmagic(self): offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3] jobpack = JobPack.header(offsets, magic=0) + b'0' * 4 status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error')
def jobpack(self, jobname): """Return the :class:`disco.job.JobPack` submitted for the job.""" from disco.compat import BytesIO from disco.job import JobPack return JobPack.load(BytesIO(self.request('/disco/ctrl/parameters?name={0}'.format(jobname), as_bytes=True)))
def jobpack(self, jobname): """Return the :class:`disco.job.JobPack` submitted for the job.""" from cStringIO import StringIO from disco.job import JobPack return JobPack.load(StringIO(self.request('/disco/ctrl/parameters?name=%s' % jobname)))