def test_badprefix(self): jobenvs, jobzip, jobdata = {}, b'', b'' jobdict = { 'prefix': 'a/b', 'scheduler': {}, 'input': [], 'worker': "w", 'owner': "o", 'nr_reduces': "2" } jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads( self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0) jobdict = { 'prefix': 'a.b', 'scheduler': {}, 'input': [], 'worker': "w", 'owner': "o", 'nr_reduces': "2" } jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata) status, response = loads( self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid prefix") >= 0)
def test_badinfo(self): jobenvs, jobzip, jobdata = {}, b'', b'' jobdict = {} jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata) status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps())) self.assertEquals(status, 'error') self.assertTrue(response.find("missing key") >= 0)
def test_badlength(self): jobenvs, jobzip, jobdata = {}, b'0'*64, b'0'*64 jobdict = {'prefix':'JobPackBadLength', 'scheduler':{}, 'input':["raw://data"], "map?":True, 'worker':"w", 'owner':"o", 'nr_reduces':"2"} jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata).dumps() jobpack = jobpack[:(len(jobpack)-len(jobdata)-1)] status, response = loads(self.disco.request('/disco/job/new', jobpack)) self.assertEquals(status, 'error') self.assertTrue(response.find("invalid header") >= 0)
def job(program, worker, *inputs): """Usage: worker [input ...] Create a jobpack and submit it to the master. Worker is automatically added to the jobhome. Input urls are specified as arguments or read from stdin. """ from disco.fileutils import DiscoZipFile from disco.job import JobPack def jobzip(*paths): jobzip = DiscoZipFile() for path in paths: jobzip.writepath(path) jobzip.close() return jobzip def jobdata(data): if data.startswith('@'): return open(data[1:]).read() return data def prefix(p): return p or os.path.basename(worker).split(".")[0] jobdict = { 'input': program.input(*inputs), 'worker': worker, 'map?': program.options.has_map, 'reduce?': program.options.has_reduce, 'nr_reduces': program.options.nr_reduces, 'prefix': prefix(program.options.prefix), 'scheduler': program.scheduler, 'owner': program.options.owner or program.settings['DISCO_JOB_OWNER'] } jobenvs = dict(program.options.env) jobzip = jobzip(worker, *program.options.files) jobdata = jobdata(program.options.data) jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata) if program.options.verbose: print("jobdict:") print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobdict.items())) print("jobenvs:") print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobenvs.items())) print("jobzip:") print("\n".join("\t{0}".format(name) for name in jobzip.namelist())) print("jobdata:") print("\n".join("\t{0}".format(line) for line in jobdata.splitlines())) if program.options.dump_jobpack: print(jobpack.dumps()) else: print(program.disco.submit(jobpack.dumps()))