def test_badinfo(self):
     jobenvs, jobzip, jobdata = {}, b'', b''
     jobdict = {}
     jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata)
     status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("missing key") >= 0)
Example #2
0
 def test_badinfo(self):
     jobenvs, jobzip, jobdata = {}, b'', b''
     jobdict = {}
     jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
     status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("missing key") >= 0)
def job(program, worker, *inputs):
    """Usage: worker [input ...]

    Create a jobpack and submit it to the master.
    Worker is automatically added to the jobhome.
    Input urls are specified as arguments or read from stdin.
    """
    from disco.fileutils import DiscoZipFile
    from disco.job import JobPack

    def jobzip(*paths):
        jobzip = DiscoZipFile()
        for path in paths:
            jobzip.writepath(path)
        jobzip.close()
        return jobzip

    def jobdata(data):
        if data.startswith('@'):
            return open(data[1:]).read()
        return data

    def prefix(p):
        return p or os.path.basename(worker).split(".")[0]

    jobdict = {
        'input': program.input(*inputs),
        'worker': worker,
        'map?': program.options.has_map,
        'reduce?': program.options.has_reduce,
        'nr_reduces': program.options.nr_reduces,
        'prefix': prefix(program.options.prefix),
        'scheduler': program.scheduler,
        'owner': program.options.owner or program.settings['DISCO_JOB_OWNER']
    }
    jobenvs = dict(program.options.env)
    jobzip = jobzip(worker, *program.options.files)
    jobdata = jobdata(program.options.data)
    jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata)
    if program.options.verbose:
        print("jobdict:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item)
                        for item in jobdict.items()))
        print("jobenvs:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item)
                        for item in jobenvs.items()))
        print("jobzip:")
        print("\n".join("\t{0}".format(name) for name in jobzip.namelist()))
        print("jobdata:")
        print("\n".join("\t{0}".format(line) for line in jobdata.splitlines()))
    if program.options.dump_jobpack:
        print(jobpack.dumps())
    else:
        print(program.disco.submit(jobpack.dumps()))
Example #4
0
def job(program, worker, *inputs):
    """Usage: worker [input ...]

    Create a jobpack and submit it to the master.
    Worker is automatically added to the jobhome.
    Input urls are specified as arguments or read from stdin.
    """
    from disco.fileutils import DiscoZipFile
    from disco.job import JobPack

    def jobzip(*paths):
        jobzip = DiscoZipFile()
        for path in paths:
            jobzip.writepath(path)
        jobzip.close()
        return jobzip

    def jobdata(data):
        if data.startswith("@"):
            return open(data[1:]).read()
        return data

    def prefix(p):
        return p or os.path.basename(worker).split(".")[0]

    jobdict = {
        "input": program.input(*inputs),
        "worker": worker,
        "map?": program.options.has_map,
        "reduce?": program.options.has_reduce,
        "nr_reduces": program.options.nr_reduces,
        "prefix": prefix(program.options.prefix),
        "scheduler": program.scheduler,
        "owner": program.options.owner or program.settings["DISCO_JOB_OWNER"],
    }
    jobenvs = dict(program.options.env)
    jobzip = jobzip(worker, *program.options.files)
    jobdata = jobdata(program.options.data)
    jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata)
    if program.options.verbose:
        print("jobdict:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobdict.items()))
        print("jobenvs:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobenvs.items()))
        print("jobzip:")
        print("\n".join("\t{0}".format(name) for name in jobzip.namelist()))
        print("jobdata:")
        print("\n".join("\t{0}".format(line) for line in jobdata.splitlines()))
    if program.options.dump_jobpack:
        print(jobpack.dumps())
    else:
        print(program.disco.submit(jobpack.dumps()))
Example #5
0
    def test_badprefix(self):
        jobenvs, jobzip, jobdata = {}, b'', b''
        jobdict = {'prefix':'a/b', 'scheduler':{}, 'input':[],
                   'worker':"w", 'owner':"o", 'nr_reduces':"2"}
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)

        jobdict = {'prefix':'a.b', 'scheduler':{}, 'input':[],
                   'worker':"w", 'owner':"o", 'nr_reduces':"2"}
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)
Example #6
0
File: task.py Project: caox/disco
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              mode=None,
              taskid=-1):
     from disco.job import JobPack
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile, 'rb'))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.mode = mode
     self.taskid = taskid
     self.outputs = {}
     self.uid = '{0}:{1}-{2}-{3}'.format(mode,
                                         taskid,
                                         hexhash(str((time.time())).encode()),
                                         os.getpid())
Example #7
0
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              stage=None,
              group=None,
              grouping=None,
              taskid=-1):
     from disco.job import JobPack
     from disco.ddfs import DDFS
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile, 'rb'))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.stage = stage
     self.group = '{0[0]}-{0[1]}'.format(group)
     self.group_label, self.group_host = group
     self.grouping = grouping
     self.taskid = taskid
     self.outputs = {}
     self.uid = '{0}:{1}-{2}-{3}-{4}'.format(
         self.stage, DDFS.safe_name(self.group), self.taskid,
         hexhash(str((time.time())).encode()), os.getpid())
Example #8
0
def pack(jobname, jobdict, worker, workdir):
    import cStringIO
    zmem = cStringIO.StringIO()
    z = zipfile.ZipFile(zmem, "w")
    # FIXME: Strip off leading pathname elements from zipped paths
    if not workdir:
        z.write(worker)
    else:
        def walker(arg, dirname, names):
            z.write(dirname)
            for n in names:
                z.write(os.path.join(dirname, n))
        os.path.walk(workdir, walker, None)
    z.close()

    def contents(*t):
        offset = JobPack.HEADER_SIZE
        for segment in t:
            yield offset, segment
            offset += len(segment)

    offsets, fields = zip(*contents(json.dumps(jobdict),
                                    json.dumps({}),
                                    zmem.getvalue(),
                                    ''))
    hdr = JobPack.header(offsets)
    return hdr + ''.join(fields)
Example #9
0
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              mode=None,
              taskid=-1):
     from disco.job import JobPack
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.mode = mode
     self.taskid = taskid
     self.outputs = {}
     self.uid = '%s:%s-%s-%x' % (mode,
                                 taskid,
                                 hexhash(str((time.time()))),
                                 os.getpid())
Example #10
0
def job(program, worker, *inputs):
    """Usage: worker [input ...]

    Create a jobpack and submit it to the master.
    Worker is automatically added to the jobhome.
    Input urls are specified as arguments or read from stdin.
    """
    from disco.fileutils import DiscoZipFile
    from disco.job import JobPack
    def jobzip(*paths):
        jobzip = DiscoZipFile()
        for path in paths:
            jobzip.writepath(path)
        jobzip.close()
        return jobzip
    def jobdata(data):
        if data.startswith('@'):
            return open(data[1:]).read()
        return data
    def prefix(p):
        return p or os.path.basename(worker).split(".")[0]
    jobdict = {'input': program.input(*inputs),
               'worker': worker,
               'map?': program.options.has_map,
               'reduce?': program.options.has_reduce,
               'nr_reduces': program.options.nr_reduces,
               'prefix': prefix(program.options.prefix),
               'scheduler': program.scheduler,
               'owner': program.options.owner or program.settings['DISCO_JOB_OWNER']}
    jobenvs = dict(program.options.env)
    jobzip  = jobzip(worker, *program.options.files)
    jobdata = jobdata(program.options.data)
    jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata)
    if program.options.verbose:
        print "jobdict:"
        print "\n".join("\t%s\t%s" % item for item in jobdict.items())
        print "jobenvs:"
        print "\n".join("\t%s\t%s" % item for item in jobenvs.items())
        print "jobzip:"
        print "\n".join("\t%s" % name for name in jobzip.namelist())
        print "jobdata:"
        print "\n".join("\t%s" % line for line in jobdata.splitlines())
    if program.options.dump_jobpack:
        print jobpack.dumps()
    else:
        print program.disco.submit(jobpack.dumps())
Example #11
0
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from disco.compat import BytesIO
     from disco.job import JobPack
     return JobPack.load(
         BytesIO(
             self.request('/disco/ctrl/parameters?name={0}'.format(jobname),
                          as_bytes=True)))
 def test_badlength(self):
     jobenvs, jobzip, jobdata = {}, b'0'*64, b'0'*64
     jobdict = {'prefix':'JobPackBadLength', 'scheduler':{}, 'input':["raw://data"],
                "map?":True, 'worker':"w", 'owner':"o", 'nr_reduces':"2"}
     jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata).dumps()
     jobpack = jobpack[:(len(jobpack)-len(jobdata)-1)]
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("invalid header") >= 0)
Example #13
0
    def test_badheader(self):
        offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2]
        jobpack = JobPack.header(offsets) + b'0' * 3
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size + 1, hdr_size, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size + 1, hdr_size + 1, hdr_size]
        jobpack = JobPack.header(offsets) + b'0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')
Example #14
0
    def test_badheader(self):
        offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2]
        jobpack = JobPack.header(offsets) + b'0'*3
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size+1, hdr_size, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size+1, hdr_size+1, hdr_size]
        jobpack = JobPack.header(offsets) + b'0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')
Example #15
0
    def test_badprefix(self):
        jobenvs, jobzip, jobdata = {}, b'', b''
        jobdict = {
            'prefix': 'a/b',
            'scheduler': {},
            'input': [],
            'worker': "w",
            'owner': "o",
            'nr_reduces': "2"
        }
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(
            self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)

        jobdict = {
            'prefix': 'a.b',
            'scheduler': {},
            'input': [],
            'worker': "w",
            'owner': "o",
            'nr_reduces': "2"
        }
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(
            self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)
Example #16
0
    def __init__(self, jobpack):
        self.jobpack = JobPack.load(StringIO(jobpack))
        self.host = "localhost"
        self.data_root = os.environ['DISCO_DATA']

        self.name = "{}@{}".format(self.prefix, leisure.disco.timestamp())

        self.home = os.path.join(self.host, leisure.disco.hex_hash(self.name),
                                 self.name)
        self.job_dir = extract_jobhome(os.path.join(self.data_root, self.home),
                                       self.jobpack.jobhome)

        self.save_jobfile(jobpack)
        self.ensure_worker_executable()
        self.results = []
        self.status = "active"
Example #17
0
  def __init__(self, jobpack):
    self.jobpack = JobPack.load(StringIO(jobpack))
    self.host = "localhost"
    self.data_root = os.environ['DISCO_DATA']

    self.name = "{}@{}".format(self.prefix, leisure.disco.timestamp())


    
    self.home = os.path.join(
      self.host, 
      leisure.disco.hex_hash(self.name), 
      self.name
    )
    self.job_dir = extract_jobhome(
      os.path.join(self.data_root, self.home),
      self.jobpack.jobhome
    )

    self.save_jobfile(jobpack)
    self.ensure_worker_executable()
    self.results = []
    self.status = "active"
Example #18
0
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              stage=None,
              group=None,
              grouping=None,
              taskid=-1):
     from disco.job import JobPack
     from disco.ddfs import DDFS
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile, 'rb'))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.stage = stage
     self.group = '{0[0]}-{0[1]}'.format(group)
     self.group_label, self.group_host = group
     self.grouping = grouping
     self.taskid = taskid
     self.outputs = {}
     self.uid = '{0}:{1}-{2}-{3}-{4}'.format(self.stage,
                                             DDFS.safe_name(self.group),
                                             self.taskid,
                                             hexhash(str((time.time())).encode()),
                                             os.getpid())
Example #19
0
 def test_badmagic(self):
     offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3]
     jobpack = JobPack.header(offsets, magic=0) + b'0'*4
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
Example #20
0
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from cStringIO import StringIO
     from disco.job import JobPack
     return JobPack.load(
         StringIO(self.request('/disco/ctrl/parameters?name=%s' % jobname)))
Example #21
0
 def test_badmagic(self):
     offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3]
     jobpack = JobPack.header(offsets, magic=0) + b'0' * 4
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
Example #22
0
File: core.py Project: hmas/disco
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from disco.compat import BytesIO
     from disco.job import JobPack
     return JobPack.load(BytesIO(self.request('/disco/ctrl/parameters?name={0}'.format(jobname),
                                              as_bytes=True)))
Example #23
0
File: core.py Project: darkua/disco
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from cStringIO import StringIO
     from disco.job import JobPack
     return JobPack.load(StringIO(self.request('/disco/ctrl/parameters?name=%s' % jobname)))