コード例 #1
0
 def test_badinfo(self):
     jobenvs, jobzip, jobdata = {}, b'', b''
     jobdict = {}
     jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata)
     status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("missing key") >= 0)
コード例 #2
0
ファイル: test_jobpack.py プロジェクト: caox/disco
 def test_badinfo(self):
     jobenvs, jobzip, jobdata = {}, b'', b''
     jobdict = {}
     jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
     status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("missing key") >= 0)
コード例 #3
0
def job(program, worker, *inputs):
    """Usage: worker [input ...]

    Create a jobpack and submit it to the master.
    Worker is automatically added to the jobhome.
    Input urls are specified as arguments or read from stdin.
    """
    from disco.fileutils import DiscoZipFile
    from disco.job import JobPack

    def jobzip(*paths):
        jobzip = DiscoZipFile()
        for path in paths:
            jobzip.writepath(path)
        jobzip.close()
        return jobzip

    def jobdata(data):
        if data.startswith('@'):
            return open(data[1:]).read()
        return data

    def prefix(p):
        return p or os.path.basename(worker).split(".")[0]

    jobdict = {
        'input': program.input(*inputs),
        'worker': worker,
        'map?': program.options.has_map,
        'reduce?': program.options.has_reduce,
        'nr_reduces': program.options.nr_reduces,
        'prefix': prefix(program.options.prefix),
        'scheduler': program.scheduler,
        'owner': program.options.owner or program.settings['DISCO_JOB_OWNER']
    }
    jobenvs = dict(program.options.env)
    jobzip = jobzip(worker, *program.options.files)
    jobdata = jobdata(program.options.data)
    jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata)
    if program.options.verbose:
        print("jobdict:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item)
                        for item in jobdict.items()))
        print("jobenvs:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item)
                        for item in jobenvs.items()))
        print("jobzip:")
        print("\n".join("\t{0}".format(name) for name in jobzip.namelist()))
        print("jobdata:")
        print("\n".join("\t{0}".format(line) for line in jobdata.splitlines()))
    if program.options.dump_jobpack:
        print(jobpack.dumps())
    else:
        print(program.disco.submit(jobpack.dumps()))
コード例 #4
0
ファイル: discocli.py プロジェクト: nicolasramy/disco
def job(program, worker, *inputs):
    """Usage: worker [input ...]

    Create a jobpack and submit it to the master.
    Worker is automatically added to the jobhome.
    Input urls are specified as arguments or read from stdin.
    """
    from disco.fileutils import DiscoZipFile
    from disco.job import JobPack

    def jobzip(*paths):
        jobzip = DiscoZipFile()
        for path in paths:
            jobzip.writepath(path)
        jobzip.close()
        return jobzip

    def jobdata(data):
        if data.startswith("@"):
            return open(data[1:]).read()
        return data

    def prefix(p):
        return p or os.path.basename(worker).split(".")[0]

    jobdict = {
        "input": program.input(*inputs),
        "worker": worker,
        "map?": program.options.has_map,
        "reduce?": program.options.has_reduce,
        "nr_reduces": program.options.nr_reduces,
        "prefix": prefix(program.options.prefix),
        "scheduler": program.scheduler,
        "owner": program.options.owner or program.settings["DISCO_JOB_OWNER"],
    }
    jobenvs = dict(program.options.env)
    jobzip = jobzip(worker, *program.options.files)
    jobdata = jobdata(program.options.data)
    jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata)
    if program.options.verbose:
        print("jobdict:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobdict.items()))
        print("jobenvs:")
        print("\n".join("\t{0[0]}\t{0[1]}".format(item) for item in jobenvs.items()))
        print("jobzip:")
        print("\n".join("\t{0}".format(name) for name in jobzip.namelist()))
        print("jobdata:")
        print("\n".join("\t{0}".format(line) for line in jobdata.splitlines()))
    if program.options.dump_jobpack:
        print(jobpack.dumps())
    else:
        print(program.disco.submit(jobpack.dumps()))
コード例 #5
0
ファイル: test_jobpack.py プロジェクト: caox/disco
    def test_badprefix(self):
        jobenvs, jobzip, jobdata = {}, b'', b''
        jobdict = {'prefix':'a/b', 'scheduler':{}, 'input':[],
                   'worker':"w", 'owner':"o", 'nr_reduces':"2"}
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)

        jobdict = {'prefix':'a.b', 'scheduler':{}, 'input':[],
                   'worker':"w", 'owner':"o", 'nr_reduces':"2"}
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)
コード例 #6
0
ファイル: task.py プロジェクト: caox/disco
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              mode=None,
              taskid=-1):
     from disco.job import JobPack
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile, 'rb'))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.mode = mode
     self.taskid = taskid
     self.outputs = {}
     self.uid = '{0}:{1}-{2}-{3}'.format(mode,
                                         taskid,
                                         hexhash(str((time.time())).encode()),
                                         os.getpid())
コード例 #7
0
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              stage=None,
              group=None,
              grouping=None,
              taskid=-1):
     from disco.job import JobPack
     from disco.ddfs import DDFS
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile, 'rb'))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.stage = stage
     self.group = '{0[0]}-{0[1]}'.format(group)
     self.group_label, self.group_host = group
     self.grouping = grouping
     self.taskid = taskid
     self.outputs = {}
     self.uid = '{0}:{1}-{2}-{3}-{4}'.format(
         self.stage, DDFS.safe_name(self.group), self.taskid,
         hexhash(str((time.time())).encode()), os.getpid())
コード例 #8
0
ファイル: discojob.py プロジェクト: pombredanne/odisco
def pack(jobname, jobdict, worker, workdir):
    import cStringIO
    zmem = cStringIO.StringIO()
    z = zipfile.ZipFile(zmem, "w")
    # FIXME: Strip off leading pathname elements from zipped paths
    if not workdir:
        z.write(worker)
    else:
        def walker(arg, dirname, names):
            z.write(dirname)
            for n in names:
                z.write(os.path.join(dirname, n))
        os.path.walk(workdir, walker, None)
    z.close()

    def contents(*t):
        offset = JobPack.HEADER_SIZE
        for segment in t:
            yield offset, segment
            offset += len(segment)

    offsets, fields = zip(*contents(json.dumps(jobdict),
                                    json.dumps({}),
                                    zmem.getvalue(),
                                    ''))
    hdr = JobPack.header(offsets)
    return hdr + ''.join(fields)
コード例 #9
0
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              mode=None,
              taskid=-1):
     from disco.job import JobPack
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.mode = mode
     self.taskid = taskid
     self.outputs = {}
     self.uid = '%s:%s-%s-%x' % (mode,
                                 taskid,
                                 hexhash(str((time.time()))),
                                 os.getpid())
コード例 #10
0
ファイル: discocli.py プロジェクト: pranjal5215/disco
def job(program, worker, *inputs):
    """Usage: worker [input ...]

    Create a jobpack and submit it to the master.
    Worker is automatically added to the jobhome.
    Input urls are specified as arguments or read from stdin.
    """
    from disco.fileutils import DiscoZipFile
    from disco.job import JobPack
    def jobzip(*paths):
        jobzip = DiscoZipFile()
        for path in paths:
            jobzip.writepath(path)
        jobzip.close()
        return jobzip
    def jobdata(data):
        if data.startswith('@'):
            return open(data[1:]).read()
        return data
    def prefix(p):
        return p or os.path.basename(worker).split(".")[0]
    jobdict = {'input': program.input(*inputs),
               'worker': worker,
               'map?': program.options.has_map,
               'reduce?': program.options.has_reduce,
               'nr_reduces': program.options.nr_reduces,
               'prefix': prefix(program.options.prefix),
               'scheduler': program.scheduler,
               'owner': program.options.owner or program.settings['DISCO_JOB_OWNER']}
    jobenvs = dict(program.options.env)
    jobzip  = jobzip(worker, *program.options.files)
    jobdata = jobdata(program.options.data)
    jobpack = JobPack(jobdict, jobenvs, jobzip.dumps(), jobdata)
    if program.options.verbose:
        print "jobdict:"
        print "\n".join("\t%s\t%s" % item for item in jobdict.items())
        print "jobenvs:"
        print "\n".join("\t%s\t%s" % item for item in jobenvs.items())
        print "jobzip:"
        print "\n".join("\t%s" % name for name in jobzip.namelist())
        print "jobdata:"
        print "\n".join("\t%s" % line for line in jobdata.splitlines())
    if program.options.dump_jobpack:
        print jobpack.dumps()
    else:
        print program.disco.submit(jobpack.dumps())
コード例 #11
0
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from disco.compat import BytesIO
     from disco.job import JobPack
     return JobPack.load(
         BytesIO(
             self.request('/disco/ctrl/parameters?name={0}'.format(jobname),
                          as_bytes=True)))
コード例 #12
0
 def test_badlength(self):
     jobenvs, jobzip, jobdata = {}, b'0'*64, b'0'*64
     jobdict = {'prefix':'JobPackBadLength', 'scheduler':{}, 'input':["raw://data"],
                "map?":True, 'worker':"w", 'owner':"o", 'nr_reduces':"2"}
     jobpack = JobPack(JOBPACK_VERSION1, jobdict, jobenvs, jobzip, jobdata).dumps()
     jobpack = jobpack[:(len(jobpack)-len(jobdata)-1)]
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
     self.assertTrue(response.find("invalid header") >= 0)
コード例 #13
0
    def test_badheader(self):
        offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2]
        jobpack = JobPack.header(offsets) + b'0' * 3
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size + 1, hdr_size, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size + 1, hdr_size + 1, hdr_size]
        jobpack = JobPack.header(offsets) + b'0' * 2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')
コード例 #14
0
ファイル: test_jobpack.py プロジェクト: caox/disco
    def test_badheader(self):
        offsets = [hdr_size + 1, hdr_size + 2, hdr_size + 2, hdr_size + 2]
        jobpack = JobPack.header(offsets) + b'0'*3
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size, hdr_size + 1, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size+1, hdr_size, hdr_size + 1]
        jobpack = JobPack.header(offsets) + b'0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')

        offsets = [hdr_size, hdr_size+1, hdr_size+1, hdr_size]
        jobpack = JobPack.header(offsets) + b'0'*2
        status, response = loads(self.disco.request('/disco/job/new', jobpack))
        self.assertEquals(status, 'error')
コード例 #15
0
    def test_badprefix(self):
        jobenvs, jobzip, jobdata = {}, b'', b''
        jobdict = {
            'prefix': 'a/b',
            'scheduler': {},
            'input': [],
            'worker': "w",
            'owner': "o",
            'nr_reduces': "2"
        }
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(
            self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)

        jobdict = {
            'prefix': 'a.b',
            'scheduler': {},
            'input': [],
            'worker': "w",
            'owner': "o",
            'nr_reduces': "2"
        }
        jobpack = JobPack(jobdict, jobenvs, jobzip, jobdata)
        status, response = loads(
            self.disco.request('/disco/job/new', jobpack.dumps()))
        self.assertEquals(status, 'error')
        self.assertTrue(response.find("invalid prefix") >= 0)
コード例 #16
0
ファイル: job.py プロジェクト: pombredanne/leisure
    def __init__(self, jobpack):
        self.jobpack = JobPack.load(StringIO(jobpack))
        self.host = "localhost"
        self.data_root = os.environ['DISCO_DATA']

        self.name = "{}@{}".format(self.prefix, leisure.disco.timestamp())

        self.home = os.path.join(self.host, leisure.disco.hex_hash(self.name),
                                 self.name)
        self.job_dir = extract_jobhome(os.path.join(self.data_root, self.home),
                                       self.jobpack.jobhome)

        self.save_jobfile(jobpack)
        self.ensure_worker_executable()
        self.results = []
        self.status = "active"
コード例 #17
0
ファイル: job.py プロジェクト: eulersantana/leisure
  def __init__(self, jobpack):
    self.jobpack = JobPack.load(StringIO(jobpack))
    self.host = "localhost"
    self.data_root = os.environ['DISCO_DATA']

    self.name = "{}@{}".format(self.prefix, leisure.disco.timestamp())


    
    self.home = os.path.join(
      self.host, 
      leisure.disco.hex_hash(self.name), 
      self.name
    )
    self.job_dir = extract_jobhome(
      os.path.join(self.data_root, self.home),
      self.jobpack.jobhome
    )

    self.save_jobfile(jobpack)
    self.ensure_worker_executable()
    self.results = []
    self.status = "active"
コード例 #18
0
ファイル: task.py プロジェクト: AlexArgus/disco
 def __init__(self,
              host='',
              jobfile='',
              jobname='',
              master=None,
              disco_port=None,
              put_port=None,
              ddfs_data='',
              disco_data='',
              stage=None,
              group=None,
              grouping=None,
              taskid=-1):
     from disco.job import JobPack
     from disco.ddfs import DDFS
     self.host = host
     self.jobfile = jobfile
     self.jobname = jobname
     self.jobpack = JobPack.load(open(jobfile, 'rb'))
     self.jobobjs = dPickle.loads(self.jobpack.jobdata)
     self.master = master
     self.disco_port = disco_port
     self.put_port = put_port
     self.ddfs_data = ddfs_data
     self.disco_data = disco_data
     self.stage = stage
     self.group = '{0[0]}-{0[1]}'.format(group)
     self.group_label, self.group_host = group
     self.grouping = grouping
     self.taskid = taskid
     self.outputs = {}
     self.uid = '{0}:{1}-{2}-{3}-{4}'.format(self.stage,
                                             DDFS.safe_name(self.group),
                                             self.taskid,
                                             hexhash(str((time.time())).encode()),
                                             os.getpid())
コード例 #19
0
ファイル: test_jobpack.py プロジェクト: caox/disco
 def test_badmagic(self):
     offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3]
     jobpack = JobPack.header(offsets, magic=0) + b'0'*4
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
コード例 #20
0
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from cStringIO import StringIO
     from disco.job import JobPack
     return JobPack.load(
         StringIO(self.request('/disco/ctrl/parameters?name=%s' % jobname)))
コード例 #21
0
 def test_badmagic(self):
     offsets = [hdr_size, hdr_size + 1, hdr_size + 2, hdr_size + 3]
     jobpack = JobPack.header(offsets, magic=0) + b'0' * 4
     status, response = loads(self.disco.request('/disco/job/new', jobpack))
     self.assertEquals(status, 'error')
コード例 #22
0
ファイル: core.py プロジェクト: hmas/disco
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from disco.compat import BytesIO
     from disco.job import JobPack
     return JobPack.load(BytesIO(self.request('/disco/ctrl/parameters?name={0}'.format(jobname),
                                              as_bytes=True)))
コード例 #23
0
ファイル: core.py プロジェクト: darkua/disco
 def jobpack(self, jobname):
     """Return the :class:`disco.job.JobPack` submitted for the job."""
     from cStringIO import StringIO
     from disco.job import JobPack
     return JobPack.load(StringIO(self.request('/disco/ctrl/parameters?name=%s' % jobname)))