Beispiel #1
0
 def jobzip(self, job, **jobargs):
     from disco.util import iskv
     from disco.worker.classic.modutil import find_modules
     jobzip = super(Worker, self).jobzip(job, **jobargs)
     def get(key):
         return self.getitem(key, job, jobargs)
     if isinstance(get('required_files'), dict):
         for path, bytes in get('required_files').iteritems():
                 jobzip.writestr(path, bytes)
     else:
         for path in get('required_files'):
             jobzip.write(path, os.path.join('lib', os.path.basename(path)))
     if get('required_modules') is None:
         self['required_modules'] = find_modules([obj
                                                  for key in self
                                                  for obj in util.iterify(get(key))
                                                  if callable(obj)],
                                                 exclude=['Task'])
     for mod in get('required_modules'):
         if iskv(mod):
             jobzip.writepath(mod[1])
     for func in ('map', 'reduce'):
         if isinstance(get(func), dict):
             for path, bytes in get(func).iteritems():
                 jobzip.writestr(os.path.join('ext.%s' % func, path), bytes)
     return jobzip
Beispiel #2
0
    def run(self, task, job, **jobargs):
        # Entry point into the executing pipeline worker task.  This
        # initializes the task environment, sets up the current stage,
        # and then executes it.
        worker.active_task = task
        for key in self:
            self[key] = self.getitem(key, job, jobargs)
        sys_version = '{0[0]}.{0[1]}'.format(sys.version_info[:2])
        assert self['version'] == sys_version, "Python version mismatch"

        # Set up the task environment.
        globals_ = globals().copy()
        for module in self['required_modules']:
            name = module[0] if util.iskv(module) else module
            globals_[name.split('.')[-1]] = __import__(name, fromlist=[name])
        for obj in util.flatten(self.values()):
            util.globalize(obj, globals_)

        # Set up the stage.
        params = self.getitem('params', job, jobargs, worker.Params())
        pipeline = dict([(s.name, (idx, s))
                         for idx, (g, s) in enumerate(self['pipeline'])])
        pipe_idx, stage = pipeline[task.stage]
        stage.taskinfo = TaskInfo(jobname=task.jobname,
                                  host=task.host,
                                  stage=task.stage,
                                  group=task.group,
                                  label=task.group_label)
        if not stage.input_chain:
            stage.input_chain = Stage.default_input_chain(pipe_idx)
        if not stage.output_chain:
            stage.output_chain = Stage.default_output_chain
        # And now run it.
        self.run_stage(task, stage, params)
Beispiel #3
0
    def jobzip(self, job, **jobargs):
        """
        A hook provided by the :class:`Worker` for creating the
        :term:`job home` zip.  The base implementation creates a
        minimal zip file containing the Disco standard library, and
        any user-specified required files and modules.

        :return: a :class:`disco.fileutils.DiscoZipFile`.
        """
        # First, add the disco standard library.
        from clx import __file__ as clxpath
        from disco import __file__ as discopath
        from disco.fileutils import DiscoZipFile
        jobzip = DiscoZipFile()
        jobzip.writepath(os.path.dirname(clxpath), exclude=('.pyc', '__pycache__'))
        jobzip.writepath(os.path.dirname(discopath), exclude=('.pyc', '__pycache__'))
        jobzip.writesource(job)
        jobzip.writesource(self)
        # Then, add any user-specified required files.
        from disco.util import iskv
        def get(key):
            return self.getitem(key, job, jobargs)
        if isinstance(get('required_files'), dict):
            for path, bytes in get('required_files').items():
                jobzip.writestr(path, bytes)
        else:
            for path in get('required_files'):
                jobzip.write(path, os.path.join('lib', os.path.basename(path)))
        if get('required_modules') is None:
            self['required_modules'] = self.get_modules(job, **jobargs)
        for mod in get('required_modules'):
            if iskv(mod):
                jobzip.writepath(mod[1])
        # Done with basic minimal zip.
        return jobzip
Beispiel #4
0
    def pack(self):
        """Pack up the :class:`JobDict` for sending over the wire."""
        jobpack = {}

        if self['required_files']:
            if not isinstance(self['required_files'], dict):
                self['required_files'] = util.pack_files(self['required_files'])
        else:
            self['required_files'] = {}

        self['required_files'].update(util.pack_files(
            o[1] for o in self['required_modules'] if util.iskv(o)))

        for key in self.defaults:
            if key == 'input':
                jobpack['input'] = ' '.join(
                    '\n'.join(reversed(list(util.iterify(url))))
                        for url in self['input'])
            elif key in ('nr_reduces', 'prefix'):
                jobpack[key] = str(self[key])
            elif key == 'scheduler':
                scheduler = self['scheduler']
                for key in scheduler:
                    jobpack['sched_%s' % key] = str(scheduler[key])
            elif self[key] is None:
                pass
            elif key in self.stacks:
                jobpack[key] = util.pack_stack(self[key])
            else:
                jobpack[key] = util.pack(self[key])
        return encode_netstring_fd(jobpack)
Beispiel #5
0
    def run(self, task, job, **jobargs):
        # Entry point into the executing pipeline worker task.  This
        # initializes the task environment, sets up the current stage,
        # and then executes it.
        for key in self:
            self[key] = self.getitem(key, job, jobargs)
        sys_version = '{0[0]}.{0[1]}'.format(sys.version_info[:2])
        assert self['version'] == sys_version, "Python version mismatch"

        # Set up the task environment.
        globals_ = globals().copy()
        for module in self['required_modules']:
            name = module[0] if util.iskv(module) else module
            globals_[name.split('.')[-1]] = __import__(name, fromlist=[name])
        for obj in util.flatten(self.values()):
            util.globalize(obj, globals_)

        # Set up the stage.
        params = self.getitem('params', job, jobargs, worker.Params())
        pipeline = dict([(s.name, (idx, s))
                         for idx, (g, s) in enumerate(self['pipeline'])])
        pipe_idx, stage = pipeline[task.stage]
        stage.taskinfo = TaskInfo(jobname=task.jobname, host=task.host,
                                  stage=task.stage, group=task.group,
                                  label=task.group_label)
        if not stage.input_chain:
            stage.input_chain = Stage.default_input_chain(pipe_idx)
        if not stage.output_chain:
            stage.output_chain = Stage.default_output_chain
        # And now run it.
        self.run_stage(task, stage, params)
    def jobzip(self, job, **jobargs):
        """
        A hook provided by the :class:`Worker` for creating the
        :term:`job home` zip.  The base implementation creates a
        minimal zip file containing the Disco standard library, and
        any user-specified required files and modules.

        :return: a :class:`disco.fileutils.DiscoZipFile`.
        """
        # First, add the disco standard library.
        from clx import __file__ as clxpath
        from disco import __file__ as discopath
        from disco.fileutils import DiscoZipFile
        jobzip = DiscoZipFile()
        jobzip.writepath(os.path.dirname(clxpath), exclude=('.pyc', '__pycache__'))
        jobzip.writepath(os.path.dirname(discopath), exclude=('.pyc', '__pycache__'))
        jobzip.writesource(job)
        jobzip.writesource(self)
        # Then, add any user-specified required files.
        from disco.util import iskv
        def get(key):
            return self.getitem(key, job, jobargs)
        if isinstance(get('required_files'), dict):
            for path, bytes in get('required_files').items():
                jobzip.writestr(path, bytes)
        else:
            for path in get('required_files'):
                jobzip.write(path, os.path.join('lib', os.path.basename(path)))
        if get('required_modules') is None:
            self['required_modules'] = self.get_modules(job, **jobargs)
        for mod in get('required_modules'):
            if iskv(mod):
                jobzip.writepath(mod[1])
        # Done with basic minimal zip.
        return jobzip
Beispiel #7
0
    def jobzip(self, job, **jobargs):
        from disco.util import iskv
        from disco.worker.classic.modutil import find_modules
        jobzip = super(Worker, self).jobzip(job, **jobargs)

        def get(key):
            return self.getitem(key, job, jobargs)

        if isinstance(get('required_files'), dict):
            for path, bytes in get('required_files').items():
                jobzip.writestr(path, bytes)
        else:
            for path in get('required_files'):
                jobzip.write(path, os.path.join('lib', os.path.basename(path)))
        if get('required_modules') is None:
            self['required_modules'] = find_modules([
                obj for key in self
                for obj in util.iterify(get(key)) if callable(obj)
            ],
                                                    exclude=['Task'])
        for mod in get('required_modules'):
            if iskv(mod):
                jobzip.writepath(mod[1])
        for func in ('map', 'reduce'):
            if isinstance(get(func), dict):
                for path, bytes in get(func).items():
                    jobzip.writestr(os.path.join('ext.{0}'.format(func), path),
                                    bytes)
        return jobzip
Beispiel #8
0
 def insert_globals(self, functions):
     for fn in functions:
         if isinstance(fn, functools.partial):
             fn=fn.func
         if isinstance(fn, FunctionType):
             fn.func_globals.setdefault('Task', self)
             for module in self.required_modules:
                 mod_name = module[0] if util.iskv(module) else module
                 mod = __import__(mod_name, fromlist=[mod_name])
                 fn.func_globals.setdefault(mod_name.split('.')[-1], mod)
Beispiel #9
0
 def insert_globals(self, functions):
     write_files(self.required_files, self.lib)
     sys.path.insert(0, self.lib)
     for fn in functions:
         if isinstance(fn, partial):
             fn = fn.func
         if isinstance(fn, FunctionType):
             fn.func_globals.setdefault('Task', self)
             for module in self.required_modules:
                 mod_name = module[0] if util.iskv(module) else module
                 mod = __import__(mod_name, fromlist=[mod_name])
                 fn.func_globals.setdefault(mod_name.split('.')[-1], mod)
Beispiel #10
0
    def run(self, task, job, **jobargs):
        global Task
        Task = task
        for key in self:
            self[key] = self.getitem(key, job, jobargs)
        assert self['version'] == '%s.%s' % sys.version_info[:2], "Python version mismatch"

        params = self['params']
        if isinstance(self[task.mode], dict):
            params = self['ext_params']
            self[task.mode] = external.prepare(params, task.mode)

        globals_ = globals().copy()
        for module in self['required_modules']:
            name = module[0] if util.iskv(module) else module
            globals_[name.split('.')[-1]] = __import__(name, fromlist=[name])
        for obj in util.flatten(self.values()):
            util.globalize(obj, globals_)

        getattr(self, task.mode)(task, params)
        external.close()
Beispiel #11
0
    def run(self, task, job, **jobargs):
        global Task
        Task = task
        for key in self:
            self[key] = self.getitem(key, job, jobargs)
        assert self['version'] == '{0[0]}.{0[1]}'.format(sys.version_info[:2]), "Python version mismatch"

        params = self['params']
        if isinstance(self[task.stage], dict):
            params = self['ext_params']
            self[task.stage] = external.prepare(params, task.stage)

        globals_ = globals().copy()
        for module in self['required_modules']:
            name = module[0] if util.iskv(module) else module
            globals_[name.split('.')[-1]] = __import__(name, fromlist=[name])
        for obj in util.flatten(self.values()):
            util.globalize(obj, globals_)

        getattr(self, task.stage)(task, params)
        external.close()
Beispiel #12
0
    def pack(self):
        """Pack up the :class:`JobDict` for sending over the wire."""
        jobpack = {}

        if self['required_files']:
            if not isinstance(self['required_files'], dict):
                self['required_files'] = util.pack_files(
                    self['required_files'])
        else:
            self['required_files'] = {}

        self['required_files'].update(
            util.pack_files(o[1] for o in self['required_modules']
                            if util.iskv(o)))

        for key in self.defaults:
            if key in ('map', 'reduce'):
                if self[key] is None:
                    continue
            if key == 'input':
                jobpack['input'] = ' '.join(
                    '\n'.join(reversed(list(util.iterify(url))))
                    for url in self['input'])
            elif key == 'username':
                jobpack['username'] = str(self['username'])
            elif key in ('nr_reduces', 'prefix'):
                jobpack[key] = str(self[key])
            elif key == 'scheduler':
                scheduler = self['scheduler']
                for key in scheduler:
                    jobpack['sched_%s' % key] = str(scheduler[key])
            elif key in self.stacks:
                jobpack[key] = util.pack_stack(self[key])
            else:
                jobpack[key] = util.pack(self[key])
        return encode_netstring_fd(jobpack)
Beispiel #13
0
def kvify(entry):
    yield entry if iskv(entry) else ('', entry)
Beispiel #14
0
def evaluate(expression, entry):
    if iskv(entry):
        k, v = entry
    yield eval(expression)
Beispiel #15
0
def where(predicate, entry):
    if iskv(entry):
        k, v = entry
    if eval(predicate):
        yield entry