Exemple #1
0
    def submit(self, jobs):

        # derive RP task descriptions and submit them

        jobs = ru.as_list(jobs)

        for job in jobs:
            job.status.update({
                'meta_data': {},
                'exit_code': None,
                'final': False
            })

        tds = [self._job_2_descr(job) for job in jobs]
        tasks = self._tmgr.submit_tasks(tds)

        with self._lock:

            # TODO: bulk advance
            for task, job in zip(tasks, jobs):

                self._jobs[job.uid] = [job, task]
                job._set_jex(self._executor)

                job.status.update({
                    'message': 'rp task submitted',
                    'time': time.time(),
                    'native_id': task.uid
                })
                self._executor._advance(job, jpsi.status.SUBMITTED)
Exemple #2
0
    def _result_cb(self, msg):

        # self._log.debug('master _result_cb: %s', msg)

        # update result and error information for the corresponding request UID
        uid = msg['req']
        out = msg['out']
        err = msg['err']
        ret = msg['ret']

        req = self._requests[uid]
        req.set_result(out, err, ret)

        try:
            new_items = ru.as_list(self.result_cb([req]))
            for item in new_items:
                self.request(item)
        except:
            self._log.exception('result callback failed')
Exemple #3
0
    def _bulk_cbs(self, units,  metrics=None):

        if not metrics: metrics = [rpc.UNIT_STATE]
        else          : metrics = ru.as_list(metrics)

        cbs = dict()  # bulked callbacks to call

        with self._cb_lock:

            for metric in metrics:

                # get wildcard callbacks
                cb_dicts = self._callbacks[metric].get('*')
                for cb_name in cb_dicts:
                    cbs[cb_name] = {'cb'     : cb_dicts[cb_name]['cb'],
                                    'cb_data': cb_dicts[cb_name]['cb_data'],
                                    'units'  : set(units)}

                # add unit specific callbacks if needed
                for unit in units:

                    uid = unit.uid
                    if uid not in self._callbacks[metric]:
                        continue

                    cb_dicts = self._callbacks[metric].get(uid, {})
                    for cb_name in cb_dicts:

                        if cb_name in cbs:
                            cbs[cb_name]['units'].add(unit)
                        else:
                            cbs[cb_name] = {'cb'     : cb_dicts[cb_name]['cb'],
                                            'cb_data': cb_dicts[cb_name]['cb_data'],
                                            'units'  : set([unit])}

            for cb_name in cbs:

                cb      = cbs[cb_name]['cb']
                cb_data = cbs[cb_name]['cb_data']
                objs    = cbs[cb_name]['units']

                if cb_data: cb(list(objs), cb_data)
                else      : cb(list(objs))
Exemple #4
0
    def work(self, units):
        '''
        This is the main callback of the component, which is called for any
        incoming (set of) unit(s).  Units arriving here must always be in
        `AGENT_SCHEDULING_PENDING` state, and must always leave in either
        `AGENT_EXECUTING_PENDING` or in a FINAL state (`FAILED` or `CANCELED`).
        While handled by this component, the units will be in `AGENT_SCHEDULING`
        state.

        This methods takes care of initial state change to `AGENT_SCHEDULING`,
        and then puts them forward onto the queue towards the actual scheduling
        process (self._schedule_units).
        '''

        # unify handling of bulks / non-bulks
        units = ru.as_list(units)

        # advance state, publish state change, and push to scheduler process
        self.advance(units, rps.AGENT_SCHEDULING, publish=True, push=False)
        self._queue_sched.put(units)
Exemple #5
0
    def unregister_callback(self, cb=None, metrics=None, uid=None):

        if not metrics: metrics = [rpc.UMGR_METRICS]
        else          : metrics = ru.as_list(metrics)

        if not uid:
            uid = '*'

        elif uid not in self._units:
            raise ValueError('no such unit %s' % uid)

        for metric in metrics:
            if metric not in rpc.UMGR_METRICS :
                raise ValueError ("invalid umgr metric '%s'" % metric)

        with self._cb_lock:

            for metric in metrics:

                if metric not in rpc.UMGR_METRICS :
                    raise ValueError("cb metric '%s' unknown" % metric)

                if metric not in self._callbacks:
                    raise ValueError("cb metric '%s' invalid" % metric)

                if uid not in self._callbacks[metric]:
                    raise ValueError("cb target '%s' invalid" % uid)

                if cb:
                    to_delete = [cb.__name__]
                else:
                    to_delete = list(self._callbacks[metric][uid].keys())

                for cb_name in to_delete:

                    if cb_name not in self._callbacks[uid][metric]:
                        raise ValueError("cb %s not registered" % cb_name)

                    del(self._callbacks[uid][metric][cb_name])
def expand_ln(to_link, src_sbox, tgt_sbox, rid, cycle, task_id=None):

    expand = {'rid': rid, 'cycle': cycle}

    if not src_sbox: src_sbox = '.'
    if not tgt_sbox: tgt_sbox = '.'

    ret = list()
    for data in ru.as_list(to_link):
        src, tgt = data.split('>')
        try:
            src = src.strip() % expand
            tgt = tgt.strip() % expand
        except Exception as e:
            raise RuntimeError('expansion error: %s : %s : %s' %
                               (src, tgt, expand))
        # if task_id is None:
        #     ret.append('%s/%s > %s/%s_%s'
        #               % (src_sbox, src, tgt_sbox, tgt, task_id))
        # else:
        #
        ret.append('%s/%s > %s/%s' % (src_sbox, src, tgt_sbox, tgt))

    return ret
    def run(self):
        '''
        run the replica exchange pipelines, and after all is done, fetch the
        requested output data
        '''

        # run the preparator, set resulting data as `shared_data`, and begin to
        # work
        fnames = ru.as_list(self._pre_alg(self._workload))

        if self._workload.data.inputs not in fnames:
            fnames.append(self._workload.data.inputs)

        # write exchange algorithm to disk (once), and then stage with every
        # exchange task
        self._ex_alg_file = 'exchange_algorithm.py'
        with open('%s/%s' % (self._workload.data.inputs, self._ex_alg_file),
                  'w') as fout:
            fout.write(
                exchange_alg_prefix %
                (inspect.getsource(self._exc_alg), self._exc_alg.__name__))

        self.shared_data = fnames
        re.AppManager.run(self)
Exemple #8
0
    def work_cb(self):
        '''
        This is the main routine of the component, as it runs in the component
        process.  It will first initialize the component in the process context.
        Then it will attempt to get new things from all input queues
        (round-robin).  For each thing received, it will route that thing to the
        respective worker method.  Once the thing is worked upon, the next
        attempt on getting a thing is up.
        '''

        # if no action occurs in this iteration, idle
        if not self._inputs:
            time.sleep(0.1)
            return True

        for name in self._inputs:
            input = self._inputs[name]['queue']
            states = self._inputs[name]['states']

            # FIXME: a simple, 1-thing caching mechanism would likely
            #        remove the req/res overhead completely (for any
            #        non-trivial worker).
            things = input.get_nowait(500)  # in microseconds
            things = ru.as_list(things)

            if not things:
                return True

            # the worker target depends on the state of things, so we
            # need to sort the things into buckets by state before
            # pushing them
            buckets = dict()
            for thing in things:
                state = thing['state']
                uid = thing['uid']
                self._prof.prof('get', uid=uid, state=state)

                if state not in buckets:
                    buckets[state] = list()
                buckets[state].append(thing)

            # We now can push bulks of things to the workers

            for state, things in buckets.items():

                assert (state in states), 'inconsistent state'
                assert (state
                        in self._workers), 'no worker for state %s' % state

                try:
                    to_cancel = list()
                    for thing in things:
                        uid = thing['uid']
                        ttype = thing['type']
                        state = thing['state']

                        # FIXME: this can become expensive over time
                        #        if the cancel list is never cleaned
                        if uid in self._cancel_list:
                            with self._cancel_lock:
                                self._cancel_list.remove(uid)
                            to_cancel.append(thing)

                        self._log.debug('got %s (%s)', ttype, uid)

                    if to_cancel:
                        self.advance(to_cancel,
                                     rps.CANCELED,
                                     publish=True,
                                     push=False)
                    with self._cb_lock:
                        self._workers[state](things)

                except Exception:

                    # this is not fatal -- only the 'things' fail, not
                    # the component
                    self._log.exception("work %s failed", self._workers[state])
                    self.advance(things, rps.FAILED, publish=True, push=False)

        # keep work_cb registered
        return True
Exemple #9
0
    def advance(self,
                things,
                state=None,
                publish=True,
                push=False,
                ts=None,
                prof=True):
        '''
        Things which have been operated upon are pushed down into the queues
        again, only to be picked up by the next component, according to their
        state model.  This method will update the thing state, and push it into
        the output queue registered as target for that state.

        things:  list of things to advance
        state:   new state to set for the things
        publish: determine if state update notifications should be issued
        push:    determine if things should be pushed to outputs
        prof:    determine if state advance creates a profile event
                 (publish, and push are always profiled)

        'Things' are expected to be a dictionary, and to have 'state', 'uid' and
        optionally 'type' set.

        If 'thing' contains an '$all' key, the complete dict is published;
        otherwise, *only the state* is published.

        This is evaluated in self.publish.
        '''

        if not ts:
            ts = time.time()

        things = ru.as_list(things)

        if not things:
            return

        self._log.debug('advance bulk: %s [%s, %s]', len(things), push,
                        publish)

        # assign state, sort things by state
        buckets = dict()
        for thing in things:

            uid = thing['uid']

            # if thing['type'] not in ['unit', 'pilot']:
            #     raise TypeError("thing has unknown type (%s)" % uid)

            if state:
                # state advance done here
                thing['state'] = state

            _state = thing['state']

            if prof:
                self._prof.prof('advance', uid=uid, state=_state, ts=ts)

            if _state not in buckets:
                buckets[_state] = list()
            buckets[_state].append(thing)

        # should we publish state information on the state pubsub?
        if publish:

            to_publish = list()

            # If '$all' is set, we update the complete thing_dict.
            # Things in final state are also published in full.
            # If '$set' is set, we also publish all keys listed in there.
            # In all other cases, we only send 'uid', 'type' and 'state'.
            for thing in things:
                if '$all' in thing:
                    del (thing['$all'])
                    to_publish.append(thing)

                elif thing['state'] in rps.FINAL:
                    to_publish.append(thing)

                else:
                    tmp = {
                        'uid': thing['uid'],
                        'type': thing['type'],
                        'state': thing['state']
                    }
                    for key in thing.get('$set', []):
                        tmp[key] = thing[key]
                    to_publish.append(tmp)

            self.publish(rpc.STATE_PUBSUB, {
                'cmd': 'update',
                'arg': to_publish
            })

        # ts = time.time()
        # for thing in things:
        #     self._prof.prof('publish', uid=thing['uid'],
        #                     state=thing['state'], ts=ts)

        # never carry $all across component boundaries!
        for thing in things:
            if '$all' in thing:
                del (thing['$all'])

        # should we push things downstream, to the next component
        if push:

            # the push target depends on the state of things, so we need to sort
            # the things into buckets by state before pushing them
            # now we can push the buckets as bulks
            for _state, _things in buckets.items():

                # ts = time.time()
                if _state in rps.FINAL:
                    # things in final state are dropped
                    for thing in _things:
                        self._log.debug('final %s [%s]', thing['uid'], _state)
                        self._prof.prof('drop',
                                        uid=thing['uid'],
                                        state=_state,
                                        ts=ts)
                    continue

                if _state not in self._outputs:
                    # unknown target state -- error
                    for thing in _things:
                        import pprint
                        self._log.debug('%s', pprint.pformat(self._outputs))
                        self._log.debug("lost  %s [%s]", thing['uid'], _state)
                        self._prof.prof('lost',
                                        uid=thing['uid'],
                                        state=_state,
                                        ts=ts)
                    continue

                if not self._outputs[_state]:
                    # empty output -- drop thing
                    for thing in _things:
                        self._log.debug('drop  %s [%s]', thing['uid'], _state)
                        self._prof.prof('drop',
                                        uid=thing['uid'],
                                        state=_state,
                                        ts=ts)
                    continue

                output = self._outputs[_state]

                # push the thing down the drain
                self._log.debug('put bulk %s: %s', _state, len(_things))
                output.put(_things)

                ts = time.time()
                for thing in _things:
                    self._prof.prof('put',
                                    uid=thing['uid'],
                                    state=_state,
                                    msg=output.name,
                                    ts=ts)