コード例 #1
0
ファイル: processor.py プロジェクト: relic7/nd1404
class Batch:
    def __init__(self, process_pk):
        process = Process.objects.get(pk=process_pk)
        self.cfg = Configurator()
        self.max_outstanding = self.cfg.getint('MPROCESSOR', 'max_outstanding')
        self.batch_size = self.cfg.getint('MPROCESSOR', 'batch_size') # how many items to load
        self.pipeline = loads(process.pipeline.params)
        self.dag = DAG(self.pipeline)
        self.schedule_length = len(self.pipeline)
        self.process = process
        self.scripts = self._get_scripts(self.pipeline)
        self.all_targets_read = False      # True when all targets have been read
        self.gameover = False              # True when all targets are done
        self.outstanding = 0               # number of not yet answered requests
        self.cur_batch = 0                 # index in batch
        self.cur_task = 0                  # index in tasks
        self.totals = {'update':0, 'passed':0, 'failed':0, 'targets': 0, None: 0} 
        self.results = {}

    def run(self):
        "Start the iteration initializing state so that the iteration starts correctly"
        log.debug('### Running batch for process %s' % (str(self.process.pk),))
        self.process.targets = ProcessTarget.objects.filter(process=self.process).count()
        self.tasks = []
        self._iterate()

    def stop(self, seconds_offset=0):
        log.info('stopping process %s' % self.process.pk)
        with transaction.commit_on_success():
            when = datetime.datetime.now() + datetime.timedelta(seconds=seconds_offset)
            self.process.end_date = when
            self.process.save()
        self.gameover = True

    def _update_item_stats(self, item, action, result, success, failure, cancelled):
        #log.debug('_update_item_stats: item=%s action=%s success=%s, failure=%s, cancelled=%s' % (item.target_id, action, success, failure, cancelled)) #d
        item.actions_passed += success
        item.actions_failed += failure
        item.actions_cancelled += cancelled
        item.actions_todo -= (success + failure + cancelled)
        if item.pk not in self.results:
            self.results[item.pk] = {}
        self.results[item.pk][action] = (success, result)
        if item.actions_todo <= 0 or failure > 0:
            item.result = dumps(self.results[item.pk])
        if item.actions_todo <= 0:
            #log.debug('_update_item_stats: finalizing item %s' % item.target_id) #d
            del self.results[item.pk]
        
    def _get_scripts(self, pipeline):
        """Load scripts from plugin directory. 
        
           Returns the dictionary
           {'script_name': (callable, params)}
           Throws an exception if not all scripts can be loaded.
        """
        plugins_module = self.cfg.get("MPROCESSOR", "plugins")
        scripts = {}
        for script_key, script_dict in pipeline.items():
            script_name = script_dict['script_name']
            full_name = plugins_module + '.' + script_name + '.run'
            p = full_name.split('.')
            log.info('<$> loading script: %s' % '.'.join(p[:-1]))
            m = __import__('.'.join(p[:-1]), fromlist = p[:-1])
            f = getattr(m, p[-1], None)
            if not f or not callable(f):
                raise BatchError('Plugin %s has no callable run method' % script_name)
            else:
                scripts[script_key] = (f, script_dict.get('params', {}))
        return scripts

    def _new_batch(self):
        "Loads from db the next batch of items and associate a schedule to each item"
        if self.all_targets_read:
            return []

        targetset = ProcessTarget.objects.filter(process=self.process.pk)[self.cur_batch:self.cur_batch + self.batch_size]
        if targetset:
            self.cur_batch += self.batch_size
            ret = [{'item':x, 'schedule':Schedule(self.dag, x.target_id)} for x in targetset]   # item, index of current action, schedule
        else:
            self.all_targets_read = True
            ret = []
        return ret

    def _get_action(self):
        """returns the first action found or None. Delete tasks with no actions left"""
        #log.debug("_get_action on num_tasks=%s" % len(self.tasks)) #d
        to_delete = []
        action = ''
        for n in xrange(len(self.tasks)):
            idx = (self.cur_task + n) % len(self.tasks)
            task = self.tasks[idx]
            action = task['schedule'].action_to_run()
            if action is None:
                to_delete.append(task)
            elif action:
                break

        #log.debug('to_delete %s' % to_delete) #d

        for t in to_delete:                   
            #log.debug('deleting done target %s' % t['item'].target_id) #d
            self.tasks.remove(t)

        # update cur_task so that we do not always start querying the same task for new actions
        if action:
            idx = self.tasks.index(task)
            self.cur_task = (idx + 1) % len(self.tasks)
        else:
            self.cur_task = 0

        # if action is None or empy there is no action ready to run
        # if there are new targets available try to read some and find some new action
        if action:
            return action, task
        else:
            if not self.all_targets_read and self.outstanding < self.max_outstanding:
                new_tasks = self._new_batch()
                if new_tasks:
                    self.cur_task = len(self.tasks)
                    self.tasks.extend(new_tasks)
            if self.all_targets_read and not self.tasks:
                log.debug("_get_action: gameover")
                self.stop()
            return  None, None


    def _iterate(self):
        """ Run the actions listed in schedule on the items returned by _new_batch """
        #log.debug('_iterate: oustanding=%s' % self.outstanding) #d
        while True:
            if self.gameover:
                log.debug('_iterate: gameover')
                return
            action, task = self._get_action()
            if action:
                log.debug('processing action: "%s"' % (action, ))
                item, schedule = task['item'], task['schedule']
                method, params = self.scripts[action]
                try:
                    item_params = loads(item.params)
    
                    # tmp bug fixing starts here
                    for k in params.keys():
                        if params[k] == '' and (k in item_params[action]):
                            params[k] = item_params[action][k]
                    # tmp bug fixing ends here
    
                    params.update(item_params.get('*', {}))
                    x = re.compile('^[a-z_]+' ) # cut out digits from action name
                    params.update(item_params.get(x.match(action).group(), {}))
                    self.outstanding += 1
                    #params = {u'source_variant_name': u'original'}
                    res = method(self.process.workspace, item.target_id, **params)
                    self._handle_ok(res, item, schedule, action, params)
                except Exception, e:
                    log.error('ERROR in %s: %s %s' % (str(method), type(e), str(e)))
                    self._handle_err(str(e), item, schedule, action, params)
            # If _get_action did not find anything and there are no more targets, no action
            # will be available until an action completes and allows more actions to go ready.
            if not (self.outstanding < self.max_outstanding and (action or not self.all_targets_read)):
                break