Exemple #1
0
def get_actions(request):
    import os, settings
    from mediadart.config import Configurator
    workspace = request.session.get('workspace')

    c = Configurator()
    actions_modules = c.get("MPROCESSOR", "plugins")
    src_dir = '/'.join(settings.ROOT_PATH.split('/')
                       [:-1])  #removing dam dir, it ends with src dir
    actions_dir = actions_modules.replace('.', '/')
    all_files = os.listdir(os.path.join(src_dir, actions_dir))

    resp = {'scripts': []}
    modules_to_load = []
    try:
        for filename in all_files:
            if filename.endswith('.py') and not filename.endswith('_idl.py'):
                modules_to_load.append(filename.split('.py')[0])

        top_module = __import__(actions_modules, fromlist=modules_to_load)
        logger.debug('modules_to_load %s' % modules_to_load)
        for module in modules_to_load:
            try:
                logger.debug(module)

                module_loaded = getattr(top_module, module, None)
                if module_loaded:
                    logger.debug(module_loaded)

                    if hasattr(module_loaded, 'inspect'):
                        tmp = module_loaded.inspect(workspace)
                        tmp.update({'name': module})
                        resp['scripts'].append(tmp)
                        media_type = request.POST.get(
                            'media_type'
                        )  # if no media_type all actions will be returned
            except Exception, ex:
                logger.error(ex)
                continue

    except Exception, ex:
        logger.exception(ex)
        raise ex
Exemple #2
0
def get_actions(request):  
    import os, settings
    from mediadart.config import Configurator
    workspace = request.session.get('workspace')
    
    c = Configurator()
    actions_modules =  c.get("MPROCESSOR", "plugins")
    src_dir = '/'.join(settings.ROOT_PATH.split('/')[:-1]) #removing dam dir, it ends with src dir
    actions_dir = actions_modules.replace('.', '/')
    all_files = os.listdir(os.path.join(src_dir, actions_dir))
    
    resp = {'scripts':[]}
    modules_to_load = []
    try:
        for filename in all_files:
            if filename.endswith('.py') and not filename.endswith('_idl.py'):
                modules_to_load.append(filename.split('.py')[0])
                
        top_module = __import__(actions_modules, fromlist=modules_to_load)
        logger.debug('modules_to_load %s'%modules_to_load)
        for module in modules_to_load:
            try:
                logger.debug(module)
                
                module_loaded = getattr(top_module, module, None)
                if module_loaded:
                    logger.debug(module_loaded)
                    
                    
                    if hasattr(module_loaded, 'inspect'):
                        tmp = module_loaded.inspect(workspace)
                        tmp.update({'name': module})
                        resp['scripts'].append(tmp)
                        media_type = request.POST.get('media_type') # if no media_type all actions will be returned
            except Exception, ex:
                logger.error(ex)
                continue
        
    except Exception, ex:
        logger.exception(ex)
        raise ex        
Exemple #3
0
class Batch:
    def __init__(self, process):
        self.cfg = Configurator()
        self.max_outstanding = self.cfg.getint('MPROCESSOR', 'max_outstanding')
        self.batch_size = self.cfg.getint('MPROCESSOR', 'batch_size') # how many items to load
        self.pipeline = loads(process.pipeline.params)
        self.dag = DAG(self.pipeline)
        self.schedule_length = len(self.pipeline)
        self.process = process
        self.scripts = self._get_scripts(self.pipeline)
        self.all_targets_read = False      # True when all targets have been read
        self.gameover = False              # True when all targets are done
        self.deferred = None               # used to signal end of batch job
        self.outstanding = 0               # number of not yet answered requests
        self.cur_batch = 0                 # index in batch
        self.cur_task = 0                  # index in tasks
        self.totals = {'update':0, 'passed':0, 'failed':0, 'targets': 0, None: 0} 
        self.results = {}

    def run(self):
        "Start the iteration initializing state so that the iteration starts correctly"
        log.debug('### Running process %s' % str(self.process.pk))
        self.deferred = defer.Deferred()
        self.process.start_date = datetime.datetime.now()
        self.process.save()
        self.process.targets = ProcessTarget.objects.filter(process=self.process).count()
        self.tasks = []
        reactor.callLater(0, self._iterate)
        return self.deferred

    def stop(self, seconds_offset=0):
        log.info('stopping process %s' % self.process.pk)
        when = datetime.datetime.now() + datetime.timedelta(seconds=seconds_offset)
        self.process.end_date = when
        self.process.save()
        self.gameover = True
        self.deferred.callback(None)

    def _update_item_stats(self, item, action, result, success, failure, cancelled):
        #log.debug('_update_item_stats: item=%s action=%s success=%s, failure=%s, cancelled=%s' % (item.target_id, action, success, failure, cancelled)) #d
        item.actions_passed += success
        item.actions_failed += failure
        item.actions_cancelled += cancelled
        item.actions_todo -= (success + failure + cancelled)
        if item.pk not in self.results:
            self.results[item.pk] = {}
        self.results[item.pk][action] = (success, result)
        if item.actions_todo <= 0 or failure > 0:
            item.result = dumps(self.results[item.pk])
        if item.actions_todo <= 0:
            #log.debug('_update_item_stats: finalizing item %s' % item.target_id) #d
            del self.results[item.pk]
        
    def _get_scripts(self, pipeline):
        """Load scripts from plugin directory. 
        
           Returns the dictionary
           {'script_name': (callable, params)}
           Throws an exception if not all scripts can be loaded.
        """
        plugins_module = self.cfg.get("MPROCESSOR", "plugins")
        scripts = {}
        for script_key, script_dict in pipeline.items():
            script_name = script_dict['script_name']
            full_name = plugins_module + '.' + script_name + '.run'
            p = full_name.split('.')
            log.info('<$> loading script: %s' % '.'.join(p[:-1]))
            m = __import__('.'.join(p[:-1]), fromlist = p[:-1])
            f = getattr(m, p[-1], None)
            if not f or not callable(f):
                raise BatchError('Plugin %s has no callable run method' % script_name)
            else:
                scripts[script_key] = (f, script_dict.get('params', {}))
        return scripts

    def _new_batch(self):
        "Loads from db the next batch of items and associate a schedule to each item"
        if self.all_targets_read:
            return []

        targetset = ProcessTarget.objects.filter(process=self.process.pk)[self.cur_batch:self.cur_batch + self.batch_size]
        if targetset:
            self.cur_batch += self.batch_size
            ret = [{'item':x, 'schedule':Schedule(self.dag, x.target_id)} for x in targetset]   # item, index of current action, schedule
        else:
            self.all_targets_read = True
            ret = []
        return ret

    def _get_action(self):
        """returns the first action found or None. Delete tasks with no actions left"""
        #log.debug("_get_action on num_tasks=%s" % len(self.tasks)) #d
        to_delete = []
        action = ''
        for n in xrange(len(self.tasks)):
            idx = (self.cur_task + n) % len(self.tasks)
            task = self.tasks[idx]
            action = task['schedule'].action_to_run()
            if action is None:
                to_delete.append(task)
            elif action:
                break

        #log.debug('to_delete %s' % to_delete) #d

        for t in to_delete:                   
            #log.debug('deleting done target %s' % t['item'].target_id) #d
            self.tasks.remove(t)

        # update cur_task so that we do not always start querying the same task for new actions
        if action:
            idx = self.tasks.index(task)
            self.cur_task = (idx + 1) % len(self.tasks)
        else:
            self.cur_task = 0

        # if action is None or empy there is no action ready to run
        # if there are new targets available try to read some and find some new action
        if action:
            return action, task
        else:
            if not self.all_targets_read and self.outstanding < self.max_outstanding:
                new_tasks = self._new_batch()
                if new_tasks:
                    self.cur_task = len(self.tasks)
                    self.tasks.extend(new_tasks)
            if self.all_targets_read and not self.tasks:
                log.debug("_get_action: gameover")
                self.stop()
            return  None, None


    def _iterate(self):
        """ Run the actions listed in schedule on the items returned by _new_batch """
        #log.debug('_iterate: oustanding=%s' % self.outstanding) #d
        if self.gameover:
            log.debug('_iterate: gameover')
            return
        action, task = self._get_action()
        if action:
            item, schedule = task['item'], task['schedule']
            method, params = self.scripts[action]
            try:
                item_params = loads(item.params)

                # tmp bug fixing starts here
                for k in params.keys():
                    if params[k] == '' and (k in item_params[action]):
                        params[k] = item_params[action][k]
                # tmp bug fixing ends here

                params.update(item_params.get('*', {}))
                x = re.compile('^[a-z_]+' ) # cut out digits from action name
                params.update(item_params.get(x.match(action).group(), {}))
                self.outstanding += 1
                #params = {u'source_variant_name': u'original'}
                d = method(self.process.workspace, item.target_id, **params)
            except Exception, e:
                log.error('ERROR in %s: %s %s' % (str(method), type(e), str(e)))
                self._handle_err(str(e), item, schedule, action, params)
            else:
                d.addCallbacks(self._handle_ok, self._handle_err, 
                    callbackArgs=[item, schedule, action, params], errbackArgs=[item, schedule, action, params])
        # If _get_action did not find anything and there are no more targets, no action
        # will be available until an action completes and allows more actions to go ready.
        if self.outstanding < self.max_outstanding and (action or not self.all_targets_read):
            #log.debug('_iterate: rescheduling') #d
            reactor.callLater(0, self._iterate)
Exemple #4
0
class Batch:
    def __init__(self, process):
        self.cfg = Configurator()
        self.max_outstanding = self.cfg.getint('MPROCESSOR', 'max_outstanding')
        self.batch_size = self.cfg.getint(
            'MPROCESSOR', 'batch_size')  # how many items to load
        self.pipeline = loads(process.pipeline.params)
        self.dag = DAG(self.pipeline)
        self.schedule_length = len(self.pipeline)
        self.process = process
        self.scripts = self._get_scripts(self.pipeline)
        self.all_targets_read = False  # True when all targets have been read
        self.gameover = False  # True when all targets are done
        self.deferred = None  # used to signal end of batch job
        self.outstanding = 0  # number of not yet answered requests
        self.cur_batch = 0  # index in batch
        self.cur_task = 0  # index in tasks
        self.totals = {
            'update': 0,
            'passed': 0,
            'failed': 0,
            'targets': 0,
            None: 0
        }
        self.results = {}

    def run(self):
        "Start the iteration initializing state so that the iteration starts correctly"
        log.debug('### Running process %s' % str(self.process.pk))
        self.deferred = defer.Deferred()
        self.process.start_date = datetime.datetime.now()
        self.process.save()
        self.process.targets = ProcessTarget.objects.filter(
            process=self.process).count()
        self.tasks = []
        reactor.callLater(0, self._iterate)
        return self.deferred

    def stop(self, seconds_offset=0):
        log.info('stopping process %s' % self.process.pk)
        when = datetime.datetime.now() + datetime.timedelta(
            seconds=seconds_offset)
        self.process.end_date = when
        self.process.save()
        self.gameover = True
        self.deferred.callback(None)

    def _update_item_stats(self, item, action, result, success, failure,
                           cancelled):
        #log.debug('_update_item_stats: item=%s action=%s success=%s, failure=%s, cancelled=%s' % (item.target_id, action, success, failure, cancelled)) #d
        item.actions_passed += success
        item.actions_failed += failure
        item.actions_cancelled += cancelled
        item.actions_todo -= (success + failure + cancelled)
        if item.pk not in self.results:
            self.results[item.pk] = {}
        self.results[item.pk][action] = (success, result)
        if item.actions_todo <= 0 or failure > 0:
            item.result = dumps(self.results[item.pk])
        if item.actions_todo <= 0:
            #log.debug('_update_item_stats: finalizing item %s' % item.target_id) #d
            del self.results[item.pk]

    def _get_scripts(self, pipeline):
        """Load scripts from plugin directory. 
        
           Returns the dictionary
           {'script_name': (callable, params)}
           Throws an exception if not all scripts can be loaded.
        """
        plugins_module = self.cfg.get("MPROCESSOR", "plugins")
        scripts = {}
        for script_key, script_dict in pipeline.items():
            script_name = script_dict['script_name']
            full_name = plugins_module + '.' + script_name + '.run'
            p = full_name.split('.')
            log.info('<$> loading script: %s' % '.'.join(p[:-1]))
            m = __import__('.'.join(p[:-1]), fromlist=p[:-1])
            f = getattr(m, p[-1], None)
            if not f or not callable(f):
                raise BatchError('Plugin %s has no callable run method' %
                                 script_name)
            else:
                scripts[script_key] = (f, script_dict.get('params', {}))
        return scripts

    def _new_batch(self):
        "Loads from db the next batch of items and associate a schedule to each item"
        if self.all_targets_read:
            return []

        targetset = ProcessTarget.objects.filter(
            process=self.process.pk)[self.cur_batch:self.cur_batch +
                                     self.batch_size]
        if targetset:
            self.cur_batch += self.batch_size
            ret = [{
                'item': x,
                'schedule': Schedule(self.dag, x.target_id)
            } for x in targetset]  # item, index of current action, schedule
        else:
            self.all_targets_read = True
            ret = []
        return ret

    def _get_action(self):
        """returns the first action found or None. Delete tasks with no actions left"""
        #log.debug("_get_action on num_tasks=%s" % len(self.tasks)) #d
        to_delete = []
        action = ''
        for n in xrange(len(self.tasks)):
            idx = (self.cur_task + n) % len(self.tasks)
            task = self.tasks[idx]
            action = task['schedule'].action_to_run()
            if action is None:
                to_delete.append(task)
            elif action:
                break

        #log.debug('to_delete %s' % to_delete) #d

        for t in to_delete:
            #log.debug('deleting done target %s' % t['item'].target_id) #d
            self.tasks.remove(t)

        # update cur_task so that we do not always start querying the same task for new actions
        if action:
            idx = self.tasks.index(task)
            self.cur_task = (idx + 1) % len(self.tasks)
        else:
            self.cur_task = 0

        # if action is None or empy there is no action ready to run
        # if there are new targets available try to read some and find some new action
        if action:
            return action, task
        else:
            if not self.all_targets_read and self.outstanding < self.max_outstanding:
                new_tasks = self._new_batch()
                if new_tasks:
                    self.cur_task = len(self.tasks)
                    self.tasks.extend(new_tasks)
            if self.all_targets_read and not self.tasks:
                log.debug("_get_action: gameover")
                self.stop()
            return None, None

    def _iterate(self):
        """ Run the actions listed in schedule on the items returned by _new_batch """
        #log.debug('_iterate: oustanding=%s' % self.outstanding) #d
        if self.gameover:
            log.debug('_iterate: gameover')
            return
        action, task = self._get_action()
        if action:
            item, schedule = task['item'], task['schedule']
            method, params = self.scripts[action]
            try:
                item_params = loads(item.params)

                # tmp bug fixing starts here
                for k in params.keys():
                    if params[k] == '' and (k in item_params[action]):
                        params[k] = item_params[action][k]
                # tmp bug fixing ends here

                params.update(item_params.get('*', {}))
                x = re.compile('^[a-z_]+')  # cut out digits from action name
                params.update(item_params.get(x.match(action).group(), {}))
                self.outstanding += 1
                #params = {u'source_variant_name': u'original'}
                d = method(self.process.workspace, item.target_id, **params)
            except Exception, e:
                log.error('ERROR in %s: %s %s' %
                          (str(method), type(e), str(e)))
                self._handle_err(str(e), item, schedule, action, params)
            else:
                d.addCallbacks(self._handle_ok,
                               self._handle_err,
                               callbackArgs=[item, schedule, action, params],
                               errbackArgs=[item, schedule, action, params])
        # If _get_action did not find anything and there are no more targets, no action
        # will be available until an action completes and allows more actions to go ready.
        if self.outstanding < self.max_outstanding and (
                action or not self.all_targets_read):
            #log.debug('_iterate: rescheduling') #d
            reactor.callLater(0, self._iterate)