Example #1
0
 def prepare_to_run_strategy(self, strategy, stage_name=None):
     '''
         Validate strategy, build tasks for it and put the task_manager.
     '''
     plugin_manager.validate_strategy(strategy)
     tasks = self.build_tasks_from_strategy(strategy, stage_name=stage_name)
     self.task_manager = TaskManager()
     for task in tasks:
         self.task_manager.add_task(task)
     if stage_name is None:
         root_task = RootTask(self.trial_manager.marked_trials)
     else:
         plugins = [task.plugin for task in tasks]
         root_task = StageRootTask(self.trial_manager.marked_trials, plugins)
     self.obsoleted_tasks = self.task_manager.add_root_task(root_task)
     return self.obsoleted_tasks
Example #2
0
class ProcessManager(object):
    '''
        ProcessManager handles all the multi-processing and task
    creation and management.
    '''
    def __init__(self, trial_manager):
        self.trial_manager  = trial_manager
        self.task_manager = None

    def build_tasks_from_strategy(self, strategy, stage_name=None):
        '''Create a task for each stage of the strategy.'''
        tasks = []
        marked_trials = self.trial_manager.marked_trials

        if stage_name == 'auxiliary':
            for plugin_name, plugin_kwargs in strategy.auxiliary_stages.items():
                plugin = plugin_manager.find_plugin('auxiliary', 
                        plugin_name)
                if plugin.runs_with_stage == 'auxiliary':
                    tasks.extend(build_tasks(marked_trials, plugin, 
                            stage_name, plugin_kwargs))

        if stage_name is not None: 
            if stage_name != 'auxiliary':
                plugin_name = strategy.methods_used[stage_name]
                plugin = plugin_manager.find_plugin(stage_name,
                        plugin_name)
                plugin_kwargs = strategy.settings[stage_name]
                tasks.extend(build_tasks(marked_trials, plugin, 
                        stage_name, plugin_kwargs))
                    
                # get auxiliary stages that should run with this stage.
                for plugin_name, plugin_kwargs in \
                        strategy.auxiliary_stages.items():
                    plugin = plugin_manager.find_plugin('auxiliary', 
                            plugin_name)
                    if plugin.runs_with_stage == stage_name:
                        tasks.extend(build_tasks(marked_trials, plugin, 
                                'auxiliary', plugin_kwargs))
        else: # do for all stages
            for stage_name, plugin_name in strategy.methods_used.items():
                plugin = plugin_manager.find_plugin(stage_name, 
                        plugin_name)
                plugin_kwargs = strategy.settings[stage_name]
                tasks.extend(build_tasks(marked_trials, plugin, stage_name,
                        plugin_kwargs))

            for plugin_name, plugin_kwargs in strategy.auxiliary_stages.items():
                plugin = plugin_manager.find_plugin('auxiliary', 
                        plugin_name)
                tasks.extend(build_tasks(marked_trials, plugin, 
                        'auxiliary', plugin_kwargs))
        return tasks 

    def prepare_to_run_strategy(self, strategy, stage_name=None):
        '''
            Validate strategy, build tasks for it and put the task_manager.
        '''
        plugin_manager.validate_strategy(strategy)
        tasks = self.build_tasks_from_strategy(strategy, stage_name=stage_name)
        self.task_manager = TaskManager()
        for task in tasks:
            self.task_manager.add_task(task)
        if stage_name is None:
            root_task = RootTask(self.trial_manager.marked_trials)
        else:
            plugins = [task.plugin for task in tasks]
            root_task = StageRootTask(self.trial_manager.marked_trials, plugins)
        self.obsoleted_tasks = self.task_manager.add_root_task(root_task)
        return self.obsoleted_tasks

    def run_tasks(self, message_queue=multiprocessing.Queue()):
        '''
            Run all the tasks in self.task_manager
        (see self.prepare_to_run_strategy()).
        '''
        num_process_workers = config_manager.get_num_workers()
        num_tasks = self.task_manager.num_tasks
        if num_tasks == 0:
            raise NoTasksError('There are no tasks to run')
        if num_tasks < num_process_workers:
            num_process_workers = num_tasks

        input_queue = multiprocessing.Queue()
        results_queue = multiprocessing.Queue()

        # start the jobs
        jobs = []
        results_list = []
        for i in xrange(num_process_workers):
            job = multiprocessing.Process(target=task_worker, 
                                          args=(input_queue, 
                                                results_queue))
            job.start()
            jobs.append(job)

        task_index = {}
        for task in self.task_manager.tasks:
            task_index[task.task_id] = task
        message_queue.put(('TASKS', [str(t) for t in task_index.values()]))
            
        results_index = {}
        queued_tasks = 0
        base_time = time.time()
        while True:
            # queue up ready tasks
            ready_tasks = self.task_manager.get_ready_tasks()
            while ready_tasks:
                picked_task = random.choice(ready_tasks)
                task_info = self.task_manager.checkout_task(picked_task)
                message_queue.put(('RUNNING_TASK', str(picked_task)))
                message_queue.put(('DISPLAY_GRAPH', 
                        (self.task_manager.get_plot_dict(), 
                        time.time()-base_time)))
                input_queue.put(task_info)
                queued_tasks += 1

                ready_tasks = self.task_manager.get_ready_tasks()

            # wait for one result
            if queued_tasks > 0:
                result = results_queue.get()
                finished_task_id = result['task_id']
                finished_task = task_index[finished_task_id]
                results_index[finished_task_id] = result['result']
                if result['result'] is None:
                    message_queue.put(('TASK_ERROR', 
                            {'task':str(finished_task),
                             'traceback':result['traceback'],
                             'runtime':result['runtime']}))
                    self.task_manager.complete_task(finished_task)
                    self.task_manager.remove_all_tasks()
                else:
                    message_queue.put(('FINISHED_TASK', 
                            {'task':str(finished_task), 
                             'runtime':result['runtime']}))
                    self.task_manager.complete_task(finished_task, 
                            result['result'])
                message_queue.put(('DISPLAY_GRAPH', 
                        (self.task_manager.get_plot_dict(),
                        time.time()-base_time)))

            # are we done queueing up tasks? then add in the sentinals.
            if self.task_manager.num_tasks == 0:
                for i in xrange(num_process_workers):
                    input_queue.put(None)
            
                # are we done getting results? then exit.
                if len(results_index.keys()) == queued_tasks:
                    break

        for job in jobs:
            job.join() # halt this thread until processes are all complete.
        message_queue.put(('FINISHED_RUN', None))

        return task_index, results_index

    def open_file(self, fullpath, **kwargs):
        '''
            Open a single data file. Returns the list of trials created.
        '''
        return self.open_files([fullpath], **kwargs)[0]

    @supports_callbacks
    def open_files(self, fullpaths, **kwargs):
        '''
            Open a multiple data files. Returns a list of 
        'list of trials created'.
        '''
        file_interpreters = plugin_manager.file_interpreters
        if len(fullpaths) == 1:
            try:
                results = open_data_file(fullpaths[0], file_interpreters, 
                        **kwargs)
            except:
                results = []
                traceback.print_exc()
            return results

        num_process_workers = config_manager.get_num_workers()
        if len(fullpaths) < num_process_workers:
            num_process_workers = len(fullpaths)

        # setup the input and return queues.
        input_queue = multiprocessing.Queue()
        for fullpath in fullpaths:
            input_queue.put(fullpath)
        for i in xrange(num_process_workers):
            input_queue.put(None)
        results_queue = multiprocessing.Queue()

        # start the jobs
        jobs = []
        for i in xrange(num_process_workers):
            job = multiprocessing.Process(target=open_file_worker, 
                                          args=(input_queue, 
                                                results_queue))
            job.start()
            jobs.append(job)

        # collect the results, waiting for all the jobs to complete
        results_list = []
        for i in xrange(len(fullpaths)):
            # file_interpreters return list of trial objects.
            results_list.extend(results_queue.get())

        for job in jobs:
            job.join() # halt this thread until processes are all complete.

        return results_list