Example #1
0
 def callback(self, case, policy, name, result ):
     '''
     Method responsible for storing results. The implementation in this
     class only keeps track of how many runs have been completed and 
     logging this. 
     
     :param case: the case to be stored
     :param policy: the name of the policy being used
     :param name: the name of the model being used
     :param result: the result dict
     
     '''
     
     self.i+=1
     debug(str(self.i)+" cases completed")
     
     if self.i % 100 == 0:
         info(str(self.i)+" cases completed")
Example #2
0
def save_results(results, file):
    '''
    save the results to the specified cPickle file. To facilitate transfer
    across different machines. the files are saved in binary format
        
    see also: http://projects.scipy.org/numpy/ticket/1284

    :param results: the return of run_experiments
    :param file: the path of the file
    :raises: IOError if file not found

    '''

    debug("saving results to: " + os.path.abspath(file))
    try:
        cPickle.dump(results, open(file, 'wb'), protocol=2)
    except IOError:
        warning(os.path.abspath(file) + " not found")
        raise
Example #3
0
def merge_results(results1, results2, downsample=None):
    '''
    convenience function for merging the return from 
    :meth:`~model.SimpleModelEnsemble.perform_experiments`.
    
    The function merges results2 with results1. For the experiments,
    it generates an empty array equal to the size of the sum of the 
    experiments. As dtype is uses the dtype from the experiments in results1.
    The function assumes that the ordering of dtypes and names is identical in
    both results.  
    
    A typical use case for this function is in combination with 
    :func:`~util.experiments_to_cases`. Using :func:`~util.experiments_to_cases`
    one extracts the cases from a first set of experiments. One then
    performs these cases on a different model or policy, and then one wants to
    merge these new results with the old result for further analysis.  
    
    :param results1: first results to be merged
    :param results2: second results to be merged
    :param downsample: should be an integer, will be used in slicing the results
                       in order to avoid memory problems. 
    :return: the merged results
    
    
    '''

    #start of merging
    old_exp, old_res = results1
    new_exp, new_res = results2
    
    #merge experiments
    dtypes = old_exp.dtype
    
    merged_exp = np.empty((old_exp.shape[0]+new_exp.shape[0],),dtype= dtypes)
    merged_exp[0:old_exp.shape[0]] = old_exp
    merged_exp[old_exp.shape[0]::] = new_exp
    
    #only merge the results that are in both
    keys = old_res.keys()
    [keys.append(key) for key in new_res.keys()]
    keys = set(keys)
    info("intersection of keys: %s" % keys)
    
    #merging results
    merged_res = {}
    for key in keys:
        info("merge "+key)
        
        old_value = old_res.get(key)
        new_value = new_res.get(key)
        
        i = old_value.shape[0]+new_value.shape[0]
        j = old_value.shape[1]
        slice = 1
        if downsample:
            j = int(math.ceil(j/downsample))
            slice = downsample
            
        merged_value = np.empty((i,j))
        debug("merged shape: %s" % merged_value.shape)
        
        merged_value[0:old_value.shape[0], :] = old_value[:, ::slice]
        merged_value[old_value.shape[0]::, :] = new_value[:, ::slice]

        merged_res[key] = merged_value
    
    mr = (merged_exp, merged_res)
    return mr  
Example #4
0
 def run(self):
     self._setupLogger()
     p = multiprocessing.current_process()
     debug('process %s with pid %s started' % (p.name, p.pid))
     #call the run of the super, which in turn will call the worker function
     super(LoggingProcess, self).run()
Example #5
0
def worker(inqueue, 
           outqueue, 
           modelInterfaces, 
           modelInitKwargs=None):
    #
    # Code run by worker processes
    #    
        
    debug("worker started")
    
    put = outqueue.put
    get = inqueue.get
    if hasattr(inqueue, '_writer'):
        inqueue._writer.close()
        outqueue._reader.close()
    
    def cleanup(modelInterfaces):
        for msi in modelInterfaces:
            msi.cleanup()
            del msi
    

    oldPolicy = {}
    modelInitialized = False
    while 1:
        try:
            task = get()
        except (EOFError, IOError):
            debug('worker got EOFError or IOError -- exiting')
            break
        if task is None:
            debug('worker got sentinel -- exiting')
            cleanup(modelInterfaces)
            break

        job, i, case, policy = task
        for modelInterface in modelInterfaces:
            if policy != oldPolicy:
                modelInitialized = False
                try:
                    debug("invoking model init")
                    modelInterface.model_init(policy, modelInitKwargs)
                    debug("model initialized successfully")
                    modelInitialized = True
                except EMAError as e:
                    exception("init not implemented")
                    raise
                except Exception:
                    exception("some exception occurred when invoking the init")
            if modelInitialized:
                try:
                    try:
                        debug("trying to run model")
                        modelInterface.run_model(copy.deepcopy(case))
                    except CaseError as e:
                        EMAlogging.warning(e)
                    debug("trying to retrieve output")
                    result = modelInterface.retrieve_output()
                    
                    debug("trying to reset model")
                    modelInterface.reset_model()
                    result = (True, (case, policy, modelInterface.name, result))
                except Exception as e:
                    result = (False, e)
            else:
                result = (False, EMAParallelError("failure to initialize"))
            put((job, i, result))
            oldPolicy = policy
Example #6
0
    def _terminate_pool(cls, 
                        taskqueue, 
                        inqueue, 
                        outqueue, 
                        pool,
                        task_handler, 
                        result_handler, 
                        cache, 
                        workingDirectories):

        EMAlogging.info("terminating pool")
        
        # this is guaranteed to only be called once
        debug('finalizing pool')
        TERMINATE = 2

        task_handler._state = TERMINATE
        for p in pool:
            taskqueue.put(None)                 # sentinel
            time.sleep(1)

        debug('helping task handler/workers to finish')
        cls._help_stuff_finish(inqueue, task_handler, len(pool))

        assert result_handler.is_alive() or len(cache) == 0

        result_handler._state = TERMINATE
        outqueue.put(None)                  # sentinel

        if pool and hasattr(pool[0], 'terminate'):
            debug('terminating workers')
            for p in pool:
                p.terminate()

        debug('joining task handler')
        task_handler.join(1e100)

        debug('joining result handler')
        result_handler.join(1e100)

        if pool and hasattr(pool[0], 'terminate'):
            debug('joining pool workers')
            for p in pool:
                p.join()
        
        # cleaning up directories
        # TODO investigate whether the multiprocessing.util tempdirectory  
        # functionality can be used instead

        
        for directory in workingDirectories:
            directory = os.path.dirname(directory)
            EMAlogging.debug("deleting "+str(directory))
            shutil.rmtree(directory)
Example #7
0
    def _handle_tasks(taskqueue, put, outqueue, pool, logQueue):
        thread = threading.current_thread()

        for taskseq, set_length in iter(taskqueue.get, None):
            i = -1
            for i, task in enumerate(taskseq):
                if thread._state:
                    debug('task handler found thread._state != RUN')
                    break
                try:
                    put(task)
                except IOError:
                    debug('could not put task on queue')
                    break
            else:
                if set_length:
                    debug('doing set_length()')
                    set_length(i+1)
                continue
            break
        else:
            debug('task handler got sentinel')


        try:
            # tell result handler to finish when cache is empty
            debug('task handler sending sentinel to result handler')
            outqueue.put(None)

            # tell workers there is no more work
            debug('task handler sending sentinel to workers')
            for i in range(2*len(pool)):
                put(None)
        except IOError:
            debug('task handler got IOError when sending sentinels')

        debug('task handler exiting')
        time.sleep(2)
        
        logQueue.put(None)
Example #8
0
    def __init__(self, 
                 modelStructureInterfaces, 
                 processes=None, 
                 callback = None, 
                 kwargs=None):
        '''
        
        :param modelStructureInterface: modelInterface class
        :param processes: nr. of processes to spawn, if none, it is 
                                   set to equal the nr. of cores
        :param callback: callback function for handling the output 
        :param kwargs: kwargs to be pased to :meth:`model_init`
        '''
        
        self._setup_queues()
        self._taskqueue = Queue.Queue()
        self._cache = {}
        self._state = RUN

        self._callback = callback

        if processes is None:
            try:
                processes = cpu_count()
            except NotImplementedError:
                processes = 1
        info("nr of processes is "+str(processes))

        self.Process = LoggingProcess
        self.logQueue = multiprocessing.Queue()
        h = EMAlogging.NullHandler()
        logging.getLogger(EMAlogging.LOGGER_NAME).addHandler(h)
        
        # This thread will read from the subprocesses and write to the
        # main log's handlers.
        log_queue_reader = LogQueueReader(self.logQueue)
        log_queue_reader.start()

        self._pool = []

        workingDirectories = []
        debug('generating workers')
        
        
        workerRoot = None
        for i in range(processes):
            debug('generating worker '+str(i))
            
            workerName = 'PoolWorker'+str(i)
            
            def ignore_function(path, names):
                if path.find('.svn') != -1:
                    return names
                else:
                    return []
            
            #setup working directories for parallelEMA
            
            for msi in modelStructureInterfaces:
                if msi.workingDirectory != None:
                    if workerRoot == None:
                        workerRoot = os.path.dirname(os.path.abspath(modelStructureInterfaces[0].workingDirectory))
                    
                    workingDirectory = os.path.join(workerRoot, workerName, msi.name)
                    
                    workingDirectories.append(workingDirectory)
                    shutil.copytree(msi.workingDirectory, 
                                    workingDirectory,
                                    ignore = ignore_function)
                    msi.set_working_directory(workingDirectory)


            w = self.Process(
                self.logQueue,
                level = logging.getLogger(EMAlogging.LOGGER_NAME)\
                                          .getEffectiveLevel(),
                                          target=worker,
                                          args=(self._inqueue, 
                                                self._outqueue, 
                                                modelStructureInterfaces, 
                                                kwargs)
                                          )
            self._pool.append(w)
            
            w.name = w.name.replace('Process', workerName)
            w.daemon = True
            w.start()
            debug(' worker '+str(i) + ' generated')

        self._task_handler = threading.Thread(
                                        target=CalculatorPool._handle_tasks,
                                        args=(self._taskqueue, 
                                              self._quick_put, 
                                              self._outqueue, 
                                              self._pool, 
                                              self.logQueue)
                                        )
        self._task_handler.daemon = True
        self._task_handler._state = RUN
        self._task_handler.start()

        self._result_handler = threading.Thread(
            target=CalculatorPool._handle_results,
            args=(self._outqueue, self._quick_get, self._cache)
            )
        self._result_handler.daemon = True
        self._result_handler._state = RUN
        self._result_handler.start()

        self._terminate = Finalize(self, 
                                   self._terminate_pool,
                                   args=(self._taskqueue, 
                                         self._inqueue, 
                                         self._outqueue, 
                                         self._pool,
                                         self._task_handler, 
                                         self._result_handler, 
                                         self._cache, 
                                         workingDirectories,
                                         ),
                                    exitpriority=15
                                    )
        
        EMAlogging.info("pool has been set up")