Exemple #1
0
    def _terminate_pool(cls, 
                        taskqueue, 
                        inqueue, 
                        outqueue, 
                        pool,
                        task_handler, 
                        result_handler, 
                        cache, 
                        working_dirs,
                        ):

        info("terminating pool")
        
        # this is guaranteed to only be called once
        debug('finalizing pool')
        TERMINATE = 2

        task_handler._state = TERMINATE
        for p in pool:
            taskqueue.put(None)                 # sentinel
            time.sleep(1)

        debug('helping task handler/workers to finish')
        cls._help_stuff_finish(inqueue, task_handler, len(pool))

        assert result_handler.is_alive() or len(cache) == 0

        result_handler._state = TERMINATE
        outqueue.put(None)                  # sentinel

        if pool and hasattr(pool[0], 'terminate'):
            debug('terminating workers')
            for p in pool:
                p.terminate()

        debug('joining task handler')
        task_handler.join(1e100)

        debug('joining result handler')
        result_handler.join(1e100)

        if pool and hasattr(pool[0], 'terminate'):
            debug('joining pool workers')
            for p in pool:
                p.join()
        
        # cleaning up directories
        # TODO investigate whether the multiprocessing.util tempdirectory  
        # functionality can be used instead
        
        for directory in working_dirs:
            directory = os.path.dirname(directory)
            debug("deleting "+str(directory))
            shutil.rmtree(directory)
Exemple #2
0
def oldcsv_load_results(file_name):
    '''
    load the specified bz2 file. the file is assumed to be saves
    using save_results.
    
    :param file_name: the path of the file
    :raises: IOError if file not found


    '''
    
    outcomes = {}
    with tarfile.open(file_name, 'r') as z:
        # load experiments
        experiments = z.extractfile('experiments.csv')
        experiments = csv2rec(experiments)

        # load metadata
        metadata = z.extractfile('experiments metadata.csv').readlines()
        metadata = [entry.strip() for entry in metadata]
        metadata = [tuple(entry.split(",")) for entry in metadata]
        metadata = np.dtype(metadata)

        # cast experiments to dtype and name specified in metadata        
        temp_experiments = np.zeros((experiments.shape[0],), dtype=metadata)
        for i, entry in enumerate(experiments.dtype.descr):
            dtype = metadata[i]
            name = metadata.descr[i][0]
            temp_experiments[name][:] = experiments[entry[0]].astype(dtype)
        experiments = temp_experiments

        # load outcomes
        fhs = z.getnames()
        fhs.remove('experiments.csv')
        fhs.remove('experiments metadata.csv')
        for fh in fhs:
            root = os.path.splitext(fh)[0]
            data = z.extractfile(fh)
            first_line = data.readline()
            shape = first_line.split(":")[1].strip()[1:-1]
            shape = shape.split((','))
            shape = tuple([int(entry) for entry in shape if len(entry)>0])
            data = np.loadtxt(data, delimiter=',')
            data = data.reshape(shape)
            
            outcomes[root] = data
            
    info("results loaded succesfully from {}".format(file_name))
    return experiments, outcomes
Exemple #3
0
 def __call__(self, case, policy, name, result):
     '''
     Method responsible for storing results. The implementation in this
     class only keeps track of how many runs have been completed and 
     logging this. 
     
     :param case: the case to be stored
     :param policy: the name of the policy being used
     :param name: the name of the model being used
     :param result: the result dict
     
     '''
     
     self.i+=1
     debug(str(self.i)+" cases completed")
     
     if self.i % self.reporting_interval == 0:
         info(str(self.i)+" cases completed")
    def __init__(self, msis, processes=None, kwargs=None):
        """
        
        :param msis: an iterable of model structure interfaces
        :param processes: nr. of processes to spawn, if none, it is 
                                   set to equal the nr. of cores
        :param callback: callback function for handling the output 
        :param kwargs: kwargs to be pased to :meth:`model_init`
        """

        self._setup_queues()
        self._taskqueue = Queue(cpu_count() * 2)
        self._cache = {}
        self._state = RUN

        if processes is None:
            try:
                processes = cpu_count()
            except NotImplementedError:
                processes = 1
        info("nr of processes is " + str(processes))

        self.log_queue = Queue()
        h = NullHandler()
        logging.getLogger(ema_logging.LOGGER_NAME).addHandler(h)

        # This thread will read from the subprocesses and write to the
        # main log's handlers.
        log_queue_reader = LogQueueReader(self.log_queue)
        log_queue_reader.start()

        self._pool = []
        working_dirs = []
        #        msis = [copy.deepcopy(msi) for msi in msis]

        debug("generating workers")

        workerRoot = None
        for i in range(processes):
            debug("generating worker " + str(i))

            workerName = "PoolWorker" + str(i)

            def ignore_function(path, names):
                if path.find(".svn") != -1:
                    return names
                else:
                    return []

            # setup working directories for parallelEMA
            for msi in msis:
                if msi.workingDirectory != None:
                    if workerRoot == None:
                        workerRoot = os.path.dirname(os.path.abspath(msis[0].workingDirectory))

                    workingDirectory = os.path.join(workerRoot, workerName, msi.name)

                    working_dirs.append(workingDirectory)
                    shutil.copytree(msi.workingDirectory, workingDirectory, ignore=ignore_function)
                    msi.set_working_directory(workingDirectory)

            w = LoggingProcess(
                self.log_queue,
                level=logging.getLogger(ema_logging.LOGGER_NAME).getEffectiveLevel(),
                target=worker,
                args=(self._inqueue, self._outqueue, msis, kwargs),
            )
            self._pool.append(w)

            w.name = w.name.replace("Process", workerName)
            w.daemon = True
            w.start()
            debug(" worker " + str(i) + " generated")

        # thread for handling tasks
        self._task_handler = threading.Thread(
            target=CalculatorPool._handle_tasks,
            name="task handler",
            args=(self._taskqueue, self._quick_put, self._outqueue, self._pool),
        )
        self._task_handler.daemon = True
        self._task_handler._state = RUN
        self._task_handler.start()

        # thread for handling results
        self._result_handler = threading.Thread(
            target=CalculatorPool._handle_results,
            name="result handler",
            args=(self._outqueue, self._quick_get, self._cache, self.log_queue),
        )
        self._result_handler.daemon = True
        self._result_handler._state = RUN
        self._result_handler.start()

        # function for cleaning up when finalizing object
        self._terminate = Finalize(
            self,
            self._terminate_pool,
            args=(
                self._taskqueue,
                self._inqueue,
                self._outqueue,
                self._pool,
                self._task_handler,
                self._result_handler,
                self._cache,
                working_dirs,
            ),
            exitpriority=15,
        )

        info("pool has been set up")
Exemple #5
0
def merge_results(results1, results2, downsample=None):
    '''
    convenience function for merging the return from 
    :meth:`~modelEnsemble.ModelEnsemble.perform_experiments`.
    
    The function merges results2 with results1. For the experiments,
    it generates an empty array equal to the size of the sum of the 
    experiments. As dtype is uses the dtype from the experiments in results1.
    The function assumes that the ordering of dtypes and names is identical in
    both results.  
    
    A typical use case for this function is in combination with 
    :func:`~util.experiments_to_cases`. Using :func:`~util.experiments_to_cases`
    one extracts the cases from a first set of experiments. One then
    performs these cases on a different model or policy, and then one wants to
    merge these new results with the old result for further analysis.  
    
    :param results1: first results to be merged
    :param results2: second results to be merged
    :param downsample: should be an integer, will be used in slicing the results
                       in order to avoid memory problems. 
    :return: the merged results
    
    
    '''

    #start of merging
    old_exp, old_res = results1
    new_exp, new_res = results2
    
    #merge experiments
    dtypes = old_exp.dtype
    
    merged_exp = np.empty((old_exp.shape[0]+new_exp.shape[0],),dtype= dtypes)
    merged_exp[0:old_exp.shape[0]] = old_exp
    merged_exp[old_exp.shape[0]::] = new_exp
    
    #only merge the results that are in both
    keys = old_res.keys()
    [keys.append(key) for key in new_res.keys()]
    keys = set(keys)
    info("intersection of keys: %s" % keys)
    
    #merging results
    merged_res = {}
    for key in keys:
        info("merge "+key)
        
        old_value = old_res.get(key)
        new_value = new_res.get(key)
        
        i = old_value.shape[0]+new_value.shape[0]
        j = old_value.shape[1]
        slice_value = 1
        if downsample:
            j = int(math.ceil(j/downsample))
            slice_value = downsample
            
        merged_value = np.empty((i,j))
        debug("merged shape: %s" % merged_value.shape)
        
        merged_value[0:old_value.shape[0], :] = old_value[:, ::slice_value]
        merged_value[old_value.shape[0]::, :] = new_value[:, ::slice_value]

        merged_res[key] = merged_value
    
    mr = (merged_exp, merged_res)
    return mr  
Exemple #6
0
def load_results(file_name):
    '''
    load the specified bz2 file. the file is assumed to be saves
    using save_results.
    
    :param file_name: the path of the file
    :raises: IOError if file not found


    '''
    
    outcomes = {}
    with tarfile.open(file_name, 'r') as z:
        # load experiments
        experiments = z.extractfile('experiments.csv')
        experiments = csv2rec(experiments)

        # load experiment metadata
        metadata = z.extractfile('experiments metadata.csv').readlines()
        metadata = [entry.strip() for entry in metadata]
        metadata = [tuple(entry.split(",")) for entry in metadata]
        metadata = np.dtype(metadata)

        # cast experiments to dtype and name specified in metadata        
        temp_experiments = np.zeros((experiments.shape[0],), dtype=metadata)
        for i, entry in enumerate(experiments.dtype.descr):
            dtype = metadata[i]
            name = metadata.descr[i][0]
            temp_experiments[name][:] = experiments[entry[0]].astype(dtype)
        experiments = temp_experiments
        
        # load outcome metadata
        metadata = z.extractfile('outcomes metadata.csv').readlines()
        metadata = [entry.strip() for entry in metadata]
        metadata = [tuple(entry.split(",")) for entry in metadata]
        metadata = {entry[0]: entry[1:] for entry in metadata}

        # load outcomes
        for outcome, shape in metadata.iteritems():
            shape = list(shape)
            shape[0] = shape[0][1:]
            shape[-1] = shape[-1][0:-1]
            
            
            temp_shape = []
            for entry in shape:
                if entry:
                    temp_shape.append(int(entry))
            shape = tuple(temp_shape)
            
            if len(shape)>2:
                nr_files = shape[-1]
                
                data = np.empty(shape)
                for i in range(nr_files):
                    values = z.extractfile("{}_{}.csv".format(outcome, i))
                    values = read_csv(values, index_col=False, header=None).values
                    data[:,:,i] = values

            else:
                data = z.extractfile("{}.csv".format(outcome))
                data = read_csv(data, index_col=False, header=None).values
                data = np.reshape(data, shape)
                
            outcomes[outcome] = data
            
    info("results loaded succesfully from {}".format(file_name))
    return experiments, outcomes
Exemple #7
0
def save_results(results, file_name):
    '''
    save the results to the specified tar.gz file. The results are stored as 
    csv files. There is an experiments.csv, and a csv for each outcome. In 
    addition, there is a metadata csv which contains the datatype information
    for each of the columns in the experiments array.

    :param results: the return of run_experiments
    :param file_name: the path of the file
    :raises: IOError if file not found

    '''

    def add_file(tararchive, string_to_add, filename):
        tarinfo = tarfile.TarInfo(filename)
        tarinfo.size = len(string_to_add)
        
        z.addfile(tarinfo, StringIO.StringIO(string_to_add))  
    
    def save_numpy_array(fh, data):
        data = pd.DataFrame(data)
        data.to_csv(fh, header=False, index=False)
        
    experiments, outcomes = results
    with tarfile.open(file_name, 'w:gz') as z:
        # write the experiments to the zipfile
        experiments_file = StringIO.StringIO()
        rec2csv(experiments, experiments_file, withheader=True)
        add_file(z, experiments_file.getvalue(), 'experiments.csv')
        
        # write experiment metadata
        dtype = experiments.dtype.descr
        dtype = ["{},{}".format(*entry) for entry in dtype]
        dtype = "\n".join(dtype)
        add_file(z, dtype, 'experiments metadata.csv')
        
        # write outcome metadata
        outcome_names = outcomes.keys()
        outcome_meta = ["{},{}".format(outcome, outcomes[outcome].shape) 
                        for outcome in outcome_names]
        outcome_meta = "\n".join(outcome_meta)
        add_file(z, outcome_meta, "outcomes metadata.csv")
        
        
        # outcomes
        for key, value in outcomes.iteritems():
            fh = StringIO.StringIO()
            
            nr_dim = len(value.shape)
            if nr_dim==3:
                for i in range(value.shape[2]):
                    data = value[:,:,i]
                    save_numpy_array(fh, data)
                    fh = fh.getvalue()
                    fn = '{}_{}.csv'.format(key, i)
                    add_file(z, fh, fn)
                    fh = StringIO.StringIO()
            else:
                save_numpy_array(fh, value)
                fh = fh.getvalue()
                add_file(z, fh, '{}.csv'.format(key))
  
    info("results saved successfully to {}".format(file_name))
    def __init__(self, 
                 msis, 
                 processes=None, 
                 kwargs=None):
        '''
        
        :param msis: an iterable of model structure interfaces
        :param processes: nr. of processes to spawn, if none, it is 
                                   set to equal the nr. of cores
        :param callback: callback function for handling the output 
        :param kwargs: kwargs to be pased to :meth:`model_init`
        '''
        
        self._setup_queues()
        self._taskqueue = Queue.Queue(cpu_count()*2)
        self._cache = {}
        self._state = RUN

        if processes is None:
            try:
                processes = cpu_count()
            except NotImplementedError:
                processes = 1
        info("nr of processes is "+str(processes))

        self.log_queue = multiprocessing.Queue()
        h = NullHandler()
        logging.getLogger(ema_logging.LOGGER_NAME).addHandler(h)
        
        # This thread will read from the subprocesses and write to the
        # main log's handlers.
        log_queue_reader = LogQueueReader(self.log_queue)
        log_queue_reader.start()

        self._pool = []
        working_dirs = []

        debug('generating workers')
        
        worker_root = None
        for i in range(processes):
            debug('generating worker '+str(i))
            
            # generate a random string helps in running repeatedly with
            # crashes
            choice_set = string.ascii_uppercase + string.digits + string.ascii_lowercase
            random_string = ''.join(random.choice(choice_set) for _ in range(5))
            
            workername = 'tpm_{}_PoolWorker_{}'.format(random_string, i)
            
            #setup working directories for parallel_ema
            for msi in msis:
                if msi.working_directory != None:
                    if worker_root == None:
                        worker_root = os.path.dirname(os.path.abspath(msis[0].working_directory))
                    
                    working_directory = os.path.join(worker_root, workername)
                    
#                     working_directory = tempfile.mkdtemp(suffix=workername,
#                                                          prefix='tmp_',
#                                                          dir=worker_root)
                    
                    working_dirs.append(working_directory)
                    shutil.copytree(msi.working_directory, 
                                    working_directory, 
                                    )
                    msi.set_working_directory(working_directory)

            w = LoggingProcess(
                self.log_queue,
                level = logging.getLogger(ema_logging.LOGGER_NAME)\
                                          .getEffectiveLevel(),
                                          target=worker,
                                          args=(self._inqueue, 
                                                self._outqueue, 
                                                msis,
                                                kwargs 
                                                )
                                          )
            self._pool.append(w)
            
            w.name = w.name.replace('Process', workername)
            w.daemon = True
            w.start()
            debug(' worker '+str(i) + ' generated')

        # thread for handling tasks
        self._task_handler = threading.Thread(
                                        target=CalculatorPool._handle_tasks,
                                        name='task handler',
                                        args=(self._taskqueue, 
                                              self._quick_put, 
                                              self._outqueue, 
                                              self._pool
                                              )
                                        )
        self._task_handler.daemon = True
        self._task_handler._state = RUN
        self._task_handler.start()

        # thread for handling results
        self._result_handler = threading.Thread(
                                        target=CalculatorPool._handle_results,
                                        name='result handler',
                                        args=(self._outqueue, 
                                              self._quick_get, 
                                              self._cache, 
                                              self.log_queue)
            )
        self._result_handler.daemon = True
        self._result_handler._state = RUN
        self._result_handler.start()

        # function for cleaning up when finalizing object
        self._terminate = Finalize(self, 
                                   self._terminate_pool,
                                   args=(self._taskqueue, 
                                         self._inqueue, 
                                         self._outqueue, 
                                         self._pool,
                                         self._task_handler, 
                                         self._result_handler, 
                                         self._cache, 
                                         working_dirs,
                                         ),
                                    exitpriority=15
                                    )
        
        info("pool has been set up")
    def __init__(self, 
                 msis, 
                 processes=None, 
                 kwargs=None):
        '''
        
        :param msis: an iterable of model structure interfaces
        :param processes: nr. of processes to spawn, if none, it is 
                                   set to equal the nr. of cores
        :param callback: callback function for handling the output 
        :param kwargs: kwargs to be pased to :meth:`model_init`
        '''
        
        if processes is None:
            try:
                processes = cpu_count()
            except NotImplementedError:
                processes = 1
        info("nr of processes is "+str(processes))
    
        # setup queues etc.
        self._setup_queues()
        self._taskqueue = Queue.Queue(processes*2)
        self._cache = {}
        self._state = RUN
        
        # handling of logging
        self.log_queue = multiprocessing.Queue()
        h = NullHandler()
        logging.getLogger(ema_logging.LOGGER_NAME).addHandler(h)
        
        log_queue_reader = LogQueueReader(self.log_queue)
        log_queue_reader.start()

        # setup of the actual pool
        self._pool = []
        working_dirs = []

        debug('generating workers')
        
        worker_root = None
        for i in range(processes):
            debug('generating worker '+str(i))
            
            workername = self._get_worker_name(i)
            
            #setup working directories for parallel_ema
            for msi in msis:
                if msi.working_directory != None:
                    if worker_root == None:
                        wd = msis[0].working_directory
                        abs_wd = os.path.abspath(wd)
                        worker_root = os.path.dirname(abs_wd)
                    
                    wd_name = workername + msi.name
                    working_directory = os.path.join(worker_root, wd_name)
                    
#                     working_directory = tempfile.mkdtemp(suffix=workername,
#                                                          prefix='tmp_',
#                                                          dir=worker_root)
                    
                    working_dirs.append(working_directory)
                    shutil.copytree(msi.working_directory, 
                                    working_directory, 
                                    )
                    msi.set_working_directory(working_directory)

            w = LoggingProcess(
                self.log_queue,
                level = logging.getLogger(ema_logging.LOGGER_NAME)\
                                          .getEffectiveLevel(),
                                          target=worker,
                                          args=(self._inqueue, 
                                                self._outqueue, 
                                                msis,
                                                kwargs 
                                                )
                                          )
            self._pool.append(w)
            
            w.name = w.name.replace('Process', workername)
            w.daemon = True
            w.start()
            debug(' worker '+str(i) + ' generated')

        # thread for handling tasks
        self._task_handler = threading.Thread(
                                        target=CalculatorPool._handle_tasks,
                                        name='task handler',
                                        args=(self._taskqueue, 
                                              self._quick_put, 
                                              self._outqueue, 
                                              self._pool
                                              )
                                        )
        self._task_handler.daemon = True
        self._task_handler._state = RUN
        self._task_handler.start()

        # thread for handling results
        self._result_handler = threading.Thread(
                                        target=CalculatorPool._handle_results,
                                        name='result handler',
                                        args=(self._outqueue, 
                                              self._quick_get, 
                                              self._cache, 
                                              self.log_queue)
            )
        self._result_handler.daemon = True
        self._result_handler._state = RUN
        self._result_handler.start()

        # function for cleaning up when finalizing object
        self._terminate = Finalize(self, 
                                   self._terminate_pool,
                                   args=(self._taskqueue, 
                                         self._inqueue, 
                                         self._outqueue, 
                                         self._pool,
                                         self._task_handler, 
                                         self._result_handler, 
                                         self._cache, 
                                         working_dirs,
                                         ),
                                    exitpriority=15
                                    )
        
        info("pool has been set up")