Esempio n. 1
0
    def perform_experiments(self, 
                           cases,
                           callback = DefaultCallback,
                           reporting_interval=100,
                           modelKwargs = {},
                           **kwargs):
        """
        Method responsible for running the experiments on a structure. In case 
        of multiple model structures, the outcomes are set to the intersection 
        of the sets of outcomes of the various models.         
        
        :param cases: In case of Latin Hypercube sampling and Monte Carlo 
                      sampling, cases specifies the number of cases to
                      generate. In case of Full Factorial sampling,
                      cases specifies the resolution to use for sampling
                      continuous uncertainties. Alternatively, one can supply
                      a list of dicts, where each dicts contains a case.
                      That is, an uncertainty name as key, and its value. 
        :param callback: Class that will be called after finishing a 
                         single experiment,
        :param reporting_interval: parameter for specifying the frequency with
                                   which the callback reports the progress.
                                   (Default is 100) 
        :param modelKwargs: dictonary of keyword arguments to be passed to 
                            model_init
        :param kwargs: generic keyword arguments to pass on to callback
         
                       
        :returns: a `structured numpy array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_ 
                  containing the experiments, and a dict with the names of the 
                  outcomes as keys and an numpy array as value.
                
        .. rubric:: suggested use
        
        In general, analysis scripts require both the structured array of the 
        experiments and the dictionary of arrays containing the results. The 
        recommended use is the following::
        
        >>> results = ensemble.perform_experiments(10000) #recommended use
        >>> experiments, output = ensemble.perform_experiments(10000) #will work fine
        
        The latter option will work fine, but most analysis scripts require 
        to wrap it up into a tuple again::
        
        >>> data = (experiments, output)
        
        Another reason for the recommended use is that you can save this tuple
        directly::
        
        >>> import expWorkbench.util as util
        >>> util.save_results(results, file)
          
        
        
        """
        
        if type(cases) ==  types.IntType:
            cases, uncertainties = self._generate_cases(cases)
        elif type(cases) == types.ListType:
            uncertainties = self.determine_intersecting_uncertainties()[0]
            uncertaintyNames = cases[0].keys()
            uncertainties = [uncertainty for uncertainty in uncertainties if 
                             uncertainty.name in uncertaintyNames]
        else:
            raise EMAError("unknown type for cases")
        
        if not self._policies:
            self._policies.append({"name": "None"})

        nrOfExperiments =len(cases)*len(self._policies)*len(self._modelStructures) 
        info(str(nrOfExperiments) + 
             " experiment will be executed")
        
        #set outcomes to the intersect of outcomes across models
        outcomes = [msi.outcomes for msi in self._modelStructures]
        outcomes = set(outcomes[0]).intersection(*outcomes[:1])
        for msi in self._modelStructures:
            msi.outcomes = list(outcomes)
        if not outcomes:
            raise EMAError("no outcomes of interest defined")
                
        #initialize the callback object
        callback = callback(uncertainties, 
                            outcomes, 
                            nrOfExperiments,
                            reporting_interval=reporting_interval,
                            **kwargs)
                
        if self.parallel:
            info("preparing to perform experiment in parallel")
            
            if not self._pool:
                self.__make_pool(modelKwargs)
            info("starting to perform experiments in parallel")

            results = self._pool.runExperiments(cases, self._policies)
            
            for entry in results:
                try:
                    callback(*entry.get())
                except EMAParallelError as e:
                    exception(e)
                except Exception as e:
                    raise
        else:
            info("starting to perform experiments sequentially")

            def cleanup(modelInterfaces):
                for msi in modelInterfaces:
                    msi.cleanup()
                    del msi

            for policy in self._policies:
                for msi in self._modelStructures:
                    policyToRun = copy.deepcopy(policy)
                    try:
                        msi.model_init(policyToRun, modelKwargs)
                    except (EMAError, NotImplementedError) as inst:
                        exception(inst)
                        cleanup(self._modelStructures)
                        raise
    
                    for case in cases:
                        caseToRun = copy.deepcopy(case)
                        try:
                            msi.run_model(caseToRun)
                        except CaseError as e:
                            warning(str(e))
                        result = msi.retrieve_output()
                        msi.reset_model()
                        callback(case, policy, msi.name, 
                                 result
                                 )
            cleanup(self._modelStructures)
        
        results = callback.get_results()
        info("experiments finished")
        
        return results
    
#    def __optimize(self, 
#                  allele_order,
#                  setOfAlleles, 
#                  obj_function,
#                  nrOfGenerations,
#                  nrOfPopMembers,
#                  minimax,
#                  crossoverRate,
#                  mutationRate,
#                  elitism,
#                  reporting_interval,
#                  population=BaseEMAPopulation):
#        # make a genome with a length equal to the list of alleles
#        genome = G1DList.G1DList(len(setOfAlleles))
#        genome.setParams(allele=setOfAlleles)
#        
#        # The evaluator function (objective function)
#        # to be decided what to use as test function. In principle
#        # the test function is a function that transforms the genome
#        # to a case, runs the model, and returns the results
#        # ideally, we might remove that step entirely by not
#        # using ind.evaluate(**args) in the population...
#        genome.evaluator.set(obj_function)
#        genome.crossover.set(Crossovers.G1DListCrossoverSinglePoint)
#        genome.mutator.set(Mutators.G1DListMutatorAllele)
#        genome.initializator.set(Initializators.G1DListInitializatorAllele)
#        
#        stats = StatisticsCallback(nrOfGenerations, nrOfPopMembers)
#        ga = EMAGA(genome, population)
#        ga.internalPop = population(genome, allele_order, self, reporting_interval)
#        ga.setMinimax(Consts.minimaxType[minimax])
#        ga.stepCallback.set(stats)
#        ga.selector.set(EMAoptimization.EMARankSelector)
#        
#        if elitism:
#            ga.setElitism(True)
#            ga.setElitismReplacement(elitism)
#        
#        # a generation contains nrOfPopMembers individuals
#        ga.setPopulationSize(nrOfPopMembers)
#        
#        # there are nrOfGeneration generations
#        ga.setGenerations(nrOfGenerations)
#        
#        # crossover and mutation    
#        ga.setCrossoverRate(crossoverRate)
#        ga.setMutationRate(mutationRate)
#
#        # perform optimization, print every 10 generations
#        # ideally, we intercept these messages and redirect them to
#        # ema_logging.
#        ema_logging.info("starting optimization")
#        ga.evolve()
#        
#        # return results for best fit
#        best_individual = ga.bestIndividual()
#        
#        best_case = {}
#        for i, key in enumerate(allele_order):
#            best_case[key] = best_individual.genomeList[i]
#        
#        c = ""
#        for key, value in best_case.items():
#            c += key
#            c += " : "
#            c += str(value)
#            c += '\n'
#        
#        info('best case:\n' + c )
#        info('raw score: ' + str(best_individual.score))
#        
#        results = {"best individual score": best_individual.score,
#                   "best individual ": best_individual,
#                   "stats": stats.stats,
#                   "raw": stats.rawScore,
#                   "fitness": stats.fitnessScore,
#                   "mutation ration": mutationRate,
#                   "crossover rate": crossoverRate,
#                   "minimax": minimax,
#                   "time elapsed": ga.get_time_elapsed()}
#        
#        return results    
#    
##    def perform_outcome_optimization(self, 
##                                     reporting_interval=100,
##                                     obj_function=None,
##                                     minimax = "maximize",
##                                     nrOfGenerations = 100,
##                                     nrOfPopMembers=100,
##                                     crossoverRate = 0.5, 
##                                     mutationRate = 0.02,
##                                     elitism = 0
##                                     ):
##        """
##        Method responsible for performing the optimization.
##        
##        :param reporting_interval: Parameter for specifying the frequency with
##                           which the callback reports the progress.
##                           (Default = 100) 
##        :param obj_function: The objective function to use. This objective 
##                             function receives the results for a single model
##                             run for all the specified outcomes of interest and
##                             should return a single score which should be 
##                             positive. 
##        :param minimax: String indicating whether to minimize or maximize the
##                        obj_function.
##        :param nrOfGenerations: The number of generations to evolve over.
##        :param nrOfPopulationMembers: The number of population members in a 
##                                      single generation.
##        :param crossoverRate: The crossover rate, between 0.0 and 1.0. 
##                              see `wikipedia <http://en.wikipedia.org/wiki/Crossover_%28genetic_algorithm%29>`__
##                              for details. (Default = 0.5)
##        :param mutationRate: The mutation rate, between 0.0 and 1.0.
##                             see `wikipedia <http://en.wikipedia.org/wiki/Mutation_%28genetic_algorithm%29>`__
##                             for details. (Default = 0.02)
##        :param elitism: The number of best individuals to copy to the next 
##                        generation. (Default = 0)
##        
##        :returns: A dict with info on the optimization including stats, best
##                  individual, and information on the optimization setup
##        
##        """
##
##        # Genome instance
##        setOfAlleles = GAllele.GAlleles()
##
##        allele_order = []
##        # deduce the alleles from the overlapping set of model structure 
##        # uncertainties
##        # the alleles should use the limits of uncertainty, and their dType
##        # in case of categorical uncertainties, the transform to the 
##        # category is delegated to a later stage (to be decided)
##        shared_uncertainties = self.determine_intersecting_uncertainties()[0]
##        for uncertainty in shared_uncertainties:
##            values = uncertainty.get_values()
##            dist = uncertainty.dist
##
##            if isinstance(uncertainty, CategoricalUncertainty):
##                allele = GAllele.GAlleleList(uncertainty.categories)
##            elif dist== INTEGER:
##                allele = GAllele.GAlleleRange(values[0], values[1])
##            else:
##                allele = GAllele.GAlleleRange(values[0], values[1], real=True)
##            
##            setOfAlleles.add(allele)
##            allele_order.append(uncertainty.name)
##        return self.__optimize(allele_order, 
##                               setOfAlleles, obj_function, 
##                              nrOfGenerations, nrOfPopMembers, minimax, 
##                              crossoverRate, mutationRate, elitism,
##                              reporting_interval,
##                              population=OutcomeOptimizationPopulation)
##
##
##    def perform_robust_optimization(self, 
##                                    cases,
##                                    reporting_interval=100,
##                                    obj_function=None,
##                                    policy_levers={},
##                                    minimax="maximize",
##                                    nrOfGenerations=100,
##                                    nrOfPopMembers=100,
##                                    crossoverRate=0.5, 
##                                    mutationRate=0.02,
##                                    elitism=0
##                                    ):
##        """
##        Method responsible for performing robust optimization.
##        
##        :param cases: In case of Latin Hypercube sampling and Monte Carlo 
##                      sampling, cases specifies the number of cases to
##                      generate. In case of Full Factorial sampling,
##                      cases specifies the resolution to use for sampling
##                      continuous uncertainties. Alternatively, one can supply
##                      a list of dicts, where each dicts contains a case.
##                      That is, an uncertainty name as key, and its value.
##        :param reporting_interval: Parameter for specifying the frequency with
##                                   which the callback reports the progress.
##                                   (Default = 100)         
##        :param obj_function: The objective function to use. This objective 
##                             function receives the results for a policy and
##                             the provided cases for all the specified outcomes 
##                             of interest and should return a single score which 
##                             should be positive. 
##        :param policy_levers: A dictionary with model parameter names as key
##                              and a dict as value. The dict should have two 
##                              fields: 'type' and 'values. Type is either
##                              list or range, and determines the appropriate
##                              allele type. Values are the parameters to 
##                              be used for the specific allele. 
##        :param minimax: String indicating whether to minimize or maximize the
##                        obj_function.
##        :param nrOfGenerations: The number of generations to evolve over.
##        :param nrOfPopulationMembers: The number of population members in a 
##                                      single generation.
##        :param crossoverRate: The crossover rate, between 0.0 and 1.0. 
##                              see `wikipedia <http://en.wikipedia.org/wiki/Crossover_%28genetic_algorithm%29>`__
##                              for details. (Default = 0.5)
##        :param mutationRate: The mutation rate, between 0.0 and 1.0.
##                             see `wikipedia <http://en.wikipedia.org/wiki/Mutation_%28genetic_algorithm%29>`__
##                             for details. (Default = 0.02)
##        :param elitism: The number of best individuals to copy to the next 
##                        generation. (Default = 0) 
##        
##        :returns: A dict with info on the optimization including stats, best
##                  individual, and information on the optimization setup
##        
##        """
##
##        # Genome instance
##        setOfAlleles = GAllele.GAlleles()
##        allele_order = []
##        for key, value in policy_levers.items():
##            type_allele = value['type'] 
##            value = value['values']
##            if type_allele=='range':
##                allele = GAllele.GAlleleRange(value[0], value[1], real=True)
##            elif type_allele=='list':
##                allele = GAllele.GAlleleList(value)
##            else:
##                raise EMAError("unknown allele type: possible types are range and list")
##            
##            setOfAlleles.add(allele)
##            allele_order.append(key)
##        
##        RobustOptimizationPopulation.cases = cases
##        return self.__optimize(allele_order, 
##                               setOfAlleles, 
##                               obj_function, 
##                               nrOfGenerations, 
##                               nrOfPopMembers, 
##                               minimax, 
##                               crossoverRate, 
##                               mutationRate,
##                               elitism,
##                               reporting_interval, 
##                               population=RobustOptimizationPopulation)
##    
##    def perform_maximin_optimization(self, 
##                                    reporting_interval=100,
##                                    obj_function1=None,
##                                    policy_levers={},
##                                    minimax1 = "minimize",
##                                    minimax2 = "maximize",                                   
##                                    nrOfGenerations1 = 100,
##                                    nrOfPopMembers1 = 100,
##                                    crossoverRate1 = 0.5, 
##                                    mutationRate1 = 0.02,
##                                    elitism1 = 0,
##                                    nrOfGenerations2 = 100,
##                                    nrOfPopMembers2 = 100,
##                                    crossoverRate2 = 0.5, 
##                                    mutationRate2 = 0.02,
##                                    elitism2 = 0
##                                    ):
##        
##        # Genome instance
##        setOfAlleles = GAllele.GAlleles()
##        allele_order = []
##        for key, value in policy_levers.items():
##            allele = GAllele.GAlleleRange(value[0], value[1], real=True)
##            
##            setOfAlleles.add(allele)
##            allele_order.append(key)
##        
##        MaximinOptimizationPopulation.optimizationType = minimax2
##        MaximinOptimizationPopulation.nrOfGenerations = nrOfGenerations2
##        MaximinOptimizationPopulation.nrOfPopMembers = nrOfPopMembers2
##        MaximinOptimizationPopulation.crossoverRate = crossoverRate2
##        MaximinOptimizationPopulation.mutationRate = mutationRate2
##        MaximinOptimizationPopulation.elitism = elitism2
##        
##        return self.__optimize(allele_order, 
##                               setOfAlleles, 
##                               obj_function1, 
##                               nrOfGenerations1, 
##                               nrOfPopMembers1, 
##                               minimax1, 
##                               crossoverRate1, 
##                               mutationRate1,
##                               elitism1,
##                               reporting_interval, 
##                               population=MaximinOptimizationPopulation)
Esempio n. 2
0
    def perform_experiments(self, 
                           cases,
                           callback=DefaultCallback,
                           reporting_interval=100,
                           model_kwargs = {},
                           which_uncertainties=INTERSECTION,
                           which_outcomes=INTERSECTION,
                           **kwargs):
        """
        Method responsible for running the experiments on a structure. In case 
        of multiple model structures, the outcomes are set to the intersection 
        of the sets of outcomes of the various models.         
        
        :param cases: In case of Latin Hypercube sampling and Monte Carlo 
                      sampling, cases specifies the number of cases to
                      generate. In case of Full Factorial sampling,
                      cases specifies the resolution to use for sampling
                      continuous uncertainties. Alternatively, one can supply
                      a list of dicts, where each dicts contains a case.
                      That is, an uncertainty name as key, and its value. 
        :param callback: Class that will be called after finishing a 
                         single experiment,
        :param reporting_interval: parameter for specifying the frequency with
                                   which the callback reports the progress.
                                   (Default is 100) 
        :param model_kwargs: dictionary of keyword arguments to be passed to 
                            model_init
        :param which_uncertainties: keyword argument for controlling whether,
                                    in case of multiple model structure 
                                    interfaces, the intersection or the union
                                    of uncertainties should be used. 
                                    (Default is intersection).  
        :param which_uncertainties: keyword argument for controlling whether,
                                    in case of multiple model structure 
                                    interfaces, the intersection or the union
                                    of outcomes should be used. 
                                    (Default is intersection).  
        :param kwargs: generic keyword arguments to pass on to callback
         
                       
        :returns: a `structured numpy array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_ 
                  containing the experiments, and a dict with the names of the 
                  outcomes as keys and an numpy array as value.
                
        .. rubric:: suggested use
        
        In general, analysis scripts require both the structured array of the 
        experiments and the dictionary of arrays containing the results. The 
        recommended use is the following::
        
        >>> results = ensemble.perform_experiments(10000) #recommended use
        >>> experiments, output = ensemble.perform_experiments(10000) 
        
        The latter option will work fine, but most analysis scripts require 
        to wrap it up into a tuple again::
        
        >>> data = (experiments, output)
        
        Another reason for the recommended use is that you can save this tuple
        directly::
        
        >>> import expWorkbench.util as util
        >>> util.save_results(results, filename)
          
        .. note:: The current implementation has a hard coded limit to the 
          number of designs possible. This is set to 50.000 designs. 
          If one want to go beyond this, set `self.max_designs` to
          a higher value.
        
        """

        if not self._policies:
            self._policies.append({"name": "None"})
        
        # identify the uncertainties and sample over them
        if type(cases) ==  types.IntType:
            sampled_unc, unc_dict = self._generate_samples(cases, 
                                                           which_uncertainties)
            nr_of_exp =self.sampler.deterimine_nr_of_designs(sampled_unc)\
                      *len(self._policies)*len(self._msis)
            experiments = self._generate_experiments(sampled_unc)
        elif type(cases) == types.ListType:
            unc_dict = self.determine_uncertainties()[1]
            unc_names = cases[0].keys()
            sampled_unc = {name:[] for name in unc_names}
            nr_of_exp = len(cases)*len(self._policies)*len(self._msis)
            experiments = self._generate_experiments(cases)
        else:
            raise EMAError("unknown type for cases")
        uncertainties = [unc_dict[unc] for unc in sorted(sampled_unc)]

        # identify the outcomes that are to be included
        overview_dict, element_dict = self._determine_unique_attributes("outcomes")
        if which_outcomes==UNION:
            outcomes = element_dict.keys()
        elif which_outcomes==INTERSECTION:
            outcomes = overview_dict[tuple([msi.name for msi in self._msis])]
            outcomes = [outcome.name for outcome in outcomes]
        else:
            raise ValueError("incomplete value for which_outcomes")
         
        info(str(nr_of_exp) + " experiment will be executed")
                
        #initialize the callback object
        callback = callback(uncertainties, 
                            outcomes, 
                            nr_of_exp,
                            reporting_interval=reporting_interval,
                            **kwargs)

        if self.parallel:
            info("preparing to perform experiment in parallel")
            
            if not self._pool:
                self._make_pool(model_kwargs)
            info("starting to perform experiments in parallel")

            self._pool.run_experiments(experiments, callback)
        else:
            info("starting to perform experiments sequentially")

            def cleanup(modelInterfaces):
                for msi in modelInterfaces:
                    msi.cleanup()
                    del msi

            
            msi_initialization_dict = {}
            msis = {msi.name: msi for msi in self._msis}
            job_counter = itertools.count()
            
            cwd = os.getcwd() 
            for experiment in experiments:
                case_id = job_counter.next()
                policy = experiment.pop('policy')
                msi = experiment.pop('model')
                
                # check whether we already initialized the model for this 
                # policy
                if not msi_initialization_dict.has_key((policy['name'], msi)):
                    try:
                        debug("invoking model init")
                        msis[msi].model_init(copy.deepcopy(policy),\
                                             copy.deepcopy(model_kwargs))
                    except (EMAError, NotImplementedError) as inst:
                        exception(inst)
                        cleanup(self._msis)
                        raise
                    except Exception:
                        exception("some exception occurred when invoking the init")
                        cleanup(self._msis)
                        raise 
                    debug("initialized model %s with policy %s" % (msi, policy['name']))
                    #always, only a single initialized msi instance
                    msi_initialization_dict = {(policy['name'], msi):msis[msi]}
                msi = msis[msi]

                case = copy.deepcopy(experiment)
                try:
                    debug("trying to run model")
                    msi.run_model(case)
                except CaseError as e:
                    warning(str(e))
                    
                debug("trying to retrieve output")
                result = msi.retrieve_output()
                msi.reset_model()
                
                debug("trying to reset model")
                callback(case_id, experiment, policy, msi.name, result)
                
            cleanup(self._msis)
            os.chdir(cwd)
       
        results = callback.get_results()
        info("experiments finished")
        
        return results
Esempio n. 3
0
def perform_regret_analysis(results,
                          policyOfInterest,
                          uncertainty1,
                          uncertainty2,
                          resolution,
                          outcomeNames = []):
    '''
    perform a RAND-style regret analysis. That is, calculate regret across 
    all runs. Regret is here understood as the regret of the policy of 
    interest as compared to the best performing other policy. 
    
    Identify the case in which the regret is maximized. Show a 2-d slice 
    across two specified uncertainties, which contains the case where the 
    regret is maximized. So, in this slice all the uncertainties apart from 
    the 2 specified, are equal to their value in the case were the regret 
    is maximized. 
    
    Function requires a full factorial sampling as the experimental design
    to work.
    
    input:
    results             default returnValue from modelEnsemble.runExperiments()
    policyOfInterest    name of policy for which you want to calculate the 
                        regret
    uncertainty1        the uncertainty across which you want to slice
    uncertainty2        the uncertainty across which you want to slice
    resolution          resolution used in generating the full factorial
    outcomeNames        if provided, this should be a list of names of outcomes 
                        where high is bad the normalized results for these 
                        outcomes will be reverted
    
    NOTE: please provide the actual uncertainty, not their name
    
    returns:
    regret          1-d array that specifies the regret of policy to 
                    all other policies
    case    
    '''
    def getIndex(range, resolution, value):
        '''
        helper function to transform a case to an index in the regretPlotArray
        '''
        
        return ((resolution-1) * (value- range[0]))/ (range[1]-range[0]) 
        
    
    regret, cases, uncertainties = calculate_regret(results, 
                                                    policyOfInterest,
                                                    outcomeNames)

    # transform regret into a dictionary for quick lookup    
    regretDict = {}
    for entry in zip(cases, regret):
        regretDict[entry[0]] = entry[1]

    #identify maximum regret case
    maximumRegret, case = max_regret(regret, cases)
    
    # generate the cases that should be in the slice
    #
    # by generating the cases we need for the slice here
    # and combining it with the dict structure, we can fill the 
    # slice up quickly 
    #
    # another alternative approach would be to filter the available cases
    # based on the case that maximizes the regret. Only the specified 
    # uncertainties should be allowed to vary. This, however, would require 
    # us to go over the entire list of cases which can potentially become 
    # very slow
    #
    sampler = FullFactorialSampler()
    designs = sampler.generate_design([uncertainty1, 
                                      uncertainty2], 
                                      resolution)[0]
    designs = [design for design in designs]
    
    # get the indexes of the uncertainties
    # we use the max regret case and only modify the entries for
    # the uncertainties across which we want to slice
    index1 = uncertainties.index(uncertainty1.name)
    index2 = uncertainties.index(uncertainty2.name)
    
    # deduce the shape of the slice
    if len(designs) < resolution**2:
        resolution1 = len(set(np.asarray(designs)[:, 0]))
        resolution2 = len(set(np.asarray(designs)[:, 1]))
        shape = (resolution1, resolution2)
    else:
        shape = (resolution, resolution)
   
    regretPlot = np.zeros(shape)  
    case = list(case)
    for design in designs:
        case[index1] = design[0]
        case[index2] = design[1]
    
        # map case values back to index in regretPlot
        i = int(round( getIndex(uncertainty1.get_values(), 
                                regretPlot.shape[0], 
                                design[0]), 0)) 
        j = int(round( getIndex(uncertainty2.get_values(), 
                                regretPlot.shape[1], 
                                design[1]), 0))
        
        # retrieve regret for particular case
        try: 
            a = regretDict.get(tuple(case))
#            print a
            regretPlot[i, j] = np.max(a)
        except KeyError as e:
            ema_logging.exception('case not found')
            raise e
    return regretPlot