Esempio n. 1
0
def distance_sse(data):
    
    '''
    The SSE (sum of squared-errors) distance between two data series is equal to the sum of squared-errors between corresponding data points of these two data series.
    Let the data series be of length N; Then SSE distance between ds1 and ds2 equals to the sum of the square of error terms from 1 to N, 
    where error_term(i) equals to ds1(i)-ds2(i) 
    
    Since SSE calculation is based on pairwise comparison of individual data points, the data series should be of equal length.
    
    SSE distance equals to the square of Euclidian distance, which is a commonly used distance metric in time series comparisons.
    '''
    
    runLogs = []
    #Generates the feature vectors for all the time series that are contained in numpy array data
    info("calculating distances")
    dRow = np.zeros(shape=(np.sum(np.arange(data.shape[0])), ))
    index = -1
    for i in range(data.shape[0]):
            
        # For each run, a log is created
        # Log includes a description dictionary that has key information 
        # for post-clustering analysis, and the data series itself. These 
        # logs are stored in a global array named runLogs
        behaviorDesc = {}
        behaviorDesc['Index'] = str(i)
        
        behavior = data[i]
        localLog = (behaviorDesc, behavior)
        runLogs.append(localLog)
    
        for j in range(i+1, data.shape[0]):
            index += 1
            distance = ssedist(data[i],data[j]) 
            dRow[index] = distance
    return dRow, runLogs
Esempio n. 2
0
    def log_stats(self, gen):
        '''Log statistics on the progress of the evolution'''

        functions = {
            "min": self.minima,
            "max": self.maxima,
            "std": self.std,
            "mean": self.mean,
        }

        hof = self.__get_hof_in_array()
        info_message = pd.DataFrame(
            index=['min', 'max', 'mean', 'std'],
            columns=['obj_{}'.format(i) for i in range(hof.shape[1])])
        for key, value in functions.iteritems():
            data = value(hof)
            info_message.loc[key] = data

        # let pandas do the formatting for us, but remove the trailing info
        # on the size of the DataFrame
        message = info_message.__str__()
        message = message.split('\n')[0:-2]
        message = "\n".join(message)
        line = "\ngeneration {}\n{}".format(gen, message)
        ema_logging.info(line)
def determine_time_dimension(outcomes):
    '''
    helper function for determining or creating time dimension

    
    Parameters
    ----------
    outcomes : dict
    
    Returns
    -------
    ndarray
    
    
    '''

    time = None
    try:
        time = outcomes['TIME']
        time = time[0, :]
        outcomes.pop('TIME')
    except KeyError:
        values = iter(outcomes.values())
        for value in values:
            if len(value.shape) == 2:
                time = np.arange(0, value.shape[1])
                break
    if time is None:
        info("no time dimension found in results")
    return time, outcomes
Esempio n. 4
0
    def __init__(self, gui=False, thd=False):
        '''
        
        Create a link with netlogo. Underneath, the netlogo jvm is started
        through jpype.
        
        
        :param gui: boolean, if true run netlogo with gui, otherwise run in 
                    headless mode. Defaults to false.
        :param thd: boolean, if thrue start netlogo in 3d mode. Defaults to 
                    false
        
        
        '''
        if not jpype.isJVMStarted():
            # netlogo jars
            jars = [
                NETLOGO_HOME + r'/lib/scala-library.jar',
                NETLOGO_HOME + r'/lib/asm-all-3.3.1.jar',
                NETLOGO_HOME + r'/lib/picocontainer-2.13.6.jar',
                NETLOGO_HOME + r'/lib/log4j-1.2.16.jar',
                NETLOGO_HOME + r'/lib/jmf-2.1.1e.jar',
                NETLOGO_HOME + r'/lib/pegdown-1.1.0.jar',
                NETLOGO_HOME + r'/lib/parboiled-core-1.0.2.jar',
                NETLOGO_HOME + r'/lib/parboiled-java-1.0.2.jar',
                NETLOGO_HOME + r'/lib/mrjadapter-1.2.jar',
                NETLOGO_HOME + r'/lib/jhotdraw-6.0b1.jar',
                NETLOGO_HOME + r'/lib/quaqua-7.3.4.jar',
                NETLOGO_HOME + r'/lib/swing-layout-7.3.4.jar',
                NETLOGO_HOME + r'/lib/jogl-1.1.1.jar', NETLOGO_HOME +
                r'/lib/gluegen-rt-1.1.1.jar', NETLOGO_HOME + r'/NetLogo.jar',
                PYNETLOGO_HOME + r'/external_files/netlogoLink.jar'
            ]

            # format jars in right format for starting java virtual machine
            # TODO the use of the jre here is only relevant under windows
            # apparently
            # might be solvable by setting netlogo home user.dir

            joined_jars = jar_separator.join(jars)
            jarpath = '-Djava.class.path={}'.format(joined_jars)

            jvm_handle = jpype.getDefaultJVMPath()
            jpype.startJVM(jvm_handle, jarpath, "-Xms128M", "-Xmx1024m")
            jpype.java.lang.System.setProperty('user.dir', NETLOGO_HOME)

            if sys.platform == 'darwin':
                jpype.java.lang.System.setProperty("java.awt.headless", "true")

            debug("jvm started")

        link = jpype.JClass('netlogoLink.NetLogoLink')
        debug('NetLogoLink class found')

        if sys.platform == 'darwin' and gui:
            info('on mac only headless mode is supported')
            gui = False

        self.link = link(gui, thd)
        debug('NetLogoLink class instantiated')
Esempio n. 5
0
 def __call__(self, case_id, case, policy, name, result):
     '''
     Method responsible for storing results. The implementation in this
     class only keeps track of how many runs have been completed and 
     logging this. Any extension of AbstractCallback needs to implement
     this method. If one want to use the logging provided here, call it via
     super.
     
     Parameters
     ----------
     case_id: int
              the job id
     case: dict
           the case to be stored
     policy: str 
             the name of the policy being used
     name: str
           the name of the model being used
     result: dict
             the result dict
     
     '''
     
     self.i+=1
     ema_logging.debug(str(self.i)+" cases completed")
     
     if self.i % self.reporting_interval == 0:
         ema_logging.info(str(self.i)+" cases completed")
def determine_time_dimension(outcomes):
    '''
    helper function for determining or creating time dimension

    
    Parameters
    ----------
    outcomes : dict
    
    Returns
    -------
    ndarray
    
    
    '''

    time = None
    try:
        time = outcomes['TIME']
        time = time[0, :]
        outcomes.pop('TIME')
    except KeyError:
        values = iter(outcomes.values())
        for value in values:
            if len(value.shape)==2:
                time =  np.arange(0, value.shape[1])
                break
    if time is None:
        info("no time dimension found in results")
    return time, outcomes    
Esempio n. 7
0
    def __init__(self, gui=False, thd=False):
        '''
        
        Create a link with netlogo. Underneath, the netlogo jvm is started
        through jpype.
        
        
        :param gui: boolean, if true run netlogo with gui, otherwise run in 
                    headless mode. Defaults to false.
        :param thd: boolean, if thrue start netlogo in 3d mode. Defaults to 
                    false
        
        
        '''
        if not jpype.isJVMStarted():
            # netlogo jars
            jars = [NETLOGO_HOME + r'/lib/scala-library.jar',
                    NETLOGO_HOME + r'/lib/asm-all-3.3.1.jar',
                    NETLOGO_HOME + r'/lib/picocontainer-2.13.6.jar',
                    NETLOGO_HOME + r'/lib/log4j-1.2.16.jar',
                    NETLOGO_HOME + r'/lib/jmf-2.1.1e.jar',
                    NETLOGO_HOME + r'/lib/pegdown-1.1.0.jar',
                    NETLOGO_HOME + r'/lib/parboiled-core-1.0.2.jar',
                    NETLOGO_HOME + r'/lib/parboiled-java-1.0.2.jar',
                    NETLOGO_HOME + r'/lib/mrjadapter-1.2.jar',
                    NETLOGO_HOME + r'/lib/jhotdraw-6.0b1.jar',
                    NETLOGO_HOME + r'/lib/quaqua-7.3.4.jar',
                    NETLOGO_HOME + r'/lib/swing-layout-7.3.4.jar',
                    NETLOGO_HOME + r'/lib/jogl-1.1.1.jar',
                    NETLOGO_HOME + r'/lib/gluegen-rt-1.1.1.jar',
                    NETLOGO_HOME + r'/NetLogo.jar',
                    PYNETLOGO_HOME + r'/external_files/netlogoLink.jar']
            
            # format jars in right format for starting java virtual machine
            # TODO the use of the jre here is only relevant under windows 
            # apparently
            # might be solvable by setting netlogo home user.dir

            joined_jars = jar_separator.join(jars)
            jarpath = '-Djava.class.path={}'.format(joined_jars)
            
            jvm_handle = jpype.getDefaultJVMPath() 
            jpype.startJVM(jvm_handle, jarpath, "-Xms128M","-Xmx1024m")  
            jpype.java.lang.System.setProperty('user.dir', NETLOGO_HOME)

            if sys.platform=='darwin':
                jpype.java.lang.System.setProperty("java.awt.headless", "true");            
            
            debug("jvm started")
        
        link = jpype.JClass('netlogoLink.NetLogoLink')
        debug('NetLogoLink class found')

        if sys.platform == 'darwin' and gui:
            info('on mac only headless mode is supported')
            gui=False
        
        self.link = link(gui, thd)
        debug('NetLogoLink class instantiated')
def do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show):
    '''
    
    Helper function for setting the tick labels on the axes correctly on and 
    off

    Parameters
    ----------
    ax : axes
    i : int
    j : int
    field1 : str
    field2 : str
    ylabels : dict, optional
    outcomes_to_show : str
    
    
    '''

    #text and labels
    if i == j:
        #only plot the name in the middle
        if ylabels:
            text = ylabels[field1]
        else:
            text = field1
        ax.text(0.5,
                0.5,
                text,
                horizontalalignment='center',
                verticalalignment='center',
                transform=ax.transAxes)

    # are we at the end of the row?
    if i != len(outcomes_to_show) - 1:
        #xaxis off
        ax.set_xticklabels([])
    else:
        if ylabels:
            try:
                ax.set_xlabel(ylabels.get(field2))
            except KeyError:
                info("no label specified for " + field2)
        else:
            ax.set_xlabel(field2)

    # are we at the end of the column?
    if j != 0:
        #yaxis off
        ax.set_yticklabels([])
    else:
        if ylabels:
            try:
                ax.set_ylabel(ylabels.get(field1))
            except KeyError:
                info("no label specified for " + field1)
        else:
            ax.set_ylabel(field1)
def do_text_ticks_labels(ax, i, j, field1, field2, ylabels, outcomes_to_show):
    '''
    
    Helper function for setting the tick labels on the axes correctly on and 
    off

    Parameters
    ----------
    ax : axes
    i : int
    j : int
    field1 : str
    field2 : str
    ylabels : dict, optional
    outcomes_to_show : str
    
    
    '''
    
    #text and labels
    if i == j:
        #only plot the name in the middle
        if ylabels:
            text = ylabels[field1]
        else:
            text = field1
        ax.text(0.5, 0.5, text,
                horizontalalignment='center',
                verticalalignment='center',
                transform = ax.transAxes)  
    
    # are we at the end of the row?
    if i != len(outcomes_to_show)-1:
        #xaxis off
        ax.set_xticklabels([])
    else:
        if ylabels:
            try:
                ax.set_xlabel(ylabels.get(field2))
            except KeyError:
                info("no label specified for "+field2)
        else:
            ax.set_xlabel(field2) 
    
    # are we at the end of the column?
    if j != 0:
        #yaxis off
        ax.set_yticklabels([])
    else:
        if ylabels:
            try:
                ax.set_ylabel(ylabels.get(field1))
            except KeyError:
                info("no label specified for "+field1) 
        else:
            ax.set_ylabel(field1)   
    def _run_optimization(self, generate_individual, 
                           evaluate_population,algorithm=None, 
                           obj_function=None,
                           weights=None, levers=None, 
                           pop_size=None, reporting_interval=None, 
                           nr_of_generations=None, crossover_rate=None, 
                           mutation_rate=None,
                           caching=False,
                           **kwargs):
        '''
        Helper function that runs the actual optimization
           
        Parameters
        ----------     
        toolbox : 
        generate_individual : callable
                              helper function for generating an individual
        evaluate_population : callable
                              helper function for evaluating the population
        attr_list : list
                    list of attributes (alleles)
        keys : list
               the names of the attributes in the same order as attr_list
        obj_function : callable
                       the objective function
        pop_size : int
                   the size of the population
        reporting_interval : int
                             the interval for reporting progress, passed on to 
                             perform_experiments
        weights : tuple
                  the weights on the outcomes
        nr_of_generations : int
                            number of generations for which the GA will 
                            be run
        crossover_rate : float
                         the crossover rate of the GA
        mutation_rate : float
                        the mutation rate of the GA
        levers : dict
                 a dictionary with param keys as keys, and as values info used 
                 in mutation.
        
        '''
        self.algorithm = algorithm(weights, levers, generate_individual, 
                          obj_function, pop_size, evaluate_population, 
                          nr_of_generations, crossover_rate, mutation_rate, 
                          reporting_interval, self, caching, **kwargs)

        # Begin the generational process
        for _ in range(nr_of_generations):
            pop = self.algorithm.get_population()
        info("-- End of (successful) evolution --")

        return self.algorithm.stats_callback, pop        
    def _terminate_pool(cls, 
                        taskqueue, 
                        inqueue, 
                        outqueue, 
                        pool,
                        task_handler, 
                        result_handler, 
                        cache, 
                        working_dirs,
                        ):
        ema_logging.info("terminating pool")
        
        
        
        # this is guaranteed to only be called once
        ema_logging.debug('finalizing pool')
        TERMINATE = 2

        task_handler._state = TERMINATE
        for p in pool:
            taskqueue.put(None)                 # sentinel
            time.sleep(1)

        ema_logging.debug('helping task handler/workers to finish')
        cls._help_stuff_finish(inqueue, task_handler, len(pool))

        assert result_handler.is_alive() or len(cache) == 0

        result_handler._state = TERMINATE
        outqueue.put(None)                  # sentinel

        if pool and hasattr(pool[0], 'terminate'):
            ema_logging.debug('terminating workers')
            for p in pool:
                p.terminate()

        ema_logging.debug('joining task handler')
        task_handler.join(1e100)

        ema_logging.debug('joining result handler')
        result_handler.join(1e100)

        if pool and hasattr(pool[0], 'terminate'):
            ema_logging.debug('joining pool workers')
            for p in pool:
                p.join()
        
        # cleaning up directories
        # TODO investigate whether the multiprocessing.util tempdirectory  
        # functionality can be used instead
        
        for directory in working_dirs:
            ema_logging.debug("deleting "+str(directory))
            shutil.rmtree(directory)
Esempio n. 12
0
def make_data_structure(clusters, distRow, runLogs):
    nr_clusters = np.max(clusters)
    cluster_list = []
    for i in range(1, nr_clusters+1):
        info("starting with cluster %s" %i)
        #determine the indices for cluster i
        indices = np.where(clusters==i)[0]
        
        drow_indices = np.zeros((indices.shape[0]**2-indices.shape[0])/2, dtype=int)
        s = 0
        #get the indices for the distance for the runs in the cluster
        for q in range(indices.shape[0]):
            for r in range(q+1, indices.shape[0]):
                b = indices[q]
                a = indices[r]
                
                drow_indices[s] = get_drow_index(indices[r],
                                                 indices[q], 
                                                 clusters.shape[0])
                s+=1
        
        #get the distance for the runs in the cluster
        dist_clust = distRow[drow_indices]
        
        #make a distance matrix
        dist_matrix = squareform(dist_clust)

        #sum across the rows
        row_sum = dist_matrix.sum(axis=0)
        
        #get the index of the result with the lowest sum of distances
        min_cIndex = row_sum.argmin()
    
        # convert this cluster specific index back to the overall cluster list 
        # of indices
        originalIndices = np.where(clusters==i)
        originalIndex = originalIndices[0][min_cIndex]

        print(originalIndex)

        a = list(np.where(clusters==i)[0])
        a = [int(entry) for entry in a]
        
        cluster = Cluster(i, 
                          np.where(clusters==i)[0], 
                          originalIndex,
                          [runLogs[entry] for entry in a],
                          dist_clust)
        cluster_list.append(cluster)
    return cluster_list
Esempio n. 13
0
def make_data_structure(clusters, distRow, runLogs):
    nr_clusters = np.max(clusters)
    cluster_list = []
    for i in range(1, nr_clusters + 1):
        info("starting with cluster %s" % i)
        #determine the indices for cluster i
        indices = np.where(clusters == i)[0]

        drow_indices = np.zeros((indices.shape[0]**2 - indices.shape[0]) / 2,
                                dtype=int)
        s = 0
        #get the indices for the distance for the runs in the cluster
        for q in range(indices.shape[0]):
            for r in range(q + 1, indices.shape[0]):
                b = indices[q]
                a = indices[r]

                drow_indices[s] = get_drow_index(indices[r], indices[q],
                                                 clusters.shape[0])
                s += 1

        #get the distance for the runs in the cluster
        dist_clust = distRow[drow_indices]

        #make a distance matrix
        dist_matrix = squareform(dist_clust)

        #sum across the rows
        row_sum = dist_matrix.sum(axis=0)

        #get the index of the result with the lowest sum of distances
        min_cIndex = row_sum.argmin()

        # convert this cluster specific index back to the overall cluster list
        # of indices
        originalIndices = np.where(clusters == i)
        originalIndex = originalIndices[0][min_cIndex]

        print(originalIndex)

        a = list(np.where(clusters == i)[0])
        a = [int(entry) for entry in a]

        cluster = Cluster(i,
                          np.where(clusters == i)[0], originalIndex,
                          [runLogs[entry] for entry in a], dist_clust)
        cluster_list.append(cluster)
    return cluster_list
    def _run_through_cache(self, individuals):
        '''Helper function, check whether individuals already have been 
        evaluated if so use the cached value '''
        invalid_inds = [ind for ind in individuals if not ind.fitness.valid]            
        ema_logging.info('nr. of invalid individuals before checking cache: {}'.format(len(invalid_inds)))
        
        for invalid_ind in invalid_inds:
            # construct key
            key = [repr(invalid_ind.get(entry)) for entry in self.lever_keys]
            key = tuple(key)
            
            try:
                # set value if in caching
                invalid_ind.fitness.values = self.cache[key]
            except KeyError:
                pass

        invalid_inds = [ind for ind in individuals if not ind.fitness.valid]            
        ema_logging.info('nr. of invalid individuals after checking cache: {}'.format(len(invalid_inds)))
Esempio n. 15
0
def construct_features(data, filterSlope, tHoldSlope, filterCurvature, 
                       tHoldCurvature, addMidExtension, addEndExtension):
    '''
    Constructs a feature vector for each of the data-series contained in the 
    data. 
    
    '''
    info("calculating features")
    
    # TODO, the casting of each feature to a list of tuples might be 
    # removed at some stage, it will lead to a speed up, for you 
    # can vectorize the calculations that use the feature vector
    features = []
    for i in range(data.shape[0]):
        feature = construct_feature_vector(data[i, :], filterSlope, tHoldSlope, 
                                     filterCurvature, tHoldCurvature, 
                                     addMidExtension, addEndExtension)
#        feature =  [tuple(feature[0,:]),tuple(feature[1,:])]
        features.append(feature)
    return features
def distance_triangle(data):
    '''
    The triangle distance is calculated as follows;
        Let ds1(.) and ds2(.) be two data series of length N. Then;
        A equals to the summation of ds1(i).ds2(i) from i=1 to N
        B equals to the square-root of the (summation ds1(i)^2 from i=1 to N)
        C equals to the square-root of the (summation ds1(i)^2 from i=1 to N)
        
        distance_triangle = A/(B.C)
     
     The triangle distance works only with data series of the same length
     
     In the literature, it is claimed that the triangle distance can deal with noise and amplitude scaling very well, and may yield poor
     results in cases of offset translation and linear drift.   
    '''
    
    
    
    runLogs = []
    #Generates the feature vectors for all the time series that are contained in numpy array data
    info("calculating distances")
    dRow = np.zeros(shape=(np.sum(np.arange(data.shape[0])), ))
    index = -1
    for i in range(data.shape[0]):
            
        # For each run, a log is created
        # Log includes a description dictionary that has key information 
        # for post-clustering analysis, and the data series itself. These 
        # logs are stored in a global array named runLogs
        behaviorDesc = {}
        behaviorDesc['Index'] = str(i)
        
        behavior = data[i]
        localLog = (behaviorDesc, behavior)
        runLogs.append(localLog)
    
        for j in range(i+1, data.shape[0]):
            index += 1
            distance = trdist(data[i],data[j]) 
            dRow[index] = distance
    return dRow, runLogs
Esempio n. 17
0
    def _get_population(self):

        if self._restart_required():
            self.called += 1
            self.last_eps_progress = 0
            self.pop_size = self.desired_labda * len(self.archive.items)

            new_pop = self._rebuild_population()

            ema_logging.info(
                self.message.format(self.pop_size, len(self.archive.items),
                                    self.tournament_size))

            # run new population through cache
            invalid_new_pop = new_pop
            if self.cache:
                invalid_new_pop = self._run_through_cache(new_pop)

            # update selection pressure...
            self.tournament_size = int(
                max(2, self.selection_presure * self.pop_size))

            if invalid_new_pop:
                # Evaluate the individuals with an invalid fitness
                self.evaluate_population(invalid_new_pop,
                                         self.reporting_interval, self.toolbox,
                                         self.ensemble)

                # update cache with newly analysed population
                if self.caching:
                    self._update_cache(invalid_new_pop)

            # Select the next generation population
            self.pop = self.toolbox.select(self.pop + new_pop, self.pop_size)

            self.stats_callback(self.pop)
            self.stats_callback.log_stats(self.called)

            return self.pop
        else:
            return super(epsNSGA2, self)._get_population()
    def _get_population(self):
        
        if self._restart_required():
            self.called +=1
            self.last_eps_progress = 0
            self.pop_size = self.desired_labda * len(self.archive.items)           
            
            new_pop = self._rebuild_population()

            ema_logging.info(self.message.format(self.pop_size,
                                                 len(self.archive.items),
                                                 self.tournament_size))
            
            # run new population through cache
            invalid_new_pop = new_pop
            if self.cache:
                invalid_new_pop = self._run_through_cache(new_pop)
        
            # update selection pressure...
            self.tournament_size = int(max(2,
                                        self.selection_presure*self.pop_size))

            
            if invalid_new_pop:
                # Evaluate the individuals with an invalid fitness
                self.evaluate_population(invalid_new_pop, self.reporting_interval, 
                                         self.toolbox, self.ensemble)
                
                # update cache with newly analysed population
                if self.caching:
                    self._update_cache(invalid_new_pop)

            # Select the next generation population
            self.pop = self.toolbox.select(self.pop + new_pop, self.pop_size)
            
            self.stats_callback(self.pop)
            self.stats_callback.log_stats(self.called)
            
            return self.pop
        else:
            return super(epsNSGA2, self)._get_population()
def distance_triangle(data):
    '''
    The triangle distance is calculated as follows;
        Let ds1(.) and ds2(.) be two data series of length N. Then;
        A equals to the summation of ds1(i).ds2(i) from i=1 to N
        B equals to the square-root of the (summation ds1(i)^2 from i=1 to N)
        C equals to the square-root of the (summation ds1(i)^2 from i=1 to N)
        
        distance_triangle = A/(B.C)
     
     The triangle distance works only with data series of the same length
     
     In the literature, it is claimed that the triangle distance can deal with noise and amplitude scaling very well, and may yield poor
     results in cases of offset translation and linear drift.   
    '''

    runLogs = []
    #Generates the feature vectors for all the time series that are contained in numpy array data
    info("calculating distances")
    dRow = np.zeros(shape=(np.sum(np.arange(data.shape[0])), ))
    index = -1
    for i in range(data.shape[0]):

        # For each run, a log is created
        # Log includes a description dictionary that has key information
        # for post-clustering analysis, and the data series itself. These
        # logs are stored in a global array named runLogs
        behaviorDesc = {}
        behaviorDesc['Index'] = str(i)

        behavior = data[i]
        localLog = (behaviorDesc, behavior)
        runLogs.append(localLog)

        for j in range(i + 1, data.shape[0]):
            index += 1
            distance = trdist(data[i], data[j])
            dRow[index] = distance
    return dRow, runLogs
Esempio n. 20
0
    def test_log_messages(self):
        ema_logging.log_to_stderr(ema_logging.DEBUG)
        
        with mock.patch('util.ema_logging._logger') as mocked_logger:
            message = 'test message'
            ema_logging.debug(message)
            mocked_logger.debug.assert_called_with(message)

            ema_logging.info(message)
            mocked_logger.info.assert_called_with(message)
            
            ema_logging.warning(message)
            mocked_logger.warning.assert_called_with(message)
            
            ema_logging.error(message)
            mocked_logger.error.assert_called_with(message)
            
            ema_logging.exception(message)
            mocked_logger.exception.assert_called_with(message)
            
            ema_logging.critical(message)
            mocked_logger.critical.assert_called_with(message)            
    def log_stats(self, gen):
        '''Log statistics on the progress of the evolution'''
        
        functions = {"min":self.minima,
                     "max":self.maxima,
                     "std":self.std,
                     "mean":self.mean,}

        hof = self.__get_hof_in_array()
        info_message = pd.DataFrame(index=['min', 'max', 'mean', 'std'],
                                    columns=['obj_{}'.format(i) for i in
                                             range(hof.shape[1])])
        for key, value in functions.iteritems():
            data = value(hof)
            info_message.loc[key] = data
            
        # let pandas do the formatting for us, but remove the trailing info
        # on the size of the DataFrame
        message = info_message.__str__()
        message = message.split('\n')[0:-2]
        message = "\n".join(message)
        line = "\ngeneration {}\n{}".format(gen,message)
        ema_logging.info(line)
def filter_scalar_outcomes(outcomes):
    '''
    Helper function that removes non time series outcomes from all the 
    outcomes.

    Parameters
    ----------
    outcomes : dict
    
    Returns
    -------
    dict
        the filtered outcomes
    
    
    '''
    outcomes_to_remove = []
    for key, value in outcomes.items():
        if len(value.shape) <2:
            outcomes_to_remove.append(key)
            info("%s not shown because it is not time series data" %key)
    [outcomes.pop(entry) for entry in outcomes_to_remove]
    return outcomes
Esempio n. 23
0
    def _run_through_cache(self, individuals):
        '''Helper function, check whether individuals already have been 
        evaluated if so use the cached value '''
        invalid_inds = [ind for ind in individuals if not ind.fitness.valid]
        ema_logging.info(
            'nr. of invalid individuals before checking cache: {}'.format(
                len(invalid_inds)))

        for invalid_ind in invalid_inds:
            # construct key
            key = [repr(invalid_ind.get(entry)) for entry in self.lever_keys]
            key = tuple(key)

            try:
                # set value if in caching
                invalid_ind.fitness.values = self.cache[key]
            except KeyError:
                pass

        invalid_inds = [ind for ind in individuals if not ind.fitness.valid]
        ema_logging.info(
            'nr. of invalid individuals after checking cache: {}'.format(
                len(invalid_inds)))
def filter_scalar_outcomes(outcomes):
    '''
    Helper function that removes non time series outcomes from all the 
    outcomes.

    Parameters
    ----------
    outcomes : dict
    
    Returns
    -------
    dict
        the filtered outcomes
    
    
    '''
    outcomes_to_remove = []
    for key, value in outcomes.items():
        if len(value.shape) < 2:
            outcomes_to_remove.append(key)
            info("%s not shown because it is not time series data" % key)
    [outcomes.pop(entry) for entry in outcomes_to_remove]
    return outcomes
def distance_mse(data):
    '''
    The MSE (mean squared-error) distance is equal to the SSE distance divided by the number of data points in data series.
    
    The SSE distance between two data series is equal to the sum of squared-errors between corresponding data points of these two data series.
    Let the data series be of length N; Then SSE distance between ds1 and ds2 equals to the sum of the square of error terms from 1 to N, 
    where error_term(i) equals to ds1(i)-ds2(i) 
    
    Given that SSE is calculated as given above, MSE equals SSE divided by N.
    
    As SSE distance, the MSE distance only works with data series of equal length.
    '''
    
    runLogs = []
    #Generates the feature vectors for all the time series that are contained in numpy array data
    info("calculating distances")
    dRow = np.zeros(shape=(np.sum(np.arange(data.shape[0])), ))
    index = -1
    for i in range(data.shape[0]):
            
        # For each run, a log is created
        # Log includes a description dictionary that has key information 
        # for post-clustering analysis, and the data series itself. These 
        # logs are stored in a global array named runLogs
        behaviorDesc = {}
        behaviorDesc['Index'] = str(i)
        
        behavior = data[i]
        localLog = (behaviorDesc, behavior)
        runLogs.append(localLog)
    
        for j in range(i+1, data.shape[0]):
            index += 1
            distance = msedist(data[i],data[j]) 
            dRow[index] = distance
    return dRow, runLogs
Esempio n. 26
0
def distance_mse(data):
    '''
    The MSE (mean squared-error) distance is equal to the SSE distance divided by the number of data points in data series.
    
    The SSE distance between two data series is equal to the sum of squared-errors between corresponding data points of these two data series.
    Let the data series be of length N; Then SSE distance between ds1 and ds2 equals to the sum of the square of error terms from 1 to N, 
    where error_term(i) equals to ds1(i)-ds2(i) 
    
    Given that SSE is calculated as given above, MSE equals SSE divided by N.
    
    As SSE distance, the MSE distance only works with data series of equal length.
    '''

    runLogs = []
    #Generates the feature vectors for all the time series that are contained in numpy array data
    info("calculating distances")
    dRow = np.zeros(shape=(np.sum(np.arange(data.shape[0])), ))
    index = -1
    for i in range(data.shape[0]):

        # For each run, a log is created
        # Log includes a description dictionary that has key information
        # for post-clustering analysis, and the data series itself. These
        # logs are stored in a global array named runLogs
        behaviorDesc = {}
        behaviorDesc['Index'] = str(i)

        behavior = data[i]
        localLog = (behaviorDesc, behavior)
        runLogs.append(localLog)

        for j in range(i + 1, data.shape[0]):
            index += 1
            distance = msedist(data[i], data[j])
            dRow[index] = distance
    return dRow, runLogs
Esempio n. 27
0
class AbstractCallback(object):
    '''
    Abstract base class from which different call back classes can be derived.
    Callback is responsible for storing the results of the runs.

    Parameters
    ----------
    uncs : list
            a list of the uncertainties over which the experiments 
            are being run.
    outcomes : list
               a list of outcomes
    nr_experiments : int
                     the total number of experiments to be executed
    reporting_interval : int, optional 
                         the interval at which to provide progress information 
                         via logging.

    Attributes
    ----------
    i : int
        a counter that keeps track of how many experiments have been saved
    reporting_interval : int
                         the frequency at which to log progress

    '''
    __metaclass__ = abc.ABCMeta

    i = 0
    reporting_interval = 100

    def __init__(self,
                 uncertainties,
                 outcomes,
                 nr_experiments,
                 reporting_interval=100):
        self.reporting_interval = reporting_interval

    @abc.abstractmethod
    def __call__(self, case_id, case, policy, name, result):
        '''
        Method responsible for storing results. The implementation in this
        class only keeps track of how many runs have been completed and 
        logging this. Any extension of AbstractCallback needs to implement
        this method. If one want to use the logging provided here, call it via
        super.
        
        Parameters
        ----------
        case_id: int
                 the job id
        case: dict
              the case to be stored
        policy: str 
                the name of the policy being used
        name: str
              the name of the model being used
        result: dict
                the result dict
        
        '''

        self.i += 1
        ema_logging.debug(str(self.i) + " cases completed")

        if self.i % self.reporting_interval == 0:
            ema_logging.info(str(self.i) + " cases completed")
    def _run_optimization(self,
                          generate_individual,
                          evaluate_population,
                          algorithm=None,
                          obj_function=None,
                          weights=None,
                          levers=None,
                          pop_size=None,
                          reporting_interval=None,
                          nr_of_generations=None,
                          crossover_rate=None,
                          mutation_rate=None,
                          caching=False,
                          **kwargs):
        '''
        Helper function that runs the actual optimization
           
        Parameters
        ----------     
        toolbox : 
        generate_individual : callable
                              helper function for generating an individual
        evaluate_population : callable
                              helper function for evaluating the population
        attr_list : list
                    list of attributes (alleles)
        keys : list
               the names of the attributes in the same order as attr_list
        obj_function : callable
                       the objective function
        pop_size : int
                   the size of the population
        reporting_interval : int
                             the interval for reporting progress, passed on to 
                             perform_experiments
        weights : tuple
                  the weights on the outcomes
        nr_of_generations : int
                            number of generations for which the GA will 
                            be run
        crossover_rate : float
                         the crossover rate of the GA
        mutation_rate : float
                        the mutation rate of the GA
        levers : dict
                 a dictionary with param keys as keys, and as values info used 
                 in mutation.
        
        '''
        self.algorithm = algorithm(weights, levers, generate_individual,
                                   obj_function, pop_size, evaluate_population,
                                   nr_of_generations, crossover_rate,
                                   mutation_rate, reporting_interval, self,
                                   caching, **kwargs)

        # Begin the generational process
        for _ in range(nr_of_generations):
            pop = self.algorithm.get_population()
        info("-- End of (successful) evolution --")

        return self.algorithm.stats_callback, pop
    def perform_experiments(self,
                            cases,
                            callback=DefaultCallback,
                            reporting_interval=100,
                            model_kwargs={},
                            which_uncertainties=INTERSECTION,
                            which_outcomes=INTERSECTION,
                            **kwargs):
        """
        Method responsible for running the experiments on a structure. In case 
        of multiple model structures, the outcomes are set to the intersection 
        of the sets of outcomes of the various models.     
        
        Parameters
        ----------    
        cases : int or iterable
                In case of Latin Hypercube sampling and Monte Carlo 
                sampling, cases specifies the number of cases to
                generate. In case of Full Factorial sampling,
                cases specifies the resolution to use for sampling
                continuous uncertainties. Alternatively, one can supply
                a list of dicts, where each dicts contains a case.
                That is, an uncertainty name as key, and its value. 
        callback : callback, optional
                   callable that will be called after finishing a 
                   single experiment (default is :class:`~callbacks.DefaultCallback`)
        reporting_interval : int, optional
                             parameter for specifying the frequency with
                             which the callback reports the progress.
                             (Default is 100) 
        model_kwargs : dict, optional
                       dictionary of keyword arguments to be passed to 
                       model_init
        which_uncertainties : {INTERSECTION, UNION}, optional
                              keyword argument for controlling whether,
                              in case of multiple model structure 
                              interfaces, the intersection or the union
                              of uncertainties should be used. 
        which_outcomes : {INTERSECTION, UNION}, optional
                          keyword argument for controlling whether,
                          in case of multiple model structure 
                          interfaces, the intersection or the union
                          of outcomes should be used. 
        kwargs : dict, optional
                 generic keyword arguments to pass on to the callback

        Returns
        -------
        tuple 
            a `structured numpy array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_ 
            containing the experiments, and a dict with the names of the 
            outcomes as keys and an numpy array as value.


        .. rubric:: suggested use

        In general, analysis scripts require both the structured array of the 
        experiments and the dictionary of arrays containing the results. The 
        recommended use is the following::

        >>> results = ensemble.perform_experiments(10000) #recommended use
        >>> experiments, output = ensemble.perform_experiments(10000) 

        The latter option will work fine, but most analysis scripts require 
        to wrap it up into a tuple again::

        >>> data = (experiments, output)

        Another reason for the recommended use is that you can save this tuple
        directly::

        >>> import util as util
        >>> util.save_results(results, filename)

        """
        return_val = self._generate_experiments(cases, which_uncertainties)

        experiments, nr_of_exp, uncertainties = return_val
        # identify the outcomes that are to be included
        overview_dict, element_dict = self._determine_unique_attributes(
            "outcomes")
        if which_outcomes == UNION:
            outcomes = element_dict.keys()
        elif which_outcomes == INTERSECTION:
            outcomes = overview_dict[tuple(
                [msi.name for msi in self.model_structures])]
            outcomes = [outcome.name for outcome in outcomes]
        else:
            raise ValueError("unknown value for which_outcomes")

        info(str(nr_of_exp) + " experiment will be executed")

        #initialize the callback object
        callback = callback(uncertainties,
                            outcomes,
                            nr_of_exp,
                            reporting_interval=reporting_interval,
                            **kwargs)

        if self.parallel:
            info("preparing to perform experiment in parallel")

            if not self.pool:
                self.pool = MultiprocessingPool(self.model_structures,
                                                model_kwargs=model_kwargs,
                                                nr_processes=self.processes)
            info("starting to perform experiments in parallel")

            self.pool.perform_experiments(callback, experiments)
        else:
            info("starting to perform experiments sequentially")

            cwd = os.getcwd()
            runner = ExperimentRunner(self._msis, model_kwargs)
            for experiment in experiments:
                experiment_id, case, policy, model_name, result = runner.run_experiment(
                    experiment)
                callback(experiment_id, case, policy, model_name, result)
            os.chdir(cwd)

        results = callback.get_results()
        info("experiments finished")

        return results
Esempio n. 30
0
def distance_gonenc(data,
                    sisterCount=50, 
                    wSlopeError=1, 
                    wCurvatureError=1,
                    filterSlope=True,
                    tHoldSlope = 0.1,
                    filterCurvature=True,
                    tHoldCurvature=0.1,
                    addMidExtension=True,
                    addEndExtension=True
                    ):
    
    '''
    The distance measures the proximity of data series in terms of their 
    qualitative pattern features. In order words, it quantifies the proximity 
    between two different dynamic behaviour modes.
    
    It is designed to work mainly on non-stationary data. It's current version 
    does not perform well in catching the proximity of two cyclic/repetitive 
    patterns with different number of cycles (e.g. oscillation with 4 cycle 
    versus oscillation with 6 cycles).
    
    :param data:
    :param sisterCount: Number of long-versions that will be created for the 
                        short vector while comparing two data series with 
                        unequal feature vector lengths. 
    :param wSlopeError: Weight of the error between the 1st dimensions of the 
                        two feature vectors (i.e. Slope). (default=1)
    :param wCurvatureError: Weight of the error between the 2nd dimensions of 
                            the two feature vectors (i.e. Curvature). 
                            (default=1)
    :param wFilterSlope: Boolean, indicating whether the slope vectors should 
                         be filtered for minor fluctuations, or not. 
                         (default=True)
    :param tHoldSlope: The threshold value to be used in filtering out 
                       fluctuations in the slope. (default=0.1)
    :param filterCurvature: Boolean, indicating whether the curvature vectors 
                            should be filtered for minor fluctuations, or not.
                            (default=True)
    :param tHoldCurvature: The threshold value to be used in filtering out 
                           fluctuations in the curvature. (default=0.1)
    :param addMidExtension: Boolean, indicating whether the feature vectors 
                            should be extended by introducing transition 
                            sections along the vector.
                            (default=True)
    :param addEndExtension: Boolean, indicating whether the feature vectors 
                            should be extended by introducing startup/closing 
                            sections at the beginning/end of the vector.
                            (default=True)
    '''
    
    
    runLogs = []
    #Generates the feature vectors for all the time series that are contained 
    # in numpy array data
    features = construct_features(data, filterSlope, tHoldSlope, 
                                  filterCurvature, tHoldCurvature, 
                                  addMidExtension, addEndExtension)
    info("calculating distances")
    dRow = np.zeros(shape=(np.sum(np.arange(data.shape[0])), ))
    index = -1
    for i in range(data.shape[0]):
        feature_i = features[i]
            
        # For each run, a log is created
        # Log includes a description dictionary that has key information 
        # for post-clustering analysis, and the data series itself. These 
        # logs are stored in a global array named runLogs
        behaviorDesc = {}
        behaviorDesc['Index'] = str(i)
        
        #this may not work due to data type mismatch
        featVector = feature_i
        
        behaviorDesc['Feature vector'] = str(featVector)
        behavior = data[i]
        localLog = (behaviorDesc, behavior)
        runLogs.append(localLog)
    
        for j in range(i+1, data.shape[0]):
            index += 1
            feature_j = features[j]
            if feature_i.shape[1] == feature_j.shape[1]:
                distance = distance_same_length(feature_i, feature_j, 
                                                wSlopeError, wCurvatureError)
    
            else:
                distance = distance_different_lenght(feature_i, 
                                                     feature_j, 
                                                     wSlopeError, 
                                                     wCurvatureError, 
                                                     sisterCount)
            dRow[index] = distance
    return dRow, runLogs
Esempio n. 31
0
def cluster(data, 
            outcome,
            distance='gonenc',
            interClusterDistance='complete',
            cMethod='inconsistent',
            cValue=2.5,
            plotDendrogram=True,
            plotClusters=True,
            groupPlot=False,
            **kwargs):
    '''
    
    Method that clusters time-series data from the specified cpickle file 
    according to a selected distance measure.
    
    Parameters
    ----------
    
    data : tuple
           return from meth:`perform_experiments`.
    outcome : str
              Name of outcome/variable whose behavior is being analyzed
    distance : {'gonenc','triangle', 'sse', 'mse'}
               The distance metric to be used.
    interClusterDistance : str
                           How to calculate inter cluster distance.
                           see `linkage <http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage>`_ 
                           for details.
    cMethod : str
              Cutoff method, see `fcluster <http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.fcluster.html#scipy.cluster.hierarchy.fcluster>`_ 
              for details.
    cValue : float
             Cutoff value, see `fcluster <http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.fcluster.html#scipy.cluster.hierarchy.fcluster>`_ 
             for details.
    plotDendogram : bool
    plotCluster : bool
    groupPlot: bool
    
    Returns
    -------
    list
        distances
    list
        Clusters
    list
        distance metrics
    
    The remainder of the arguments are passed on to the specified distance 
    function.
    
    Gonenc Distance:
    
    * 'distance': String that specifies the distance to be used. 
                  Options: bmd (default), mse, sse
    * 'filter?': Boolean that specifies whether the data series will be 
                 filtered (for bmd distance)
    * 'slope filter': A float number that specifies the filtering threshold 
                     for the slope (for every data point if change__in_the_
                     outcome/average_value_of_the_outcome < threshold, 
                     consider slope = 0) (for bmd distance)
    * 'curvature filter': A float number that specifies the filtering 
                          threshold for the curvature (for every data point if 
                          change__in_the_slope/average_value_of_the_slope < 
                          threshold, consider curvature = 0) (for bmd distance)
    * 'no of sisters': 50 (for bmd distance)

    '''
    
    global varName 
    varName = outcome
    data = data[1][outcome]
    
    # Construct a list with distances. This list is the upper triange
    # of the distance matrix
    dRow, runLogs = construct_distances(data, distance, **kwargs)
    info('finished distances')
    
    # Allocate individual runs into clusters using hierarchical agglomerative 
    # clustering. clusterSetup is a dictionary that customizes the clustering 
    # algorithm to be used.
    z, clusters, runLogs = flatcluster(dRow, 
                                    runLogs, 
                                    plotDendrogram=plotDendrogram,
                                    interClusterDistance=interClusterDistance,
                                    cMethod=cMethod,
                                    cValue=cValue)
    
    info("tranforming to list of clusters")
    clusters = make_data_structure(clusters, dRow, runLogs)

    if plotClusters:
        plot_clusters(groupPlot, runLogs)
    
    return dRow, clusters, z
Esempio n. 32
0
def cluster(data,
            outcome,
            distance='gonenc',
            interClusterDistance='complete',
            cMethod='inconsistent',
            cValue=2.5,
            plotDendrogram=True,
            plotClusters=True,
            groupPlot=False,
            **kwargs):
    '''
    
    Method that clusters time-series data from the specified cpickle file 
    according to a selected distance measure.
    
    Parameters
    ----------
    
    data : tuple
           return from meth:`perform_experiments`.
    outcome : str
              Name of outcome/variable whose behavior is being analyzed
    distance : {'gonenc','triangle', 'sse', 'mse'}
               The distance metric to be used.
    interClusterDistance : str
                           How to calculate inter cluster distance.
                           see `linkage <http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html#scipy.cluster.hierarchy.linkage>`_ 
                           for details.
    cMethod : str
              Cutoff method, see `fcluster <http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.fcluster.html#scipy.cluster.hierarchy.fcluster>`_ 
              for details.
    cValue : float
             Cutoff value, see `fcluster <http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.fcluster.html#scipy.cluster.hierarchy.fcluster>`_ 
             for details.
    plotDendogram : bool
    plotCluster : bool
    groupPlot: bool
    
    Returns
    -------
    list
        distances
    list
        Clusters
    list
        distance metrics
    
    The remainder of the arguments are passed on to the specified distance 
    function.
    
    Gonenc Distance:
    
    * 'distance': String that specifies the distance to be used. 
                  Options: bmd (default), mse, sse
    * 'filter?': Boolean that specifies whether the data series will be 
                 filtered (for bmd distance)
    * 'slope filter': A float number that specifies the filtering threshold 
                     for the slope (for every data point if change__in_the_
                     outcome/average_value_of_the_outcome < threshold, 
                     consider slope = 0) (for bmd distance)
    * 'curvature filter': A float number that specifies the filtering 
                          threshold for the curvature (for every data point if 
                          change__in_the_slope/average_value_of_the_slope < 
                          threshold, consider curvature = 0) (for bmd distance)
    * 'no of sisters': 50 (for bmd distance)

    '''

    global varName
    varName = outcome
    data = data[1][outcome]

    # Construct a list with distances. This list is the upper triange
    # of the distance matrix
    dRow, runLogs = construct_distances(data, distance, **kwargs)
    info('finished distances')

    # Allocate individual runs into clusters using hierarchical agglomerative
    # clustering. clusterSetup is a dictionary that customizes the clustering
    # algorithm to be used.
    z, clusters, runLogs = flatcluster(
        dRow,
        runLogs,
        plotDendrogram=plotDendrogram,
        interClusterDistance=interClusterDistance,
        cMethod=cMethod,
        cValue=cValue)

    info("tranforming to list of clusters")
    clusters = make_data_structure(clusters, dRow, runLogs)

    if plotClusters:
        plot_clusters(groupPlot, runLogs)

    return dRow, clusters, z
    def perform_experiments(self, 
                           cases,
                           callback=DefaultCallback,
                           reporting_interval=100,
                           model_kwargs = {},
                           which_uncertainties=INTERSECTION,
                           which_outcomes=INTERSECTION,
                           **kwargs):
        """
        Method responsible for running the experiments on a structure. In case 
        of multiple model structures, the outcomes are set to the intersection 
        of the sets of outcomes of the various models.     
        
        Parameters
        ----------    
        cases : int or iterable
                In case of Latin Hypercube sampling and Monte Carlo 
                sampling, cases specifies the number of cases to
                generate. In case of Full Factorial sampling,
                cases specifies the resolution to use for sampling
                continuous uncertainties. Alternatively, one can supply
                a list of dicts, where each dicts contains a case.
                That is, an uncertainty name as key, and its value. 
        callback : callback, optional
                   callable that will be called after finishing a 
                   single experiment (default is :class:`~callbacks.DefaultCallback`)
        reporting_interval : int, optional
                             parameter for specifying the frequency with
                             which the callback reports the progress.
                             (Default is 100) 
        model_kwargs : dict, optional
                       dictionary of keyword arguments to be passed to 
                       model_init
        which_uncertainties : {INTERSECTION, UNION}, optional
                              keyword argument for controlling whether,
                              in case of multiple model structure 
                              interfaces, the intersection or the union
                              of uncertainties should be used. 
        which_outcomes : {INTERSECTION, UNION}, optional
                          keyword argument for controlling whether,
                          in case of multiple model structure 
                          interfaces, the intersection or the union
                          of outcomes should be used. 
        kwargs : dict, optional
                 generic keyword arguments to pass on to the callback

        Returns
        -------
        tuple 
            a `structured numpy array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`_ 
            containing the experiments, and a dict with the names of the 
            outcomes as keys and an numpy array as value.


        .. rubric:: suggested use

        In general, analysis scripts require both the structured array of the 
        experiments and the dictionary of arrays containing the results. The 
        recommended use is the following::

        >>> results = ensemble.perform_experiments(10000) #recommended use
        >>> experiments, output = ensemble.perform_experiments(10000) 

        The latter option will work fine, but most analysis scripts require 
        to wrap it up into a tuple again::

        >>> data = (experiments, output)

        Another reason for the recommended use is that you can save this tuple
        directly::

        >>> import util as util
        >>> util.save_results(results, filename)

        """
        return_val = self._generate_experiments(cases, which_uncertainties)
        
        experiments, nr_of_exp, uncertainties = return_val
        # identify the outcomes that are to be included
        overview_dict, element_dict = self._determine_unique_attributes("outcomes")
        if which_outcomes==UNION:
            outcomes = element_dict.keys()
        elif which_outcomes==INTERSECTION:
            outcomes = overview_dict[tuple([msi.name for msi in 
                                            self.model_structures])]
            outcomes = [outcome.name for outcome in outcomes]
        else:
            raise ValueError("unknown value for which_outcomes")
         
        info(str(nr_of_exp) + " experiment will be executed")
                
        #initialize the callback object
        callback = callback(uncertainties, 
                            outcomes, 
                            nr_of_exp,
                            reporting_interval=reporting_interval,
                            **kwargs)

        if self.parallel:
            info("preparing to perform experiment in parallel")
            
            if not self.pool:
                self.pool = MultiprocessingPool(self.model_structures, 
                        model_kwargs=model_kwargs, nr_processes=self.processes)
            info("starting to perform experiments in parallel")

            self.pool.perform_experiments(callback, experiments)
        else:
            info("starting to perform experiments sequentially")
            
            cwd = os.getcwd() 
            runner = ExperimentRunner(self._msis, model_kwargs)
            for experiment in experiments:
                experiment_id, case, policy, model_name, result = runner.run_experiment(experiment)
                callback(experiment_id, case, policy, model_name, result)
            os.chdir(cwd)
       
        results = callback.get_results()
        info("experiments finished")
        
        return results
    def __init__(self, 
                 msis, 
                 processes=None, 
                 kwargs=None):
        '''
        
        Parameters
        ----------
        msis : list 
               iterable of model structure interface instances
        processes: int
                   nr. of processes to spawn, if none, it is set to equal the 
                   nr. of cores
        kwargs : dict
                 kwargs to be pased to :meth:`model_init`
        '''
        
        if processes is None:
            try:
                processes = multiprocessing.cpu_count()
            except NotImplementedError:
                processes = 1
        ema_logging.info("nr of processes is "+str(processes))
    
        # setup queues etc.
        self._setup_queues()
        self._taskqueue = queue.Queue(processes*2)
        self._cache = {}
        self._state = pool.RUN
        
        # handling of logging
        self.log_queue = multiprocessing.Queue()
        h = ema_logging.NullHandler()
        logging.getLogger(ema_logging.LOGGER_NAME).addHandler(h)
        
        log_queue_reader = LogQueueReader(self.log_queue)
        log_queue_reader.start()

        # setup of the actual pool
        self._pool = []
        working_dirs = []

        ema_logging.debug('generating workers')
        
        worker_root = None
        for i in range(processes):
            ema_logging.debug('generating worker '+str(i))
            
            workername = self._get_worker_name(i)
            
            #setup working directories for parallel_ema
            for msi in msis:
                if msi.working_directory != None:
                    if worker_root == None:
                        wd = msis[0].working_directory
                        abs_wd = os.path.abspath(wd)
                        worker_root = os.path.dirname(abs_wd)
                    
                    wd_name = workername + msi.name
                    working_directory = os.path.join(worker_root, wd_name)
                    
                    working_dirs.append(working_directory)
                    shutil.copytree(msi.working_directory, 
                                    working_directory, 
                                    )
                    msi.set_working_directory(working_directory)

            w = LoggingProcess(
                self.log_queue,
                level = logging.getLogger(ema_logging.LOGGER_NAME)\
                                          .getEffectiveLevel(),
                                          target=worker,
                                          args=(self._inqueue, 
                                                self._outqueue, 
                                                msis,
                                                kwargs 
                                                )
                                          )
            self._pool.append(w)
            
            w.name = w.name.replace('Process', workername)
            w.daemon = True
            w.start()
            ema_logging.debug(' worker '+str(i) + ' generated')

        # thread for handling tasks
        self._task_handler = threading.Thread(
                                        target=CalculatorPool._handle_tasks,
                                        name='task handler',
                                        args=(self._taskqueue, 
                                              self._quick_put, 
                                              self._outqueue, 
                                              self._pool
                                              )
                                        )
        self._task_handler.daemon = True
        self._task_handler._state = pool.RUN
        self._task_handler.start()

        # thread for handling results
        self._result_handler = threading.Thread(
                                        target=CalculatorPool._handle_results,
                                        name='result handler',
                                        args=(self._outqueue, 
                                              self._quick_get, 
                                              self._cache, 
                                              self.log_queue)
            )
        self._result_handler.daemon = True
        self._result_handler._state = pool.RUN
        self._result_handler.start()

        # function for cleaning up when finalizing object
        self._terminate = Finalize(self, 
                                   self._terminate_pool,
                                   args=(self._taskqueue, 
                                         self._inqueue, 
                                         self._outqueue, 
                                         self._pool,
                                         self._task_handler, 
                                         self._result_handler, 
                                         self._cache, 
                                         working_dirs,
                                         ),
                                    exitpriority=15
                                    )
        
        ema_logging.info("pool has been set up")
    vensim_single = ctypes.windll.vendll32
except AttributeError:
    vensim_single = None
except WindowsError:
    vensim_single = None
    
try:
    vensim_double = ctypes.windll.LoadLibrary('C:\Windows\SysWOW64\VdpDLL32.dll')
except AttributeError:
    vensim_double = None
except WindowsError:
    vensim_double = None

if vensim_single and vensim_double:
    vensim = vensim_single
    info("both single and double precision vensim available, using single")
elif vensim_single:
    vensim = vensim_single
    info('using single precision vensim')
elif vensim_double:
    vensim = vensim_double
    info('using single precision vensim')
else:
    message = "vensim dll not found, vensim functionality not available"
    sys.stderr.write(message+"\n")
    warning(message)
del sys


def be_quiet(quietflag):
    '''
 def start(self):
     '''start the log watcher'''
     
     ema_logging.info('start watching on {}'.format(self.url))
     self.stream.on_recv(self.log_message)