コード例 #1
0
ファイル: misc.py プロジェクト: christianurich/VIBe2UrbanSim
def ndsum(input, labels, index=None):
    """ extend scipy.ndimage.sum to handle labels with multi-array
    index argument is not used

    e.g.
    input =  array([3, 7, 4, 6, 2, 5 ])
    attr_a = array([0, 0, 1, 0, 1, 1])
    attr_b = array([3, 1, 2, 1, 2, 0])
    result = ndsum(input, labels=column_stack([attr_a, attr_b]))
    print result
    >>> (array([13, 3, 5, 6]), (array([0, 0, 1, 1]), array([1, 3, 0, 2])) )
    """

    if labels is None or not isinstance(labels, ndarray):
        return sum(input, labels=labels, index=index)
    
    assert input.size == labels.shape[0]
    #labels = column_stack(labels)
    hash_table = {}
    def hashlabel(label):
        hash_value = djbhash(label)
        hash_table.update({hash_value:label})
        return hash_value
    labels_hash = array(map(hashlabel, labels)).astype("int32")

    index = array(hash_table.keys()).astype("int32")
    value = array(hash_table.values())
    result = sum(input, labels=labels_hash, index=index)
コード例 #2
0
 def get_demand_for_submodel(self, submodel=0):
     """Return aggregated probabilities for each location for the submodel."""
     probs = self.upc_sequence.get_probabilities()
     demand = ndimage.sum(probs.ravel().astype("float32"),
                          labels=self.run_config["index"].ravel() + 1,
                          index=arange(self.choice_set.size()) + 1)
     return demand
コード例 #3
0
 def compute_demand(self, probabilities):
     """sums probabilities for each alternative and adds it to the demand attribute of the choice set.
     """
     demand = ndimage.sum(
         probabilities.ravel().astype("float32"),
         labels=self.run_config["index"].ravel() + 1,
         index=arange(self.choice_set.size()) + 1,
     )
     demand_attr = self.run_config.get("demand_string")
     self.choice_set.modify_attribute(name=demand_attr, data=self.choice_set.get_attribute(demand_attr) + demand)
コード例 #4
0
 def aggregate(self, values, aggregate_from, aggregate_to, intermediates=[]):
     dataset_pool = self._setup_environment(self.cache_set[0], self.get_base_year())
     ds_from = dataset_pool.get_dataset(aggregate_from)
     dataset_names = intermediates + [aggregate_to]
     new_values = values.copy()
     for dataset_name in dataset_names:
         aggr_values = new_values.copy()
         ds_to = dataset_pool.get_dataset(dataset_name)
         ids = ds_from.get_attribute(ds_to.get_id_name()[0])
         new_values = zeros((ds_to.size(), aggr_values.shape[1]), dtype=values.dtype)
         for i in range(aggr_values.shape[1]):
             new_values[:,i] = ndimage.sum(aggr_values[:,i], labels=ids, index=ds_to.get_id_attribute())
         ds_from = ds_to
     return (new_values, ds_to.get_id_attribute())
コード例 #5
0
def get_category_and_frequency(agent_set, agent_category_definition,
                               choice_set, choice_category_definition,
                               agent_filter_attribute, category_inflating_factor,
                               dataset_pool):

    agent_category_variable = []
    for i in range( len(agent_category_definition) ):
        agent_category_variable.append( VariableName(agent_category_definition[i]).get_alias() + \
                '*%i' % category_inflating_factor**i )
    if len(agent_category_variable) > 0:
        agent_category_id = agent_set.compute_variables("agent_category_id=" + ' + '.join(agent_category_variable), dataset_pool=dataset_pool)
    else:
        agent_category_id = agent_set.get_id_attribute()
    unique_agent_category_id = unique(agent_category_id)

    choice_category_variable = []
    for i in range( len(choice_category_definition) ):
        #choice_category_variable.append( VariableName(choice_category_definition[i]).get_alias() + \
                #        '*%i' % category_inflating_factor**i )
        choice_category_variable.append( choice_category_definition[i] + \
                '*%i' % category_inflating_factor**i )
    if len(choice_category_variable) > 0:
        choice_category_id = choice_set.compute_variables("choice_category_id=" + ' + '.join( choice_category_variable), dataset_pool=dataset_pool)
        agent_choice_category_id = agent_set.compute_variables('choice_category_id=%s.disaggregate(%s.choice_category_id)' % \
                                                               (agent_set.get_dataset_name(), choice_set.get_dataset_name()),
                                                               dataset_pool=dataset_pool)
    else:
        choice_category_id = choice_set.get_id_attribute()
        agent_choice_category_id = agent_set.get_attribute(choice_set.get_id_name()[0])

    unique_choice_category_id = unique(choice_category_id)

    if agent_filter_attribute is not None and len(agent_filter_attribute) > 0:
        agent_filter = agent_set.compute_variables(agent_filter_attribute, dataset_pool=dataset_pool)
    else:
        agent_filter = ones(agent_set.size(), dtype='bool')

    frequency = zeros( (unique_agent_category_id.size, unique_choice_category_id.size), dtype="int32" )
    for i in range(unique_agent_category_id.size): # iterate over agent category
        is_agent_of_this_category = logical_and(agent_filter, agent_category_id == unique_agent_category_id[i]).astype("int32")
        frequency[i,:] = ndimage.sum(is_agent_of_this_category, labels=agent_choice_category_id.astype("int32"), index=unique_choice_category_id.astype("int32"))
    return frequency, unique_agent_category_id, unique_choice_category_id, agent_category_id, choice_category_id
コード例 #6
0
    def run(self, n=500, 
            realestate_dataset_name = 'building',
            current_year=None,
            occupied_spaces_variable="occupied_spaces",
            total_spaces_variable="total_spaces",
            minimum_spaces_attribute="minimum_spaces",
            within_parcel_selection_weight_string=None,
            within_parcel_selection_n=0,
            within_parcel_selection_compete_among_types=False,
            within_parcel_selection_threshold=75,
            within_parcel_selection_MU_same_weight=False,
            within_parcel_selection_transpose_interpcl_weight=True,
            run_config=None,
            debuglevel=0):
        """
        run method of the Development Project Proposal Sampling Model
        
        **Parameters**
        
            **n** : int, sample size for each iteration
                   
                   sample n proposals at a time, which are then evaluated one by one until the 
                   target vacancies are satisfied or proposals are running out
                   
            **realestate_dataset_name** : string, name of real estate dataset
            
            **current_year**: int, simulation year. If None, get value from SimulationState
            
            **occupied_spaces_variable** : string, variable name for calculating how much spaces are currently occupied
                                        
                                          It can either be a variable for real_estate dataset that returns 
                                          the amount spaces being occupied or a target_vacancy attribute 
                                          that contains the name of real_estate variables.   
            
            **total_spaces_variable** : string, variable name for calculating total existing spaces
            
        **Returns**
        
            **proposal_set** : indices to proposal_set that are accepted 
            
            **demolished_buildings** : buildings to be demolished for re-development
        """

        self.accepted_proposals = []
        self.demolished_buildings = []  #id of buildings to be demolished

        if self.proposal_set.n <= 0:
            logger.log_status("The size of proposal_set is 0; no proposals to consider, skipping DPPSM.")
            return (self.proposal_set, self.demolished_buildings)

        target_vacancy = self.dataset_pool.get_dataset('target_vacancy')

        if current_year is None:
            year = SimulationState().get_current_time()
        else:
            year = current_year
        this_year_index = where(target_vacancy['year']==year)[0]
        target_vacancy_for_this_year = DatasetSubset(target_vacancy, this_year_index)
        if target_vacancy_for_this_year.size() == 0:
            raise IOError, 'No target vacancy defined for year %s.' % year
        
        ## current_target_vacancy.target_attribute_name = 'target_vacancy_rate'
        ## each column provides a category for which a target vacancy is specified
        self.column_names = list(set( target_vacancy.get_known_attribute_names() ) - \
                            set( [ target_vacancy.target_attribute_name, 
                                   'year', '_hidden_id_', minimum_spaces_attribute,
                                   occupied_spaces_variable, total_spaces_variable
                                   ] )
                            )
        self.column_names.sort(reverse=True)
            
        ## buildings table provides existing stocks
        self.realestate_dataset = self.dataset_pool.get_dataset(realestate_dataset_name)
        
        occupied_spaces_variables = [occupied_spaces_variable]
        total_spaces_variables = [total_spaces_variable]
        if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
            occupied_spaces_variables += unique(target_vacancy_for_this_year[occupied_spaces_variable]).tolist()
        if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
            total_spaces_variables += unique(target_vacancy_for_this_year[total_spaces_variable]).tolist()
            
        self._compute_variables_for_dataset_if_needed(self.realestate_dataset, self.column_names + occupied_spaces_variables + total_spaces_variables)
        self._compute_variables_for_dataset_if_needed(self.proposal_component_set, self.column_names + total_spaces_variables)
        self.proposal_set.compute_variables(["urbansim_parcel.development_project_proposal.number_of_components", 
                                             "urbansim_parcel.development_project_proposal.land_area_taken"],
                                            dataset_pool=self.dataset_pool)
        
        n_column = len(self.column_names)
        self.column_names_index = {}
        for iname in range(n_column):
            self.column_names_index[self.column_names[iname]] = iname
 
        target_vacancy_for_this_year.column_values = target_vacancy_for_this_year.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        self.realestate_dataset.column_values = self.realestate_dataset.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        self.proposal_component_set.column_values = self.proposal_component_set.get_multiple_attributes(self.column_names).reshape((-1, n_column))
        #defaults, can be changed later by spaces_variable specified in target_vacancy rates
        self.realestate_dataset.total_spaces = self.realestate_dataset[total_spaces_variable]
        self.proposal_component_set.total_spaces = self.proposal_component_set[total_spaces_variable]
        self.realestate_dataset.occupied_spaces = self.realestate_dataset[occupied_spaces_variable]
        
        self.accounting = {}; self.logging = {}
        #has_needed_components = zeros(self.proposal_set.size(), dtype='bool')
        for index in range(target_vacancy_for_this_year.size()):
            column_value = tuple(target_vacancy_for_this_year.column_values[index,:].tolist())
            accounting = {'target_vacancy': target_vacancy_for_this_year[target_vacancy.target_attribute_name][index]}
            if minimum_spaces_attribute in target_vacancy_for_this_year.get_known_attribute_names():
                accounting['minimum_spaces'] = target_vacancy_for_this_year[minimum_spaces_attribute][index]
            realestate_indexes = self.get_index_by_condition(self.realestate_dataset.column_values, column_value)
            component_indexes = self.get_index_by_condition(self.proposal_component_set.column_values, column_value)
            
            this_total_spaces_variable, this_occupied_spaces_variable = total_spaces_variable, occupied_spaces_variable
            ## total/occupied_spaces_variable can be specified either as a universal name for all realestate
            ## or in targe_vacancy_rate dataset for each vacancy category
            if occupied_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_occupied_spaces_variable = target_vacancy_for_this_year[occupied_spaces_variable][index]
                self.realestate_dataset.occupied_spaces[realestate_indexes] = (self.realestate_dataset[this_occupied_spaces_variable][realestate_indexes]
                                                                               ).astype(self.realestate_dataset.occupied_spaces.dtype)
    
            if total_spaces_variable in target_vacancy_for_this_year.get_known_attribute_names():
                this_total_spaces_variable = target_vacancy_for_this_year[total_spaces_variable][index]    
                self.realestate_dataset.total_spaces[realestate_indexes] = (self.realestate_dataset[this_total_spaces_variable][realestate_indexes]
                                                                            ).astype(self.realestate_dataset.total_spaces.dtype)
                self.proposal_component_set.total_spaces[component_indexes] = (self.proposal_component_set[this_total_spaces_variable][component_indexes]
                                                                               ).astype(self.proposal_component_set.total_spaces.dtype)
                
            accounting["total_spaces_variable"] = this_total_spaces_variable
            accounting["total_spaces"] = self.realestate_dataset.total_spaces[realestate_indexes].sum()
            accounting["occupied_spaces_variable"] = this_occupied_spaces_variable
            accounting["occupied_spaces"] = self.realestate_dataset.occupied_spaces[realestate_indexes].sum()
            accounting["target_spaces"] = int( round( accounting["occupied_spaces"] /\
                                                     (1 - accounting["target_vacancy"])
                                               ) )
            accounting["proposed_spaces"] = 0
            accounting["demolished_spaces"] = 0
            
            self.accounting[column_value] = accounting
            
            if self._is_target_reached(column_value):
                proposal_indexes = self.proposal_set.get_id_index(unique(self.proposal_component_set['proposal_id'][component_indexes]))
                if n_column == 1:
                    comp_indexes = where(ndimage.sum(self.proposal_component_set[self.column_names[0]]==column_value[0], 
                                    labels=self.proposal_component_set['proposal_id'], 
                                    index=self.proposal_set.get_id_attribute()
                                    ) == self.proposal_set["number_of_components"])[0]
                else:
                    comp_indexes = where(self.proposal_set["number_of_components"]==1)[0]
                target_reached_prop_idx = intersect1d(proposal_indexes, comp_indexes)
                self.weight[target_reached_prop_idx] = 0.0
                self.proposal_set["status_id"][intersect1d(target_reached_prop_idx, where(self.proposal_set["status_id"]==self.proposal_set.id_tentative)[0])] = self.proposal_set.id_no_demand
                
        ## handle planned proposals: all proposals with status_id == is_planned 
        ## and start_year == year are accepted
        planned_proposal_indexes = where(logical_and(
                                                  self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned, 
                                                  self.proposal_set.get_attribute("start_year") == year ) 
                                        )[0]
        
        logger.start_block("Processing %s planned proposals" % planned_proposal_indexes.size)
        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        logger.end_block()
        
        if within_parcel_selection_n > 0:
            logger.start_block("Selecting proposals within parcels (%s proposals per parcel)" % within_parcel_selection_n)
            self.select_proposals_within_parcels(nmax=within_parcel_selection_n, weight_string=within_parcel_selection_weight_string,
                                                 compete_among_types=within_parcel_selection_compete_among_types, 
                                                 filter_threshold=within_parcel_selection_threshold,
                                                 MU_same_weight=within_parcel_selection_MU_same_weight,
                                                 transpose_interpcl_weight=within_parcel_selection_transpose_interpcl_weight)
            logger.end_block()
        
        # consider proposals (in this order: proposed, tentative)
        for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]:
            stat = (self.proposal_set.get_attribute("status_id") == status)
            if stat.sum() == 0:
                continue
            
            logger.log_status("Sampling from %s eligible proposals of status %s." % (stat.sum(), status))
            iteration = 0
            while (not self._is_target_reached()):
                ## prevent proposals from being sampled for vacancy type whose target is reached
                #for column_value in self.accounting.keys():
                
                if self.weight[stat].sum() == 0.0:
                    logger.log_warning("Running out of proposals of status %s before vacancy targets are reached; there aren't any proposals with non-zero weight" % status)
                    break
                
                available_indexes = where(logical_and(stat, self.weight > 0))[0]
                sample_size = minimum(available_indexes.size, n)
                sampled_proposal_indexes = probsample_noreplace(available_indexes, sample_size, 
                                                                prob_array=self.weight[available_indexes],
                                                                return_index=False)
                #sorted_sampled_indices = argsort(self.weight[sampled_proposal_indexes])
                #self.consider_proposals(sampled_proposal_indexes[sorted_sampled_indices][::-1])
                self.consider_proposals(sampled_proposal_indexes)
                self.weight[sampled_proposal_indexes] = 0
                iteration += 1
        
        self._log_status()
        
        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(name="status_id", 
                                           data=self.proposal_set.id_active,
                                           index=array(self.accepted_proposals, dtype='int32'))
        
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute('status_id')
        actv = where(stat_id==1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:          
            total_land_area_taken_computed = self.proposal_set['land_area_taken']
            self.proposal_set.modify_attribute('total_land_area_taken', total_land_area_taken_computed[actv], actv)

        return (self.proposal_set, self.realestate_dataset.get_id_attribute()[self.demolished_buildings])
コード例 #7
0
    def select_proposals_within_parcels(self, nmax=2, weight_string=None, compete_among_types=False, filter_threshold=75, 
                                        MU_same_weight=False, transpose_interpcl_weight=True):
        # Allow only nmax proposals per parcel in order to not disadvantage parcels with small amount of proposals.
        # It takes proposals with the highest weights.
        #parcels_with_proposals = unique(self.proposal_set['parcel_id'])
        #parcel_set = self.dataset_pool.get_dataset('parcel')
        if weight_string is not None:
            within_parcel_weights = self.proposal_set.compute_variables([weight_string], dataset_pool=self.dataset_pool)
        else:
            within_parcel_weights = self.weight
        
        egligible = logical_and(self.weight > 0, 
                                self.proposal_set['status_id'] == self.proposal_set.id_tentative)
        wegligible = where(egligible)[0]
        if wegligible.size <=0:
            return
        #parcels_with_proposals = unique(self.proposal_set['parcel_id'][wegligible])
        #min_type = {}
        #egligible_proposals = {}
        tobechosen_ind = ones(wegligible.size).astype('bool8')
        if not compete_among_types:
            for key in self.column_names:
                utypes_all = unique(self.proposal_component_set[key])
                categories = zeros(self.proposal_set.size(), dtype='int32')
                for btype in utypes_all:
                    w = where(ndimage.sum(self.proposal_component_set[key] == btype,
                                          labels=self.proposal_component_set['proposal_id'], 
                                          index=self.proposal_set.get_id_attribute()
                                          ) == self.proposal_set["number_of_components"])[0]
                    categories[w] = btype
                # categories equal zero means mix-used type with components of different type

                utypes = unique(categories[wegligible])           
                for value in utypes:
                    type_is_value_ind = categories[wegligible]==value
                    for i in range(nmax):
                        parcels_with_proposals = (unique(self.proposal_set['parcel_id'][wegligible][where(type_is_value_ind)])).astype(int32)
                        if parcels_with_proposals.size <= 0:
                            continue
                        labels = (self.proposal_set['parcel_id'][wegligible])*type_is_value_ind               
                        chosen_prop = array(maximum_position(within_parcel_weights[wegligible], 
                                            labels=labels, 
                                            index=parcels_with_proposals)).flatten().astype(int32)               
                        egligible[wegligible[chosen_prop]] = False
                        type_is_value_ind[chosen_prop] = False
        else:
            parcels_with_proposals = unique(self.proposal_set['parcel_id'][wegligible]).astype(int32)
            max_prop = array(maximum_position(within_parcel_weights[wegligible], 
                                            labels=self.proposal_set['parcel_id'][wegligible], 
                                            index=parcels_with_proposals)).flatten().astype(int32)                                            
            max_value_by_parcel = within_parcel_weights[wegligible][max_prop]
            incompetition = ones(wegligible.size, dtype='bool8')
            incompetition[max_prop] = False
            egligible[wegligible[max_prop]] = False            
            for i in range(nmax-1):
                labels = (self.proposal_set['parcel_id'][wegligible])*incompetition 
                valid_parcels = where(in1d(parcels_with_proposals, self.proposal_set['parcel_id'][wegligible][where(incompetition)]))[0]
                if valid_parcels.size <= 0:
                    break
                chosen_prop = array(maximum_position(within_parcel_weights[wegligible], 
                                            labels=labels, 
                                            index=parcels_with_proposals[valid_parcels])).flatten().astype(int32)
                percent = within_parcel_weights[wegligible][chosen_prop]/(max_value_by_parcel[valid_parcels]/100.0)
                where_lower = where(in1d(self.proposal_set['parcel_id'][wegligible], parcels_with_proposals[valid_parcels][percent <= filter_threshold]))[0]
                egligible[wegligible[setdiff1d(chosen_prop, where_lower)]] = False   # proposals with egligible=True get eliminated, so we dont want to set it to False for the where_lower ones
                incompetition[union1d(chosen_prop, where_lower)] = False
                if incompetition.sum() <= 0:
                    break
             
            self.proposal_set['status_id'][where(egligible)] = self.proposal_set.id_eliminated_in_within_parcel_selection
            if MU_same_weight:
                # Set weights of mix-use proposals within the same parcel to the same value
                parcels = self.dataset_pool.get_dataset('parcel')
#                parcels.compute_variables(['mu_ind = parcel.aggregate(numpy.logical_or(development_project_proposal_component.building_type_id==4, development_project_proposal_component.building_type_id==12) + numpy.logical_or(development_project_proposal_component.building_type_id==3, development_project_proposal_component.building_type_id==13), intermediates=[development_project_proposal])'], 
#                                                    dataset_pool=self.dataset_pool)
#                pcl_ids = parcels.get_id_attribute()[parcels['mu_ind'] > 1]
#                is_mu = logical_and(logical_and(self.weight > 0, 
#                                self.proposal_set['status_id'] == self.proposal_set.id_tentative),
#                                       in1d(self.proposal_set['parcel_id'], pcl_ids))
#                where_mu = where(is_mu)[0]
#                if where_mu.size <= 0:
#                    return
#                trans_weights = self.weight[where_mu]
#                if transpose_interpcl_weight:
#                    trans_weights = log(trans_weights)
#                pcl_idx = parcels.get_id_index(self.proposal_set['parcel_id'][where_mu])
#                upcl_idx = unique(pcl_idx)
#                weight_mean = array(ndimage_mean(trans_weights, labels=pcl_idx,  index=upcl_idx))
#                if transpose_interpcl_weight:
#                    weight_mean = exp(weight_mean)
#                weight_mean_tmp = zeros(upcl_idx.max()+1).astype(weight_mean.dtype)
#                weight_mean_tmp[upcl_idx]=weight_mean
#                self.weight[where_mu]=weight_mean_tmp[pcl_idx]
                self.proposal_set.compute_variables(['is_mfres = development_project_proposal.aggregate(numpy.logical_or(development_project_proposal_component.building_type_id==4, development_project_proposal_component.building_type_id==12))'],
                                                    dataset_pool=self.dataset_pool)
                parcels.compute_variables(['mu_ind = (parcel.aggregate(development_project_proposal.is_mfres)>0) * (parcel.mix_split_id > 0)'], 
                                                    dataset_pool=self.dataset_pool)
                pcl_ids = parcels.get_id_attribute()[parcels['mu_ind'] > 0]
                egligible_props = logical_and(self.weight > 0, logical_and(
                                self.proposal_set['status_id'] == self.proposal_set.id_tentative,
                                self.proposal_set['is_mfres']>0))
                where_prop_to_modify = where(logical_and(egligible_props,
                                       in1d(self.proposal_set['parcel_id'], pcl_ids)))[0]
                if where_prop_to_modify.size <= 0:
                    return
                upcl = unique(self.proposal_set['parcel_id'][where_prop_to_modify])               
                npcl_to_modify = int(upcl.size/10.0)
                if npcl_to_modify == 0:
                    return
                pcls_to_modify = sample_noreplace(upcl, npcl_to_modify)
                where_prop_to_modify_final = where(logical_and(egligible_props,
                                       in1d(self.proposal_set['parcel_id'], pcls_to_modify)))[0]
                trans_weights = self.weight[where_prop_to_modify_final]
                if transpose_interpcl_weight:
                    trans_weights = log(trans_weights)
                #trans_weights = 1.2*trans_weights
                if transpose_interpcl_weight:
                    trans_weights = exp(trans_weights)
                self.weight[where_prop_to_modify_final] = trans_weights
            return
            
コード例 #8
0
    def run(self, developmentproject_dataset, building_dataset, 
            label_attribute_names=["building_type_id", "zone_id"], 
            quantity_attribute_names = ["residential_units", "non_residential_sqft"]):
        """Modify buildings to reflect new development projects. 
        """
        
        project_labels = None
        building_labels = None
        
        if not developmentproject_dataset or developmentproject_dataset.size() == 0:
            logger.log_warning("Empty development project dataset. Skip add_projects_to_buildings.")
            return building_dataset

        is_placed_project = ones(developmentproject_dataset.size(), dtype='bool')
        for label_attribute in label_attribute_names:
            project_label_attribute = developmentproject_dataset.get_attribute_as_column(label_attribute)
            is_placed_project = logical_and(is_placed_project, project_label_attribute[:,0]>0)
            
            building_lable_attribute = building_dataset.get_attribute_as_column(label_attribute)

            if project_labels is None:
                project_labels = project_label_attribute
            else:
                project_labels = column_stack((project_labels, project_label_attribute))
                
            if building_labels is None:
                building_labels = building_lable_attribute
            else:
                building_labels = column_stack((building_labels, building_lable_attribute))
        max_digits = digits( row_stack((project_labels, building_labels)).max(axis=0) )
        multipler = array([10**d for d in max_digits[1:] + [0]])
        building_identifier = (building_labels * multipler).sum(axis=1)
        project_identifier = (project_labels * multipler).sum(axis=1)
        if not all(is_placed_project):
            logger.log_warning("There are %s projects with %s less than 0; they are not being processed." % (logical_not(is_placed_project).sum(), 
                                                                                                             ",".join(label_attribute_names)))
            project_identifier = project_identifier[is_placed_project]
        unique_project_identifier = unique(project_identifier)
        
        for quantity_attribute in quantity_attribute_names:
            developmentproject_quantity = developmentproject_dataset.get_attribute(quantity_attribute)[is_placed_project]
            if developmentproject_quantity.sum() == 0: continue
            quantity_sum = ndimage.sum(developmentproject_quantity, labels=project_identifier, index=unique_project_identifier)
            for i in range(unique_project_identifier.size):
                if quantity_sum[i] != 0:
                    this_identifier = unique_project_identifier[i]
                    this_label = []
                    remain = this_identifier
                    for m in multipler:
                        this_label.append(remain // m)
                        remain = remain % m
                    building_index = where(building_identifier==this_identifier)[0]
                    #assert building_index.size == 1
                    if building_index.size == 0:
                        logger.log_error("building with attribute (%s) = (%s) is not in building_dataset" % (label_attribute_names, this_label) )
                        continue
                        #for attribute in []:
                            #data = None
                        #building_dataset.add_elements(name=quantity_attribute, data = current_values+quantity_sum[i], 
                                                      #index=building_index)
                    if building_index.size > 1:
                        logger.log_warning("There are more than 1 building with attributes (%s) = (%s)" % (label_attribute_names, this_label) )
                        building_index = building_index[0]
                        
                    current_values = building_dataset.get_attribute_by_index(quantity_attribute, building_index)                    
                    building_dataset.modify_attribute(name=quantity_attribute, data = current_values+quantity_sum[i], 
                                                      index=building_index)
        return building_dataset
コード例 #9
0
 def get_demand_for_submodel(self, submodel=0):
     """Return aggregated probabilities for each location for the submodel."""
     probs = self.upc_sequence.get_probabilities()
     demand = ndimage.sum(probs.ravel().astype("float32"), labels=self.run_config["index"].ravel()+1,
                          index=arange(self.choice_set.size())+1)
     return demand
コード例 #10
0
    def run(self, n=500, run_config=None, current_year=None, debuglevel=0):
        """
        n - sample n proposals at a time, evaluate them one by one
        """
        self.demolished_buildings = array([], dtype="int32")  # id of buildings to be demolished
        if current_year is None:
            current_year = SimulationState().get_current_time()
        if not self.positive_proposals:
            logger.log_status("Proposal Set size <= 0, no proposals to consider, skipping DPPSM.")
            return (self.proposal_set, self.demolished_buildings)
        self.proposal_component_set.compute_variables(
            [
                "urbansim_parcel.development_project_proposal_component.units_proposed",
                "urbansim_parcel.development_project_proposal_component.is_residential",
            ],
            dataset_pool=self.dataset_pool,
        )
        self.proposal_set.compute_variables(
            [
                "urbansim_parcel.development_project_proposal.number_of_components",
                "zone_id=development_project_proposal.disaggregate(parcel.zone_id)",
                #'occurence_frequency = development_project_proposal.disaggregate(development_template.sample_size)'
            ],
            dataset_pool=self.dataset_pool,
        )
        buildings = self.dataset_pool.get_dataset("building")
        buildings.compute_variables(
            [
                "occupied_units_for_jobs = urbansim_parcel.building.occupied_spaces",
                "units_for_jobs = urbansim_parcel.building.total_spaces",
                "occupied_residential_units = urbansim_parcel.building.number_of_households",
                #                                "urbansim_parcel.building.existing_units",
                "urbansim_parcel.building.is_residential",
            ],
            dataset_pool=self.dataset_pool,
        )

        ## define unit_name by whether a building is residential or not (with is_residential attribute)
        ## if it is non-residential (0), count units by number of job spaces (units_for_jobs)
        ## if it is residential (1), count units by residenital units
        self.unit_name = array(["units_for_jobs", "residential_units"])

        target_vacancy = self.dataset_pool.get_dataset("target_vacancy")
        target_vacancy.compute_variables(
            ["is_residential = target_vacancy.disaggregate(building_type.is_residential)"],
            dataset_pool=self.dataset_pool,
        )
        # This try-except block checks to see if the object has a subarea_id_name,
        # if it does, it calculates the vacancy rates by subarea_id_name
        try:
            # Check for subarea_id_name in target_vacancies dataset
            # if it is present, vacancy rates are specified by subarea_id_name
            # if it is not, vacancy rates are specified region wide
            target_vacancy.load_dataset()
            if self.subarea_id_name in target_vacancy.get_attribute_names():
                current_target_vacancy_this_year = DatasetSubset(
                    target_vacancy, index=where(target_vacancy.get_attribute("year") == current_year)[0]
                )
                current_target_vacancy = DatasetSubset(
                    current_target_vacancy_this_year,
                    index=where(current_target_vacancy_this_year.get_attribute(self.subarea_id_name) == self.area_id)[
                        0
                    ],
                )
            else:
                current_target_vacancy = DatasetSubset(
                    target_vacancy, index=where(target_vacancy.get_attribute("year") == current_year)[0]
                )
        except AttributeError:
            # vacancy rates are specified region wide:
            current_target_vacancy = DatasetSubset(
                target_vacancy, index=where(target_vacancy.get_attribute("year") == current_year)[0]
            )

        if current_target_vacancy.size() == 0:
            raise IOError, "No target vacancy defined for year %s." % current_year

        self.existing_units = {}  # total existing units by land_use type
        self.occupied_units = {}  # total occupied units by land_use type
        self.proposed_units = {}  # total proposed units by land_use type
        self.demolished_units = {}  # total (to be) demolished units by land_use type

        components_building_type_ids = self.proposal_component_set.get_attribute("building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute("proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute("units_proposed")
        number_of_components_in_proposals = self.proposal_set.get_attribute("number_of_components")

        self.accepting_proposals = zeros(
            current_target_vacancy.get_attribute("building_type_id").max() + 1, dtype="bool8"
        )  # whether accepting new proposals, for each building type
        self.accepted_proposals = []  # index of accepted proposals

        self.target_vacancies = {}
        tv_building_types = current_target_vacancy.get_attribute("building_type_id")
        tv_rate = current_target_vacancy.get_attribute("target_vacancy_rate")
        for itype in range(tv_building_types.size):
            self.target_vacancies[tv_building_types[itype]] = tv_rate[itype]

        self.check_vacancy_rates(
            current_target_vacancy
        )  # initialize self.accepting_proposal based on current vacancy rate

        sqft_per_job = self.dataset_pool.get_dataset("building_sqft_per_job")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        self.building_sqft_per_job_table = sqft_per_job.get_building_sqft_as_table(
            zones_of_proposals.max(), tv_building_types.max()
        )
        # consider only those proposals that have all components of accepted type and sum of proposed units > 0
        is_accepted_type = self.accepting_proposals[components_building_type_ids]
        sum_is_accepted_type_over_proposals = array(
            ndimage.sum(is_accepted_type, labels=proposal_ids_in_component_set, index=proposal_ids)
        )
        sum_of_units_proposed = array(
            ndimage.sum(all_units_proposed, labels=proposal_ids_in_component_set, index=proposal_ids)
        )
        is_proposal_eligible = logical_and(
            sum_is_accepted_type_over_proposals == number_of_components_in_proposals, sum_of_units_proposed > 0
        )

        is_proposal_eligible = logical_and(
            is_proposal_eligible, self.proposal_set.get_attribute("start_year") == current_year
        )
        ## handle planned proposals: all proposals with status_id == is_planned
        ## and start_year == current_year are accepted
        planned_proposal_indexes = where(
            logical_and(
                self.proposal_set.get_attribute("status_id") == self.proposal_set.id_planned,
                self.proposal_set.get_attribute("start_year") == current_year,
            )
        )[0]

        self.consider_proposals(planned_proposal_indexes, force_accepting=True)
        # consider proposals (in this order: planned, proposed, tentative)
        for status in [self.proposal_set.id_proposed, self.proposal_set.id_tentative]:
            idx = where(logical_and(self.proposal_set.get_attribute("status_id") == status, is_proposal_eligible))[0]
            if idx.size <= 0:
                continue
            logger.log_status("Sampling from %s eligible proposals with status %s." % (idx.size, status))
            while True in self.accepting_proposals:
                if self.weight[idx].sum() == 0.0:
                    logger.log_warning("Running out of proposals; there aren't any proposals with non-zero weight")
                    break

                idx = idx[self.weight[idx] > 0]
                n = minimum(idx.size, n)
                sampled_proposal_indexes = probsample_noreplace(
                    proposal_ids[idx],
                    n,
                    prob_array=(self.weight[idx] / float(self.weight[idx].sum())),
                    exclude_index=None,
                    return_index=True,
                )
                self.consider_proposals(arange(self.proposal_set.size())[idx[sampled_proposal_indexes]])
                self.weight[idx[sampled_proposal_indexes]] = 0

        # set status of accepted proposals to 'active'
        self.proposal_set.modify_attribute(
            name="status_id", data=self.proposal_set.id_active, index=array(self.accepted_proposals, dtype="int32")
        )
        building_types = self.dataset_pool.get_dataset("building_type")
        logger.log_status("Status of %s development proposals set to active." % len(self.accepted_proposals))
        logger.log_status("Target/existing vacancy rates (reached using eligible proposals) by building type:")
        for type_id in self.existing_units.keys():
            units_stock = self._get_units_stock(type_id)
            vr = self._get_vacancy_rates(type_id)
            ## units = residential_units if building_type is residential
            ## units = number of job spaces if building_type is non-residential
            logger.log_status(
                """%(type_id)s[%(type_name)s]: %(vr)s = ((existing_units:%(existing_units)s + 
                              units_proposed:%(units_proposed)s - units_to_be_demolished:%(units_demolished)s) 
                              - units_occupied:%(units_occupied)s) / units_stock:%(units_stock)s"""
                % {
                    "type_id": type_id,
                    "type_name": building_types.get_attribute_by_id("building_type_name", type_id),
                    "vr": vr,
                    "existing_units": int(self.existing_units[type_id]),
                    "units_occupied": int(self.occupied_units[type_id]),
                    "units_proposed": int(self.proposed_units[type_id]),
                    "units_demolished": int(self.demolished_units[type_id]),
                    "units_stock": int(units_stock),
                }
            )
        # Code added by Jesse Ayers, MAG, 7/20/2009
        # Get the active projects:
        stat_id = self.proposal_set.get_attribute("status_id")
        actv = where(stat_id == 1)[0]
        # Where there are active projects, compute the total_land_area_taken
        # and store it on the development_project_proposals dataset
        # so it can be used by the building_construction_model for the proper
        # computation of units_proposed for those projects with velocity curves
        if actv.size > 0:
            total_land_area_taken_computed = self.proposal_set.get_attribute(
                "urbansim_parcel.development_project_proposal.land_area_taken"
            )
            self.proposal_set.modify_attribute("total_land_area_taken", total_land_area_taken_computed[actv], actv)

        return (self.proposal_set, self.demolished_buildings)
コード例 #11
0
    def consider_proposals(self, proposal_indexes, force_accepting=False):

        proposals_parcel_ids = self.proposal_set.get_attribute("parcel_id")

        components_building_type_ids = self.proposal_component_set.get_attribute("building_type_id").astype("int32")
        proposal_ids = self.proposal_set.get_id_attribute()
        proposal_ids_in_component_set = self.proposal_component_set.get_attribute("proposal_id")
        all_units_proposed = self.proposal_component_set.get_attribute("units_proposed")
        is_component_residential = self.proposal_component_set.get_attribute("is_residential")
        number_of_components_in_proposals = self.proposal_set.get_attribute("number_of_components")
        zones_of_proposals = self.proposal_set.get_attribute("zone_id")
        is_proposal_rejected = zeros(proposal_indexes.size, dtype=bool8)
        proposal_site = proposals_parcel_ids[proposal_indexes]
        is_redevelopment = self.proposal_set.get_attribute_by_index("is_redevelopment", proposal_indexes)
        buildings = self.dataset_pool.get_dataset("building")
        building_site = buildings.get_attribute("parcel_id")
        #        building_existing_units = buildings.get_attribute("existing_units")
        is_residential = buildings.get_attribute("is_residential")
        building_type_ids = buildings.get_attribute("building_type_id")
        building_ids = buildings.get_id_attribute()

        for i in range(proposal_indexes.size):
            if not (True in self.accepting_proposals) and not (force_accepting):
                # if none of the types is accepting_proposals, exit
                # this is put in the loop to check if the last accepted proposal has sufficed
                # the target vacancy rates for all types
                return
            if is_proposal_rejected[i]:
                continue
            proposal_index = proposal_indexes[i]  # consider 1 proposed project at a time
            proposal_index_in_component_set = where(proposal_ids_in_component_set == proposal_ids[proposal_index])[0]
            units_proposed = all_units_proposed[proposal_index_in_component_set]
            component_types = components_building_type_ids[proposal_index_in_component_set]
            is_this_component_residential = is_component_residential[proposal_index_in_component_set]
            this_site = proposal_site[i]

            if is_redevelopment[i]:  # redevelopment proposal
                affected_building_index = where(building_site == this_site)[0]
                for this_building in affected_building_index:
                    this_building_type = building_type_ids[this_building]
                    if this_building_type in self.existing_units.keys():
                        _unit_name = self.unit_name[is_residential[this_building]]
                        self.demolished_units[this_building_type] += buildings.get_attribute(_unit_name)[
                            this_building
                        ]  # demolish affected buildings

                    self.demolished_buildings = concatenate(
                        (self.demolished_buildings, array([building_ids[this_building]]))
                    )
            #                self.occupied_units[type_id] = buildings.get_attribute("occupied_%s" % unit_name)[is_matched_type].astype("float32").sum()

            for itype_id in range(component_types.size):  #
                # this loop is only needed when a proposal could provide units of more than 1 generic building types
                type_id = component_types[itype_id]

                if is_this_component_residential[itype_id]:
                    self.proposed_units[type_id] += units_proposed[itype_id]
                else:  # translate from building_sqft to number of job spaces
                    self.proposed_units[type_id] += (
                        units_proposed[itype_id]
                        / self.building_sqft_per_job_table[zones_of_proposals[proposal_indexes[i]], type_id]
                    )
                if not force_accepting:
                    ## consider whether target vacancy rates have been achieved if not force_accepting
                    units_stock = self._get_units_stock(type_id)
                    vr = self._get_vacancy_rates(type_id)
                    if vr >= self.target_vacancies[type_id]:
                        ## not accepting proposals of this type
                        self.accepting_proposals[type_id] = False
                        ## reject all proposals to be processed that have one of the components of this type
                        consider_idx = proposal_indexes[(i + 1) : proposal_indexes.size]
                        if consider_idx.size > 0:
                            is_accepted_type = self.accepting_proposals[components_building_type_ids]
                            sum_is_accepted_type_over_proposals = array(
                                ndimage.sum(
                                    is_accepted_type,
                                    labels=proposal_ids_in_component_set,
                                    index=proposal_ids[consider_idx],
                                )
                            )
                            is_rejected_indices = where(
                                sum_is_accepted_type_over_proposals < number_of_components_in_proposals[consider_idx]
                            )[0]
                            is_proposal_rejected[arange((i + 1), proposal_indexes.size)[is_rejected_indices]] = True
                            self.weight[consider_idx[is_rejected_indices]] = 0.0

            if not is_proposal_rejected[i]:
                # proposal accepted
                self.accepted_proposals.append(proposal_index)
            # reject all pending proposals for this site (1 site can accept only 1 proposal at any 1 given year)
            is_proposal_rejected[proposal_site == this_site] = True
            # don't consider proposals for this site in future sampling (in this year's developer model)
            self.weight[proposals_parcel_ids == this_site] = 0.0
            # if all proposals in proposal_indexes have being rejected, return
            if is_proposal_rejected.sum() == is_proposal_rejected.size:
                return
コード例 #12
0
    def create_prediction_success_table(self, 
                                        summarize_by=None, 
                                        predicted_choice_id_name=None,
                                        predicted_choice_id_prefix="predicted_",
                                        log_to_file=None,
                                        force_predict=True):
        agents = self.get_agent_set()
        choices = self.get_choice_set()
        choice_id_name = choices.get_id_name()[0]
        
        if self.agents_index_for_prediction is not None:
            agents_index = self.agents_index_for_prediction
        else:
            agents_index = self.get_agent_set_index()

        if predicted_choice_id_name is None or len(predicted_choice_id_name) == 0:
            predicted_choice_id_name = predicted_choice_id_prefix + choice_id_name
            
        if force_predict or (predicted_choice_id_name not in agents.get_known_attribute_names()):
            if not self.predict(predicted_choice_id_name=predicted_choice_id_name,
                                agents_index=agents_index
                                ):
                logger.log_error("Failed to run simulation for prediction; unable to create prediction success table.")
                return

        if log_to_file is not None and len(log_to_file) > 0:
            logger.enable_file_logging(log_to_file)
            
        ## by default, compare predicted choice with observed choice
        ## this is not feasible for location choice model, where the 
        ## alternative set is too large to be useful
        if summarize_by is None:
            summarize_by = "%s.%s" % (agents.dataset_name, choice_id_name)
            
        summarize_dataset_name = VariableName(summarize_by).get_dataset_name()
        if summarize_dataset_name == choices.dataset_name:
            summary_id = choices.compute_variables(summarize_by)
            
            chosen_choice_id = agents.get_attribute_by_index(choices.get_id_name()[0], agents_index)
            predicted_choice_id = agents.get_attribute_by_index(predicted_choice_id_name, agents_index)
            chosen_choice_index = choices.get_id_index(chosen_choice_id)
            predicted_choice_index = choices.try_get_id_index(predicted_choice_id)
            
            chosen_summary_id = summary_id[chosen_choice_index]
            predicted_summary_id = summary_id[predicted_choice_index]
    
            unique_summary_id = unique(summary_id)
        elif summarize_dataset_name == agents.dataset_name:
            chosen_summary_id = agents.compute_variables(summarize_by)[agents_index]
            
            chosen_choice_id = agents.get_attribute(choice_id_name).copy()
            predicted_choice_id = agents.get_attribute(predicted_choice_id_name)
            agents.modify_attribute(name=choice_id_name, data=predicted_choice_id)
            predicted_summary_id = agents.compute_variables(summarize_by)[agents_index]
            
            agents.modify_attribute(name=choice_id_name, data=chosen_choice_id)
    
            unique_summary_id = unique( concatenate((chosen_summary_id, predicted_summary_id)) )
        else:
            logger.log_error("summarize_by expression '%s' is specified for dataset %s, which is neither the choice_set '%s' nor the agent_set '%s'." 
                             % (summarize_by, summarize_dataset_name, choices.dataset_name, agents.dataset_name))
            return False

        unique_nonneg_summary_id = unique_summary_id[unique_summary_id >= 0] 
        # observed on row, predicted on column
        prediction_matrix = zeros( (unique_nonneg_summary_id.size, unique_nonneg_summary_id.size), dtype="int32" )

        def _convert_array_to_tab_delimited_string(an_array):
            from numpy import dtype
            if an_array.dtype == dtype('f'):
                return "\t".join(["%5.4f" % item for item in an_array])
            return "\t".join([str(item) for item in an_array])
        
        logger.log_status("Observed_id\tSuccess_rate\t%s" % \
                          _convert_array_to_tab_delimited_string(unique_nonneg_summary_id) )
        i = 0
        total_correct = 0
        success_rate = zeros( unique_nonneg_summary_id.size, dtype="float32" )
        for observed_id in unique_nonneg_summary_id:
            predicted_id = predicted_summary_id[chosen_summary_id==observed_id]
            prediction_matrix[i] = ndimage.sum(ones(predicted_id.size), labels=predicted_id, index=unique_nonneg_summary_id )
            if prediction_matrix[i].sum() > 0:
                if prediction_matrix[i].sum() > 0:
                    success_rate[i] = float(prediction_matrix[i, i]) / prediction_matrix[i].sum()
                    total_correct = total_correct + prediction_matrix[i, i]
                else:
                    success_rate[i] = 0
            logger.log_status("%s\t\t%5.4f\t\t%s" % (observed_id, success_rate[i], 
                                              _convert_array_to_tab_delimited_string(prediction_matrix[i]) ) )
            i+=1

        success_rate2 = zeros( i, dtype="float32" )
        for j in range(i):
            if prediction_matrix[j, :].sum() > 0:
                success_rate2[j]=float(prediction_matrix[:,j].sum()) / prediction_matrix[j, :].sum()
            else:
                success_rate2[j]=0
        logger.log_status("%s\t\t%s\t\t%s" % (' ', ' ', 
                                                 _convert_array_to_tab_delimited_string( success_rate2 ) ))
        logger.log_status("\nTotal success rate: %5.4f" % (total_correct/float(prediction_matrix.sum())))
        logger.disable_file_logging(filename=log_to_file)
コード例 #13
0
ファイル: estimator.py プロジェクト: emiliom/DRCOG_Urbansim
    def create_prediction_success_table(self, 
                                        summarize_by=None, 
                                        predicted_choice_id_name=None,
                                        predicted_choice_id_prefix="predicted_",
                                        log_to_file=None,
                                        force_predict=True):
        agents = self.get_agent_set()
        choices = self.get_choice_set()
        choice_id_name = choices.get_id_name()[0]
        
        if self.agents_index_for_prediction is not None:
            agents_index = self.agents_index_for_prediction
        else:
            agents_index = self.get_agent_set_index()

        if predicted_choice_id_name is None or len(predicted_choice_id_name) == 0:
            predicted_choice_id_name = predicted_choice_id_prefix + choice_id_name
            
        if force_predict or (predicted_choice_id_name not in agents.get_known_attribute_names()):
            if not self.predict(predicted_choice_id_name=predicted_choice_id_name,
                                agents_index=agents_index
                                ):
                logger.log_error("Failed to run simulation for prediction; unable to create prediction success table.")
                return

        if log_to_file is not None and len(log_to_file) > 0:
            logger.enable_file_logging(log_to_file)
            
        ## by default, compare predicted choice with observed choice
        ## this is not feasible for location choice model, where the 
        ## alternative set is too large to be useful
        if summarize_by is None:
            summarize_by = "%s.%s" % (agents.dataset_name, choice_id_name)
            
        summarize_dataset_name = VariableName(summarize_by).get_dataset_name()
        if summarize_dataset_name == choices.dataset_name:
            summary_id = choices.compute_variables(summarize_by)
            
            chosen_choice_id = agents.get_attribute_by_index(choices.get_id_name()[0], agents_index)
            predicted_choice_id = agents.get_attribute_by_index(predicted_choice_id_name, agents_index)
            chosen_choice_index = choices.get_id_index(chosen_choice_id)
            predicted_choice_index = choices.try_get_id_index(predicted_choice_id)
            
            chosen_summary_id = summary_id[chosen_choice_index]
            predicted_summary_id = summary_id[predicted_choice_index]
    
            unique_summary_id = unique(summary_id)
        elif summarize_dataset_name == agents.dataset_name:
            chosen_summary_id = agents.compute_variables(summarize_by)[agents_index]
            
            chosen_choice_id = agents.get_attribute(choice_id_name).copy()
            predicted_choice_id = agents.get_attribute(predicted_choice_id_name)
            agents.modify_attribute(name=choice_id_name, data=predicted_choice_id)
            predicted_summary_id = agents.compute_variables(summarize_by)[agents_index]
            
            agents.modify_attribute(name=choice_id_name, data=chosen_choice_id)
    
            unique_summary_id = unique( concatenate((chosen_summary_id, predicted_summary_id)) )
        else:
            logger.log_error("summarize_by expression '%s' is specified for dataset %s, which is neither the choice_set '%s' nor the agent_set '%s'." 
                             % (summarize_by, summarize_dataset_name, choices.dataset_name, agents.dataset_name))
            return False

        unique_nonneg_summary_id = unique_summary_id[unique_summary_id >= 0] 
        # observed on row, predicted on column
        prediction_matrix = zeros( (unique_nonneg_summary_id.size, unique_nonneg_summary_id.size), dtype="int32" )

        def _convert_array_to_tab_delimited_string(an_array):
            from numpy import dtype
            if an_array.dtype == dtype('f'):
                return "\t".join(["%5.4f" % item for item in an_array])
            return "\t".join([str(item) for item in an_array])
        
        logger.log_status("Observed_id\tSuccess_rate\t%s" % \
                          _convert_array_to_tab_delimited_string(unique_nonneg_summary_id) )
        i = 0
        total_correct = 0
        success_rate = zeros( unique_nonneg_summary_id.size, dtype="float32" )
        for observed_id in unique_nonneg_summary_id:
            predicted_id = predicted_summary_id[chosen_summary_id==observed_id]
            prediction_matrix[i] = ndimage.sum(ones(predicted_id.size), labels=predicted_id, index=unique_nonneg_summary_id )
            if prediction_matrix[i].sum() > 0:
                if prediction_matrix[i].sum() > 0:
                    success_rate[i] = float(prediction_matrix[i, i]) / prediction_matrix[i].sum()
                    total_correct = total_correct + prediction_matrix[i, i]
                else:
                    success_rate[i] = 0
            logger.log_status("%s\t\t%5.4f\t\t%s" % (observed_id, success_rate[i], 
                                              _convert_array_to_tab_delimited_string(prediction_matrix[i]) ) )
            i+=1

        success_rate2 = zeros( i, dtype="float32" )
        for j in range(i):
            if prediction_matrix[j, :].sum() > 0:
                success_rate2[j]=float(prediction_matrix[:,j].sum()) / prediction_matrix[j, :].sum()
            else:
                success_rate2[j]=0
        logger.log_status("%s\t\t%s\t\t%s" % (' ', ' ', 
                                                 _convert_array_to_tab_delimited_string( success_rate2 ) ))
        logger.log_status("\nTotal success rate: %5.4f" % (total_correct/float(prediction_matrix.sum())))
        logger.disable_file_logging(filename=log_to_file)