def create_scaled_array(characteristics, agent_set):
    scaled_array={}
    if "characteristic_opus" in characteristics.get_known_attribute_names():
        all_categories = characteristics.get_attribute("characteristic_opus") # to be changed to 'characteristic'
    else:
        all_categories = characteristics.get_attribute("characteristic")
    all_categories = array(all_categories.amap(lambda x: x.lower()))
    categories = get_distinct_names(all_categories)
    mins = characteristics.get_attribute("min")
    maxs = characteristics.get_attribute("max")
    for cat in categories:
        idx = where(all_categories == cat)[0]
        scaled_array[cat+"_index_mapping"] = {}
        scaled_array[cat+"_index"] = {}
        j=0
        for i in idx:
            this_min = mins[i]
            this_max = maxs[i]
            is_in_cat = reshape(array(agent_set.get_attribute(cat) >= this_min), shape=(1,agent_set.size()))
            if not scaled_array.has_key(cat):
                scaled_array[cat] = is_in_cat
            else:
                scaled_array[cat] = concatenate((scaled_array[cat], is_in_cat))
            if this_max >= 0: # there is a maximum
                scaled_array[cat][j,:] = logical_and(scaled_array[cat][j,:], array(agent_set.get_attribute(cat) <= this_max))
            scaled_array[cat+"_index_mapping"][(this_min, this_max)] = j
            scaled_array[cat+"_index"][j] = (this_min, this_max)

            j+=1
    return (scaled_array, categories)
    def run(self, year, household_set, control_totals, characteristics, resources=None):
#        self.person_set = person_set
        self._do_initialize_for_run(household_set)
        control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
        self.characteristics = characteristics
        self.all_categories = self.characteristics.get_attribute("characteristic")
        self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
        self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
        self.marginal_characteristic_names = copy(control_totals.get_id_name())
        index_year = self.marginal_characteristic_names.index("year")
        self.marginal_characteristic_names.remove("year")
        self.marginal_characteristic_names.remove(self.subarea_id_name)
        region_ids = control_totals.get_attribute(self.subarea_id_name)
        households_region_ids = household_set.compute_one_variable_with_unknown_package(variable_name="%s" % (self.subarea_id_name), dataset_pool=self.dataset_pool)

        unique_regions = unique(region_ids)
        is_year = control_totals.get_attribute("year")==year
        all_households_index = arange(household_set.size())
        for area in unique_regions:
            idx = where(logical_and(is_year, region_ids == area))[0]
            self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
            households_index = where(households_region_ids == area)[0]
            if households_index.size == 0:
                continue
            households_for_this_area = DatasetSubset(household_set, households_index)
            logger.log_status("HTM for area %s (currently %s households)" % (area, households_for_this_area.size()))
            last_remove_idx = self.remove_households.size
            last_new_hhs_idx = self.mapping_existing_hhs_to_new_hhs.size
            self._do_run_for_this_year(households_for_this_area)
            add_hhs_size = self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size+self.mapping_existing_hhs_to_new_hhs.size-last_new_hhs_idx
            remove_hhs_size = self.remove_households.size-last_remove_idx
            logger.log_status("add %s, remove %s, total %s" % (add_hhs_size, remove_hhs_size,
                                                               households_for_this_area.size()+add_hhs_size-remove_hhs_size
                                                               ))
            self.new_households[self.subarea_id_name] = concatenate((self.new_households[self.subarea_id_name],
                                            array((self.new_households[self.location_id_name].size-self.new_households[self.subarea_id_name].size)*[area], dtype="int32")))
            # transform indices of removing households into indices of the whole dataset
            self.remove_households[last_remove_idx:self.remove_households.size] = all_households_index[households_index[self.remove_households[last_remove_idx:self.remove_households.size]]]
            # do the same for households to be duplicated
            self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size] = all_households_index[households_index[self.mapping_existing_hhs_to_new_hhs[last_new_hhs_idx:self.mapping_existing_hhs_to_new_hhs.size]]]
            
        self._update_household_set(household_set)
        idx_new_households = arange(household_set.size()-self.new_households[self.subarea_id_name].size, household_set.size())
        #household_region_ids = household_set.compute_variables("urbansim_parcel.household.%s" % self.subarea_id_name)
        #household_region_ids[idx_new_households] = self.new_households[self.subarea_id_name]
        region_ids = household_set.get_attribute(self.subarea_id_name).copy()
        household_set.delete_one_attribute(self.subarea_id_name)
        household_set.add_attribute(region_ids, self.subarea_id_name, metadata=AttributeType.PRIMARY)
        # return an index of new households
        return idx_new_households
 def run(self, year, household_set, control_totals, characteristics, resources=None):
     self._do_initialize_for_run(household_set)
     control_totals.get_attribute("total_number_of_households") # to make sure they are loaded
     self.characteristics = characteristics
     self.all_categories = self.characteristics.get_attribute("characteristic")
     self.all_categories = array(map(lambda x: x.lower(), self.all_categories))
     self.scaled_characteristic_names = get_distinct_names(self.all_categories).tolist()
     self.marginal_characteristic_names = copy(control_totals.get_id_name())
     index_year = self.marginal_characteristic_names.index("year")
     self.marginal_characteristic_names.remove("year")
     idx = where(control_totals.get_attribute("year")==year)[0]
     self.control_totals_for_this_year = DatasetSubset(control_totals, idx)
     self._do_run_for_this_year(household_set)
     return self._update_household_set(household_set)
 def get_distinct_names(self):
     return get_distinct_names(self.get_names())