def test_accounting_attribute(self):
        """
        """
        annual_employment_control_totals_data = {
            "year":           array([2000,   2000,  2000,  2001]),
            "sector_id":      array([    1,     2,     3,     2]),
            "number_of_jobs": array([25013,  1513,  5000, 10055])
            }


        business_data = {
            "business_id":arange(1500)+1,
            "grid_id": array(1500*[1]),
            "sector_id": array(500*[1] +
                               500*[2] + 
                               500*[3]),
            "jobs":      array(500*[10] + 
                               500*[10] +
                               500*[10]),
                            
            }
        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(table_name='bs_set', table_data=business_data)
        bs_set = BusinessDataset(in_storage=storage, in_table_name='bs_set')

        storage.write_table(table_name='ect_set', table_data=annual_employment_control_totals_data)
        ect_set = ControlTotalDataset(in_storage=storage, in_table_name='ect_set', what='',
                                      id_name=[])

        model = TransitionModel(bs_set, dataset_accounting_attribute='jobs', control_total_dataset=ect_set)
        model.run(year=2000, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1})

        results = bs_set.get_attribute('jobs').sum()
        should_be = [(ect_set.get_attribute("number_of_jobs")[0:3]).sum()]
        self.assertEqual(ma.allclose(should_be, results, rtol=10),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
        
        cats = 3
        results = zeros(cats, dtype=int32)
        for i in range(0, cats):
            results[i] = ( bs_set.get_attribute('jobs')*(bs_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i])).sum()
        should_be = ect_set.get_attribute("number_of_jobs")[0:3]
        self.assertEqual(ma.allclose(results, should_be, rtol=10),
                         True, "Error, should_be: %s, but result: %s" % (should_be, results))
 def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None):
     logger.log_status("Unrolling %s table." % business_table)
     # get attributes from the establisments table
     business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table)
     business_sizes = business_dataset.get_attribute(self.number_of_jobs_attr).astype("int32")
     sectors = business_dataset.get_attribute("sector_id")
     tazes = business_dataset.get_attribute(self.geography_id_attr).astype("int32")
     building_ids = array([], dtype='int32')
     if "building_id" in business_dataset.get_primary_attribute_names():
         building_ids = business_dataset.get_attribute("building_id")
     parcel_ids = array([], dtype='int32')
     if "parcel_id" in business_dataset.get_primary_attribute_names():
         parcel_ids = business_dataset.get_attribute("parcel_id")
     home_based = array([], dtype='int16')
     if "home_based" in business_dataset.get_primary_attribute_names():
         home_based = business_dataset.get_attribute("home_based")
     building_sqft = business_dataset.get_attribute(self.sqft_attr)
     building_sqft[building_sqft <= 0] = 0
     join_flags = None
     if "join_flag" in business_dataset.get_primary_attribute_names():
         join_flags = business_dataset.get_attribute("join_flag")
     impute_sqft_flag = None
     if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names():
         impute_sqft_flag = business_dataset.get_attribute("impute_building_sqft_flag")
     
     # inititalize jobs attributes
     total_size = business_sizes.sum()
     jobs_data = {}
     jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size)
     jobs_data["building_id"] = resize(array([-1], dtype=building_ids.dtype), total_size)
     jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size)
     jobs_data[self.geography_id_attr] = resize(array([-1], dtype=tazes.dtype), total_size)
     jobs_data["building_type"] = resize(array([-1], dtype=home_based.dtype), total_size)
     jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size)
     if join_flags is not None:
         jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size)
     if impute_sqft_flag is not None:
         jobs_data["impute_building_sqft_flag"] = resize(array([], dtype=impute_sqft_flag.dtype), total_size)
     
     indices = cumsum(business_sizes)
     # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part 
     # of the arrays
     start_index=0
     for i in range(business_dataset.size()):
         end_index = indices[i]
         jobs_data["sector_id"][start_index:end_index] = sectors[i]
         if building_ids.size > 0:
             jobs_data["building_id"][start_index:end_index] = building_ids[i]
         if parcel_ids.size > 0:
             jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i]
         jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i]
         if home_based.size > 0:
             jobs_data["building_type"][start_index:end_index] = home_based[i]
         if self.compute_sqft_per_job:
             jobs_data["sqft"][start_index:end_index] = round((building_sqft[i]-building_sqft[i]/10.0)/float(business_sizes[i])) # sqft per employee
         else:
             jobs_data["sqft"][start_index:end_index] = building_sqft[i]
         if join_flags is not None:
             jobs_data["join_flag"][start_index:end_index] = join_flags[i]
         if impute_sqft_flag is not None:
             jobs_data["impute_building_sqft_flag"][start_index:end_index]  = impute_sqft_flag[i]
         start_index = end_index
         
     jobs_data["job_id"] = arange(total_size)+1
     if self.compute_sqft_per_job:
         jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft)
         jobs_data["sqft"][logical_and(jobs_data["sqft"]>0, jobs_data["sqft"]<self.minimum_sqft)] = self.minimum_sqft
     
     # correct missing job_building_types
     wmissing_bt = where(jobs_data["building_type"]<=0)[0]
     if wmissing_bt.size > 0:
         jobs_data["building_type"][wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script
     
     # create jobs table and write it out
     storage = StorageFactory().get_storage('dict_storage')
     storage.write_table(
             table_name="jobs",
             table_data=jobs_data
             )
     job_dataset = JobDataset(in_storage=storage)
     if self.unplace_jobs_with_non_existing_buildings:
         self.do_unplace_jobs_with_non_existing_buildings(job_dataset, out_storage)
     
     # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). 
     if control_totals_table is not None:
         logger.log_status("Matching to control totals.")
         control_totals = ControlTotalDataset(what='employment', id_name=['zone_id', 'sector_id'], 
                                              in_table_name=control_totals_table, in_storage=in_storage)
         control_totals.load_dataset(attributes=['zone_id', 'sector_id', 'jobs'])
         zones_sectors = control_totals.get_id_attribute()
         njobs = control_totals.get_attribute('jobs')
         remove = array([], dtype='int32')
         for i in range(zones_sectors.shape[0]):
             zone, sector = zones_sectors[i,:]
             in_sector = job_dataset.get_attribute("sector_id") == sector
             in_zone_in_sector = logical_and(in_sector, job_dataset.get_attribute("zone_id") == zone)
             if in_zone_in_sector.sum() <= njobs[i]:
                 continue
             to_be_removed = in_zone_in_sector.sum() - njobs[i]
             this_removal = 0
             not_considered = ones(job_dataset.size(), dtype='bool8')
             for unit in ['parcel_id', 'building_id', None]: # first consider jobs without parcel id, then without building_id, then all
                 if unit is not None:
                     wnunit = job_dataset.get_attribute(unit) <= 0
                     eligible = logical_and(not_considered, logical_and(in_zone_in_sector, wnunit))
                     not_considered[where(wnunit)] = False
                 else:
                     eligible = logical_and(not_considered, in_zone_in_sector)
                 eligible_sum = eligible.sum()
                 if eligible_sum > 0:
                     where_eligible = where(eligible)[0]
                     if eligible_sum <= to_be_removed-this_removal:
                         draw = arange(eligible_sum)
                     else:
                         draw = sample_noreplace(where_eligible, to_be_removed-this_removal, eligible_sum)
                     remove = concatenate((remove, where_eligible[draw]))
                     this_removal += draw.size
                     if this_removal >= to_be_removed:
                         break
             
         job_dataset.remove_elements(remove)
         logger.log_status("%s jobs removed." % remove.size)
         
     
     logger.log_status("Write jobs table.")
     job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage)
     logger.log_status("Created %s jobs." % job_dataset.size())
    def run(self,
            in_storage,
            out_storage,
            business_table="business",
            jobs_table="jobs",
            control_totals_table=None):
        logger.log_status("Unrolling %s table." % business_table)
        # get attributes from the establisments table
        business_dataset = BusinessDataset(in_storage=in_storage,
                                           in_table_name=business_table)
        business_sizes = business_dataset.get_attribute(
            self.number_of_jobs_attr).astype("int32")
        sectors = business_dataset.get_attribute("sector_id")
        tazes = business_dataset.get_attribute(
            self.geography_id_attr).astype("int32")
        building_ids = array([], dtype='int32')
        if "building_id" in business_dataset.get_primary_attribute_names():
            building_ids = business_dataset.get_attribute("building_id")
        parcel_ids = array([], dtype='int32')
        if "parcel_id" in business_dataset.get_primary_attribute_names():
            parcel_ids = business_dataset.get_attribute("parcel_id")
        home_based = array([], dtype='int16')
        if "home_based" in business_dataset.get_primary_attribute_names():
            home_based = business_dataset.get_attribute("home_based")
        building_sqft = business_dataset.get_attribute(self.sqft_attr)
        building_sqft[building_sqft <= 0] = 0
        join_flags = None
        if "join_flag" in business_dataset.get_primary_attribute_names():
            join_flags = business_dataset.get_attribute("join_flag")
        impute_sqft_flag = None
        if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names(
        ):
            impute_sqft_flag = business_dataset.get_attribute(
                "impute_building_sqft_flag")

        # inititalize jobs attributes
        total_size = business_sizes.sum()
        jobs_data = {}
        jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype),
                                        total_size)
        jobs_data["building_id"] = resize(
            array([-1], dtype=building_ids.dtype), total_size)
        jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype),
                                        total_size)
        jobs_data[self.geography_id_attr] = resize(
            array([-1], dtype=tazes.dtype), total_size)
        jobs_data["building_type"] = resize(
            array([-1], dtype=home_based.dtype), total_size)
        jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype),
                                   total_size)
        if join_flags is not None:
            jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype),
                                            total_size)
        if impute_sqft_flag is not None:
            jobs_data["impute_building_sqft_flag"] = resize(
                array([], dtype=impute_sqft_flag.dtype), total_size)

        indices = cumsum(business_sizes)
        # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part
        # of the arrays
        start_index = 0
        for i in range(business_dataset.size()):
            end_index = indices[i]
            jobs_data["sector_id"][start_index:end_index] = sectors[i]
            if building_ids.size > 0:
                jobs_data["building_id"][start_index:end_index] = building_ids[
                    i]
            if parcel_ids.size > 0:
                jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i]
            jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i]
            if home_based.size > 0:
                jobs_data["building_type"][start_index:end_index] = home_based[
                    i]
            if self.compute_sqft_per_job:
                jobs_data["sqft"][start_index:end_index] = round(
                    (building_sqft[i] - building_sqft[i] / 10.0) /
                    float(business_sizes[i]))  # sqft per employee
            else:
                jobs_data["sqft"][start_index:end_index] = building_sqft[i]
            if join_flags is not None:
                jobs_data["join_flag"][start_index:end_index] = join_flags[i]
            if impute_sqft_flag is not None:
                jobs_data["impute_building_sqft_flag"][
                    start_index:end_index] = impute_sqft_flag[i]
            start_index = end_index

        jobs_data["job_id"] = arange(total_size) + 1
        if self.compute_sqft_per_job:
            jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft)
            jobs_data["sqft"][logical_and(
                jobs_data["sqft"] > 0,
                jobs_data["sqft"] < self.minimum_sqft)] = self.minimum_sqft

        # correct missing job_building_types
        wmissing_bt = where(jobs_data["building_type"] <= 0)[0]
        if wmissing_bt.size > 0:
            jobs_data["building_type"][
                wmissing_bt] = 2  # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script

        # create jobs table and write it out
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name="jobs", table_data=jobs_data)
        job_dataset = JobDataset(in_storage=storage)
        if self.unplace_jobs_with_non_existing_buildings:
            self.do_unplace_jobs_with_non_existing_buildings(
                job_dataset, out_storage)

        # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs).
        if control_totals_table is not None:
            logger.log_status("Matching to control totals.")
            control_totals = ControlTotalDataset(
                what='employment',
                id_name=['zone_id', 'sector_id'],
                in_table_name=control_totals_table,
                in_storage=in_storage)
            control_totals.load_dataset(
                attributes=['zone_id', 'sector_id', 'jobs'])
            zones_sectors = control_totals.get_id_attribute()
            njobs = control_totals.get_attribute('jobs')
            remove = array([], dtype='int32')
            for i in range(zones_sectors.shape[0]):
                zone, sector = zones_sectors[i, :]
                in_sector = job_dataset.get_attribute("sector_id") == sector
                in_zone_in_sector = logical_and(
                    in_sector,
                    job_dataset.get_attribute("zone_id") == zone)
                if in_zone_in_sector.sum() <= njobs[i]:
                    continue
                to_be_removed = in_zone_in_sector.sum() - njobs[i]
                this_removal = 0
                not_considered = ones(job_dataset.size(), dtype='bool8')
                for unit in [
                        'parcel_id', 'building_id', None
                ]:  # first consider jobs without parcel id, then without building_id, then all
                    if unit is not None:
                        wnunit = job_dataset.get_attribute(unit) <= 0
                        eligible = logical_and(
                            not_considered,
                            logical_and(in_zone_in_sector, wnunit))
                        not_considered[where(wnunit)] = False
                    else:
                        eligible = logical_and(not_considered,
                                               in_zone_in_sector)
                    eligible_sum = eligible.sum()
                    if eligible_sum > 0:
                        where_eligible = where(eligible)[0]
                        if eligible_sum <= to_be_removed - this_removal:
                            draw = arange(eligible_sum)
                        else:
                            draw = sample_noreplace(
                                where_eligible, to_be_removed - this_removal,
                                eligible_sum)
                        remove = concatenate((remove, where_eligible[draw]))
                        this_removal += draw.size
                        if this_removal >= to_be_removed:
                            break

            job_dataset.remove_elements(remove)
            logger.log_status("%s jobs removed." % remove.size)

        logger.log_status("Write jobs table.")
        job_dataset.write_dataset(out_table_name=jobs_table,
                                  out_storage=out_storage)
        logger.log_status("Created %s jobs." % job_dataset.size())