コード例 #1
0
    def run(self,
            in_storage,
            out_storage,
            business_table="business",
            jobs_table="jobs",
            control_totals_table=None):
        logger.log_status("Unrolling %s table." % business_table)
        # get attributes from the establisments table
        business_dataset = BusinessDataset(in_storage=in_storage,
                                           in_table_name=business_table)
        business_sizes = business_dataset.get_attribute(
            self.number_of_jobs_attr).astype("int32")
        sectors = business_dataset.get_attribute("sector_id")
        tazes = business_dataset.get_attribute(
            self.geography_id_attr).astype("int32")
        building_ids = array([], dtype='int32')
        if "building_id" in business_dataset.get_primary_attribute_names():
            building_ids = business_dataset.get_attribute("building_id")
        parcel_ids = array([], dtype='int32')
        if "parcel_id" in business_dataset.get_primary_attribute_names():
            parcel_ids = business_dataset.get_attribute("parcel_id")
        home_based = array([], dtype='int16')
        if "home_based" in business_dataset.get_primary_attribute_names():
            home_based = business_dataset.get_attribute("home_based")
        building_sqft = business_dataset.get_attribute(self.sqft_attr)
        building_sqft[building_sqft <= 0] = 0
        join_flags = None
        if "join_flag" in business_dataset.get_primary_attribute_names():
            join_flags = business_dataset.get_attribute("join_flag")
        impute_sqft_flag = None
        if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names(
        ):
            impute_sqft_flag = business_dataset.get_attribute(
                "impute_building_sqft_flag")

        # inititalize jobs attributes
        total_size = business_sizes.sum()
        jobs_data = {}
        jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype),
                                        total_size)
        jobs_data["building_id"] = resize(
            array([-1], dtype=building_ids.dtype), total_size)
        jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype),
                                        total_size)
        jobs_data[self.geography_id_attr] = resize(
            array([-1], dtype=tazes.dtype), total_size)
        jobs_data["building_type"] = resize(
            array([-1], dtype=home_based.dtype), total_size)
        jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype),
                                   total_size)
        if join_flags is not None:
            jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype),
                                            total_size)
        if impute_sqft_flag is not None:
            jobs_data["impute_building_sqft_flag"] = resize(
                array([], dtype=impute_sqft_flag.dtype), total_size)

        indices = cumsum(business_sizes)
        # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part
        # of the arrays
        start_index = 0
        for i in range(business_dataset.size()):
            end_index = indices[i]
            jobs_data["sector_id"][start_index:end_index] = sectors[i]
            if building_ids.size > 0:
                jobs_data["building_id"][start_index:end_index] = building_ids[
                    i]
            if parcel_ids.size > 0:
                jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i]
            jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i]
            if home_based.size > 0:
                jobs_data["building_type"][start_index:end_index] = home_based[
                    i]
            if self.compute_sqft_per_job:
                jobs_data["sqft"][start_index:end_index] = round(
                    (building_sqft[i] - building_sqft[i] / 10.0) /
                    float(business_sizes[i]))  # sqft per employee
            else:
                jobs_data["sqft"][start_index:end_index] = building_sqft[i]
            if join_flags is not None:
                jobs_data["join_flag"][start_index:end_index] = join_flags[i]
            if impute_sqft_flag is not None:
                jobs_data["impute_building_sqft_flag"][
                    start_index:end_index] = impute_sqft_flag[i]
            start_index = end_index

        jobs_data["job_id"] = arange(total_size) + 1
        if self.compute_sqft_per_job:
            jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft)
            jobs_data["sqft"][logical_and(
                jobs_data["sqft"] > 0,
                jobs_data["sqft"] < self.minimum_sqft)] = self.minimum_sqft

        # correct missing job_building_types
        wmissing_bt = where(jobs_data["building_type"] <= 0)[0]
        if wmissing_bt.size > 0:
            jobs_data["building_type"][
                wmissing_bt] = 2  # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script

        # create jobs table and write it out
        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name="jobs", table_data=jobs_data)
        job_dataset = JobDataset(in_storage=storage)
        if self.unplace_jobs_with_non_existing_buildings:
            self.do_unplace_jobs_with_non_existing_buildings(
                job_dataset, out_storage)

        # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs).
        if control_totals_table is not None:
            logger.log_status("Matching to control totals.")
            control_totals = ControlTotalDataset(
                what='employment',
                id_name=['zone_id', 'sector_id'],
                in_table_name=control_totals_table,
                in_storage=in_storage)
            control_totals.load_dataset(
                attributes=['zone_id', 'sector_id', 'jobs'])
            zones_sectors = control_totals.get_id_attribute()
            njobs = control_totals.get_attribute('jobs')
            remove = array([], dtype='int32')
            for i in range(zones_sectors.shape[0]):
                zone, sector = zones_sectors[i, :]
                in_sector = job_dataset.get_attribute("sector_id") == sector
                in_zone_in_sector = logical_and(
                    in_sector,
                    job_dataset.get_attribute("zone_id") == zone)
                if in_zone_in_sector.sum() <= njobs[i]:
                    continue
                to_be_removed = in_zone_in_sector.sum() - njobs[i]
                this_removal = 0
                not_considered = ones(job_dataset.size(), dtype='bool8')
                for unit in [
                        'parcel_id', 'building_id', None
                ]:  # first consider jobs without parcel id, then without building_id, then all
                    if unit is not None:
                        wnunit = job_dataset.get_attribute(unit) <= 0
                        eligible = logical_and(
                            not_considered,
                            logical_and(in_zone_in_sector, wnunit))
                        not_considered[where(wnunit)] = False
                    else:
                        eligible = logical_and(not_considered,
                                               in_zone_in_sector)
                    eligible_sum = eligible.sum()
                    if eligible_sum > 0:
                        where_eligible = where(eligible)[0]
                        if eligible_sum <= to_be_removed - this_removal:
                            draw = arange(eligible_sum)
                        else:
                            draw = sample_noreplace(
                                where_eligible, to_be_removed - this_removal,
                                eligible_sum)
                        remove = concatenate((remove, where_eligible[draw]))
                        this_removal += draw.size
                        if this_removal >= to_be_removed:
                            break

            job_dataset.remove_elements(remove)
            logger.log_status("%s jobs removed." % remove.size)

        logger.log_status("Write jobs table.")
        job_dataset.write_dataset(out_table_name=jobs_table,
                                  out_storage=out_storage)
        logger.log_status("Created %s jobs." % job_dataset.size())