def create_edges(self, input_file_dir, input_file_name, output_file_name):
     storage = StorageFactory().get_storage(type='tab_storage', subdir='store', 
         storage_location=input_file_dir)
     dataset = Dataset(in_storage = storage, id_name = ['stop_id','sch_time'], in_table_name = input_file_name)
     
     n = dataset.size()
     trip_ids = dataset.get_attribute("stop_id")
     unique_trip_ids = unique(trip_ids)
     source_list = list()
     target_list = list()
     time_list = list()
     
     for trip in unique_trip_ids:
         idx = where(dataset.get_attribute("stop_id") == trip)[0]
         nodes = dataset.get_attribute_by_index("node_id", idx)
         times = dataset.get_attribute_by_index("sch_time", idx)
         for inode in range(nodes.size-1):
             source_list.append(nodes[inode])
             target_list.append(nodes[inode+1])
             time_list.append(times[inode+1] - times[inode])
    
     storage = StorageFactory().get_storage('dict_storage')
     
     storage.write_table(table_name='edges',
         table_data={
             'edge_id': arange(len(source_list))+1, 
             'source': array(source_list), #type=int64), # <<<< OUTPUT FIELD, USE array
             'target': array(target_list), #type=int64), # <<<< OUTPUT FIELD, USE array
             'cost': array(time_list, dtype=int32)
             }
         )
    
     edges = Dataset(in_storage=storage, in_table_name='edges', id_name = "edge_id")
     
     edges.write_dataset(attributes = ["source", "target", "cost"], out_storage = storage, out_table_name = output_file_name)
Example #2
0
    def create_edges(self, input_file_dir, input_file_name, output_file_name):
        storage = StorageFactory().get_storage(type='tab_storage',
                                               subdir='store',
                                               storage_location=input_file_dir)
        dataset = Dataset(in_storage=storage,
                          id_name=['stop_id', 'sch_time'],
                          in_table_name=input_file_name)

        n = dataset.size()
        trip_ids = dataset.get_attribute("stop_id")
        unique_trip_ids = unique(trip_ids)
        source_list = list()
        target_list = list()
        time_list = list()

        for trip in unique_trip_ids:
            idx = where(dataset.get_attribute("stop_id") == trip)[0]
            nodes = dataset.get_attribute_by_index("node_id", idx)
            times = dataset.get_attribute_by_index("sch_time", idx)
            for inode in range(nodes.size - 1):
                source_list.append(nodes[inode])
                target_list.append(nodes[inode + 1])
                time_list.append(times[inode + 1] - times[inode])

        storage = StorageFactory().get_storage('dict_storage')

        storage.write_table(
            table_name='edges',
            table_data={
                'edge_id': arange(len(source_list)) + 1,
                'source': array(
                    source_list),  #type=int64), # <<<< OUTPUT FIELD, USE array
                'target': array(
                    target_list),  #type=int64), # <<<< OUTPUT FIELD, USE array
                'cost': array(time_list, dtype=int32)
            })

        edges = Dataset(in_storage=storage,
                        in_table_name='edges',
                        id_name="edge_id")

        edges.write_dataset(attributes=["source", "target", "cost"],
                            out_storage=storage,
                            out_table_name=output_file_name)
 def run(self, spatial_table_name, storage_type, data_path, dataset, attribute_names, join_attribute=None, new_table_name=None, 
         files_to_copy_postfix=['shp', 'shx']):
     logger.start_block('Run SpatialTableJoin')
     storage = StorageFactory().get_storage(type=storage_type, storage_location=data_path)
     spatial_dataset = Dataset(in_storage=storage, in_table_name=spatial_table_name, dataset_name='spatial_dataset', id_name=[])
     spatial_dataset.join(dataset, name=attribute_names, join_attribute=join_attribute, metadata=AttributeType.PRIMARY)
     if new_table_name is None:
         out_table_name = spatial_table_name
     else:
         out_table_name = new_table_name
         for postfix in files_to_copy_postfix:
             file_name = os.path.join(data_path, '%s.%s' % (spatial_table_name, postfix))
             if os.path.exists(file_name):
                 new_file_name = os.path.join(data_path, '%s.%s' % (new_table_name, postfix))
                 logger.log_status('Copying %s into %s.' % (file_name, new_file_name))
                 shutil.copy(file_name, new_file_name)
     logger.log_status('New table written into %s/%s.' % (data_path, out_table_name))
     spatial_dataset.write_dataset(out_storage=storage, out_table_name=out_table_name, attributes=AttributeType.PRIMARY)
     logger.end_block()
Example #4
0
def to_opus_dataset(df, out_store, table_name):
    data_dict = {}
    id_names = df.index.names
    if id_names is None or id_names == [None]:
        id_names = []
    else:
        df = df.reset_index()
    for name in df.columns:
        data_dict[name] = df[name].values
    in_store = StorageFactory().get_storage('dict_storage')
    in_store.write_table(table_name=table_name,
                        table_data=data_dict) 
    opus_ds = Dataset(in_storage=in_store,
                    in_table_name=table_name,
                    id_name=id_names,
                    dataset_name='dataset')
    opus_ds.write_dataset(attributes='*', out_storage=out_store,
                          out_table_name=table_name)
    return opus_ds
Example #5
0
def to_opus_dataset(df, out_store, table_name, zone_id_offset=100):
    data_dict = {}
    id_names = df.index.names
    df = df.reset_index()
    for name in df.columns:
        data_dict[name] = df[name].values

    data_dict['from_zone_id'] = data_dict['from_zone_id'] + zone_id_offset
    data_dict['to_zone_id'] = data_dict['to_zone_id'] + zone_id_offset

    in_store = StorageFactory().get_storage('dict_storage')
    in_store.write_table(table_name=table_name,
                        table_data=data_dict) 
    opus_ds = Dataset(in_storage=in_store,
                    in_table_name=table_name,
                    id_name=id_names,
                    dataset_name='dataset')
    opus_ds.write_dataset(attributes='*', out_storage=out_store,
                          out_table_name=table_name)
    return opus_ds
 def run(self,
         spatial_table_name,
         storage_type,
         data_path,
         dataset,
         attribute_names,
         join_attribute=None,
         new_table_name=None,
         files_to_copy_postfix=['shp', 'shx']):
     logger.start_block('Run SpatialTableJoin')
     storage = StorageFactory().get_storage(type=storage_type,
                                            storage_location=data_path)
     spatial_dataset = Dataset(in_storage=storage,
                               in_table_name=spatial_table_name,
                               dataset_name='spatial_dataset',
                               id_name=[])
     spatial_dataset.join(dataset,
                          name=attribute_names,
                          join_attribute=join_attribute,
                          metadata=AttributeType.PRIMARY)
     if new_table_name is None:
         out_table_name = spatial_table_name
     else:
         out_table_name = new_table_name
         for postfix in files_to_copy_postfix:
             file_name = os.path.join(
                 data_path, '%s.%s' % (spatial_table_name, postfix))
             if os.path.exists(file_name):
                 new_file_name = os.path.join(
                     data_path, '%s.%s' % (new_table_name, postfix))
                 logger.log_status('Copying %s into %s.' %
                                   (file_name, new_file_name))
                 shutil.copy(file_name, new_file_name)
     logger.log_status('New table written into %s/%s.' %
                       (data_path, out_table_name))
     spatial_dataset.write_dataset(out_storage=storage,
                                   out_table_name=out_table_name,
                                   attributes=AttributeType.PRIMARY)
     logger.end_block()
     person.work2home_travel_time_min)/60.0)*250*\
     (100/" + percentage_of_agents + ")),0)", dataset_pool = dataset_pool)
     parcels.compute_variables("travel_cost_per_parcel = \
     parcel.aggregate(person.travel_cost_per_person,\
     intermediates=[household, building])", dataset_pool = dataset_pool)
     parcels.compute_variables("utility_of_residents_parcel = \
     parcel.income_per_parcel-parcel.housing_cost_per_parcel-parcel.travel_cost_per_parcel", dataset_pool = dataset_pool)
 # export output to tab file
 print '*************************'
 print 'Exporting to tab files...'
 print '*************************'
 # export computations to tab file [person level]
 if export_computations_person == True:
     print '[Person Level] Exporting computations to: %s' % out_table_name_person_computations
     persons.write_dataset(attributes = ['person_id', 'home2work_travel_time_min', 'work2home_travel_time_min', 'travel_cost_per_person'], 
                         out_storage = storage_output, 
                         out_table_name = out_table_name_person_computations)
 # Exports according to case study
 if case_study == 'brussels' and policy_level == 'zone':
     # export computations to tab file [zone level]
     if export_computations_zone == True:
         print '[Zone Level] Exporting computations to: %s' % out_table_name_computations
         zones.write_dataset(attributes = ['income_per_zone', 'housing_cost_per_zone', 'travel_cost_per_zone', 'utility_of_residents_zone'], 
                         out_storage = storage_output, 
                         out_table_name = out_table_name_computations)
     # Summarize utility_of_residents_zone attributes
     swf_per_year = zones.attribute_sum('utility_of_residents_zone')
 elif case_study == 'zurich' and policy_level == 'parcel':
     # export computations to tab file [parcel level]
     if export_computations_parcel == True:
         print '[Parcel Level] Exporting computations to: %s' % out_table_name_computations
Example #8
0
        print 'Time Start: %s' % strftime("%a, %d %b %Y %X", gmtime())
#        persons.write_dataset(attributes = ['person_id', 'home2work_travel_time_min', 'work2home_travel_time_min', 'travel_cost_per_person'], 
#                            out_storage = storage_output, 
#                            out_table_name = out_table_name_person_computations)
        print 'Time End: %s' % strftime("%a, %d %b %Y %X", gmtime())
    # Exports according to case study
    if case_study == 'brussels' and policy_level == 'zone':
        # export computations to tab file [zone level]
        if export_computations_zone == True:
            print '[Zone Level] Exporting computations to: %s' % out_table_name_computations
            print 'Time Start: %s' % strftime("%a, %d %b %Y %X", gmtime())
            zones.write_dataset(attributes = ['income_per_zone', 
                                              'housing_cost_per_zone', 
                                              'travel_cost_per_zone',
                                              'travel_benefit_car', #'travel_benefit_pt',
                                              'utility_of_residents_zone',
                                              'utility_of_commuters',
                                              'utility_of_the_rest_of_the_world'], 
                            out_storage = storage_output, 
                            out_table_name = out_table_name_computations)
            print 'Time End: %s' % strftime("%a, %d %b %Y %X", gmtime())
        # Compute social welfare per year by summarizing attributes
        swf_per_year = zones.attribute_sum('utility_of_residents_zone') \
        + zones.attribute_sum('utility_of_commuters') \
        + zones.attribute_sum('utility_of_the_rest_of_the_world')
    elif case_study == 'zurich' and policy_level == 'parcel':
        # export computations to tab file [parcel level]
        if export_computations_parcel == True:
            print '[Parcel Level] Exporting computations to: %s' % out_table_name_computations
            print 'Time Start: %s' % strftime("%a, %d %b %Y %X", gmtime())
            parcels.write_dataset(attributes = ['income_per_parcel', 
    def run(self, in_storage, out_storage=None, business_dsname="business", zone_dsname=None):
        dataset_pool = DatasetPool(storage=in_storage, package_order=['psrc_parcel', 'urbansim_parcel', 'urbansim', 'opus_core'] )
        seed(1)
        allbusinesses = dataset_pool.get_dataset(business_dsname)
        parcels = dataset_pool.get_dataset('parcel')
        buildings = dataset_pool.get_dataset('building')
        parcels.compute_variables(["urbansim_parcel.parcel.residential_units", "number_of_buildings = parcel.number_of_agents(building)", 
                                   "non_residential_sqft = (parcel.aggregate(building.non_residential_sqft)).astype(int32)",
                                   "number_of_res_buildings = parcel.aggregate(urbansim_parcel.building.is_residential)",
                                   "number_of_nonres_buildings = parcel.aggregate(urbansim_parcel.building.is_non_residential)",
                                   "number_of_mixed_use_buildings = parcel.aggregate(urbansim_parcel.building.is_generic_building_type_6)"
                                   ], 
                                  dataset_pool=dataset_pool)
        restypes = [12, 4, 19, 11, 34, 10, 33]
        reslutypes = [13,14,15,24]
        is_valid_business = ones(allbusinesses.size(), dtype='bool8')
        parcels_not_matched = logical_and(in1d(allbusinesses["parcel_id"], parcels.get_id_attribute(), invert=True), allbusinesses["parcel_id"] > 0)
        if(parcels_not_matched.sum() > 0):
            is_valid_business[where(parcels_not_matched)] = False
            logger.log_warning(message="No parcel exists for %s businesses (%s jobs)" % (parcels_not_matched.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(parcels_not_matched)].sum()))
        zero_parcel = allbusinesses["parcel_id"]<=0
        if zero_parcel.sum() > 0:
            is_valid_business[where(zero_parcel)] = False
            logger.log_warning(message="%s businesses (%s jobs) located on zero parcel_id" % (zero_parcel.sum(), 
                                                                                         allbusinesses[self.number_of_jobs_attr][where(zero_parcel)].sum()))            
            
        zero_size = logical_and(is_valid_business, allbusinesses[self.number_of_jobs_attr].round() == 0)
        if(sum(zero_size) > 0):
            is_valid_business[where(zero_size)] = False
            logger.log_warning(message="%s businesses are of size 0." % sum(zero_size))
        
        businesses = DatasetSubset(allbusinesses, index=where(is_valid_business)[0])
        
        parcels.add_attribute(name="number_of_workplaces", data=parcels.sum_dataset_over_ids(businesses, constant=1))
        
        has_single_res_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_res_buildings"] == 1) # 1 (1 residential)
        parcels.add_attribute(data=has_single_res_buildings.astype("int32"), name="buildings_code")
        has_mult_res_buildings = logical_and(parcels["number_of_buildings"] > 1,  parcels["number_of_nonres_buildings"] == 0) # 2 (mult residential)
        parcels.modify_attribute("buildings_code", data=2*ones(has_mult_res_buildings.sum()), index=where(has_mult_res_buildings)) 
        has_single_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_nonres_buildings"] == 1), parcels["number_of_mixed_use_buildings"] == 0) # 3 (1 non-res)
        parcels.modify_attribute("buildings_code", data=3*ones(has_single_nonres_buildings.sum()), index=where(has_single_nonres_buildings)) 
        has_mult_nonres_buildings = logical_and(logical_and(parcels["number_of_buildings"] > 1, parcels["number_of_res_buildings"] == 0), parcels["number_of_mixed_use_buildings"] == 0) # 4 (mult non-res)
        parcels.modify_attribute("buildings_code", data=4*ones(has_mult_nonres_buildings.sum()), index=where(has_mult_nonres_buildings))
        has_single_mixed_buildings = logical_and(parcels["number_of_buildings"] == 1, parcels["number_of_mixed_use_buildings"] == 1) # 5 (1 mixed-use)
        parcels.modify_attribute("buildings_code", data=5*ones(has_single_mixed_buildings.sum()), index=where(has_single_mixed_buildings))
        has_mult_mixed_buildings = logical_and(parcels["number_of_buildings"] > 1, 
                                               logical_or(logical_and(parcels["number_of_res_buildings"] > 0, parcels["number_of_nonres_buildings"] > 0), 
                                                          logical_or(parcels["number_of_mixed_use_buildings"] > 1, 
                                                                     logical_and(parcels["number_of_res_buildings"] == 0, 
                                                                                 parcels["number_of_mixed_use_buildings"] > 0)))) # 6
        parcels.modify_attribute("buildings_code", data=6*ones(has_mult_mixed_buildings.sum()), index=where(has_mult_mixed_buildings))
        has_no_building_res_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)) # 7 (vacant with res LU type)
        parcels.modify_attribute("buildings_code", data=7*ones(has_no_building_res_lutype.sum()), index=where(has_no_building_res_lutype)) 
        has_no_building_nonres_lutype = logical_and(parcels["number_of_buildings"] == 0, in1d(parcels["land_use_type_id"], reslutypes)==0) # 8 (vacant with non-res LU type)
        parcels.modify_attribute("buildings_code", data=8*ones(has_no_building_nonres_lutype.sum()), index=where(has_no_building_nonres_lutype))
        
        business_sizes = businesses[self.number_of_jobs_attr].round().astype("int32") 
        business_location = {}
        business_location1wrkpl = zeros(businesses.size(), dtype="int32")
        business_location1wrkplres = zeros(businesses.size(), dtype="int32")
        business_ids = businesses.get_id_attribute()
        # sample one building for cases when sampling is required.
        for ibusid in range(businesses.size()):
            idx = where(buildings['parcel_id'] == businesses['parcel_id'][ibusid])[0]
            bldgids = buildings['building_id'][idx]
            business_location[business_ids[ibusid]] = bldgids
            if bldgids.size == 1:
                business_location1wrkpl[ibusid] = bldgids[0]
            elif bldgids.size > 1:
                business_location1wrkpl[ibusid] = bldgids[sample_noreplace(arange(bldgids.size), 1)]
                if buildings['residential_units'][idx].sum() > 0:
                    # Residential buildings are sampled with probabilities proportional to residential units
                    business_location1wrkplres[ibusid] = bldgids[probsample_noreplace(arange(bldgids.size), 1, prob_array=buildings['residential_units'][idx])]
                else:
                    business_location1wrkplres[ibusid] = business_location1wrkpl[ibusid]
        
        home_based = zeros(business_sizes.sum(), dtype="bool8")
        job_building_id = zeros(business_sizes.sum(), dtype="int32")
        job_array_labels = business_ids.repeat(business_sizes)
        job_assignment_case = zeros(business_sizes.sum(), dtype="int32")
        processed_bindicator = zeros(businesses.size(), dtype="bool8")
        business_codes = parcels.get_attribute_by_id("buildings_code", businesses["parcel_id"])
        business_nworkplaces = parcels.get_attribute_by_id("number_of_workplaces", businesses["parcel_id"])
        logger.log_status("Total number of jobs: %s" % home_based.size)
        
        # 1. 1-2 worker business in 1 residential building
        idx_sngl_wrk_1bld_fit = where(logical_and(business_sizes < 3, business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_1bld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_1bld_fit].repeat(business_sizes[idx_sngl_wrk_1bld_fit])
        job_assignment_case[jidx] = 1
        processed_bindicator[idx_sngl_wrk_1bld_fit] = True
        logger.log_status("1. %s jobs (%s businesses) set as home-based due to 1-2 worker x 1 residential building fit." % (
            business_sizes[idx_sngl_wrk_1bld_fit].sum(), idx_sngl_wrk_1bld_fit.size))
        
        # 2. 1-2 worker business in multiple residential buildings
        idx_sngl_wrk_multbld_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_multbld_fit])
        home_based[jidx] = True
        job_building_id[jidx] = business_location1wrkplres[idx_sngl_wrk_multbld_fit].repeat(business_sizes[idx_sngl_wrk_multbld_fit])
        job_assignment_case[jidx] = 2
        processed_bindicator[idx_sngl_wrk_multbld_fit] = True
        logger.log_status("2. %s jobs (%s businesses) set as home-based due to 1-2 worker x multiple residential buildings fit." % (
            business_sizes[idx_sngl_wrk_multbld_fit].sum(), idx_sngl_wrk_multbld_fit.size))
               
        # 3. 1-2 worker in single non-res building (not mixed-use)
        idx_sngl_wrk_single_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 3))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_single_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_single_nonres_fit].repeat(business_sizes[idx_sngl_wrk_single_nonres_fit])
        job_assignment_case[jidx] = 3
        processed_bindicator[idx_sngl_wrk_single_nonres_fit] = True
        logger.log_status("3. %s jobs (%s businesses) placed due to 1-2 worker x single non-res building fit." % (
                          business_sizes[idx_sngl_wrk_single_nonres_fit].sum(), idx_sngl_wrk_single_nonres_fit.size))     
        
        # 4. 1-2 worker in multiple non-res building (not mixed-use)
        idx_sngl_wrk_mult_nonres_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 4))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mult_nonres_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mult_nonres_fit].repeat(business_sizes[idx_sngl_wrk_mult_nonres_fit])
        job_assignment_case[jidx] = 4
        processed_bindicator[idx_sngl_wrk_mult_nonres_fit] = True
        logger.log_status("4. %s jobs (%s businesses) placed due to 1-2 worker x multiple non-res building fit." % (
            business_sizes[idx_sngl_wrk_mult_nonres_fit].sum(), idx_sngl_wrk_mult_nonres_fit.size))      
                
        # 5. 1-2 worker in single mixed-use building
        idx_sngl_wrk_smu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 5))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_smu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_smu_fit].repeat(business_sizes[idx_sngl_wrk_smu_fit])
        job_assignment_case[jidx] = 5
        processed_bindicator[idx_sngl_wrk_smu_fit] = True
        logger.log_status("5. %s jobs (%s businesses) in 1-2 worker x single mixed-use building." % (
            business_sizes[idx_sngl_wrk_smu_fit].sum(), idx_sngl_wrk_smu_fit.size))       
        
        # 6. 1-2 worker in multiple mixed-type buildings
        idx_sngl_wrk_mmu_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 6))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_mmu_fit].repeat(business_sizes[idx_sngl_wrk_mmu_fit])
        bldtype = buildings.get_attribute_by_id("building_type_id", business_location1wrkpl[idx_sngl_wrk_mmu_fit])
        is_bldtype_res = in1d(bldtype, restypes)
        home_based[in1d(job_array_labels, business_ids[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)])] = True
        job_assignment_case[jidx] = 6
        processed_bindicator[idx_sngl_wrk_mmu_fit] = True
        logger.log_status("6. %s jobs (%s businesses) in 1-2 worker x multiple mixed-type buildings. %s jobs classified as home-based." % (
            business_sizes[idx_sngl_wrk_mmu_fit].sum(), idx_sngl_wrk_mmu_fit.size, business_sizes[idx_sngl_wrk_mmu_fit][where(is_bldtype_res)].sum()))            

        # 7. 1-2 worker business in residential parcel with no building
        idx_sngl_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes < 3), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_vacant_res])
        job_assignment_case[jidx] = 7
        home_based[jidx] = True
        processed_bindicator[idx_sngl_wrk_vacant_res] = True
        logger.log_status("7. %s jobs (%s businesses of size 1-2) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_sngl_wrk_vacant_res].sum(), idx_sngl_wrk_vacant_res.size))        

        # 8. 3+ workers of governmental workplaces in 1+ residential building
        ind_bussiness_case8 = logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, in1d(businesses['sector_id'], [18,19]))), in1d(business_codes, [1,2]))
        idx_wrk_fit = where(ind_bussiness_case8)[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 8
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("8. %s governmental jobs (%s businesses of size 3+) could not be placed due to residing in residential buildings only." % (
                    business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))
        
        # 9. 3-30 workers in single residential building. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrk_fit].repeat(business_sizes[idx_sngl_wrk_fit])
        bsizeminus2 = vstack((2*ones(idx_sngl_wrk_fit.size), business_sizes[idx_sngl_wrk_fit]-2)).ravel("F").astype("int32") # interweaving 2 and remaining business size
        hbidx = tile(array([True, False]), bsizeminus2.size/2).repeat(bsizeminus2) # set the first two jobs of every business to True, others to False
        home_based[(where(jidx)[0])[hbidx]] = True
        job_assignment_case[jidx] = 9
        processed_bindicator[idx_sngl_wrk_fit] = True        
        logger.log_status("9. %s jobs (%s businesses) in 3-30 worker x single residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, hbidx.sum()))      
        
        # 10. 3-30 workers in multiple residential buildings. Make two of them home based.
        idx_sngl_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, logical_and(business_sizes > 2, business_sizes <= 30)), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrk_fit])
        job_assignment_case[jidx] = 10
        processed_bindicator[idx_sngl_wrk_fit] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_sngl_wrk_fit])
        for ipcl in range(bpcls.size):
            bidx = where(buildings['parcel_id'] == bpcls[ipcl])[0]
            bldgids = buildings['building_id'][bidx]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_sngl_wrk_fit])
            # multiply by units for sampling prop. to units rather than buildings
            bldgids = bldgids.repeat(maximum(1, buildings['residential_units'][bidx].astype('int32'))) 
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)) )
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])[0]
                job_building_id[jidx] = bldarray[ib]
                home_based[jidx[0:2]] = True
        logger.log_status("10. %s jobs (%s businesses) in 3-30 worker x multiple residential building. %s jobs assigned as home-based." % (
            business_sizes[idx_sngl_wrk_fit].sum(), idx_sngl_wrk_fit.size, idx_sngl_wrk_fit.size*2))        


        # 11. single workplace, 3+ workers in single non-res or mixed-use building (11.)
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), 
                                                            logical_or(business_codes==3, business_codes==5)),
                                                business_nworkplaces==1))[0]
        which_labels = where(in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers]))[0]
        job_building_id[which_labels] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])   
        job_assignment_case[which_labels] = 11
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("11. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x single non-res/mixed-use building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))
        
        # 12. single workplace, 3+ workers in multiple mixed-type building
        idx_sngl_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces==1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_sngl_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_sngl_wrkplace_2plus_workers].repeat(business_sizes[idx_sngl_wrkplace_2plus_workers])    
        job_assignment_case[jidx] = 12
        processed_bindicator[idx_sngl_wrkplace_2plus_workers] = True
        logger.log_status("12. %s jobs (%s businesses) could be placed due to single workplace x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_sngl_wrkplace_2plus_workers].sum(), idx_sngl_wrkplace_2plus_workers.size))

        # 13. multiple workplaces, 3+ workers in single non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==3, business_codes==5)),
                                                            business_nworkplaces > 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_mult_wrkplace_2plus_workers])
        job_building_id[jidx] = business_location1wrkpl[idx_mult_wrkplace_2plus_workers].repeat(business_sizes[idx_mult_wrkplace_2plus_workers])
        job_assignment_case[jidx] = 13
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        logger.log_status("13. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x single non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        # 14. multiple workplaces, 3+ workers in multiple non-res or mixed building
        idx_mult_wrkplace_2plus_workers = where(logical_and(logical_and(logical_and(processed_bindicator==0, business_sizes > 2),
                                                                        logical_or(business_codes==4, business_codes==6)),
                                                            business_nworkplaces > 1))[0]
        processed_bindicator[idx_mult_wrkplace_2plus_workers] = True
        # sample buildings to businesses by parcels 
        bpcls = unique(businesses["parcel_id"][idx_mult_wrkplace_2plus_workers])
        #hbasedsum = home_based.sum()
        for ipcl in range(bpcls.size):
            bldgids = buildings['building_id'][buildings['parcel_id'] == bpcls[ipcl]]
            bussids = intersect1d(business_ids[businesses["parcel_id"] == bpcls[ipcl]], business_ids[idx_mult_wrkplace_2plus_workers])
            if bldgids.size < bussids.size:
                bldarray = bldgids.repeat(1+ceil((bussids.size - bldgids.size)/float(bldgids.size)))
            else:
                bldarray = bldgids
            shuffle(bldarray) # randomly reorder in-place
            is_res = in1d(bldarray, restypes)
            for ib in range(bussids.size):
                jidx = where(job_array_labels == bussids[ib])
                job_building_id[jidx] = bldarray[ib]
                #home_based[jidx] = is_res
                job_assignment_case[jidx] = 14
        logger.log_status("14. %s jobs (%s businesses) could be placed due to multiple workplaces x 3+ workers x multiple non-res/mixed building fit." % (
            business_sizes[idx_mult_wrkplace_2plus_workers].sum(), idx_mult_wrkplace_2plus_workers.size))
        
        
        # 15. 3+ workers in residential parcel with no building
        idx_wrk_vacant_res = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 2), business_codes == 7))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_res])
        job_assignment_case[jidx] = 15
        processed_bindicator[idx_wrk_vacant_res] = True
        logger.log_status("15. %s jobs (%s businesses of 3+ workers) could not be placed due to non-existing buildings in parcels with residential LU type." % (
            business_sizes[idx_wrk_vacant_res].sum(), idx_wrk_vacant_res.size))
        
        # 16. nonresidential parcel with no building
        idx_wrk_vacant_nonres = where(logical_and(processed_bindicator==0, business_codes == 8))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_vacant_nonres])
        job_assignment_case[jidx] = 16
        processed_bindicator[idx_wrk_vacant_nonres] = True
        logger.log_status("16. %s jobs (%s businesses) could not be placed due to non-existing buildings in parcels with non-esidential LU type." % (
            business_sizes[idx_wrk_vacant_nonres].sum(), idx_wrk_vacant_nonres.size))        
        
        # 17. 31+ workers in single residential building. Do not place - will go into ELCM.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 1))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 17
        processed_bindicator[idx_wrk_fit] = True        
        logger.log_status("17. %s jobs (%s businesses) in 31+ workers x single residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))         
    
        # 18. 31+ workers in multiple residential buildings.
        idx_wrk_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 30), business_codes == 2))[0]
        jidx = in1d(job_array_labels, business_ids[idx_wrk_fit])
        job_assignment_case[jidx] = 18
        processed_bindicator[idx_wrk_fit] = True
        logger.log_status("18. %s jobs (%s businesses) in 31+ workers x multiple residential building." % (
            business_sizes[idx_wrk_fit].sum(), idx_wrk_fit.size))                

        # jobs in messy buildings
        idx_messy_fit = where(logical_and(logical_and(processed_bindicator==0, business_sizes > 0), business_codes == 0))[0]
        processed_bindicator[idx_messy_fit] = True
        logger.log_status("%s jobs (%s businesses) could not be placed due to messy buildings." % (
            business_sizes[idx_messy_fit].sum(), idx_messy_fit.size))         
         
        # build new buildings for jobs in cases 7, 8, 15 and 16
        jidx_no_bld = where(in1d(job_assignment_case, [7,8,15,16]))[0]
        bus = unique(job_array_labels[jidx_no_bld])
        bsidx = businesses.get_id_index(bus)
        # first create buildings for single workplaces per parcel
        single_workplace_idx = where(business_nworkplaces[bsidx] == 1)[0]
        newbld_parcel_id = businesses['parcel_id'][bsidx][single_workplace_idx]
        newbld_bt = sector2building_type(businesses['sector_id'][bsidx][single_workplace_idx])
        newbids = arange(buildings.get_id_attribute().max()+1, buildings.get_id_attribute().max()+single_workplace_idx.size+1)
        bbldid = zeros(bsidx.size, dtype='int32')
        bbldid[single_workplace_idx] = newbids
        # for parcels with multiple workplaces select the largest business to determine its building type
        mult_bsidx = bsidx[where(business_nworkplaces[bsidx] > 1)[0]]
        empty_parcels = businesses['parcel_id'][mult_bsidx]
        uempty_parcels = unique(empty_parcels)
        bsize_on_empty_pcl = ndmax(business_sizes[mult_bsidx], labels=empty_parcels, index=uempty_parcels)
        newbld2_sec = zeros(uempty_parcels.size, dtype='int32')
        newbids2 = arange(newbids.max()+1, newbids.max()+uempty_parcels.size+1)
        for ipcl in range(uempty_parcels.size):
            newbld2_sec[ipcl] = businesses['sector_id'][mult_bsidx][logical_and(businesses['parcel_id'][mult_bsidx] == uempty_parcels[ipcl], 
                                                                                business_sizes[mult_bsidx]==bsize_on_empty_pcl[ipcl])][0]
            this_bidx = where(businesses['parcel_id'][bsidx] == uempty_parcels[ipcl])
            bbldid[this_bidx] = newbids2[ipcl]
            
        newbld_parcel_id = concatenate((newbld_parcel_id, uempty_parcels))
        newbld_bt = concatenate((newbld_bt, sector2building_type(newbld2_sec)))    
        
        newbldgs = {'building_id': concatenate((newbids, newbids2)),
                    'parcel_id': newbld_parcel_id,
                    'building_type_id': newbld_bt,
                    }
        buildings.add_elements(newbldgs, require_all_attributes=False)
        jidx = where(in1d(job_array_labels, business_ids[bsidx]))[0]
        job_building_id[jidx] = bbldid.repeat(business_sizes[bsidx])
        logger.log_status("Build %s new buildings to accommodate %s jobs (out of which %s are governmental) from cases 7, 15, 16." % (
            newbld_parcel_id.size, jidx.size, business_sizes[bsidx][where(in1d(businesses['sector_id'][bsidx], [18,19]))].sum()))
        
        
        logger.log_status("Assigned %s (%s percent) home-based jobs." % (home_based.sum(), round(home_based.sum()/(home_based.size/100.),2)))
        logger.log_status("Finished %s percent (%s) jobs (%s businesses) processed. %s jobs (%s businesses) remain to be processed." % \
                          (round(business_sizes[processed_bindicator].sum()/(home_based.size/100.),2),
                           business_sizes[processed_bindicator].sum(), processed_bindicator.sum(),
                          business_sizes[logical_not(processed_bindicator)].sum(), business_sizes[logical_not(processed_bindicator)].size))
        
        logger.start_block("Storing jobs data.")
        # create job dataset
        job_data = {"job_id": (arange(job_building_id.size)+1).astype("int32"),
                    "home_based_status" : home_based,
                    "building_id": job_building_id,
                    "business_id": job_array_labels.astype("int32"),
                    "sector_id": businesses['sector_id'].repeat(business_sizes).astype("int32"), 
                    "parcel_id": businesses['parcel_id'].repeat(business_sizes).astype("int32"), 
                    "assignment_case": job_assignment_case}

        # join with zones
        if zone_dsname is not None:
            zones = dataset_pool.get_dataset(zone_dsname)
            idname = zones.get_id_name()[0]
            #jpcls = buildings.get_attribute_by_id('parcel_id', job_building_id)
            job_data[idname] = parcels.get_attribute_by_id(idname, job_data["parcel_id"])
            
            
        dictstorage = StorageFactory().get_storage('dict_storage')
        dictstorage.write_table(table_name="jobs", table_data=job_data)
        jobs = Dataset(in_storage=dictstorage, in_table_name="jobs", dataset_name="job", id_name="job_id")
        if out_storage is not None:
            jobs.write_dataset(out_storage=out_storage, out_table_name="jobs")
            buildings.write_dataset(out_storage=out_storage, attributes=AttributeType.PRIMARY)
        logger.end_block()        
        return jobs
    def run(
        self,
        individual_dataset,
        counts_dataset,
        fraction_dataset,
        id_name1="blockgroup_id",
        id_name2="zone_id",
        fraction_attribute_name="fraction",
        out_storage=None,
    ):

        """
        """
        assert id_name1 in individual_dataset.get_known_attribute_names()
        if id_name2 not in individual_dataset.get_known_attribute_names():
            individual_dataset.add_primary_attribute(-1 * ones(individual_dataset.size()), id_name2)

        lucky_household_index = array([], dtype="int32")
        hh_zone_id = array([], dtype="int32")
        output_data = {}

        logger.start_block("Start assigning individuals")
        zone_ids = counts_dataset.get_attribute(id_name2)
        building_types = counts_dataset.get_attribute("building_type_id")
        households = counts_dataset.get_attribute("households")
        for zone_id, building_type, n in zip(zone_ids, building_types, households):
            logger.log_status("n(%s=%i & %s=%i) = %s:" % (id_name2, zone_id, "building_type_id", building_type, n))
            fraction_index = where(fraction_dataset.get_attribute(id_name2) == zone_id)

            blockgroup_ids = fraction_dataset.get_attribute_by_index(id_name1, fraction_index)
            fractions = fraction_dataset.get_attribute_by_index(fraction_attribute_name, fraction_index)
            for blockgroup_id, fraction in zip(blockgroup_ids, fractions):
                nn = int(round(n * fraction))
                logger.log_status("\tfrac(%s=%s) = %s, n = %s" % ("blockgroup_id", blockgroup_id, fraction, nn))
                if nn >= 1:
                    suitable_household_index = where(
                        logical_and(
                            individual_dataset.get_attribute(id_name1) == blockgroup_id,
                            individual_dataset.get_attribute("building_type_id") == building_type,
                        )
                    )[0]
                    logger.log_status(
                        "\t\t sample %s from %s suitable households" % (nn, suitable_household_index.size)
                    )
                    if suitable_household_index.size == 0:
                        logger.log_warning("\tNo suitable households")
                        continue
                    lucky_household_index = concatenate(
                        (lucky_household_index, sample_replace(suitable_household_index, nn))
                    )
                    hh_zone_id = concatenate((hh_zone_id, [zone_id] * nn))

        for attribute_name in individual_dataset.get_known_attribute_names():
            output_data[attribute_name] = individual_dataset.get_attribute_by_index(
                attribute_name, lucky_household_index
            )
        output_data["original_household_id"] = output_data["household_id"]
        output_data["household_id"] = 1 + arange(lucky_household_index.size)
        output_data["zone_id"] = hh_zone_id

        storage = StorageFactory().get_storage("dict_storage")
        storage.write_table(table_name="households", table_data=output_data)
        output_dataset = Dataset(in_storage=storage, id_name=["household_id"], in_table_name="households")
        output_dataset.write_dataset(out_storage=out_storage, out_table_name="households")
        pass
    option_group = AssignmentOptionGroup()
    parser = option_group.parser
    (options, args) = parser.parse_args()
    if options.cache_directory is None:
        parser.print_usage()
        sys.exit()

    individual_table = options.individual_table
    counts_table = options.counts_table
    fraction_table = options.fraction_table
    fraction_attribute_name = options.fraction_attribute_name
    storage = StorageFactory().get_storage("flt_storage", storage_location=options.cache_directory)
    individual_dataset = Dataset(in_storage=storage, id_name=[], in_table_name=individual_table)
    counts_dataset = Dataset(in_storage=storage, id_name=[], in_table_name=counts_table)
    fraction_dataset = Dataset(in_storage=storage, id_name=[], in_table_name=fraction_table)

    MonteCarloAssignmentModel().run(
        individual_dataset,
        counts_dataset,
        fraction_dataset,
        id_name1=options.id_name1,
        id_name2=options.id_name2,
        fraction_attribute_name=options.fraction_attribute_name,
        out_storage=storage,
    )

    individual_dataset.write_dataset(
        out_storage=storage, out_table_name=individual_table, attributes=[options.id_name2]
    )
    def run(self,
            individual_dataset,
            counts_dataset,
            fraction_dataset,
            id_name1='blockgroup_id',
            id_name2='zone_id',
            fraction_attribute_name='fraction',
            out_storage=None):
        """
        """
        assert id_name1 in individual_dataset.get_known_attribute_names()
        if id_name2 not in individual_dataset.get_known_attribute_names():
            individual_dataset.add_primary_attribute(
                -1 * ones(individual_dataset.size()), id_name2)

        lucky_household_index = array([], dtype="int32")
        hh_zone_id = array([], dtype="int32")
        output_data = {}

        logger.start_block("Start assigning individuals")
        zone_ids = counts_dataset.get_attribute(id_name2)
        building_types = counts_dataset.get_attribute("building_type_id")
        households = counts_dataset.get_attribute("households")
        for zone_id, building_type, n in zip(zone_ids, building_types,
                                             households):
            logger.log_status(
                "n(%s=%i & %s=%i) = %s:" %
                (id_name2, zone_id, "building_type_id", building_type, n))
            fraction_index = where(
                fraction_dataset.get_attribute(id_name2) == zone_id)

            blockgroup_ids = fraction_dataset.get_attribute_by_index(
                id_name1, fraction_index)
            fractions = fraction_dataset.get_attribute_by_index(
                fraction_attribute_name, fraction_index)
            for blockgroup_id, fraction in zip(blockgroup_ids, fractions):
                nn = int(round(n * fraction))
                logger.log_status(
                    "\tfrac(%s=%s) = %s, n = %s" %
                    ("blockgroup_id", blockgroup_id, fraction, nn))
                if nn >= 1:
                    suitable_household_index = where(
                        logical_and(
                            individual_dataset.get_attribute(id_name1) ==
                            blockgroup_id,
                            individual_dataset.get_attribute(
                                "building_type_id") == building_type))[0]
                    logger.log_status(
                        "\t\t sample %s from %s suitable households" %
                        (nn, suitable_household_index.size))
                    if suitable_household_index.size == 0:
                        logger.log_warning("\tNo suitable households")
                        continue
                    lucky_household_index = concatenate(
                        (lucky_household_index,
                         sample_replace(suitable_household_index, nn)))
                    hh_zone_id = concatenate((hh_zone_id, [zone_id] * nn))

        for attribute_name in individual_dataset.get_known_attribute_names():
            output_data[
                attribute_name] = individual_dataset.get_attribute_by_index(
                    attribute_name, lucky_household_index)
        output_data["original_household_id"] = output_data["household_id"]
        output_data["household_id"] = 1 + arange(lucky_household_index.size)
        output_data["zone_id"] = hh_zone_id

        storage = StorageFactory().get_storage('dict_storage')
        storage.write_table(table_name="households", table_data=output_data)
        output_dataset = Dataset(in_storage=storage,
                                 id_name=["household_id"],
                                 in_table_name="households")
        output_dataset.write_dataset(out_storage=out_storage,
                                     out_table_name="households")
        sys.exit()

    individual_table = options.individual_table
    counts_table = options.counts_table
    fraction_table = options.fraction_table
    fraction_attribute_name = options.fraction_attribute_name
    storage = StorageFactory().get_storage(
        'flt_storage', storage_location=options.cache_directory)
    individual_dataset = Dataset(in_storage=storage,
                                 id_name=[],
                                 in_table_name=individual_table)
    counts_dataset = Dataset(in_storage=storage,
                             id_name=[],
                             in_table_name=counts_table)
    fraction_dataset = Dataset(in_storage=storage,
                               id_name=[],
                               in_table_name=fraction_table)

    MonteCarloAssignmentModel().run(
        individual_dataset,
        counts_dataset,
        fraction_dataset,
        id_name1=options.id_name1,
        id_name2=options.id_name2,
        fraction_attribute_name=options.fraction_attribute_name,
        out_storage=storage)

    individual_dataset.write_dataset(out_storage=storage,
                                     out_table_name=individual_table,
                                     attributes=[options.id_name2])