Example #1
0
    def estimate(self, spec_var=None, spec_py=None,
            submodel_string = "workers", 
            agent_sample_rate=0.005, alt_sample_size=None):
        """

        """
        CLOSE = 0.001
        sampler = "opus_core.samplers.weighted_sampler"
        if alt_sample_size==None:
            sampler = None
        
        date_time_str=strftime("%Y_%m_%d__%H_%M", localtime())
        agent_sample_rate_str = "__ASR_" + str(agent_sample_rate)
        alt_sample_size_str = "_ALT_" + str(alt_sample_size)
        info_file = date_time_str + agent_sample_rate_str + alt_sample_size_str + "__info.txt"
        logger.enable_file_logging(date_time_str + agent_sample_rate_str + alt_sample_size_str + "__run.txt")
        logger.enable_memory_logging()
        logger.log_status("Constrained Estimation with agent sample rate of %s and alternatvie sample size %s\n" % \
                          (agent_sample_rate, alt_sample_size))
                
        t1 = time()
        
        SimulationState().set_current_time(2000)

        self.nbs = SessionConfiguration().get_dataset_from_pool("neighborhood")
        self.hhs = SessionConfiguration().get_dataset_from_pool('household')

        depts, lambda_value = compute_lambda(self.nbs)
        supply, vacancy_rate = compute_supply_and_vacancy_rate(self.nbs, depts, lambda_value)
        self.nbs.set_values_of_one_attribute("supply", supply)
        dataset_pool = SessionConfiguration().get_dataset_pool()
        dataset_pool.add_datasets_if_not_included({'vacancy_rate': vacancy_rate,
                                                   'sample_rate':agent_sample_rate
                                                   })
        SessionConfiguration()["CLOSE"] = CLOSE
        SessionConfiguration()['info_file'] = info_file
        
        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                storage_location=self.out_con)
        
        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification
        
        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(in_table_name="household_location_choice_model_specification")

        #submodel_string = "workers"
        
        seed(71) # was: seed(71,110)
        self.model_name = "household_location_choice_model"

        model = HouseholdLocationChoiceModelCreator().get_model(location_set=self.nbs, 
                                                                submodel_string=submodel_string,
                                                                sampler = sampler,
                                                                estimation_size_agents = agent_sample_rate * 100/20,    
                                                                # proportion of the agent set that should be used for the estimation,
                                                                # 
                                                                sample_size_locations = alt_sample_size,  # choice set size (includes current location)
                                                                compute_capacity_flag = True,
                                                                probabilities = "opus_core.mnl_probabilities",
                                                                choices = "urbansim.lottery_choices",
                                                                run_config = Resources({"capacity_string":"supply"}), 
                                                                estimate_config = Resources({"capacity_string":"supply","compute_capacity_flag":True}))

        #TODO: since households_for_estimation currently is the same as households, create_households_for_estimation
        #becomes unnecesarry
        #agent_set, agents_index_for_estimation  =  create_households_for_estimation(self.hhs, self.in_con)
        agent_set = self.hhs; agents_index_for_estimation = arange(self.hhs.size())
        self.result = model.estimate(self.specification, 
                                     agent_set=agent_set, 
                                     agents_index=agents_index_for_estimation, 
                                     debuglevel=self.debuglevel,
                                     procedure="urbansim.constrain_estimation_bhhh_two_loops" ) #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:    
            self.save_results(out_storage)
            
        logger.log_status("Estimation done. " + str(time()-t1) + " s")
    def estimate(self,
                 spec_var=None,
                 spec_py=None,
                 submodel_string="workers",
                 agent_sample_rate=0.005,
                 alt_sample_size=None):
        """

        """
        CLOSE = 0.001
        sampler = "opus_core.samplers.weighted_sampler"
        if alt_sample_size == None:
            sampler = None

        date_time_str = strftime("%Y_%m_%d__%H_%M", localtime())
        agent_sample_rate_str = "__ASR_" + str(agent_sample_rate)
        alt_sample_size_str = "_ALT_" + str(alt_sample_size)
        info_file = date_time_str + agent_sample_rate_str + alt_sample_size_str + "__info.txt"
        logger.enable_file_logging(date_time_str + agent_sample_rate_str +
                                   alt_sample_size_str + "__run.txt")
        logger.enable_memory_logging()
        logger.log_status("Constrained Estimation with agent sample rate of %s and alternatvie sample size %s\n" % \
                          (agent_sample_rate, alt_sample_size))

        t1 = time()

        SimulationState().set_current_time(2000)

        self.nbs = SessionConfiguration().get_dataset_from_pool("neighborhood")
        self.hhs = SessionConfiguration().get_dataset_from_pool('household')

        depts, lambda_value = compute_lambda(self.nbs)
        supply, vacancy_rate = compute_supply_and_vacancy_rate(
            self.nbs, depts, lambda_value)
        self.nbs.set_values_of_one_attribute("supply", supply)
        dataset_pool = SessionConfiguration().get_dataset_pool()
        dataset_pool.add_datasets_if_not_included({
            'vacancy_rate':
            vacancy_rate,
            'sample_rate':
            agent_sample_rate
        })
        SessionConfiguration()["CLOSE"] = CLOSE
        SessionConfiguration()['info_file'] = info_file

        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.out_con)

        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification

        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(
                in_table_name="household_location_choice_model_specification")

        #submodel_string = "workers"

        seed(71)  # was: seed(71,110)
        self.model_name = "household_location_choice_model"

        model = HouseholdLocationChoiceModelCreator().get_model(
            location_set=self.nbs,
            submodel_string=submodel_string,
            sampler=sampler,
            estimation_size_agents=agent_sample_rate * 100 / 20,
            # proportion of the agent set that should be used for the estimation,
            #
            sample_size_locations=
            alt_sample_size,  # choice set size (includes current location)
            compute_capacity_flag=True,
            probabilities="opus_core.mnl_probabilities",
            choices="urbansim.lottery_choices",
            run_config=Resources({"capacity_string": "supply"}),
            estimate_config=Resources({
                "capacity_string": "supply",
                "compute_capacity_flag": True
            }))

        #TODO: since households_for_estimation currently is the same as households, create_households_for_estimation
        #becomes unnecesarry
        #agent_set, agents_index_for_estimation  =  create_households_for_estimation(self.hhs, self.in_con)
        agent_set = self.hhs
        agents_index_for_estimation = arange(self.hhs.size())
        self.result = model.estimate(
            self.specification,
            agent_set=agent_set,
            agents_index=agents_index_for_estimation,
            debuglevel=self.debuglevel,
            procedure="urbansim.constrain_estimation_bhhh_two_loops"
        )  #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:
            self.save_results(out_storage)

        logger.log_status("Estimation done. " + str(time() - t1) + " s")
    def estimate(self, spec_var=None, spec_py=None,
                 movers_index = None,
                 submodel_string = "", 
                 alt_sample_size=None,
                 sampler = "opus_core.samplers.weighted_sampler",
                 weight_string = "supply",
                 aggregate_demand = False,
                 submarket_definition = ('zone', 'building_type_id'),
                 sample_size_from_each_stratum = 50
                 ):        
        """

        """
        
        t1 = time()        
        SimulationState().set_current_time(2000)

        dataset_pool=SessionConfiguration().get_dataset_pool()
        
        buildings = dataset_pool.get_dataset("building")
        agent_set = dataset_pool.get_dataset('household')
        #buildings.load_dataset()

        submarket_geography = dataset_pool.get_dataset(submarket_definition[0])
        intermediates = '[]'
        if submarket_geography.dataset_name == 'zone':
            intermediates = '[parcel]'
        elif submarket_geography.dataset_name == 'faz':
            intermediates = '[zone, parcel]'
        elif submarket_geography.dataset_name == 'large_area':
            intermediates = '[faz, zone, parcel]'
        
        submarket_id_expression = 'building.disaggregate(%s.%s, intermediates=%s) * 100' % \
                                                (submarket_geography.dataset_name, submarket_geography.get_id_name()[0],
                                                 intermediates)
        submarket_variables = ['%s=numpy.ceil(submarket.submarket_id / 100)' % submarket_geography.get_id_name()[0]]

        if submarket_definition[1] == 'residential_building_type_id':
            set_residential_building_types(dataset_pool.get_dataset("building_type"), dataset_pool.get_dataset("building"))
        if submarket_definition[1] != '':
            submarket_id_expression = submarket_id_expression + ' + building.%s'  % submarket_definition[1] 
            submarket_variables.append(submarket_definition[1] + '=submarket.submarket_id % 100' ) 
            
        submarkets = define_submarket(buildings, 
                                      submarket_id_expression,
                                      #"urbansim_parcel.building.zone_id*100 + building.residential_building_type_id",
                                      #"building.disaggregate(faz.large_area_id, intermediates=[zone, parcel]) * 100 + building.residential_building_type_id",
                                      compute_variables=submarket_variables + [
                                          "residential_units=submarket.aggregate(building.residential_units)",
                                          "number_of_buildings_with_non_zero_units=submarket.aggregate(building.residential_units > 0 )",
                                          "number_of_surveyed_households=submarket.aggregate(household.household_id > 5000000, intermediates=[building])",                                                     
                                                     ],
                                      #filter = 'numpy.logical_and(submarket.number_of_surveyed_households > 0, submarket.residential_units>0)',
                                      #filter = 'submarket.supply > 0',
                                      #"psrc_parcel.building.large_area_id*100 + building.residential_building_type_id",
                                      #compute_variables=['residential_building_type_id=submarket.submarket_id % 100',
                                                         #'large_area_id=numpy.ceil(submarket.submarket_id / 100)']
                                      #"psrc_parcel.building.large_area_id",
                                      #compute_variables=[#'residential_building_type_id=submarket.submarket_id % 100',
                                                         #'large_area_id=numpy.ceil(submarket.submarket_id)']

                                  )

        dataset_pool.add_datasets_if_not_included({'submarket':submarkets})        
        compute_lambda_and_supply(buildings, agent_set, movers_index, submarkets)

        submarket_filter = 'submarket.supply > 0'
        if submarket_filter is not None:
            from numpy import logical_not
            submarkets.remove_elements(index= where( logical_not(submarkets.compute_variables(submarket_filter)) )[0])
            submarkets.touch_attribute(submarkets.get_id_name()[0])
            buildings.touch_attribute(submarkets.get_id_name()[0])
            
        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                storage_location=self.out_con)
        
        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification
        
        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(type='sql_storage', 
                                                                    storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(in_table_name="household_location_choice_model_specification")
        
        self.model_name = "household_location_choice_model"

        agent_set, agents_index_for_estimation = get_households_for_estimation(agent_set,
                                                                               AttributeCache(),
                                                                               "households_for_estimation",
                                                                               exclude_condition="household.disaggregate(submarket.submarket_id, intermediates=[building])<=0",
                                                                           )
        agent_set.compute_variables("submarket_id=household.disaggregate(building.submarket_id)")
        agent_sample_rate = agents_index_for_estimation.size / float(movers_index.size)
        dataset_pool.add_datasets_if_not_included({'sample_rate': agent_sample_rate })

        if aggregate_demand:
            location_set = buildings
            aggregate_dataset = 'submarket'
            #weight_string = 'inv_submarket_supply = 1.0 / (building.disaggregate(submarket.number_of_agents(building))).astype(float32) * (building.disaggregate(submarket.submarket_id) > 0)'
            #weight_string = 'submarket_supply = (building.disaggregate(submarket.supply) > 0).astype(int32)'
            #weight_string = 'submarket_supply = building.disaggregate(submarket.supply) * (building.disaggregate(submarket.submarket_id) > 0).astype(float32)'
        else:
            location_set = submarkets
            aggregate_dataset = None
            #weight_string = 'supply'

        model = HouseholdLocationChoiceModelCreator().get_model(location_set=location_set,
                                                                #location_set=submarkets,  
                                                                #filter = 'building.disaggregate(submarket.submarket_id) > 0',
                                                                #filter = 'numpy.logical_and(submarket.number_of_surveyed_households > 0, submarket.residential_units>0)',
                                                                #filter = 'building.disaggregate(numpy.logical_and(submarket.number_of_buildings_with_non_zero_units > 5000, submarket.number_of_surveyed_households > 0))',
                                                                submodel_string=submodel_string,
                                                                sampler = sampler,
                                                                #estimation_size_agents = agent_sample_rate * 100/20,    
                                                                # proportion of the agent set that should be used for the estimation
                                                                sample_size_locations = alt_sample_size,
                                                                #sample_proportion_locations = 1.0/1000,
                                                                # choice set size (includes current location)
                                                                compute_capacity_flag = True,
                                                                probabilities = "opus_core.mnl_probabilities",
                                                                choices = "urbansim.lottery_choices",
                                                                #run_config = Resources({"capacity_string":"supply"}), 
                                                                estimate_config = Resources({"capacity_string":"supply",
                                                                                             "weights_for_estimation_string":weight_string,
                                                                                             "aggregate_to_dataset":aggregate_dataset,
                                                                                             "stratum": "building.disaggregate(submarket.submarket_id)",
                                                                                             "sample_size_from_each_stratum": sample_size_from_each_stratum,
                                                                                             #"index2":where(submarkets.compute_variables('submarket.number_of_surveyed_households > 0'))[0],
                                                                                             #"sample_rate": 1.0/5000,
                                                                                             #"sample_size_from_chosen_stratum": 0,
                                                                                             "include_chosen_choice": True
                                                                                             }))

        
        # was dataset_pool.add_datasets_if_not_included({'sample_rate':agent_sample_rate})        
        self.result = model.estimate(self.specification, 
                                     agent_set=agent_set, 
                                     agents_index=agents_index_for_estimation, 
                                     debuglevel=self.debuglevel,
                                     procedure="urbansim.constrain_estimation_bhhh_two_loops" ) #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:    
            self.save_results(out_storage)
            
        logger.log_status("Estimation done. " + str(time()-t1) + " s")
    def estimate(self,
                 spec_var=None,
                 spec_py=None,
                 movers_index=None,
                 submodel_string="",
                 alt_sample_size=None,
                 sampler="opus_core.samplers.weighted_sampler",
                 weight_string="supply",
                 aggregate_demand=False,
                 submarket_definition=('zone', 'building_type_id'),
                 sample_size_from_each_stratum=50):
        """

        """

        t1 = time()
        SimulationState().set_current_time(2000)

        dataset_pool = SessionConfiguration().get_dataset_pool()

        buildings = dataset_pool.get_dataset("building")
        agent_set = dataset_pool.get_dataset('household')
        #buildings.load_dataset()

        submarket_geography = dataset_pool.get_dataset(submarket_definition[0])
        intermediates = '[]'
        if submarket_geography.dataset_name == 'zone':
            intermediates = '[parcel]'
        elif submarket_geography.dataset_name == 'faz':
            intermediates = '[zone, parcel]'
        elif submarket_geography.dataset_name == 'large_area':
            intermediates = '[faz, zone, parcel]'

        submarket_id_expression = 'building.disaggregate(%s.%s, intermediates=%s) * 100' % \
                                                (submarket_geography.dataset_name, submarket_geography.get_id_name()[0],
                                                 intermediates)
        submarket_variables = [
            '%s=numpy.ceil(submarket.submarket_id / 100)' %
            submarket_geography.get_id_name()[0]
        ]

        if submarket_definition[1] == 'residential_building_type_id':
            set_residential_building_types(
                dataset_pool.get_dataset("building_type"),
                dataset_pool.get_dataset("building"))
        if submarket_definition[1] != '':
            submarket_id_expression = submarket_id_expression + ' + building.%s' % submarket_definition[
                1]
            submarket_variables.append(submarket_definition[1] +
                                       '=submarket.submarket_id % 100')

        submarkets = define_submarket(
            buildings,
            submarket_id_expression,
            #"urbansim_parcel.building.zone_id*100 + building.residential_building_type_id",
            #"building.disaggregate(faz.large_area_id, intermediates=[zone, parcel]) * 100 + building.residential_building_type_id",
            compute_variables=submarket_variables + [
                "residential_units=submarket.aggregate(building.residential_units)",
                "number_of_buildings_with_non_zero_units=submarket.aggregate(building.residential_units > 0 )",
                "number_of_surveyed_households=submarket.aggregate(household.household_id > 5000000, intermediates=[building])",
            ],
            #filter = 'numpy.logical_and(submarket.number_of_surveyed_households > 0, submarket.residential_units>0)',
            #filter = 'submarket.supply > 0',
            #"psrc_parcel.building.large_area_id*100 + building.residential_building_type_id",
            #compute_variables=['residential_building_type_id=submarket.submarket_id % 100',
            #'large_area_id=numpy.ceil(submarket.submarket_id / 100)']
            #"psrc_parcel.building.large_area_id",
            #compute_variables=[#'residential_building_type_id=submarket.submarket_id % 100',
            #'large_area_id=numpy.ceil(submarket.submarket_id)']
        )

        dataset_pool.add_datasets_if_not_included({'submarket': submarkets})
        compute_lambda_and_supply(buildings, agent_set, movers_index,
                                  submarkets)

        submarket_filter = 'submarket.supply > 0'
        if submarket_filter is not None:
            from numpy import logical_not
            submarkets.remove_elements(index=where(
                logical_not(submarkets.compute_variables(submarket_filter)))
                                       [0])
            submarkets.touch_attribute(submarkets.get_id_name()[0])
            buildings.touch_attribute(submarkets.get_id_name()[0])

        if self.save_estimation_results:
            out_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.out_con)

        if spec_py is not None:
            reload(spec_py)
            spec_var = spec_py.specification

        if spec_var is not None:
            self.specification = load_specification_from_dictionary(spec_var)
        else:
            in_storage = StorageFactory().build_storage_for_dataset(
                type='sql_storage', storage_location=self.in_con)
            self.specification = EquationSpecification(in_storage=in_storage)
            self.specification.load(
                in_table_name="household_location_choice_model_specification")

        self.model_name = "household_location_choice_model"

        agent_set, agents_index_for_estimation = get_households_for_estimation(
            agent_set,
            AttributeCache(),
            "households_for_estimation",
            exclude_condition=
            "household.disaggregate(submarket.submarket_id, intermediates=[building])<=0",
        )
        agent_set.compute_variables(
            "submarket_id=household.disaggregate(building.submarket_id)")
        agent_sample_rate = agents_index_for_estimation.size / float(
            movers_index.size)
        dataset_pool.add_datasets_if_not_included(
            {'sample_rate': agent_sample_rate})

        if aggregate_demand:
            location_set = buildings
            aggregate_dataset = 'submarket'
            #weight_string = 'inv_submarket_supply = 1.0 / (building.disaggregate(submarket.number_of_agents(building))).astype(float32) * (building.disaggregate(submarket.submarket_id) > 0)'
            #weight_string = 'submarket_supply = (building.disaggregate(submarket.supply) > 0).astype(int32)'
            #weight_string = 'submarket_supply = building.disaggregate(submarket.supply) * (building.disaggregate(submarket.submarket_id) > 0).astype(float32)'
        else:
            location_set = submarkets
            aggregate_dataset = None
            #weight_string = 'supply'

        model = HouseholdLocationChoiceModelCreator().get_model(
            location_set=location_set,
            #location_set=submarkets,
            #filter = 'building.disaggregate(submarket.submarket_id) > 0',
            #filter = 'numpy.logical_and(submarket.number_of_surveyed_households > 0, submarket.residential_units>0)',
            #filter = 'building.disaggregate(numpy.logical_and(submarket.number_of_buildings_with_non_zero_units > 5000, submarket.number_of_surveyed_households > 0))',
            submodel_string=submodel_string,
            sampler=sampler,
            #estimation_size_agents = agent_sample_rate * 100/20,
            # proportion of the agent set that should be used for the estimation
            sample_size_locations=alt_sample_size,
            #sample_proportion_locations = 1.0/1000,
            # choice set size (includes current location)
            compute_capacity_flag=True,
            probabilities="opus_core.mnl_probabilities",
            choices="urbansim.lottery_choices",
            #run_config = Resources({"capacity_string":"supply"}),
            estimate_config=Resources({
                "capacity_string": "supply",
                "weights_for_estimation_string": weight_string,
                "aggregate_to_dataset": aggregate_dataset,
                "stratum": "building.disaggregate(submarket.submarket_id)",
                "sample_size_from_each_stratum": sample_size_from_each_stratum,
                #"index2":where(submarkets.compute_variables('submarket.number_of_surveyed_households > 0'))[0],
                #"sample_rate": 1.0/5000,
                #"sample_size_from_chosen_stratum": 0,
                "include_chosen_choice": True
            }))

        # was dataset_pool.add_datasets_if_not_included({'sample_rate':agent_sample_rate})
        self.result = model.estimate(
            self.specification,
            agent_set=agent_set,
            agents_index=agents_index_for_estimation,
            debuglevel=self.debuglevel,
            procedure="urbansim.constrain_estimation_bhhh_two_loops"
        )  #"urbansim.constrain_estimation_bhhh"

        #save estimation results
        if self.save_estimation_results:
            self.save_results(out_storage)

        logger.log_status("Estimation done. " + str(time() - t1) + " s")