def define_submarket(choice_set, submarket_id_expression, compute_variables=[], filter=None):
    submarket_ids = choice_set.compute_variables("submarket_id=" + submarket_id_expression)
    unique_submarket_ids = unique(submarket_ids)
    storage = StorageFactory().get_storage("dict_storage")
    storage.write_table(table_name="submarkets", table_data={"submarket_id": unique_submarket_ids})
    submarkets = Dataset(
        in_storage=storage, in_table_name="submarkets", id_name="submarket_id", dataset_name="submarket"
    )
    if len(compute_variables):
        submarkets.compute_variables(compute_variables)
    if filter is not None:
        from numpy import logical_not

        submarkets.remove_elements(index=where(logical_not(submarkets.compute_variables(filter)))[0])
        # submarkets = DatasetSubset(submarkets, index=where(submarkets.compute_variables(filter))[0])
    return submarkets
    def prepare_for_estimate(self,
                             add_member_prefix=True,
                             specification_dict=None,
                             specification_storage=None,
                             specification_table=None,
                             building_set=None,
                             buildings_for_estimation_storage=None,
                             buildings_for_estimation_table=None,
                             constants=None,
                             base_year=0,
                             building_categories=None,
                             location_id_variable=None,
                             join_datasets=False,
                             data_objects=None,
                             **kwargs):
        #        buildings = None

        if (building_set is not None):
            if location_id_variable is not None:
                building_set.compute_variables(
                    location_id_variable, resources=Resources(data_objects))

        # create agents for estimation
        if buildings_for_estimation_storage is not None:
            estimation_set = Dataset(
                in_storage=buildings_for_estimation_storage,
                in_table_name=buildings_for_estimation_table,
                id_name=building_set.get_id_name(),
                dataset_name=building_set.get_dataset_name())
            if location_id_variable:
                estimation_set.compute_variables(
                    location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(
                    estimation_set.get_attribute(location_id_variable),
                    VariableName(location_id_variable).alias())

            years = estimation_set.get_attribute("scheduled_year")
            recent_years = constants['recent_years']
            indicator = zeros(estimation_set.size())
            for year in range(base_year - recent_years, base_year + 1):
                indicator = logical_or(indicator, years == year)
            idx = where(logical_not(indicator))[0]
            estimation_set.remove_elements(idx)

            #if filter:
            #estimation_set.compute_variables(filter, resources=Resources(data_objects))
            #index = where(estimation_set.get_attribute(filter) > 0)[0]
            #estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                building_set.join_by_rows(estimation_set,
                                          require_all_attributes=False,
                                          change_ids_if_not_unique=True)
                index = arange(building_set.size() - estimation_set.size(),
                               agent_set.size())
            else:
                index = building_set.get_id_index(
                    estimation_set.get_id_attribute())
        else:
            if building_set is not None:
                index = arange(building_set.size())
            else:
                index = None

        if add_member_prefix:
            specification_table = self.group_member.add_member_prefix_to_table_names(
                [specification_table])

        from opus_core.model import get_specification_for_estimation
        #from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(
            specification_dict, specification_storage, specification_table)

        #specification, dummy = AgentLocationChoiceModelMember.prepare_for_estimate(self, add_member_prefix,
        #specification_dict, specification_storage,
        #specification_table,
        #location_id_variable=location_id_variable,
        #data_objects=data_objects, **kwargs)
        return (specification, index)
    def prepare_for_estimate(
        self,
        add_member_prefix=True,
        specification_dict=None,
        specification_storage=None,
        specification_table=None,
        building_set=None,
        buildings_for_estimation_storage=None,
        buildings_for_estimation_table=None,
        constants=None,
        base_year=0,
        building_categories=None,
        location_id_variable=None,
        join_datasets=False,
        data_objects=None,
        **kwargs
    ):
        #        buildings = None

        if building_set is not None:
            if location_id_variable is not None:
                building_set.compute_variables(location_id_variable, resources=Resources(data_objects))

        # create agents for estimation
        if buildings_for_estimation_storage is not None:
            estimation_set = Dataset(
                in_storage=buildings_for_estimation_storage,
                in_table_name=buildings_for_estimation_table,
                id_name=building_set.get_id_name(),
                dataset_name=building_set.get_dataset_name(),
            )
            if location_id_variable:
                estimation_set.compute_variables(location_id_variable, resources=Resources(data_objects))
                # needs to be a primary attribute because of the join method below
                estimation_set.add_primary_attribute(
                    estimation_set.get_attribute(location_id_variable), VariableName(location_id_variable).alias()
                )

            years = estimation_set.get_attribute("scheduled_year")
            recent_years = constants["recent_years"]
            indicator = zeros(estimation_set.size(), dtype="int32")
            for year in range(base_year - recent_years, base_year + 1):
                indicator = logical_or(indicator, years == year)
            idx = where(logical_not(indicator))[0]
            estimation_set.remove_elements(idx)

            # if filter:
            # estimation_set.compute_variables(filter, resources=Resources(data_objects))
            # index = where(estimation_set.get_attribute(filter) > 0)[0]
            # estimation_set.subset_by_index(index, flush_attributes_if_not_loaded=False)

            if join_datasets:
                building_set.join_by_rows(estimation_set, require_all_attributes=False, change_ids_if_not_unique=True)
                index = arange(building_set.size() - estimation_set.size(), building_set.size())
            else:
                index = building_set.get_id_index(estimation_set.get_id_attribute())
        else:
            if building_set is not None:
                index = arange(building_set.size())
            else:
                index = None

        if add_member_prefix:
            specification_table = self.group_member.add_member_prefix_to_table_names([specification_table])

        from opus_core.model import get_specification_for_estimation

        # from urbansim.functions import compute_supply_and_add_to_location_set
        specification = get_specification_for_estimation(specification_dict, specification_storage, specification_table)

        # specification, dummy = AgentLocationChoiceModelMember.prepare_for_estimate(self, add_member_prefix,
        # specification_dict, specification_storage,
        # specification_table,
        # location_id_variable=location_id_variable,
        # data_objects=data_objects, **kwargs)
        return (specification, index)