Ejemplo n.º 1
0
    def func(net):
        print('Calculating {}'.format(name))

        nodes = pd.DataFrame(index=net.node_ids)
        flds = [target_variable] if target_variable else []

        if target_value:
            flds += util.columns_in_filters(
                ["{} == {}".format(target_variable, target_value)])

        if filters:
            flds += util.columns_in_filters(filters)
        flds.append('node_id')

        df = orca.get_table(agent).to_frame(flds)

        if target_value:
            df = util.apply_filter_query(
                df, ["{} == {}".format(target_variable, target_value)])
        if filters:
            df = util.apply_filter_query(df, filters)

        net.set(df['node_id'],
                variable=df[target_variable] if target_variable else None)
        nodes[name] = net.aggregate(radius, type=agg_function, decay=decay)

        if log:
            nodes[name] = nodes[name].apply(eval('np.log1p'))
        return nodes[name]
def test_submodel_filters(m):
    """
    Test that submodel filters generate the correct data subset.
    
    """
    m.build_submodels()

    df = orca.get_table(m.defaults.choosers).to_frame()
    len1 = len(
        apply_filter_query(df.loc[df.group == 'A'],
                           m.defaults.chooser_filters))
    len2 = len(apply_filter_query(df, m.submodels['A'].chooser_filters))

    assert len1 == len2
Ejemplo n.º 3
0
def get_data(tables,
             fallback_tables=None,
             filters=None,
             model_expression=None,
             extra_columns=None):
    """
    Generate a ``pd.DataFrame`` for model estimation or simulation. Automatically loads 
    tables from Orca, merges them, and removes columns not referenced in a model 
    expression or data filter. Additional columns can be requested.
    
    If filters are provided, the output will include only rows that match the filter
    criteria. 
    
    See ``urbansim_templates.utils.merge_tables()`` for a detailed description of how 
    the merges are performed.
    
    Parameters
    ----------
    tables : str or list of str
        Orca table(s) to draw data from.
    
    fallback_tables : str or list of str, optional
        Table(s) to use if first parameter evaluates to `None`. (This option will be 
        removed shortly when estimation and simulation settings are separated.)
    
    filters : str or list of str, optional
        Filter(s) to apply to the merged data, using `pd.DataFrame.query()`.
    
    model_expression : str, optional
        Model expression that will be evaluated using the output data. Only used to drop 
        non-relevant columns. PyLogit format is not yet supported.
    
    extra_columns : str or list of str, optional
        Columns to include, in addition to any in the model expression and filters. (If 
        this and the model_expression are both None, all columns will be included.)

    Returns
    -------
    pd.DataFrame
    
    """
    if tables is None:
        tables = fallback_tables

    colnames = None  # this will get all columns
    if (model_expression is not None) or (extra_columns is not None):
        colnames = list(set(columns_in_formula(model_expression) + \
                            columns_in_filters(filters) + to_list(extra_columns)))

    if not isinstance(tables, list):
        df = get_df(tables, colnames)

    else:
        df = merge_tables(tables, colnames)

    df = apply_filter_query(df, filters)
    return df
Ejemplo n.º 4
0
    def _get_df(self,
                tables='unset',
                fallback_tables=None,
                filters='unset',
                model_expression=None):
        """
        Generate a data table for estimation or prediction, relying on functionality from
        Orca and `urbansim.models.util`. This should be performed immediately before
        estimation or prediction so that it reflects the current data state.
        
        The output includes only the necessary columns: those mentioned in the model 
        expression or filters, plus (it appears) the index of each merged table. Relevant
        row filters are also applied.
        
        TO DO - this method is a generalization of _get_data(), and should replace it, 
        but does not currently support column filtering or PyLogit model expressions.
        
        Parameters
        ----------
        tables : str or list of str, optional
            Name of table or tables. If not provided, `self.tables` will be used.
        
        fallback_tables : str or list of str, optional
            Table(s) to use if first argument evaluates to `None`.
            
        filters : str or list of str, optional
            Filter(s) to apply. If not provided, `self.filters` will be used.
            
        model_expression : NOT YET IMPLEMENTED
            Model expression, for determining which columns are needed. If not provided,
            `self.model_expression` will be used.
            
            TO DO - this needs to handle the large MNL case where there are two sets of
            data tables, so we can't use urbansim.models.util.columns_in_formula()
        
        Returns
        -------
        DataFrame
        
        """
        if tables == 'unset':
            tables = self.tables

        if tables is None:
            tables = fallback_tables

        if filters == 'unset':
            filters = self.filters

        if isinstance(tables, list):
            df = orca.merge_tables(target=tables[0], tables=tables)
        else:
            df = orca.get_table(tables).to_frame()

        df = util.apply_filter_query(df, filters)
        return df
Ejemplo n.º 5
0
    def _get_data(self, task='fit'):
        """
        DEPRECATED - this should be replaced by the more general utils.get_data()
        
        Generate a data table for estimation or prediction, relying on functionality from
        Orca and UrbanSim.models.util. This should be performed immediately before 
        estimation or prediction so that it reflects the current data state.
        
        The output includes only the necessary columns: those mentioned in the model
        expression or filters, plus (it appears) the index of each merged table. Relevant 
        filter queries are applied.
        
        Parameters
        ----------
        task : 'fit' or 'predict'
        
        Returns
        -------
        DataFrame
        
        """
        # TO DO - verify input data

        if isinstance(self.model_expression, str):
            expr_cols = util.columns_in_formula(self.model_expression)

        if (task == 'fit'):
            tables = self.tables
            columns = expr_cols + util.columns_in_filters(self.filters)
            filters = self.filters

        elif (task == 'predict'):
            if self.out_tables is not None:
                tables = self.out_tables
            else:
                tables = self.tables

            columns = expr_cols + util.columns_in_filters(self.out_filters)
            if self.out_column is not None:
                columns += [self.out_column]

            filters = self.out_filters

        if isinstance(tables, list):
            df = orca.merge_tables(target=tables[0],
                                   tables=tables,
                                   columns=columns)
        else:
            df = orca.get_table(tables).to_frame(columns)

        df = util.apply_filter_query(df, filters)
        return df
    def fit(self, mct=None):
        """
        Fit the model; save and report results. This uses the ChoiceModels estimation 
        engine (originally from UrbanSim MNL).

        The `fit()` method can be run as many times as desired. Results will not be saved
        with Orca or ModelManager until the `register()` method is run.

        After sampling alternatives for each chooser, the merged choice table is saved to 
        the class object for diagnostic use (`mergedchoicetable` with type
        choicemodels.tools.MergedChoiceTable).

        Parameters
        ----------
        mct : choicemodels.tools.MergedChoiceTable
            This parameter is a temporary backdoor allowing us to pass in a more 
            complicated choice table than can be generated within the template, for 
            example including sampling weights or interaction terms. 

        Returns
        -------
        None

        """
        check_choicemodels_version()
        from choicemodels import MultinomialLogit
        from choicemodels.tools import MergedChoiceTable

        if (mct is not None):
            df_from_mct = mct.to_frame()
            idx_names = df_from_mct.index.names
            df_from_mct = df_from_mct.reset_index()
            df_from_mct = apply_filter_query(
                df_from_mct, self.chooser_filters).set_index(idx_names)
            mct = MergedChoiceTable.from_df(df_from_mct)

        else:
            observations = get_data(tables=self.choosers,
                                    filters=self.chooser_filters,
                                    model_expression=self.model_expression,
                                    extra_columns=self.choice_column)

            if (self.chooser_sample_size is not None):
                observations = observations.sample(self.chooser_sample_size)

            alternatives = get_data(tables=self.alternatives,
                                    filters=self.alt_filters,
                                    model_expression=self.model_expression)

            mct = MergedChoiceTable(observations=observations,
                                    alternatives=alternatives,
                                    chosen_alternatives=self.choice_column,
                                    sample_size=self.alt_sample_size)

        model = MultinomialLogit(data=mct,
                                 model_expression=self.model_expression)
        results = model.fit()

        self.name = self._generate_name()
        self.summary_table = str(results)
        print(self.summary_table)

        coefs = results.get_raw_results()['fit_parameters']['Coefficient']
        self.fitted_parameters = coefs.tolist()
        self.model = results

        # Save merged choice table to the class object for diagnostics
        self.mergedchoicetable = mct
Ejemplo n.º 7
0
 def apply_predict_filters(self, choosers):
     choosers = util.apply_filter_query(
         choosers, self.choosers_predict_filters)
     return choosers
Ejemplo n.º 8
0
def get_data(tables,
             fallback_tables=None,
             filters=None,
             model_expression=None,
             extra_columns=None):
    """
    Generate a pd.DataFrame from one or more tables registered with Orca. Templates should 
    call this function immediately before the data is needed, so that it's as up-to-date 
    as possible.
    
    If filters are provided, the output will include only rows that match the filter
    criteria. 
    
    Default behavior is for the output to inclue all columns. If a model_expression and/or
    extra_columns is provided, non-relevant columns will be dropped from the output.
    Relevant columns include any mentioned in the model expression, filters, or list of 
    extras. Join keys will *not* be included in the final output even if the data is drawn
    from multiple tables, unless they appear in the model expression or filters as well.
    
    If a named column is not found in the source tables, it will just be skipped. This is 
    to support use cases where data is assembled separately for choosers and alternatives 
    and then merged together -- the model expression would include terms from both sets 
    of tables.
    
    Duplicate column names are not recommended -- columns are expected to be unique within 
    the set of tables they're being drawn from, with the exception of join keys. If column 
    names are repeated, current behavior is to follow the Orca default and keep the 
    left-most copy of the column. This may change later and should not be relied on. 
    
    Parameters
    ----------
    tables : str or list of str
        Orca table(s) to draw data from.
    
    fallback_tables : str or list of str, optional
        Table(s) to use if first parameter evaluates to `None`. (This option will be 
        removed shortly when estimation and simulation settings are separated.)
    
    filters : str or list of str, optional
        Filter(s) to apply to the merged data, using `pd.DataFrame.query()`.
    
    model_expression : str, optional
        Model expression that will be evaluated using the output data. Only used to drop 
        non-relevant columns. PyLogit format is not yet supported.
    
    extra_columns : str or list of str, optional
        Columns to include, in addition to any in the model expression and filters. (If 
        this and the model_expression are both None, all columns will be included.)

    Returns
    -------
    pd.DataFrame
    
    """
    if tables is None:
        tables = fallback_tables

    tables = to_list(tables)
    colnames = None  # this will get all columns from Orca utilities

    if (model_expression is not None) or (extra_columns is not None):
        colnames = set(columns_in_formula(model_expression) + \
                       columns_in_filters(filters) + to_list(extra_columns))

        # skip cols not found in any of the source tables - have to check for this
        # explicitly because the orca utilities will raise an error if we request column
        # names that aren't there
        all_cols = []
        for t in tables:
            dfw = orca.get_table(t)
            all_cols += list(dfw.index.names) + list(dfw.columns)

        colnames = [c for c in colnames if c in all_cols]

    if len(tables) == 1:
        df = orca.get_table(table_name=tables[0]).to_frame(columns=colnames)

    else:
        df = orca.merge_tables(target=tables[0],
                               tables=tables,
                               columns=colnames)

    if colnames is not None:
        if len(df.columns) > len(colnames):
            df = df[colnames]

    df = apply_filter_query(df, filters)
    return df
Ejemplo n.º 9
0
    def _get_data(self, task='fit'):
        """
        DEPRECATED - this should be replaced by the more general _get_df()
        
        Generate a data table for estimation or prediction, relying on functionality from
        Orca and UrbanSim.models.util. This should be performed immediately before 
        estimation or prediction so that it reflects the current data state.
        
        The output includes only the necessary columns: those mentioned in the model
        expression or filters, plus (it appears) the index of each merged table. Relevant 
        filter queries are applied.
        
        Parameters
        ----------
        task : 'fit' or 'predict'
        
        Returns
        -------
        DataFrame
        
        """
        # TO DO - verify input data

        if isinstance(self.model_expression, str):
            expr_cols = util.columns_in_formula(self.model_expression)

        # This is for PyLogit model expressions
        elif isinstance(self.model_expression, OrderedDict):
            # TO DO - check that this works in Python 2.7
            expr_cols = [t[0] for t in list(self.model_expression.items()) \
                         if t[0] is not 'intercept']
            # TO DO - not very general, maybe we should just override the method
            # TO DO - and this only applies to the fit condition
            if self.choice_column is not None:
                expr_cols += [self.choice_column]

        if (task == 'fit'):
            tables = self.tables
            columns = expr_cols + util.columns_in_filters(self.filters)
            filters = self.filters

        elif (task == 'predict'):
            if self.out_tables is not None:
                tables = self.out_tables
            else:
                tables = self.tables

            columns = expr_cols + util.columns_in_filters(self.out_filters)
            if self.out_column is not None:
                columns += [self.out_column]

            filters = self.out_filters

        if isinstance(tables, list):
            df = orca.merge_tables(target=tables[0],
                                   tables=tables,
                                   columns=columns)
        else:
            df = orca.get_table(tables).to_frame(columns)

        df = util.apply_filter_query(df, filters)
        return df