def test_get_data_single_table(orca_session):
    """
    Single table, no other params.
        
    """
    df = utils.get_data(tables = 'households')
    assert(len(df) == 3)
Example #2
0
    def run(self):
        """
        Run the model step: calculate simulated choices and use them to update a column.
        
        Alternatives that appear in the estimation data but not in the model expression
        will not be available for simulation.
        
        Predicted probabilities come from PyLogit. Monte Carlo simulation of choices is
        performed directly. (This functionality will move to ChoiceModels.)
        
        The predicted probabilities and simulated choices are saved to the class object 
        for interactive use (`probabilities` with type pd.DataFrame, and `choices` with 
        type pd.Series) but are not persisted in the dictionary representation of the 
        model step.
        
        """
        expr_cols = [t[0] for t in list(self.model_expression.items()) \
                     if t[0] != 'intercept']

        df = get_data(tables=self.out_tables,
                      fallback_tables=self.tables,
                      filters=self.out_filters,
                      extra_columns=expr_cols)

        long_df = self._to_long(df, 'predict')

        num_obs = len(df)
        num_alts = len(self._get_alts())

        # Get predictions from underlying model - this is an ndarray with the same length
        # as the long-format df, representing choice probability for each alternative
        probs = self.model.predict(long_df)

        # Generate choices by adapting an approach from UrbanSim MNL
        # https://github.com/UDST/choicemodels/blob/master/choicemodels/mnl.py#L578-L583
        cumprobs = probs.reshape((num_obs, num_alts)).cumsum(axis=1)
        rands = np.random.random(num_obs)
        diff = np.subtract(cumprobs.transpose(), rands).transpose()

        # The diff conversion replaces negative values with 0 and positive values with 1,
        # so that argmax can return the position of the first positive value
        choice_ix = np.argmax((diff + 1.0).astype('i4'), axis=1)
        choice_ix_1d = choice_ix + (np.arange(num_obs) * num_alts)

        choices = long_df._alt_id.values.take(choice_ix_1d)

        # Save results to the class object (via df to include indexes)
        long_df['_probability'] = probs
        self.probabilities = long_df[['_obs_id', '_alt_id', '_probability']]
        df['_choices'] = choices
        self.choices = df._choices

        # Save to Orca
        update_column(table=self.out_tables,
                      fallback_table=self.tables,
                      column=self.out_column,
                      fallback_column=self.choice_column,
                      data=self.choices)
def test_get_data_bad_columns(orca_session):
    """
    Bad column name, should be ignored.
        
    """
    df = utils.get_data(tables = ['households', 'buildings'], 
                        model_expression = 'tenure ~ pop + potato')
    
    assert(set(df.columns) == set(['tenure', 'pop']))
def test_alternative_filters_for_alts_as_list(m_alts_as_list):
    """
    Test that the default alternative filters generate the correct data subset.
    
    """
    m = m_alts_as_list
    m.defaults.alt_filters = 'altval_2 < 0.5'

    m.build_submodels()
    for k, v in m.submodels.items():
        alts = get_data(tables=v.alternatives, filters=v.alt_filters)
        assert alts['altval_2'].max() < 0.5
def test_get_data(orca_session):
    """
    General test - multiple tables, binding filters, extra columns.
        
    """
    df = utils.get_data(tables = ['households', 'buildings'], 
                        model_expression = 'tenure ~ pop', 
                        filters = ['age > 20', 'age < 50'],
                        extra_columns = 'zone_id')
    
    assert(set(df.columns) == set(['tenure', 'pop', 'age', 'zone_id']))
    assert(len(df) == 2)
Example #6
0
    def run(self):
        """
        Save a table to disk.
        
        Saving a table to an HDF store requires providing a ``key`` that will be used to 
        identify the table in the store. We'll use the Orca table name, unless you 
        provide a different ``key`` in the ``extra_settings``.

        Returns
        -------
        None
        
        """
        if self.output_type not in ['csv', 'hdf']:
            raise ValueError("Please provide an output type of 'csv' or 'hdf'")

        if self.table is None:
            raise ValueError("Please provide the table name")

        if self.path is None:
            raise ValueError("Please provide a file path")

        kwargs = self.extra_settings
        if kwargs is None:
            kwargs = dict()

        df = get_data(tables=self.table,
                      filters=self.filters,
                      extra_columns=self.columns)

        if self.output_type == 'csv':
            df.to_csv(self.get_dynamic_filepath(), **kwargs)

        elif self.output_type == 'hdf':
            if 'key' not in kwargs:
                kwargs['key'] = self.table

            df.to_hdf(self.get_dynamic_filepath(), **kwargs)
Example #7
0
    def fit(self):
        """
        Fit the model; save and report results. This uses PyLogit via ChoiceModels.
        
        The `fit()` method can be run as many times as desired. Results will not be saved 
        with Orca or ModelManager until the `register()` method is run. 
        
        """
        expr_cols = [t[0] for t in list(self.model_expression.items()) \
                     if t[0] != 'intercept']

        df = get_data(tables=self.tables,
                      filters=self.filters,
                      extra_columns=expr_cols + [self.choice_column])

        long_df = self._to_long(df)

        # Set initial coefs to 0 if none provided
        pc = self._get_param_count()
        if (self.initial_coefs is None) or (len(self.initial_coefs) != pc):
            self.initial_coefs = np.zeros(pc).tolist()

        model = MultinomialLogit(data=long_df,
                                 observation_id_col='_obs_id',
                                 choice_col='_chosen',
                                 model_expression=self.model_expression,
                                 model_labels=self.model_labels,
                                 alternative_id_col='_alt_id',
                                 initial_coefs=self.initial_coefs)

        results = model.fit()

        self.name = self._generate_name()
        self.summary_table = str(results.report_fit())
        print(self.summary_table)

        # We need the PyLogit fitted model object for prediction, so save it directly
        self.model = results.get_raw_results()