def testDefaultsExist(self):
        """Check defaults are accessible."""

        # Summon the defaults.
        col_names = semantics.DataFrameNameMapping()
        groups = semantics.GroupSemantics()
        periods = semantics.PeriodSemantics()

        # Check default col names available.
        self.assertIsInstance(col_names.geo, str)
        self.assertIsInstance(col_names.group, str)
        self.assertIsInstance(col_names.period, str)
        self.assertIsInstance(col_names.response, str)
        self.assertIsInstance(col_names.cost, str)
        self.assertIsInstance(col_names.incr_response, str)
        self.assertIsInstance(col_names.incr_cost, str)

        #  Check default data semantics available.
        self.assertIsInstance(groups.control, int)
        self.assertIsInstance(groups.treatment, int)
        self.assertIsInstance(groups.unassigned, int)

        self.assertIsInstance(periods.pre, int)
        self.assertIsInstance(periods.test, int)
        self.assertIsInstance(periods.cooldown, int)
        self.assertIsInstance(periods.unassigned, int)
예제 #2
0
    def fit(self, data_frame, target, **kwargs):
        """Fit the TBR model to the supplied data frame.

    See optional kwargs for interpretation of the data frame.

    Args:
      data_frame: a pandas.DataFrame. Should contain the columns and indices
      corresponding to the **kwargs information below. Only one of response
      or cost need be present, corresponding to the supplied `target`. Must be
      indexed by date.
      target: `str`. The name of the column to be analysed.
      **kwargs: optional column/index names for the data and related semantics:
        key_geo='geo' - geo data frame index name.
        key_period='period' - experimental period column name.
        key_group='group' - group assignment column name.
        key_cost='cost' - cost column name.
        key_response='response' - response column name.
        key_date='date' - date index name.
        key_incr_cost='_incr_cost' - incremental cost column name.
        key_incr_response='_incr_response' - incremental response column name.
        group_control=1 - value representing the control group in the data.
        group_treatment=2 - value representing the treatment group in the data.
        period_pre=0 - value representing the pre-test period in the data.
        period_test=1 - value representing the test period in the data.
        period_cool=2 - value representing the cooldown period in the data.
    """

        # Set the target of the analysis.
        self.target = target

        # Extract any column / index name information supplied by the user.
        user_df_names = utils.kwarg_subdict('key_', **kwargs)
        self.df_names = semantics.DataFrameNameMapping(**user_df_names)

        # Extract any semantics for control / treatment supplied by user.
        user_group_semantics = utils.kwarg_subdict('group_', **kwargs)
        self.groups = semantics.GroupSemantics(**user_group_semantics)

        # Extract any semantics for experimental period supplied by user.
        user_period_semantics = utils.kwarg_subdict('period_', **kwargs)
        self.periods = semantics.PeriodSemantics(**user_period_semantics)

        # Set up the analysis data.
        self._construct_analysis_data(data_frame)
        # Fit pre-period models for response and for cost.
        self._fit_pre_period_model()
    def setUp(self):
        """This method will be run before each of the test methods in the class."""

        super(TBRiROASTest, self).setUp()

        # Load the salesandcost dataset.
        csv_path = 'matched_markets/csv/'
        csv_dir = os.path.join("", csv_path)
        self.data = salesandcost.example_data_formatted(csv_dir)

        # Data frame names for the salesandcost example.
        self.key_response = 'sales'
        self.key_cost = 'cost'
        self.key_group = 'geo.group'
        self.key_period = 'period'
        self.key_geo = 'geo'
        self.key_date = 'date'

        # Semantics for groups and periods.
        self.groups = semantics.GroupSemantics()
        self.periods = semantics.PeriodSemantics()
예제 #4
0
  def fit(self, data_frame, **kwargs):
    """Fit the TBRiROAS model to the supplied data frame.

    See optional kwargs for interpretation of the data frame.

    Args:
      data_frame: a pandas.DataFrame. Should contain the columns and indices
      corresponding to the **kwargs information below. Must be indexed by date.
      **kwargs: optional column/index names for the data and related semantics:
        key_geo='geo' - geo data frame index name.
        key_period='period' - experimental period column name.
        key_group='group' - group assignment column name.
        key_cost='cost' - cost column name.
        key_response='response' - response column name.
        key_date='date' - date index name.
        key_incr_cost='_incr_cost' - incremental cost column name.
        key_incr_response='_incr_response' - incremental response column name.
        group_control=1 - value representing the control group in the data.
        group_treat=2 - value representing the treatment group in the data.
        period_pre=0 - value representing the pre-test period in the data.
        period_test=1 - value representing the test period in the data.
        period_cool=2 - value representing the cooldown period in the data.
    """

    # Extract any column / index name information supplied by the user
    user_df_names = utils.kwarg_subdict('key_', **kwargs)
    self.df_names = semantics.DataFrameNameMapping(**user_df_names)

    # Extract any semantics for control / treatment supplied by user
    user_group_semantics = utils.kwarg_subdict('group_', **kwargs)
    self.groups = semantics.GroupSemantics(**user_group_semantics)

    # Extract any semantics for experimental period supplied by user
    user_period_semantics = utils.kwarg_subdict('period_', **kwargs)
    self.periods = semantics.PeriodSemantics(**user_period_semantics)

    # Fit seprate TBR models for response and cost
    self.tbr_response.fit(data_frame, self.df_names.response, **kwargs)
    self.tbr_cost.fit(data_frame, self.df_names.cost, **kwargs)
    def __init__(
            self,
            n_control,
            n_treat,
            time_pre,
            time_test,  # no cooldown as yet
            hetresp,
            hetcost,
            beta,
            hetsked,
            sig_resp,
            sig_cost,
            noise_treat_only=False,
            seed=None,
            **kwargs):
        """Creates a data simulator.

    Args:
      n_control: int. The number of control geos.
      n_treat: int. The number of treatment geos.
      time_pre: int. The number of pre-test period ticks.
      time_test: int. The number of test period ticks.
      hetresp: float. The degree of mean response variable heterogeneity.
      hetcost: float. The degree of mean cost variable heterogeneity.
      beta: float. The iROAS coefficient to be used.
      hetsked: float. The degree of heteroskedasticity in cost and response.
      sig_resp: float. The noise level in the response variable.
      sig_cost: float. The noise level in the cost variable.
      noise_treat_only: bool. Whether to add noise only in the treatment period.
      seed: int. Sets the seed of the random number generator.
      **kwargs: optional sematics for the produced data frame.
    """
        # Constants.
        self.n_control = n_control
        self.n_treat = n_treat
        self.time_pre = time_pre
        self.time_test = time_test
        self.time_total = time_pre + time_test

        # Model parameters.
        self.hetresp = hetresp
        self.hetcost = hetcost
        self.beta = beta
        self.hetsked = hetsked
        self.sig_resp = sig_resp
        self.sig_cost = sig_cost

        # Derived facts.
        self.n_total = self.n_treat + self.n_control
        self.col_len = self.n_total * self.time_total

        # Extract any column / index name information supplied by the user.
        user_df_names = utils.kwarg_subdict('key_', **kwargs)
        self._df_names = semantics.DataFrameNameMapping(**user_df_names)

        # Options
        self.noise_treat_only = noise_treat_only

        # Extract any semantics for control / treatment supplied by user.
        user_group_semantics = utils.kwarg_subdict('group_', **kwargs)
        self._groups = semantics.GroupSemantics(**user_group_semantics)

        # Extract any semantics for experimental period supplied by user.
        user_period_semantics = utils.kwarg_subdict('period_', **kwargs)
        self._periods = semantics.PeriodSemantics(**user_period_semantics)

        if seed is None:
            seed = np.random.randint(0, 2**32)
        self._rng = np.random.RandomState(seed=seed)
예제 #6
0
    def fit(self, data_frame, target=None, **kwargs):
        """Runs the TBR diagnostics suite.

    This method executes the following diagnostics: (1) detect and remove the
    disrupted geos; (2) detect and remove the outlier time points (3)
    correlation test and (4) the structural stability (A/A) test removing part
    of the pre-test period. The results of these diagnostics are stored in the
    _test_results attribute. The resulting modified data frame is stored in the
    _data attribute and accessible via the get_data() method.

    Note. This method makes a copy of the original data_frame, and it doesn't
    modify the original.

    See optional kwargs for interpretation of the data frame.

    Args:
      data_frame: (pandas.DataFrame) Should contain the columns and indices
        corresponding to the **kwargs information below. Only one of response
        need be present, corresponding to the supplied `target`. Must be
        indexed by date.
      target: (str) name of the target metric (data frame column). If not
        specified, the column specified as key_response will be assumed.
      **kwargs: optional column/index names for the data and related semantics:
        key_geo (string) column name for geo (default: 'geo').
        key_period (string) column name for period (default: 'period').
        key_group (string) column name for group (default: 'group').
        key_response (string) response column name (default: 'response').
        key_date (string) date index name (default: 'date').
        group_control (int) control group id (default: 1).
        group_treat (int) treatment group id (default: 2).
        period_pre (int) pre-test period id (default: 0).
        period_test (int) test period id (default: 1).
        period_cool (int) cooldown period id (default: 2).
    """
        self._data = data_frame.copy()

        user_df_names = utils.kwarg_subdict('key_', **kwargs)
        self._df_names = semantics.DataFrameNameMapping(**user_df_names)

        user_group_semantics = utils.kwarg_subdict('group_', **kwargs)
        self._groups = semantics.GroupSemantics(**user_group_semantics)

        user_period_semantics = utils.kwarg_subdict('period_', **kwargs)
        self._periods = semantics.PeriodSemantics(**user_period_semantics)

        if target is None:
            target = self._df_names.response
        self._target = target

        remove_geos = self._detect_noisy_geos(iqr_coef=1.5, max_threshold=0.5)

        self._diagnostics['noisy_geos'] = remove_geos

        if remove_geos:
            exclude = self._data[self._df_names.geo].isin(remove_geos)
            self._data = self._data[~exclude]

        self._create_analysis_data()

        remove_dates = self._detect_outliers(max_prob=0.1)
        self._diagnostics['outlier_dates'] = remove_dates

        if remove_dates:
            exclude_dates = self._data[self._df_names.date].isin(remove_dates)
            self._data = self._data[~exclude_dates]
            self._create_analysis_data()

        self._diagnostics['corr_test'] = self._correlation_test(
            min_cor=0.5, prefer_cor=0.8, credible_level=0.95)