def testColumnNames(self):
        """Checks if the required columns are available."""
        # The required column names are case sensitive.
        df = self.df.copy()
        new_columns = list(df.columns)
        new_columns[0] = 'Control'
        df.columns = new_columns
        with self.assertRaisesRegex(ValueError,
                                    r'Missing column\(s\): control'):
            geoeligibility.GeoEligibility(df)

        # Required columns must exist.
        df = self.df.copy()
        del df['exclude']
        with self.assertRaisesRegex(ValueError,
                                    r'Missing column\(s\): exclude'):
            geoeligibility.GeoEligibility(df)

        # Other columns are allowed.
        df = self.df.copy()
        df['newcolumn'] = 1
        geoeligibility.GeoEligibility(df)

        # Duplicated columns are not allowed.
        df = self.df.copy()
        df['newcolumn'] = 1
        df.columns = ['control', 'treatment', 'exclude', 'control']
        with self.assertRaisesRegex(ValueError,
                                    r'Duplicate column\(s\): control'):
            geoeligibility.GeoEligibility(df)
    def testBadValues(self):
        """Checks if there are any illegal values in the value columns."""
        # Only zeros and ones are allowed.
        df = self.df.copy()
        df.loc['G1'] = [1, 0, -1]
        with self.assertRaisesRegex(
                ValueError, 'GeoEligibility objects must have only values '
                '0, 1 in columns control, treatment, exclude'):
            geoeligibility.GeoEligibility(df)

        # Three zeros is an illegal value.
        df.loc['G1'] = [0, 0, 0]
        with self.assertRaisesRegex(ValueError,
                                    r'Three zeros found for geo\(s\) G1'):
            geoeligibility.GeoEligibility(df)
    def setUp(self):
        """Set up a valid Geo Eligibility data frame."""
        super(GeoEligibilityTest, self).setUp()

        geonames = ['G%d' % i for i in range(7)]
        df = pd.DataFrame(
            {
                'geo': geonames,
                'control': 0,
                'treatment': 0,
                'exclude': 0
            },
            columns=['geo', 'control', 'treatment', 'exclude'])
        df = df.set_index('geo')
        # Add all 7 valid assignments (0, 0, 0 is invalid).
        df.loc['G0'] = [1, 0, 0]  # Control only.
        df.loc['G1'] = [0, 1, 0]  # Treatment only.
        df.loc['G2'] = [0, 0, 1]  # Excluded only.
        df.loc['G3'] = [1, 1, 0]  # Control or Treatment.
        df.loc['G4'] = [0, 1, 1]  # Treatment or Excluded.
        df.loc['G5'] = [1, 0, 1]  # Control or Excluded.
        df.loc['G6'] = [1, 1, 1]  # Control, Treatment, or Excluded.
        self.df = df
        # Verify that the above dataframe does not raise errors.
        self.obj = geoeligibility.GeoEligibility(df)
  def testExhaustiveSearchGeoEligibility(self):
    """Tests search with geo eligibility constraints."""
    # without constraints, the optimal design would be treatment = {'1'} and
    # control = {'2'}, see testExhaustiveSearchFindsOptimalDesign
    df_geo_elig = self.default_geo_eligibility_data
    df_geo_elig.loc['1'] = [1, 1, 0]  # Cannot exclude geo 1.
    df_geo_elig.loc['2'] = [1, 0, 0]  # Cannot exclude geo 2.
    df_geo_elig.loc['3'] = [0, 1, 0]  # Cannot exclude geo 3.
    # given the fact that geo 1 and 2 are correlated, and geo 3 is approx.
    # independent of both, the optimal design will have treatment = {'1','3'}
    # and control = {'2'}. Since geos '1' and '2' should be in different groups
    # to achieve high correlation and geo '2' is fixed to control.
    geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
    data = TBRMMData(self.df, 'response', geo_elig)
    mm = TBRMatchedMarkets(data, self.par)
    designs = mm.exhaustive_search()

    diag = TBRMMDiagnostics(
        data.aggregate_time_series(set([0, 2])), self.par)
    diag.x = data.aggregate_time_series(set([1]))
    corr = diag.corr
    required_impact = diag.required_impact
    self.assertSetEqual(designs[0].treatment_geos, {'1', '3'})
    self.assertSetEqual(designs[0].control_geos, {'2'})
    self.assertTupleEqual(
        designs[0].score.score,
        (1, 1, 1, 1, round(corr, 2), 1/required_impact))
    def testGeoColumn(self):
        """Checks if the geo column is there (as an index or column)."""
        # An index or column 'geo' (case sensitive) must exist.
        df = self.df.copy()
        df.index.name = 'Geo'
        with self.assertRaisesRegex(ValueError,
                                    r'There is no column or index \'geo\''):
            geoeligibility.GeoEligibility(df)

        df.reset_index(inplace=True)
        with self.assertRaisesRegex(ValueError,
                                    r'There is no column or index \'geo\''):
            geoeligibility.GeoEligibility(df)

        # Column 'geo' is also possible. No error raised.
        df = self.df.copy().reset_index()
        geoeligibility.GeoEligibility(df)
 def testOneGeoExcluded_SizeUnbounded(self):
     """One geo excluded (x_fixed), no group size restrictions except >= 1."""
     df_geo_elig = self.df_geo_elig
     df_geo_elig.loc['0'] = [0, 0, 1]
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     mm = TBRMatchedMarkets(data, self.par)
     # n = number of freely assignable geos (geos in 'ctx').
     # 3^n - 2^(n + 1) + 1 == 3^4 - 2^5 + 1 == 50.
     self.assertEqual(mm.count_max_designs(), 50)
 def setUp(self):
     super().setUp()
     self.mm = TBRMatchedMarkets(self.data, self.par)
     df_geo_elig = self.data.geo_eligibility.data.copy()
     # Assign geo '3' into Treatment group. In the order of size (in terms of
     # required budget), geo '3' will be index 1.
     df_geo_elig.loc['3'] = [0, 1, 0]
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     self.mmfix = TBRMatchedMarkets(data, self.par)
 def testOneGeoTX_SizeUnbounded(self):
     """One treatment geo in 'tx' and no group size restrictions except >= 1."""
     df_geo_elig = self.df_geo_elig
     df_geo_elig.loc['0'] = [0, 1, 1]
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     mm = TBRMatchedMarkets(data, self.par)
     # N freely assignable geos and one geo in group 'tx'.
     # 2 * (3^n + 2^(n+1) + 1) + 2^n - 1.
     self.assertEqual(mm.count_max_designs(), 115)
 def testOneTreatmentFixedDefault_SizeUnbounded(self):
     """One treatment geo fixed and no group size restrictions except >= 1."""
     df_geo_elig = self.df_geo_elig
     df_geo_elig.loc['0'] = [1, 0, 0]
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     mm = TBRMatchedMarkets(data, self.par)
     # n = number of freely assignable geos (geos in 'ctx'). One fixed.
     # Total 3^4 - 2^4 == 65.
     self.assertEqual(mm.count_max_designs(), 65)
 def testDuplicateGeos(self):
     """Checks if there are any duplicate geos in the geo column."""
     df = self.df.copy()
     geos = df.index.tolist()
     geos[1] = 'G0'
     df.index = geos
     df.index.name = 'geo'
     with self.assertRaisesRegex(ValueError,
                                 r'\'geo\' has duplicate values: G0'):
         geoeligibility.GeoEligibility(df)
 def testOneGeoCT_SizeUnbounded(self):
     """One treatment geo in 'ct' and no group size restrictions except >= 1."""
     df_geo_elig = self.df_geo_elig
     df_geo_elig.loc['0'] = [1, 1, 0]
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     mm = TBRMatchedMarkets(data, self.par)
     # n = number of freely assignable geos (geos in 'ctx'). One in 'ct'.
     # 2 * (3^n - 2^(n+1) + 2^n) == 2 * (3^4 - 2^5 + 2^4) = 2 * 65 = 130.
     self.assertEqual(mm.count_max_designs(), 130)
 def testGeosMustInclude(self):
     """Geos that must be included are identified."""
     df_geo_elig = self.default_geo_eligibility_data
     df_geo_elig.loc['1'] = [1, 1, 0]  # Cannot exclude geo 1.
     df_geo_elig.loc['2'] = [1, 0, 0]  # Cannot exclude geo 2.
     df_geo_elig.loc['3'] = [0, 1, 0]  # Cannot exclude geo 3.
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     par = TBRMMDesignParameters(n_test=14, iroas=3.0)
     mm = TBRMatchedMarkets(data, par)
     self.assertCountEqual(mm.geos_must_include, {'1', '2', '3'})
 def testOneControlGeoFixed_SizeUnbounded(self):
     """One control geo fixed and no group size restrictions except >= 1."""
     # Default except one fixed control geo (c_fixed).
     df_geo_elig = self.df_geo_elig
     df_geo_elig.loc['0'] = [1, 0, 0]
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     mm = TBRMatchedMarkets(data, self.par)
     # n = number of freely assignable geos (geos in 'ctx'). One fixed.
     # 3^n - 2^n == 3^4 - 2^4 == 65.
     self.assertEqual(mm.count_max_designs(), 65)
    def setUp(self):
        super().setUp()
        df_geo_elig = self.data.geo_eligibility.data.copy()
        # Object mm4: 4 geos.
        # Exclude geo, '1' from the set, use 4 geos for testing.
        df_geo_elig.loc['1'] = [0, 0, 1]
        # Note: the remaining 4 geos will be reindexed as 0, 1, 2, 3.
        geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
        self.data4 = TBRMMData(self.df, self.response, geo_elig)
        self.mm4 = TBRMatchedMarkets(self.data4, self.par)
        df_geo_elig = self.data.geo_eligibility.data.copy()

        # Object mmfix: 3 geos + 1 fixed to control + 1 in group 'ct'.
        # Exclude geo, '1' from the set, use 4 geos for testing.
        df_geo_elig.loc['1'] = [1, 0, 0]  # Geo index 3, assigned to control.
        df_geo_elig.loc['2'] = [1, 1,
                                0]  # Geo index 2, Control or Treatment only.
        geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
        self.datafix = TBRMMData(self.df, self.response, geo_elig)
        self.mmfix = TBRMatchedMarkets(self.datafix, self.par)
    def testExcludedGeo(self):
        """Completely excluded geos (x_fixed) do not appear in geo_assignments.

    If a geo is excluded, geo indices will be renumbered.
    """
        df_geo_elig = self.default_geo_eligibility_data
        df_geo_elig.loc['2'] = [0, 0, 1]  # Group 'x_fixed'.
        geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
        data = TBRMMData(self.df, self.response, geo_elig)
        mm = TBRMatchedMarkets(data, self.par)
        self.assertFalse(mm.geo_assignments.x_fixed)
        self.assertCountEqual(mm.geo_assignments.all, {0, 1, 2, 3})
 def testNonEligibilityOverridesGeoOverBudget(self):
     """Geos that are too large are not excluded if not eligible."""
     df_geo_elig = self.default_geo_eligibility_data
     df_geo_elig.loc['4'] = [0, 1, 0]  # Cannot exclude geo 4.
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     iroas = 2.5
     budget_max = self.impact['2'] / iroas  # Exclude '3', '4'.
     par = TBRMMDesignParameters(n_test=14,
                                 iroas=iroas,
                                 budget_range=(0.1, budget_max))
     mm = TBRMatchedMarkets(data, par)
     self.assertCountEqual(mm.geos_within_constraints, {'0', '1', '2', '4'})
 def testNonEligibilityOverridesTooLargeGeo(self):
     """Geos that are too large are not excluded if not eligible."""
     max_share = self.data.geo_share['3']  # Geo '4' is 'too large'.
     share_range = (max_share / 2.0, max_share)
     df_geo_elig = self.default_geo_eligibility_data
     df_geo_elig.loc['4'] = [0, 1, 0]  # Cannot exclude geo 4.
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     par = TBRMMDesignParameters(n_test=14,
                                 iroas=3.0,
                                 treatment_share_range=share_range)
     mm = TBRMatchedMarkets(data, par)
     self.assertCountEqual(mm.geos_within_constraints, self.geos)
 def testRangeIsSpecifiedButLowerThanLowerBound(self):
     """The user-specified lower bound of the range is adjusted if too low."""
     par = TBRMMDesignParameters(n_test=14,
                                 iroas=2.0,
                                 treatment_geos_range=(1, 4))
     df_geo_elig = self.data.geo_eligibility.data
     df_geo_elig.loc['1'] = [0, 1, 0]  # Geo '1' is always in Treatment.
     df_geo_elig.loc['2'] = [0, 1, 0]  # Geo '2' is always in Treatment.
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     mm = TBRMatchedMarkets(data, par)
     # There are 5 geos, 2 always assigned to Treatment, hence the lower bound
     # must be 2. The upper bound is unchanged (4) as it is the maximum possible.
     self.assertEqual(mm.treatment_group_size_range(), range(2, 5))
    def testThereAreFixedTreatmentGeos(self):
        """The minimum number of geos must be at least len(t_fixed).

    obj.geo_assignments.t_fixed is the set of treatment geos that are always
    included in Treatment group. Hence the minimum must be adjusted accordingly.
    """
        par = TBRMMDesignParameters(n_test=14, iroas=2.0)
        df_geo_elig = self.data.geo_eligibility.data
        df_geo_elig.loc['1'] = [0, 1, 0]  # Geo '1' is always in Treatment.
        df_geo_elig.loc['2'] = [0, 1, 0]  # Geo '2' is always in Treatment.
        geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
        data = TBRMMData(self.df, self.response, geo_elig)
        mm = TBRMatchedMarkets(data, par)
        # There are 5 geos, 2 fixed to Treatment, but none fixed to control, hence
        # the range must be equal to [2, 3, 4].
        self.assertEqual(mm.treatment_group_size_range(), range(2, 5))
    def testSomeGeosAreNeverInTreatment(self):
        """The max # of geos == len(t) if some geos are never in Treatment group.

    If there are geos that are never assigned to treatment, the maximum
    treatment group size does not have to be restricted.
    """
        par = TBRMMDesignParameters(n_test=14, iroas=2.0)
        df_geo_elig = self.data.geo_eligibility.data
        df_geo_elig.loc['1'] = [1, 0, 0]  # Geo '1' is never in Treatment.
        df_geo_elig.loc['2'] = [1, 0, 1]  # Geo '2' is never in Treatment.
        geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
        data = TBRMMData(self.df, self.response, geo_elig)
        mm = TBRMatchedMarkets(data, par)
        # There are 5 geos, 2 never assigned to Treatment, hence up to 3 geos can be
        # assigned to treatment so the range must be equal to [1, 2, 3].
        self.assertEqual(mm.treatment_group_size_range(), range(1, 4))
 def testGeoIndexOrder(self):
     """Geos are indexed from the largest budget (index 0) to smallest (4)."""
     df_geo_elig = self.default_geo_eligibility_data
     df_geo_elig.loc['4'] = [1, 0,
                             0]  # Group 'c_fixed'. Largest geo -> index 0.
     df_geo_elig.loc['3'] = [0, 1, 0]  # Group 't_fixed'.
     df_geo_elig.loc['1'] = [1, 1, 0]  # Group 'ct'.
     df_geo_elig.loc['0'] = [1, 0,
                             1]  # Group 'cx'. Smallest geo -> index 4.
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     mm = TBRMatchedMarkets(data, self.par)
     self.assertCountEqual(mm.geo_assignments.c_fixed, {0})
     self.assertCountEqual(mm.geo_assignments.t_fixed, {1})
     self.assertCountEqual(mm.geo_assignments.ct, {3})
     self.assertCountEqual(mm.geo_assignments.cx, {4})
 def testAllGeoDifferentGroup_SizeUnbounded(self):
     """All geos in different groups, no group size restrictions except >= 1."""
     df_geo_elig = self.df_geo_elig
     df_geo_elig.loc['4'] = [1, 0, 0]  # 0 - 'c_fixed'.
     df_geo_elig.loc['3'] = [1, 0, 1]  # 1 - 'cx'.
     df_geo_elig.loc['2'] = [0, 1, 1]  # 2 - 'tx'.
     df_geo_elig.loc['1'] = [1, 1, 0]  # 3 - 'ct'.
     df_geo_elig.loc['0'] = [1, 1, 1]  # 4 - 'ctx'.
     # 20 eligible designs.
     # 0 1 2 3 4 | 0 1 2 3 4 | 0 1 2 3 4 | 0 1 2 3 4 |
     # c c t c c | c c x c t | c x t c c | c x x c t |
     # . . . c t | . . . t c | . . . c t | . . . t c |
     # . . . c x | . . . t t | . . . c x | . . . t t |
     # . . . t c | . . . t x | . . . t c | . . . t x |
     # . . . t t |           | . . . t t |           |
     # . . . t x |           | . . . t x |           |
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     mm = TBRMatchedMarkets(data, self.par)
     self.assertEqual(mm.count_max_designs(), 20)
 def testAllGeoDifferentGroup_SizeBounded(self):
     """All geos in different groups, limit to size 2 only."""
     df_geo_elig = self.df_geo_elig
     df_geo_elig.loc['4'] = [1, 0, 0]  # 0 - 'c_fixed'.
     df_geo_elig.loc['3'] = [1, 0, 1]  # 1 - 'cx'.
     df_geo_elig.loc['2'] = [0, 1, 1]  # 2 - 'tx'.
     df_geo_elig.loc['1'] = [1, 1, 0]  # 3 - 'ct'.
     df_geo_elig.loc['0'] = [1, 1, 1]  # 4 - 'ctx'.
     # Group sizes can vary from 2 or 3, only 7 eligible designs.
     # 0 1 2 3 4 | 0 1 2 3 4 |
     # c c t c t | c c x t t |
     # c c t t c | c x t c t |
     # c c t t t | c x t t c |
     # c c t t x |           |
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     par = TBRMMDesignParameters(n_test=14,
                                 iroas=3.0,
                                 control_geos_range=(2, 3),
                                 treatment_geos_range=(2, 3))
     mm = TBRMatchedMarkets(data, par)
     self.assertEqual(mm.count_max_designs(), 7)
 def testAllGeoDifferentGroup_MaxRatioFixed(self):
     """All geos in different groups, limit to size 2 only."""
     df_geo_elig = self.df_geo_elig
     df_geo_elig.loc['4'] = [1, 0, 0]  # 0 - 'c_fixed'.
     df_geo_elig.loc['3'] = [1, 0, 1]  # 1 - 'cx'.
     df_geo_elig.loc['2'] = [0, 1, 1]  # 2 - 'tx'.
     df_geo_elig.loc['1'] = [1, 1, 0]  # 3 - 'ct'.
     df_geo_elig.loc['0'] = [1, 1, 1]  # 4 - 'ctx'.
     # Only control/treatment geo ratios 1/1, 2/3, 2/1, 1/2 allowed, 14 eligible
     # designs.
     # 0 1 2 3 4 | 0 1 2 3 4 | 0 1 2 3 4 | 0 1 2 3 4 |
     # c c t c t | c c x t t | c x t c t | c x x c t
     # . . . t c | . . . t x | . . . c x | . . . t c
     # . . . t t |           | . . . t c | . . . t t
     # . . . t x |           | . . . t x | . . . t x
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.df, self.response, geo_elig)
     par = TBRMMDesignParameters(n_test=14,
                                 iroas=3.0,
                                 geo_ratio_tolerance=1.0)
     mm = TBRMatchedMarkets(data, par)
     self.assertEqual(mm.count_max_designs(), 14)
Ejemplo n.º 25
0
 def testGreedySearchFixedTreatmentGeoFail(self):
     """Search fails with fixed treatment group as all design do not pass dwtest."""
     df_geo_elig = self.default_geo_eligibility_data
     df_geo_elig.loc['2'] = [0, 1,
                             0]  # geo 2 (index 1) is fixed to Treatment.
     geo_elig = geoeligibility.GeoEligibility(df_geo_elig)
     data = TBRMMData(self.dataframe, 'response', geo_elig)
     self.par.treatment_geos_range = (1, 1)
     mm = TBRMatchedMarkets(data, self.par)
     designs = mm.greedy_search()
     diag = TBRMMDiagnostics(data.aggregate_time_series(set([1])), self.par)
     diag.x = data.aggregate_time_series(set([0]))
     corr = diag.corr
     required_impact = diag.estimate_required_impact(corr)
     self.assertTrue(len(designs) == 1)  # pylint: disable=g-generic-assert
     self.assertSetEqual(designs[0].treatment_geos, {'2'})
     self.assertSetEqual(designs[0].control_geos, {'1'})
     self.assertTupleEqual(
         designs[0].score.score,
         (1, 1, 1, 0, round(corr, 2), 1 / required_impact))
     self.assertEqual(designs[0].score.diag.corr, designs[0].diag.corr)
     self.assertEqual(designs[0].score.score.corr,
                      round(designs[0].diag.corr, 2))