def testTrimmedMatchValueError(self): # if max_trim_rate is negative with self.assertRaises(ValueError): _ = estimator.TrimmedMatch(self._delta_response, self._delta_cost, -0.1) # if delta_response and delta_delta have different lengths with self.assertRaises(ValueError): _ = estimator.TrimmedMatch(self._delta_response, self._delta_cost + [1.0]) # if confidence is outside of (0, 1] tm = estimator.TrimmedMatch(self._delta_response, self._delta_cost) with self.assertRaises(ValueError): _ = tm.Report(-0.5, 0.0)
def testReporValueError(self): tm = estimator.TrimmedMatch(self._delta_response, self._delta_cost, 0.25) @parameterized.parameters((-0.1, 0.1), (1.1, 0.1), (0.8, 0.5)) def _(self, confidence, trim_rate): with self.assertRaises(ValueError): tm.Report(confidence, trim_rate)
def testTrimmedMatchCase(self): """Tests with various trim rates.""" tm = estimator.TrimmedMatch(self._delta_response, self._delta_cost, 0.25) @parameterized.parameters((self._report_no_trim, 0.0), (self._report_trim1, 0.20), (self._report_auto_trim, -1)) def _(self, expected, trim_rate): self.AssertReportEqual(expected, tm.Report(0.90, trim_rate))
def testCalculateEpsilons(self): """Tests _CalculateEpsilons.""" tm = estimator.TrimmedMatch(self._delta_response, self._delta_cost, 0.25) report = tm.Report(0.90, 0.0) expected = [ self._delta_response[i] - self._iroas0 * self._delta_cost[i] for i in range(0, len(self._delta_response)) ] self.assertEqual(len(expected), len(report.epsilons)) for i in range(0, len(expected)): self.assertAlmostEqual(expected[i], report.epsilons[i], places=3)
def calculate_experiment_results( data: TrimmedMatchData, max_trim_rate: float = 0.25, confidence: float = 0.80, trim_rate: float = -1.0) -> TrimmedMatchResults: """Calculate the results of an experiment with Trimmed Match. Args: data: namedtuple with fields pair, treatment_response, treatment_cost, control_response, control_cost which can be obtained as output of the method prepare_data_for_post_analysis. max_trim_rate: half the largest fraction of pairs that can be trimmed. confidence: the confidence level for the two-sided confidence interval. trim_rate: trim rate, a value outside [0, max_trim_rate) triggers the data-driven choice described in the Trimmed Match paper. Returns: results: namedtuple with fields data, report, trimmed_pairs, incremental_cost, lift, treatment_response. """ delta_response = [ data.treatment_response[x] - data.control_response[x] for x in range(len(data.treatment_response)) ] delta_spend = [ data.treatment_cost[x] - data.control_cost[x] for x in range(len(data.treatment_response)) ] tm = estimator.TrimmedMatch(delta_response, delta_spend, max_trim_rate) fit = tm.Report(confidence, trim_rate) trimmed_pairs = [data.pair[x] for x in fit.trimmed_pairs_indices] increm_cost = sum(data.treatment_cost) - sum(data.control_cost) lift = fit.estimate * increm_cost treatment_response = sum(data.treatment_response) epsilon = fit.epsilons data_updated = data._replace(epsilon=epsilon) results = TrimmedMatchResults( data=data_updated, report=fit, trimmed_pairs=trimmed_pairs, incremental_cost=increm_cost, lift=lift, treatment_response=treatment_response, ) return results
def report(self, num_simulations: int = 1000, max_trim_rate: float = _MAX_TRIM_RATE_FOR_RMSE_EVAL, trim_rate: float = -1.0) -> Tuple[float, pd.DataFrame]: """Reports the RMSE. Args: num_simulations: int. max_trim_rate: float. trim_rate: float, with default (-1.0) trim_rate is data-driven. Returns: RMSE: rmse of the iROAS estimate obtained from multiple simulations. detailed_results: a list of estimator.TrimmedMatch elements, one for each simulation. Each element contains the fields: estimate, std_error, conf_interval_low, conf_interval_up, trim_rate, ci_level. """ point_estimates = np.zeros(num_simulations) detailed_results = [] for index in range(num_simulations): geox_data = self._simulate_geox_data(index) delta_response = [ v.treated.response - v.controlled.response for v in geox_data.values() ] delta_spend = [ v.treated.spend - v.controlled.spend for v in geox_data.values() ] fit = estimator.TrimmedMatch(delta_response, delta_spend, max_trim_rate) report = fit.Report(trim_rate=trim_rate) detailed_results.append({ "simulation": index, "estimate": report.estimate, "std_error": report.std_error, "trim_rate": report.trim_rate, "ci_level": report.confidence, "conf_interval_low": report.conf_interval_low, "conf_interval_up": report.conf_interval_up }) point_estimates[index] = report.estimate rmse = np.sqrt(np.mean( (point_estimates - self._hypothesized_iroas)**2)) detailed_results = pd.DataFrame(detailed_results) return rmse, detailed_results
def trimmed_match_aa_test(delta_responses: List[float], delta_spends: List[float], confidence: float = 0.8) -> bool: """Returns whether the number of positive pairs is about half of total. Args: delta_responses: response differences from each geo pair. delta_spends: spend differences from each geo pair. confidence: the confidence level for a two-sided conf. interval. Returns: True if the two-sided conf interval of the trimmed match estimator covers 0. """ report = estimator.TrimmedMatch(delta_responses, delta_spends).Report(confidence=confidence) return report.conf_interval_up > 0 and report.conf_interval_low < 0
def testTrimmedMatchZeroSpend(self): with self.assertRaisesRegex(ValueError, "delta_spends are all too close to 0!"): _ = estimator.TrimmedMatch([1, 2, 3, 4, 5], [0, 0, 0, 0, 1e-15])
def testTrimmedMatchTiedThetasConstant(self): tm = estimator.TrimmedMatch([1, 1, 1, 1, 1], [1, 1, 1, 1, 1]) self.assertAlmostEqual(tm.Report().estimate, 1.0)
def testTrimmedMatchTiedThetas(self): tm = estimator.TrimmedMatch([1, 2, 3, 4, 5], [1, 2, 3, 4, 5]) self.assertAlmostEqual(tm.Report().estimate, 1.0)
def testTrimmedMatchTiedSpend(self): tm = estimator.TrimmedMatch([1, 2, 3, 4, 5], [1, 1, 1, 1, 1]) self.assertAlmostEqual(tm.Report().estimate, 3.0)