예제 #1
0
    def testUnitBootstrap(self):
        # The bootstrap depends upon random values to work; thus, we'll
        # only check that it's statistically close to a simulated value.

        # We set the seed to avoid flaky tests; this test will fail with
        # probability 0.05 otherwise.

        # Note this is an equivalent problem to the testBootstrap case,
        # we've just split some rows.

        np.random.seed(12345)

        x = []
        y = []
        for ii in range(1, 101):
            for _ in range(3):
                x.append(ii)
                y.append(ii)

        data = pd.DataFrame({"X": x, "Y": y})

        metric = metrics.Mean("X")
        se_method = standard_errors.Bootstrap(100, unit="Y")
        output = core.Analyze(data).with_standard_errors(se_method).calculate(
            metric).run()

        bootstrap_se = output["mean(X) Bootstrap SE"].values[0]

        simulation_se = 2.88
        epsilon = 0.41  # Two standard errors based on simulation.

        self.assertAlmostEqual(simulation_se, bootstrap_se, delta=epsilon)
예제 #2
0
    def testBootstrap(self):
        # The bootstrap depends upon random values to work; thus, we'll
        # only check that it's statistically close to the theoretical
        # value.

        # We set the seed to avoid flaky tests; this test will fail with
        # probability 0.05 otherwise.
        np.random.seed(12345)

        data = pd.DataFrame({"X": range(1, 101)})

        metric = metrics.Mean("X")
        se_method = standard_errors.Bootstrap(100)
        output = core.Analyze(data).with_standard_errors(se_method).calculate(
            metric).run()

        bootstrap_se = output["mean(X) Bootstrap SE"].values[0]

        # Parameters based on the following R simulation
        # set.seed(12345)
        # library(bootstrap)
        # x <- 1:100
        # estimates <- replicate(1000, sd(bootstrap(x, 100, mean)$thetastar))
        # mean(estimates)
        # sd(estimates)

        simulation_se = 2.88
        epsilon = 0.41  # Two standard errors based on simulation.

        self.assertAlmostEqual(simulation_se, bootstrap_se, delta=epsilon)
예제 #3
0
  def testMeanWithWeights(self):
    df = pd.DataFrame({"X": [1, 2, 3, 4]})
    weights = np.array([3, 2, 1, 1])

    metric = metrics.Mean("X")

    output = metric(df, weights)

    correct = 2.0

    self.assertEqual(output, correct)
예제 #4
0
    def testMultipleCalculations(self):
        data = pd.DataFrame({"X": [1, 2, 3, 4, 5]})

        output = core.Analyze(data).calculate(
            [metrics.Sum("X"), metrics.Mean("X")]).run()

        correct = pd.DataFrame(np.array([[15, 3.0]]),
                               columns=["sum(X)", "mean(X)"])
        correct[["sum(X)"]] = correct[["sum(X)"]].astype(int)

        self.assertTrue(output.equals(correct))