Example #1
0
    def testShuffledDataframeRelativeToJackknife(self):
        # Same as test above, but also testing that reordering the data doesn't
        # change results, up to order.
        df = pd.DataFrame({
            "X": range(11),
            "Y": np.concatenate((np.zeros(6), np.ones(5))),
            "Z": np.concatenate((np.zeros(3), np.ones(8)))
        })

        metric = metrics.Distribution("X", ["Z"])
        se_method = standard_errors.Jackknife()
        output = core.Analyze(df.iloc[np.random.permutation(11)]).relative_to(
            comparisons.AbsoluteDifference(
                "Y",
                0)).with_standard_errors(se_method).calculate(metric).run()
        output = (output.reset_index().sort_values(by=["Y", "Z"]).set_index(
            ["Y", "Z"]))

        correct = pd.DataFrame(
            np.array([[-0.2, 0.18100283490], [0.2, 0.18100283490]]),
            columns=[
                "X Distribution Absolute Difference",
                "X Distribution Absolute Difference Jackknife SE"
            ],
            index=pd.MultiIndex(levels=[[1.], [0., 1.]],
                                labels=[[0, 0], [0, 1]],
                                names=["Y", "Z"]))
        correct = (correct.reset_index().sort_values(by=["Y", "Z"]).set_index(
            ["Y", "Z"]))

        self.assertTrue(
            all(output.index == correct.index)
            and all(output.columns == correct.columns)
            and np.all(abs(output.values - correct.values) < 1e-10))
Example #2
0
    def testDataframeRelativeToJackknife(self):
        df = pd.DataFrame({
            "X": range(11),
            "Y": np.concatenate((np.zeros(6), np.ones(5))),
            "Z": np.concatenate((np.zeros(3), np.ones(8)))
        })

        metric = metrics.Distribution("X", ["Z"])
        se_method = standard_errors.Jackknife()
        output = core.Analyze(df).relative_to(
            comparisons.AbsoluteDifference(
                "Y",
                0)).with_standard_errors(se_method).calculate(metric).run()

        correct = pd.DataFrame(
            np.array([[-0.2, 0.18100283490], [0.2, 0.18100283490]]),
            columns=[
                "X Distribution Absolute Difference",
                "X Distribution Absolute Difference Jackknife SE"
            ],
            index=pd.MultiIndex(levels=[[1.], [0., 1.]],
                                labels=[[0, 0], [0, 1]],
                                names=["Y", "Z"]))

        self.assertTrue(
            all(output.index == correct.index)
            and all(output.columns == correct.columns)
            and np.all(abs(output.values - correct.values) < 1e-10))
 def testWeightedDistribution(self):
   df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                      "Y": [1, 2, 0, 1, 1, 1, 1]})
   weights = np.array([1, 7, 1, 1, 1, 1, 1])
   metric = metrics.Distribution("X", ["Y"])
   output = metric(df, weights)
   correct = pd.DataFrame(
       np.array([12 / 20., 7 / 20., 1 / 20.]),
       columns=[""],
       index=pd.Index([1, 2, 0], name="Y"))
   self.assertTrue(output.equals(correct))
 def testTwoDimensionalDistribution(self):
   df = pd.DataFrame({"X": [1, 1, 1, 2, 2, 3, 4],
                      "Y": [1, 2, 0, 1, 1, 1, 1],
                      "Z": [1, 0, 0, 0, 0, 0, 0]})
   weights = np.array([1, 1, 1, 1, 1, 1, 1])
   metric = metrics.Distribution("X", ["Y", "Z"])
   output = metric(df, weights)
   correct = pd.DataFrame(
       np.array([1 / 14., 1 / 14., 1 / 14., 11 / 14.]),
       columns=[""],
       index=pd.MultiIndex(levels=[[0, 1, 2], [0, 1]],
                           labels=[[1, 2, 0, 1], [1, 0, 0, 0]],
                           names=["Y", "Z"]))
   self.assertTrue(output.equals(correct))
Example #5
0
    def testSplitDataframe(self):
        df = pd.DataFrame({
            "X": range(11),
            "Y": np.concatenate((np.zeros(6), np.ones(5))),
            "Z": np.concatenate((np.zeros(3), np.ones(8)))
        })

        metric = metrics.Distribution("X", ["Z"])
        output = core.Analyze(df).split_by(["Y"]).calculate(metric).run()

        correct = pd.DataFrame(np.array([0.2, 0.8, 0.0, 1.0]),
                               columns=["X Distribution"],
                               index=pd.MultiIndex(levels=[[0.0, 1.0],
                                                           [0.0, 1.0]],
                                                   labels=[[0, 0, 1, 1],
                                                           [0, 1, 0, 1]],
                                                   names=["Y", "Z"]))

        self.assertTrue(
            all(output.index == correct.index)
            and all(output.columns == correct.columns)
            and np.all(abs(output.values - correct.values) < 1e-10))
Example #6
0
    def testDataframeJackknife(self):
        df = pd.DataFrame({
            "X": range(11),
            "Y": np.concatenate((np.zeros(6), np.ones(5))),
            "Z": np.concatenate((np.zeros(3), np.ones(8)))
        })

        metric = metrics.Distribution("X", ["Z"])
        se_method = standard_errors.Jackknife("Y")
        output = core.Analyze(df).with_standard_errors(se_method).calculate(
            metric).run()

        correct = pd.DataFrame(
            np.array([[3 / 55.,
                       np.sqrt(((3 / 15. - 0.1)**2 + 0.1**2) / 2.)],
                      [52 / 55.,
                       np.sqrt(((12 / 15. - 0.9)**2 + 0.1**2) / 2.)]]),
            columns=("X Distribution", "X Distribution Jackknife SE"),
            index=pd.Index([0., 1.], name="Z"))

        self.assertTrue(
            all(output.index == correct.index)
            and all(output.columns == correct.columns)
            and np.all(abs(output.values - correct.values) < 1e-10))