예제 #1
0
    def make_pmfs_from_datasetmatrix(self, X: int, Y: int, Zl: list[int]) -> tuple[CPMF, CPMF, CPMF, PMF]:
        PrZ: PMF
        PrXcZ: CPMF
        PrYcZ: CPMF
        PrXYcZ: CPMF

        (VarX, VarY, VarZ) = self.load_variables(X, Y, Zl)
        if len(Zl) == 0:
            PrXY = PMF(JointVariables(VarX, VarY))
            PrX = PMF(VarX)
            PrY = PMF(VarY)
            PrZ = OmegaPMF()
            PrXYcZ = OmegaCPMF(PrXY)
            PrXcZ = OmegaCPMF(PrX)
            PrYcZ = OmegaCPMF(PrY)

        else:
            PrXYZ = PMF(JointVariables(VarX, VarY, VarZ))
            PrXZ = PMF(JointVariables(VarX, VarZ))
            PrYZ = PMF(JointVariables(VarY, VarZ))
            PrZ = PMF(VarZ)

            PrXcZ = PrXZ.condition_on(PrZ)
            PrYcZ = PrYZ.condition_on(PrZ)
            PrXYcZ = PrXYZ.condition_on(PrZ)

        return (PrXYcZ, PrXcZ, PrYcZ, PrZ)
예제 #2
0
    def G_test_conditionally_independent(self, X: int, Y: int,
                                         Z: list[int]) -> CITestResult:
        (VarX, VarY, VarZ) = self.load_variables(X, Y, Z)

        result = CITestResult()
        result.start_timing()

        PrZ: PMF
        PrXcZ: CPMF
        PrYcZ: CPMF
        PrXYcZ: CPMF

        if len(Z) == 0:
            PrXY = PMF(JointVariables(VarX, VarY))
            PrX = PMF(VarX)
            PrY = PMF(VarY)
            PrZ = OmegaPMF()
            PrXYcZ = OmegaCPMF(PrXY)
            PrXcZ = OmegaCPMF(PrX)
            PrYcZ = OmegaCPMF(PrY)

            if self.DoF_calculator.requires_pmfs:
                self.DoF_calculator.set_context_pmfs(PrXY, PrX, PrY, None)

        else:
            PrXYZ = PMF(JointVariables(VarX, VarY, VarZ))
            PrXZ = PMF(JointVariables(VarX, VarZ))
            PrYZ = PMF(JointVariables(VarY, VarZ))
            PrZ = PMF(VarZ)

            PrXcZ = PrXZ.condition_on(PrZ)
            PrYcZ = PrYZ.condition_on(PrZ)
            PrXYcZ = PrXYZ.condition_on(PrZ)

            if self.DoF_calculator.requires_pmfs:
                self.DoF_calculator.set_context_pmfs(PrXYZ, PrXZ, PrYZ, PrZ)

        self.DoF_calculator.set_context_variables(X, Y, Z)

        if self.DoF_calculator.requires_cpmfs:
            self.DoF_calculator.set_context_cpmfs(PrXYcZ, PrXcZ, PrYcZ, PrZ)

        DoF = self.DoF_calculator.calculate_DoF(X, Y, Z)

        if not self.sufficient_samples(DoF):
            result.end_timing()
            result.index = self.ci_test_counter + 1
            result.set_insufficient_samples()
            result.set_variables(VarX, VarY, VarZ)
            result.extra_info = ' DoF {}'.format(DoF)
            return result

        G = self.G_value(PrXYcZ, PrXcZ, PrYcZ, PrZ)
        p = chi2.cdf(G, DoF)

        independent = None
        if p < self.significance:
            independent = True
        else:
            independent = False

        result.end_timing()
        result.index = self.ci_test_counter + 1
        result.set_independent(independent, self.significance)
        result.set_variables(VarX, VarY, VarZ)
        result.set_statistic('G', G, dict())
        result.set_distribution('chi2', p, {'DoF': DoF})

        result.extra_info = ' DoF {}'.format(DoF)

        return result