Esempio n. 1
0
    def cls_sumout_var(cls, f: Factor, Y: NumCatRVariable):
        """!
        \brief Sum the variable out of factor as per Koller, Friedman 2009, p. 297

        Summing out, or factor marginalization, is defined as the following by
        Koller, Friedman:

        <blockquote>

        Let X be a set of variables and Y \f$\not \in \f$ X a variable. Let
        \f$\phi(X, Y)\f] be a factor. We define the factor marginalization of Y
        in phi, denoted \f$ \sum_Y \phi \f$, to be a factor psi over X such
        that: \f$ \psi(X) = \sum_Y \phi(X,Y) \f$

        </blockquote>


        \param Y the variable that we are going to sum out.

        \throw ValueError We raise a value error if the argument is not in
        the scope of this factor

        \return Factor
        """
        if Y not in f:
            msg = "Argument " + str(Y)
            msg += " is not in scope of this factor: "
            msg += " ".join(f.scope_vars())
            raise ValueError(msg)

        # Y_vals = Y.value_set()
        products = f.factor_domain()
        fn = f.factor_fn

        def psi(scope_product: Set[Tuple[str, NumericValue]]):
            """"""
            s = set(scope_product)
            diffs = set([p for p in products if s.issubset(p) is True])
            return sum([fn(d) for d in diffs])

        return Factor(
            gid=str(uuid4()),
            scope_vars=f.scope_vars().difference({Y}),
            factor_fn=psi,
        )
Esempio n. 2
0
    def cls_maxout_var(cls, f: Factor, Y: NumCatRVariable):
        """!
        \brief max the variable out of factor as per Koller, Friedman 2009, p. 555

        Maxing out a variable, or factor maximization is defined by Koller,
        Friedman as:
        <blockquote>
        Let X be a set of variables, and Y \f$ \not \in \f$ X, a random
        variable. Let \f$ \phi(X, Y) \f$ be a factor. We define the factor
        maximization of Y in \f$ \phi \f$ to be factor \f$ \psi \f$ over X such
        that: \f$ \psi(X) = max_{Y}\phi(X, Y) \f$
        </blockquote>

        \param Y random variable who is going to be maxed out.

        \throw ValueError If the argument is not in scope of this factor, we
        throw a value error

        \return Factor
        """
        if Y not in f:
            raise ValueError("argument is not in scope of this factor")

        # Y_vals = Y.value_set()
        products = f.factor_domain()
        fn = f.factor_fn

        def psi(scope_product: Set[Tuple[str, NumericValue]]):
            """"""
            s = set(scope_product)
            diffs = set([p for p in products if s.issubset(p) is True])
            return max([fn(d) for d in diffs])

        return Factor(
            gid=str(uuid4()),
            scope_vars=f.scope_vars().difference({Y}),
            factor_fn=psi,
        )
Esempio n. 3
0
    def cls_reduced(cls, f: Factor,
                    assignments: Set[Tuple[str, NumericValue]]) -> Factor:
        """!
        \brief reduce factor using given context

        \param assignments values that are assigned to random variables of this
        factor.

        \return Factor whose conditional probability table rows are shrink to
        rows that contain assignment values.

        Koller, Friedman 2009, p. 111 reduction by value example

        \f$phi(A,B,C)\f$

         A      B      C
        ---- | ---- | ----
         a1  |  b1  |  c1
         a1  |  b1  |  c2
         a2  |  b1  |  c1
         a2  |  b1  |  c2

        reduction C=c1 \f$\phi(A,B,C=c_1)\f$

           A      B      C
          ---- | ---- | ----
           a1  |  b1  |  c1
           a2  |  b1  |  c1

        """
        svars = set()
        for sv in f.scope_vars():
            for kval in assignments:
                k, value = kval
                if sv.id() == k:
                    sv.reduce_to_value(value)
            svars.add(sv)
        return Factor(gid=str(uuid4()), scope_vars=svars, factor_fn=f.phi)
Esempio n. 4
0
class TestFactor(unittest.TestCase):
    """!"""
    def data_1(self):
        """"""
        input_data = {
            "intelligence": {
                "outcome-values": [0.1, 0.9],
                "evidence": 0.9
            },
            "grade": {
                "outcome-values": [0.2, 0.4, 0.6],
                "evidence": 0.2
            },
            "dice": {
                "outcome-values": [i for i in range(1, 7)],
                "evidence": 1.0 / 6,
            },
            "fdice": {
                "outcome-values": [i for i in range(1, 7)]
            },
        }

        def intelligence_dist(intelligence_value: float):
            if intelligence_value == 0.1:
                return 0.7
            elif intelligence_value == 0.9:
                return 0.3
            else:
                return 0

        def grade_dist(grade_value: float):
            if grade_value == 0.2:
                return 0.25
            elif grade_value == 0.4:
                return 0.37
            elif grade_value == 0.6:
                return 0.38
            else:
                return 0

        def fair_dice_dist(dice_value: float):
            if dice_value in [i for i in range(1, 7)]:
                return 1.0 / 6.0
            else:
                return 0

        def f_dice_dist(dice_value: float):
            if dice_value in [i for i in range(1, 5)]:
                return 0.2
            else:
                return 0.2

        # intelligence
        # grade
        self.intelligence = NumCatRVariable(
            node_id="int",
            input_data=input_data["intelligence"],
            marginal_distribution=intelligence_dist,
        )
        nid2 = "grade"
        self.grade = NumCatRVariable(
            node_id=nid2,
            input_data=input_data["grade"],
            marginal_distribution=grade_dist,
        )
        nid3 = "dice"
        self.dice = NumCatRVariable(
            node_id=nid3,
            input_data=input_data["dice"],
            marginal_distribution=fair_dice_dist,
        )
        nid4 = "fdice"
        self.fdice = NumCatRVariable(
            node_id=nid4,
            input_data=input_data["fdice"],
            marginal_distribution=f_dice_dist,
        )
        self.f = Factor(gid="f",
                        scope_vars=set(
                            [self.grade, self.dice, self.intelligence]))
        self.f2 = Factor(gid="f2", scope_vars=set([self.grade, self.fdice]))

    def data_2(self):
        """"""
        # Koller, Friedman 2009, p. 104
        self.Af = NumCatRVariable(
            node_id="A",
            input_data={"outcome-values": [10, 50]},
            marginal_distribution=lambda x: 0.5,
        )
        self.Bf = NumCatRVariable(
            node_id="B",
            input_data={"outcome-values": [10, 50]},
            marginal_distribution=lambda x: 0.5,
        )
        self.Cf = NumCatRVariable(
            node_id="C",
            input_data={"outcome-values": [10, 50]},
            marginal_distribution=lambda x: 0.5,
        )
        self.Df = NumCatRVariable(
            node_id="D",
            input_data={"outcome-values": [10, 50]},
            marginal_distribution=lambda x: 0.5,
        )

        def phiAB(scope_product):
            """"""
            sfs = set(scope_product)
            if sfs == set([("A", 10), ("B", 10)]):
                return 30
            elif sfs == set([("A", 10), ("B", 50)]):
                return 5
            elif sfs == set([("A", 50), ("B", 10)]):
                return 1
            elif sfs == set([("A", 50), ("B", 50)]):
                return 10
            else:
                raise ValueError("unknown arg")

        self.AB = Factor(gid="AB",
                         scope_vars=set([self.Af, self.Bf]),
                         factor_fn=phiAB)

        def phiBC(scope_product):
            """"""
            sfs = set(scope_product)
            if sfs == set([("B", 10), ("C", 10)]):
                return 100
            elif sfs == set([("B", 10), ("C", 50)]):
                return 1
            elif sfs == set([("B", 50), ("C", 10)]):
                return 1
            elif sfs == set([("B", 50), ("C", 50)]):
                return 100
            else:
                raise ValueError("unknown arg")

        self.BC = Factor(gid="BC",
                         scope_vars=set([self.Bf, self.Cf]),
                         factor_fn=phiBC)

    def data_3(self):
        """"""
        def phiCD(scope_product):
            """"""
            sfs = set(scope_product)
            if sfs == set([("C", 10), ("D", 10)]):
                return 1
            elif sfs == set([("C", 10), ("D", 50)]):
                return 100
            elif sfs == set([("C", 50), ("D", 10)]):
                return 100
            elif sfs == set([("C", 50), ("D", 50)]):
                return 1
            else:
                raise ValueError("unknown arg")

        self.CD = Factor(gid="CD",
                         scope_vars=set([self.Cf, self.Df]),
                         factor_fn=phiCD)

        def phiDA(scope_product):
            """"""
            sfs = set(scope_product)
            if sfs == set([("D", 10), ("A", 10)]):
                return 100
            elif sfs == set([("D", 10), ("A", 50)]):
                return 1
            elif sfs == set([("D", 50), ("A", 10)]):
                return 1
            elif sfs == set([("D", 50), ("A", 50)]):
                return 100
            else:
                raise ValueError("unknown arg")

        self.DA = Factor(gid="DA",
                         scope_vars=set([self.Df, self.Af]),
                         factor_fn=phiDA)

    def setUp(self):
        """"""
        self.data_1()
        self.data_2()
        self.data_3()

        # Koller, Friedman 2009 p. 107
        self.af = NumCatRVariable(
            node_id="A",
            input_data={"outcome-values": [10, 50, 20]},
            marginal_distribution=lambda x: 0.4 if x != 20 else 0.2,
        )

        def phiaB(scope_product):
            """"""
            sfs = set(scope_product)
            if sfs == set([("A", 10), ("B", 10)]):
                return 0.5
            elif sfs == set([("A", 10), ("B", 50)]):
                return 0.8
            elif sfs == set([("A", 50), ("B", 10)]):
                return 0.1
            elif sfs == set([("A", 50), ("B", 50)]):
                return 0
            elif sfs == set([("A", 20), ("B", 10)]):
                return 0.3
            elif sfs == set([("A", 20), ("B", 50)]):
                return 0.9
            else:
                raise ValueError("unknown arg")

        self.aB = Factor(gid="ab",
                         scope_vars=set([self.af, self.Bf]),
                         factor_fn=phiaB)

        def phibc(scope_product):
            """"""
            sfs = set(scope_product)
            if sfs == set([("B", 10), ("C", 10)]):
                return 0.5
            elif sfs == set([("B", 10), ("C", 50)]):
                return 0.7
            elif sfs == set([("B", 50), ("C", 10)]):
                return 0.1
            elif sfs == set([("B", 50), ("C", 50)]):
                return 0.2
            else:
                raise ValueError("unknown arg")

        self.bc = Factor(gid="bc",
                         scope_vars=set([self.Bf, self.Cf]),
                         factor_fn=phibc)

    def test_id(self):
        """"""
        self.assertEqual(self.f.id(), "f")

    def test_domain_scope(self):
        """"""
        d = self.AB.domain_scope(
            domain=[set([("A", 50), ("B", 50)]),
                    set([("A", 10), ("B", 10)])])
        self.assertEqual(set(d), set([self.Af, self.Bf]))

    def test_has_var(self):
        """"""
        intuple = self.f.has_var(ids="dice")
        nottuple = self.f.has_var(ids="dice22")
        self.assertTrue(intuple[0])
        self.assertEqual(intuple[1], self.dice)
        self.assertFalse(nottuple[0])
        self.assertEqual(nottuple[1], None)

    def test_in_scope_t_num(self):
        self.assertTrue(self.dice in self.f)

    def test_in_scope_t_str(self):
        self.assertTrue(self.dice.id() in self.f)

    def test_in_scope_f_str(self):
        self.assertFalse("fdsfdsa" in self.f)

    def test_scope_vars(self):
        self.assertTrue(
            self.f.scope_vars(),
            set([self.dice, self.intelligence, self.grade]),
        )

    def test_marginal_joint(self):
        """ """
        mjoint = self.f.marginal_joint(
            set([("int", 0.1), ("grade", 0.4), ("dice", 2)]))
        dmarg = self.dice.marginal(2)
        imarg = self.intelligence.marginal(0.1)
        gmarg = self.grade.marginal(0.4)
        self.assertTrue(mjoint, dmarg * imarg * gmarg)

    def test_partition_value(self):
        """"""
        pval = self.f.partition_value(self.f.vars_domain())
        self.assertTrue(pval, 1.0)

    def test_phi(self):
        """"""
        mjoint = self.f.phi(set([("int", 0.1), ("grade", 0.4), ("dice", 2)]))
        dmarg = self.dice.marginal(2)
        imarg = self.intelligence.marginal(0.1)
        gmarg = self.grade.marginal(0.4)
        self.assertTrue(mjoint, dmarg * imarg * gmarg)

    def test_phi_normalize(self):
        mjoint = self.f.phi(set([("int", 0.1), ("grade", 0.4), ("dice", 2)]))
        dmarg = self.dice.marginal(2)
        imarg = self.intelligence.marginal(0.1)
        gmarg = self.grade.marginal(0.4)
        self.assertTrue(mjoint, (dmarg * imarg * gmarg) / self.f.zval())

    def test_from_scope_variables_with_fn(self):
        """"""
        A = NumCatRVariable(
            "A",
            input_data={"outcome-values": [True, False]},
            marginal_distribution=lambda x: 0.6 if x else 0.4,
        )
        B = NumCatRVariable(
            "B",
            input_data={"outcome-values": [True, False]},
            marginal_distribution=lambda x: 0.62 if x else 0.38,
        )

        def phi_ab(scope_product):
            ss = set(scope_product)
            if ss == set([("A", True), ("B", True)]):
                return 0.9
            elif ss == set([("A", True), ("B", False)]):
                return 0.1
            elif ss == set([("A", False), ("B", True)]):
                return 0.2
            elif ss == set([("A", False), ("B", False)]):
                return 0.8
            else:
                raise ValueError("unknown argument")

        f = Factor.from_scope_variables_with_fn(svars=set([A, B]), fn=phi_ab)
        query = set([("A", True), ("B", True)])
        ff = f.phi(query)
        self.assertEqual(round(ff, 2), 0.9)

    @unittest.skip("Factor.from_conditional_vars not yet implemented")
    def test_from_conditional_vars(self):
        """"""

        # A = NumCatRVariable(
        #     "A",
        #     input_data={"outcome-values": [True, False]},
        #     marginal_distribution=lambda x: 0.6 if x else 0.4,
        # )
        # B = NumCatRVariable(
        #     "B",
        #     input_data={"outcome-values": [True, False]},
        #     marginal_distribution=lambda x: 0.62 if x else 0.38,
        # )

        def phi_ab(scope_product):
            ss = set(scope_product)
            if ss == set([("A", True), ("B", True)]):
                return 0.9
            elif ss == set([("A", True), ("B", False)]):
                return 0.1
            elif ss == set([("A", False), ("B", True)]):
                return 0.2
            elif ss == set([("A", False), ("B", False)]):
                return 0.8
            else:
                raise ValueError("unknown argument")
Esempio n. 5
0
 def scope_of(self, phi: Factor) -> Set[NumCatRVariable]:
     """!"""
     return phi.scope_vars()
Esempio n. 6
0
    def cls_product(
        cls,
        f: Factor,
        other: Factor,
        product_fn=lambda x, y: x * y,
        accumulator=lambda added, accumulated: added * accumulated,
    ) -> Tuple[Factor, float]:
        """!
        \brief Factor product operation from Koller, Friedman 2009, p. 107
        \f$ \psi(X,Y,Z) =  \phi(X,Y) \cdot \phi(Y,Z) \f$
        \f$ \prod_i phi(X_i) \f$

        Point wise product of two different factor functions.

        \param product_fn actual function for computing product. This function
        can be exchanged with another function to compute log-sum for example.

        \param accumulator this function decides how to accumulate resulting product.
        \param product_fn
        \parblock

        product function. Default case is that it multiplies
        its two arguments. In case of a floating precision problem it can be
        changed into summation.

        \endparblock

        \return tuple whose first element is the resulting factor and second
        element is the accumulated product.
        """
        if not isinstance(f, Factor):
            raise TypeError("f argument needs to be a factor")

        if not isinstance(other, Factor):
            raise TypeError("other needs to be a factor")
        #
        svar = f.scope_vars()
        ovar = other.scope_vars()
        var_inter = svar.intersection(ovar)
        var_inter = list(var_inter)
        vsets = [v.value_set() for v in var_inter]
        inter_products = list(product(*vsets))
        smatch = f.factor_domain()
        omatch = other.factor_domain()
        prod = 1.0
        common_match = set()
        for iproduct in inter_products:
            for o in omatch:
                for s in smatch:
                    ss = set(s)
                    ost = set(o)
                    prod_s = set(iproduct)
                    if prod_s.issubset(ss) and prod_s.issubset(ost):
                        common = ss.union(ost)
                        multi = product_fn(f.factor_fn(ss),
                                           other.factor_fn(ost))
                        common_match.add((multi, tuple(common)))
                        prod = accumulator(multi, prod)

        def fx(scope_product: Set[Tuple[str, NumericValue]]):
            """"""
            for multip, match in common_match:
                if set(match) == set(scope_product):
                    return multip

        f = Factor(gid=str(uuid4()), scope_vars=svar.union(ovar), factor_fn=fx)
        return f, prod