예제 #1
0
    def test_invalid_implication_values(self, triples):
        def too_big(var, val1, val2):
            return 1.001

        def too_small(var, val1, val2):
            return -1.001

        with pytest.raises(ValueError):
            Dataset(triples, implication_function=too_big)
        with pytest.raises(ValueError):
            Dataset(triples, implication_function=too_small)
예제 #2
0
    def test_source_multiple_claims_for_a_single_variable(self):
        with pytest.raises(ValueError) as excinfo:
            Dataset((("s1", "x", 4), ("s2", "x", 5), ("s1", "x", 5)))

        err_msg = "Source 's1' claimed more than one value for variable 'x'"
        assert err_msg in str(excinfo.value)

        # Allow multiple claims; first one should be used
        data = Dataset((
            ("s1", "x", 5),
            ("s2", "x", 6),
            ("s3", "x", 5),
            ("s3", "x", 6),
        ),
                       allow_multiple=True)
        exp_sc = np.array([[1, 0], [0, 1], [1, 0]])
        assert np.array_equal(data.sc.toarray(), exp_sc)
예제 #3
0
    def test_uniform_priors(self, data, triples):
        alg = BaseIterativeAlgorithm(priors=PriorBelief.UNIFORM)
        got = alg.get_prior_beliefs(data)
        expected = [1, 0.5, 1, 0.5]
        assert np.array_equal(got, expected)

        # Same secondary test as for voted, but beliefs should not change here
        data2 = Dataset(triples + [("s3", "y", "eight")])
        got2 = alg.get_prior_beliefs(data2)
        expected2 = [1, 0.5, 1, 0.5]
        assert np.array_equal(got2, expected2)
예제 #4
0
    def data(self):
        triples = (
            ("john", "wind", "very windy"),
            ("paul", "wind", "not very windy"),
            ("george", "wind", "very windy"),
            ("ringo", "wind", "not very windy at all"),
            ("john", "rain", "dry"),
            ("george", "rain", "wet"),
            ("john", "water", "wet"),  # re-use value
            ("paul", "water", "drink"),
            ("george", "water", "drink"),

            # Mix up the order of variables
            ("ringo", "rain", "dry"),
        )
        return Dataset(triples)
예제 #5
0
    def test_implications(self, triples):
        # Test using manually crafted values
        def imp_func(var, val1, val2):
            if var == "x":
                if (val1, val2) == (1, 2):
                    return 0.85
                elif (val1, val2) == (2, 1):
                    return -0.5
                elif (val1, val2) == (1, 1):  # pragma: no cover
                    # Note: this value should not be used
                    return 10000000  # pragma: no cover
            elif var == "y":
                if (val1, val2) == (2, 3):
                    return 1
                elif (val1, val2) == (3, 2):
                    return 0.0001
            elif var == "z":
                if (val1, val2) == (3, 4):
                    return -0.3
                elif (val1, val2) == (4, 3):
                    return None
            elif var == "w":
                if (val1, val2) == (4, 5):
                    return 0.7
                elif (val1, val2) == (5, 4):
                    return -0.654

        data = Dataset(triples, implication_function=imp_func)
        # Claims are:
        # 0: x=1
        # 1: y=2
        # 2: z=3
        # 3: w=4
        # 4: x=2
        # 5: z=4
        # 6: w=5
        # 7: y=3
        expected_imp = np.array([[0, 0, 0, 0, 0.85, 0, 0, 0],
                                 [0, 0, 0, 0, 0, 0, 0, 1],
                                 [0, 0, 0, 0, 0, -0.3, 0, 0],
                                 [0, 0, 0, 0, 0, 0, 0.7, 0],
                                 [-0.5, 0, 0, 0, 0, 0, 0, 0],
                                 [0, 0, 0, 0, 0, 0, 0, 0],
                                 [0, 0, 0, -0.654, 0, 0, 0, 0],
                                 [0, 0.0001, 0, 0, 0, 0, 0, 0]])
        assert data.imp.shape == (8, 8)
        assert np.array_equal(data.imp.toarray(), expected_imp)
예제 #6
0
    def test_num_connected_components(self):
        ds1 = Dataset([
            ("s1", "x", "a"),
            ("s2", "x", "b"),
            ("s3", "x", "a"),
        ])
        assert ds1.num_connected_components() == 1

        ds2 = Dataset([("s1", "x", "a"), ("s2", "x", "b"), ("s3", "x", "a"),
                       ("s4", "y", "a"), ("s5", "z", "a"), ("s6", "z", "a")])
        assert ds2.num_connected_components() == 3
예제 #7
0
 def test_not_enough_claimed_values(self):
     # If all variables have only one claimed value, the accuracy
     # calculation cannot be performed: an exception should be raised
     dataset = Dataset([("s1", "x", 4), ("s2", "x", 4), ("s3", "x", 4),
                        ("s1", "y", 1), ("s2", "y", 1), ("s3", "y", 1)])
     sup = SupervisedData(dataset, {"x": 4, "y": 2})
     res = Result(trust={
         "s1": 0.3,
         "s2": 0.4,
         "s3": 0.4
     },
                  belief={
                      "x": {
                          4: 1
                      },
                      "y": {
                          1: 1
                      }
                  },
                  time_taken=None)
     with pytest.raises(ValueError):
         sup.get_accuracy(res)
예제 #8
0
 def dataset(self):
     return Dataset(
         (("s1", "x", 1), ("s1", "y", 2), ("s1", "z", 3), ("s1", "w", 4),
          ("s2", "x", 1), ("s2", "z", 3), ("s2", "w", 4), ("s3", "y", 2),
          ("s3", "z", 3), ("s3", "w", 4)))