def test_specify_hypothesis_by_attribute(self): hypothesis = Hypothesis(["val1", None, All]) assert _specify_hypothesis_by_attribute(hypothesis, "val1", 0) == hypothesis assert _specify_hypothesis_by_attribute(hypothesis, "val1", 1) == hypothesis assert _specify_hypothesis_by_attribute(hypothesis, "val1", 2) == \ Hypothesis(["val1", None, "val1"]) assert _specify_hypothesis_by_attribute(hypothesis, "val2", 0) == \ Hypothesis([None, None, All]) assert _specify_hypothesis_by_attribute(hypothesis, "val2", 100) == hypothesis assert _specify_hypothesis_by_attribute(hypothesis, "val2", -1) == hypothesis
def test_specify_hypothesis(self): hypothesis = Hypothesis(["val1", All, All]) instance = Instance(["val1", "val2", "val3", True]) attribute_values_map = {0: {"val1", "val2"}, 1: {"val1", "val2"}, 2: {"val1", "val2", "val3"}} hypotheses = _specify_hypothesis(hypothesis, instance, attribute_values_map) assert hypotheses == {Hypothesis(["val1", "val1", All]), Hypothesis(["val1", All, "val1"]), Hypothesis(["val1", All, "val2"])}
def test_generalize_hypothesis_by_attribute(self): hypothesis = Hypothesis(["val1", None, All]) assert _generalize_hypothesis_by_attribute(hypothesis, "val1", 0) == hypothesis assert _generalize_hypothesis_by_attribute(hypothesis, "val1", 1) == \ Hypothesis(["val1", "val1", All]) assert _generalize_hypothesis_by_attribute(hypothesis, "val1", 2) == hypothesis assert _generalize_hypothesis_by_attribute(hypothesis, "val2", 0) == \ Hypothesis([All, None, All]) assert _generalize_hypothesis_by_attribute(hypothesis, "val2", 100) == hypothesis assert _generalize_hypothesis_by_attribute(hypothesis, "val2", -1) == hypothesis
def test_candidate_elimination(self, specify_hypothesis_mock, generalize_hypothesis_mock, is_consistent_mock): instances = [Instance(["val1", "val2", True]), Instance(["val2", "val3", False])] is_consistent_mock.side_effect = [True, True, True, True] candidate_elimination = CandidateElimination(3) candidate_elimination.train(instances) S, G = candidate_elimination.model assert S == Hypothesis([None] * 2) assert G == Hypothesis([All] * 2)
def train(self, instances: Iterable[Instance]): # Initialize G to the set of maximally general hypotheses in H G = {Hypothesis([All] * (self.len_attributes - 1))} # Initialize S to the set of maximally specific hypotheses in H S = {Hypothesis([None] * (self.len_attributes - 1))} attribute_values_map = get_attribute_values_map(instances) # For each training example d, do for d in instances: # If d is a positive example if d[Instance.target_attribute_idx]: # Remove from G any hypothesis inconsistent with d G = {g for g in G if _is_hypothesis_consistent(g, d)} S_new = set(S) # For each hypothesis s in S that is not consistent with d for s in S: if not _is_hypothesis_consistent(s, d): # Remove s from S S_new.remove(s) # Add to S all minimal generalizations h of s such that gen_hypotheses = _generalize_hypothesis(s, d) for gen_h in gen_hypotheses: # h is consistent with d, and some member of G is more general than h if _is_hypothesis_consistent(gen_h, d) and any(g > gen_h for g in G): S_new.add(gen_h) # Remove from S any hypothesis that is more general than another hypothesis in S S = {s_new for s_new in S_new if any(s_new > s_ref for s_ref in S_new)} # If d is a negative example else: # Remove from S any hypothesis inconsistent with d S = {s for s in S if _is_hypothesis_consistent(s, d)} G_new = set(G) # For each hypothesis g in G that is not consistent with d for g in G: if not _is_hypothesis_consistent(g, d): # Remove g from G G_new.remove(g) # Add to G all minimal specializations h of g such that spec_hypotheses = _specify_hypothesis(g, d, attribute_values_map) for h in spec_hypotheses: # h is consistent with d, and some member of S is more specific than h if _is_hypothesis_consistent(h, d) and any(s < h for s in S): G_new.add(h) # Remove from G any hypothesis that is less general than another hypothesis in G G = {g_new for g_new in G_new if any(g_new < g_ref for g_ref in G_new)} self.model = S, G
def test_candidate_elimination(self, specify_hypothesis_mock, generalize_hypothesis_mock, is_consistent_mock): instances = [Instance(["val1", "val2", True]), Instance(["val2", "val3", False])] is_consistent_mock.side_effect = [False, False, False, False] min_hypotheses = {Hypothesis(["val1", "val2"])} max_hypotheses = {Hypothesis(["val2", All]), Hypothesis([All, "val3"])} generalize_hypothesis_mock.return_value = min_hypotheses specify_hypothesis_mock.return_value = max_hypotheses candidate_elimination = CandidateElimination(3) candidate_elimination.train(instances) S, G = candidate_elimination.model assert S == set() assert G == set()
def test_specify_hypothesis_empty(self): hypothesis = Hypothesis(["val1", "val2"]) instance = Instance(["val1", "val2", True]) attribute_values_map = {0: {"val1", "val2"}, 1: {"val2"}} hypotheses = _specify_hypothesis(hypothesis, instance, attribute_values_map) assert hypotheses == set()
def _generalize_hypothesis(h: Hypothesis, d: Instance): generalized_h = Hypothesis(h) for idx, a in enumerate(d): if h[idx] is None: generalized_h[idx] = a elif h[idx] != a: generalized_h[idx] = All return {generalized_h}
def _generalize_hypothesis_by_attribute(h: Hypothesis, a: object, idx: int): generalized_h = Hypothesis(h) if 0 <= idx < len(h): if h[idx] is None: generalized_h[idx] = a elif h[idx] != a: generalized_h[idx] = All return generalized_h
def _specify_hypothesis_by_attribute(h: Hypothesis, a: object, idx: int): specified_h = Hypothesis(h) if 0 <= idx < len(h): if h[idx] is All: specified_h[idx] = a elif h[idx] != a: specified_h[idx] = None return specified_h
def test_find_s_negative_examples(self, generalize_hypothesis_mock, is_satisfied_mock): instances = [Instance(["val1", "val2", True]), Instance(["val1", "val3", False])] is_satisfied_mock.side_effect = [False, True] + [False, False] generalize_hypothesis_mock.return_value = Hypothesis([None] * 2) find_s = FindS(3) find_s.train(instances) assert generalize_hypothesis_mock.call_count == 1
def _specify_hypothesis(h: Hypothesis, d: Instance, attribute_values_map: Dict[int, object]): specified_hypotheses = set() for idx, a in enumerate(d): if h[idx] == All: for a_val in attribute_values_map[idx]: new_h = Hypothesis(h) if a_val != a: new_h[idx] = a_val specified_hypotheses.add(new_h) return specified_hypotheses
def test_compare_attribute_constraints_less(self): assert Hypothesis._compare_attribute_constraints( None, "val1", Hypothesis.LESS) assert not Hypothesis._compare_attribute_constraints( "val1", None, Hypothesis.LESS) assert Hypothesis._compare_attribute_constraints( None, None, Hypothesis.LESS) assert Hypothesis._compare_attribute_constraints( All, All, Hypothesis.LESS) assert Hypothesis._compare_attribute_constraints( "val1", "val1", Hypothesis.LESS) assert Hypothesis._compare_attribute_constraints( "val1", All, Hypothesis.LESS) assert not Hypothesis._compare_attribute_constraints( All, "val1", Hypothesis.LESS) assert not Hypothesis._compare_attribute_constraints( "val1", "val2", Hypothesis.LESS)
def train(self, instances: Iterable[Instance]): # 1. Initialize h to the most specific hypothesis in H h = Hypothesis([None] * (self.len_attributes - 1)) # 2. For each positive training instance x for x in instances: if x[Instance.target_attribute_idx]: # For each attribute constraint a, in h for idx, a in enumerate(h): # If the constraint a, is satisfied by x # Then do nothing if not _is_attribute_constraint_satisfied(x, a, idx): # Else replace a, in h by the next more general constraint that is satisfied by x h = _generalize_hypothesis_by_attribute(h, x[idx], idx) # 3. Output hypothesis h self.model = h
def test_compare_attribute_greater(self): assert not Hypothesis._compare_attribute_constraints( None, "val1", Hypothesis.GREATER) assert Hypothesis._compare_attribute_constraints( "val1", None, Hypothesis.GREATER) assert Hypothesis._compare_attribute_constraints( None, None, Hypothesis.GREATER) assert Hypothesis._compare_attribute_constraints( All, All, Hypothesis.GREATER) assert Hypothesis._compare_attribute_constraints( "val1", "val1", Hypothesis.GREATER) assert not Hypothesis._compare_attribute_constraints( "val1", All, Hypothesis.GREATER) assert Hypothesis._compare_attribute_constraints( All, "val1", Hypothesis.GREATER) assert not Hypothesis._compare_attribute_constraints( "val1", "val2", Hypothesis.GREATER)
def test_compare(self): h1 = Hypothesis([None, None]) h2 = Hypothesis(["val1", None]) h3 = Hypothesis(["val1", "val2"]) h4 = Hypothesis(["val1", All]) h5 = Hypothesis([All, All]) h6 = Hypothesis(["val2", All]) assert h1 < h2 < h3 < h4 < h5 assert not h3 < h6 assert not h6 < h3 assert not h6 < h4 assert not h4 < h6
def test_is_hypothesis_consistent_negative_true(self, is_satisfied_mock): is_satisfied_mock.side_effect = [True, False, True] hypothesis = Hypothesis(["val1", None, "val3"]) instance = Instance(["val1", "val2", "val3", False]) res = _is_hypothesis_consistent(hypothesis, instance) assert res
def test_generalize_hypothesis(self): hypothesis = Hypothesis(["val1", None, "val3", All]) instance = Instance(["val2", "val2", "val3", "val4", True]) hypotheses = _generalize_hypothesis(hypothesis, instance) assert hypotheses == {Hypothesis([All, "val2", "val3", All])}