def test_summary(self): print(">> pylfit.datasets.StateTransitionsDataset.summary()") for i in range(self._nb_tests): # Empty dataset dataset = random_StateTransitionsDataset( \ nb_transitions=0, \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) expected_print = "StateTransitionsDataset summary:\n" expected_print += " Features: \n" for var, vals in dataset.features: expected_print += " " + var + ": " + str(vals) + "\n" expected_print += " Targets: \n" for var, vals in dataset.targets: expected_print += " " + var + ": " + str(vals) + "\n" expected_print += " Data: []\n" old_stdout = sys.stdout sys.stdout = mystdout = StringIO() dataset.summary() sys.stdout = old_stdout self.assertEqual(mystdout.getvalue(), expected_print) # Usual dataset dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) expected_print = "StateTransitionsDataset summary:\n" expected_print += " Features: \n" for var, vals in dataset.features: expected_print += " " + var + ": " + str(vals) + "\n" expected_print += " Targets: \n" for var, vals in dataset.targets: expected_print += " " + var + ": " + str(vals) + "\n" expected_print += " Data:\n" for s1, s2 in dataset.data: expected_print += " " + str((list(s1), list(s2))) + "\n" old_stdout = sys.stdout sys.stdout = mystdout = StringIO() dataset.summary() sys.stdout = old_stdout self.assertEqual(mystdout.getvalue(), expected_print)
def test_compile(self): print(">> CDMVLP.compile(algorithm)") for i in range(self._nb_tests): for algorithm in self._SUPPORTED_ALGORITHMS: dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile() self.assertEqual(model.algorithm, "synchronizer") # default algorithm model.compile(algorithm=algorithm) self.assertEqual(model.algorithm, algorithm) self.assertRaises(ValueError, model.compile, "lol") self.assertRaises(ValueError, model.compile, "gula") #self.assertRaises(NotImplementedError, model.compile, "pride") #self.assertRaises(NotImplementedError, model.compile, "synchronizer-pride") original = CDMVLP._ALGORITHMS.copy() CDMVLP._ALGORITHMS = ["gula"] self.assertRaises(NotImplementedError, model.compile, "gula") # dataset not supported yet CDMVLP._ALGORITHMS = original
def test_to_csv(self): print( ">> pylfit.datasets.StateTransitionsDataset.to_csv(path_to_file)") for i in range(self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) path = "tmp/StateTransitionsDataset_test.csv" dataset.to_csv(path) self.assertTrue(os.path.isfile(path)) with open(path, newline='') as f: reader = csv.reader(f) data = list(reader) self.assertEqual( data[0], [var for var, vals in dataset.features + dataset.targets]) for id, line in enumerate(data[1:]): self.assertEqual(line, [ val for val in list(dataset.data[id][0]) + list(dataset.data[id][1]) ])
def test_interprete(self): print(">> PRIDE.interprete(transitions, variable, value)") for i in range(self._nb_tests): # Generate transitions dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) # Encode data with StateTransitionsDataset data_encoded = [] for (s1,s2) in dataset.data: s1_encoded = [domain.index(s1[var_id]) for var_id, (var,domain) in enumerate(dataset.features)] s2_encoded = [domain.index(s2[var_id]) for var_id, (var,domain) in enumerate(dataset.targets)] data_encoded.append((s1_encoded,s2_encoded)) #dataset.summary() # Group transitions by initial state data_grouped_by_init_state = [] for (s1,s2) in data_encoded: added = False for (s1_,S) in data_grouped_by_init_state: if s1_ == s1: if s2 not in S: S.append(s2) added = True break if not added: data_grouped_by_init_state.append((s1,[s2])) # new init state #eprint(data_encoded) #eprint() #eprint(data_grouped_by_init_state) # each pos/neg interpretation for var_id, (var,vals) in enumerate(dataset.targets): for val_id, val in enumerate(vals): #eprint("var_id: ", var_id) #eprint("val_id: ", val_id) pos, neg = PRIDE.interprete(data_encoded, var_id, val_id) # All neg are valid for s in neg: for s1, s2 in data_encoded: if s1 == s: self.assertTrue(s2[var_id] != val_id) # All transitions are interpreted for s1, S2 in data_grouped_by_init_state: if len([s2 for s2 in S2 if s2[var_id] == val_id]) == 0: self.assertTrue(tuple(s1) in neg)
def test_to_string(self): print(">> pylfit.datasets.StateTransitionsDataset.to_string()") for i in range(self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) expected= \ "{"+\ "Features: "+ str(dataset.features)+\ "\nTargets: "+ str(dataset.targets)+\ "\nData: "+ str(dataset.data)+\ "}" self.assertEqual(dataset.to_string(), expected)
def test_predict(self): print(">> CDMVLP.predict()") for i in range(self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) for algorithm in self._SUPPORTED_ALGORITHMS: model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algorithm) model.fit(dataset=dataset) feature_states = list(set( tuple(s1) for s1, s2 in dataset.data)) prediction = model.predict(feature_states) for state_id, s1 in enumerate(feature_states): feature_state_encoded = [] for var_id, val in enumerate(s1): val_id = model.features[var_id][1].index(str(val)) feature_state_encoded.append(val_id) #eprint(feature_state_encoded) target_states = SynchronousConstrained.next( feature_state_encoded, model.targets, model.rules, model.constraints) output = [] for s in target_states: target_state = [] for var_id, val_id in enumerate(s): #eprint(var_id, val_id) if val_id == -1: target_state.append("?") else: target_state.append( model.targets[var_id][1][val_id]) output.append(target_state) self.assertEqual(prediction[state_id][0], list(s1)) self.assertEqual(prediction[state_id][1], output) # Force missing value model.rules = [ r for r in model.rules if r.head_variable != random.randint(0, len(model.targets)) ] prediction = model.predict(feature_states) for state_id, s1 in enumerate(feature_states): feature_state_encoded = [] for var_id, val in enumerate(s1): val_id = model.features[var_id][1].index(str(val)) feature_state_encoded.append(val_id) #eprint(feature_state_encoded) target_states = SynchronousConstrained.next( feature_state_encoded, model.targets, model.rules, model.constraints) output = [] for s in target_states: target_state = [] for var_id, val_id in enumerate(s): #eprint(var_id, val_id) if val_id == -1: target_state.append("?") else: target_state.append( model.targets[var_id][1][val_id]) output.append(target_state) self.assertEqual(prediction[state_id][1], output) # Exceptions: self.assertRaises( TypeError, model.predict, "") # Feature_states bad format: is not a list self.assertRaises( TypeError, model.predict, [["0", "1"], 0, 10 ]) # Feature_states bad format: is not a list of list self.assertRaises( TypeError, model.predict, [["0", "1"], [0, 10]] ) # Feature_states bad format: is not a list of list of string feature_states = [ list(s) for s in set(tuple(s1) for s1, s2 in dataset.data) ] state_id = random.randint(0, len(feature_states) - 1) original = feature_states[state_id].copy() feature_states[state_id] = feature_states[ state_id][:-random.randint(1, len(dataset.features))] self.assertRaises( TypeError, model.predict, feature_states ) # Feature_states bad format: size of state not correspond to model features < feature_states[state_id] = original.copy() feature_states[state_id].extend( ["0" for i in range(random.randint(1, 10))]) self.assertRaises( TypeError, model.predict, feature_states ) # Feature_states bad format: size of state not correspond to model features > feature_states[state_id] = original.copy() var_id = random.randint(0, len(dataset.features) - 1) feature_states[state_id][var_id] = "bad_value" self.assertRaises( ValueError, model.predict, feature_states ) # Feature_states bad format: value out of domain feature_states[state_id] = original.copy()
def test_fit(self): print(">> CDMVLP.fit(dataset)") for i in range(self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) for algorithm in self._SUPPORTED_ALGORITHMS: for verbose in [0, 1]: model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algorithm) f = io.StringIO() with contextlib.redirect_stderr(f): model.fit(dataset=dataset, verbose=verbose) expected_rules, expected_constraints = Synchronizer.fit( dataset, complete=(algorithm == "synchronizer")) self.assertEqual(expected_rules, model.rules) self.assertEqual(expected_constraints, model.constraints) # Exceptions #------------ model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algorithm) self.assertRaises(ValueError, model.fit, [], verbose) # dataset is not of valid type model.algorithm = "bad_value" self.assertRaises(ValueError, model.fit, dataset, verbose) # algorithm not supported model.algorithm = algorithm original = CDMVLP._COMPATIBLE_DATASETS.copy() class newdataset(Dataset): def __init__(self, data, features, targets): x = "" CDMVLP._COMPATIBLE_DATASETS = [newdataset] self.assertRaises( ValueError, model.fit, newdataset([], [], []), verbose) # dataset not supported by the algo CDMVLP._COMPATIBLE_DATASETS = original model.algorithm = "gula" original = CDMVLP._ALGORITHMS.copy() class newdataset(Dataset): def __init__(self, data, features, targets): x = "" CDMVLP._ALGORITHMS = ["gula"] self.assertRaises(NotImplementedError, model.fit, dataset, verbose) # dataset not supported yet CDMVLP._ALGORITHMS = original
def test_transitions_dataset_from_array(self): print( ">> pylfit.preprocessing.tabular_dataset.transitions_dataset_from_csv(path, feature_names, target_names)" ) # unit tests data = [ \ ([0,0,0],[0,0,1]), \ ([0,0,0],[1,0,0]), \ ([1,0,0],[0,0,0]), \ ([0,1,0],[1,0,1]), \ ([0,0,1],[0,0,1]), \ ([1,1,0],[1,0,0]), \ ([1,0,1],[0,1,0]), \ ([0,1,1],[1,0,1]), \ ([1,1,1],[1,1,0])] feature_names = ["p_t-1", "q_t-1", "r_t-1"] target_names = ["p_t", "q_t", "r_t"] dataset = pylfit.preprocessing.transitions_dataset_from_array( data=data, feature_names=feature_names, target_names=target_names) data = [(np.array([str(i) for i in s1]), np.array([str(i) for i in s2])) for (s1, s2) in data] self.assertEqual(dataset.features, [("p_t-1", ["0", "1"]), ("q_t-1", ["0", "1"]), ("r_t-1", ["0", "1"])]) self.assertEqual(dataset.targets, [("p_t", ["0", "1"]), ("q_t", ["0", "1"]), ("r_t", ["0", "1"])]) data = [(np.array([str(i) for i in s1]), np.array([str(i) for i in s2])) for (s1, s2) in data] self.assertEqual(len(data), len(dataset.data)) for i in range(len(data)): self.assertTrue((dataset.data[i][0] == data[i][0]).all()) self.assertTrue((dataset.data[i][1] == data[i][1]).all()) # exceptions #------------ # data is not list data = "[ \ ([0,0,0],[0.1,0,1]), \ ([0,0.6,0],[1,0,0]), \ ([1,0,0],[0,0,0])]" self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, feature_names, target_names) # data is not list of tuples data = [ \ ([0,0,0],[0,0,1],[0,0,0],[1,0,0]), \ [[1,0,0],[0,0,0]]] self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, feature_names, target_names) # data is not list of pairs data = [ \ ([0,0,0],[0,0,1],[0,0,0],[1,0,0]), \ ([1,0,0],[0,0,0])] self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, feature_names, target_names) # Not same size for features data = [ \ ([0,0,0],[0,0,1]), \ ([0,0,0],[1,0,0]), \ ([1,0],[0,0,0])] self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, None, None, feature_names, target_names) # Not same size for targets data = [ \ ([0,0,0],[0,0,1]), \ ([0,0,0],[1,0]), \ ([1,0,0],[0,0,0])] self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, None, None, feature_names, target_names) # Not only int/string in features data = [ \ ([0,0,0],[0,0,1]), \ ([0,0.3,0],[1,0,0]), \ ([1,0,0],[0,0,0])] self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, None, None, feature_names, target_names) # Not only int/string in targets data = [ \ ([0,0,0],[0,0.11,1]), \ ([0,0,0],[1,0,0]), \ ([1,0,0],[0,0,0])] self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, None, None, feature_names, target_names) # features is not a list of (string, list of string) data = [ \ ([0,0,0],[0,0,1]), \ ([0,0,0],[1,0,0])] features = "" # not list self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, features, None, None, None) features = [1, (1, 2)] # not list of tuples self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, features, None, None, None) features = [(1, 1), (1, 2, 4), (1, 2)] # not list of pair self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, features, None, None, None) features = [("p_t", ["1", "2"]), (1, ["0"]), ("r_t", ["1", "3"])] # not list of pair (string,_) self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, features, None, None, None) features = [("p_t", ["1", "2"]), ("q_t", "0"), ("r_t", ["1", "2"])] # not list of pair (string,list of _) self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, features, None, None, None) features = [("p_t", ["1", "2"]), ("q_t", ["0"]), ("r_t", ["1", 2])] # not list of pair (string,list of string) self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, features, None, None, None) features = [("p_t", ["1", "2"]), ("q_t", ["0", "1"]), ("p_t", ["1", "3"])] # not all different variables self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, features, None, None, None) features = [("p_t", ["1", "2"]), ("q_t", ["0", "0"]), ("r_t", ["1", "3"])] # not all different values self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, features, None, None, None) # targets is not a list of (string, list of string) targets = "" # not list self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, None, targets, None, None) targets = [1, (1, 2)] # not list of tuples self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, None, targets, None, None) targets = [(1, 1), (1, 2, 4), (1, 2)] # not list of pair self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, None, targets, None, None) targets = [("p_t", ["1", "2"]), (1, ["0"]), ("r_t", ["1", "3"])] # not list of pair (string,_) self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, None, targets, None, None) targets = [("p_t", ["1", "2"]), ("q_t", "0"), ("r_t", ["1", "2"])] # not list of pair (string,list of _) self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, None, targets, None, None) targets = [("p_t", ["1", "2"]), ("q_t", ["0"]), ("r_t", ["1", 2])] # not list of pair (string,list of string) self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, None, targets, None, None) targets = [("p_t", ["1", "2"]), ("q_t", ["0", "1"]), ("p_t", ["1", "3"])] # not all different values self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, None, targets, None, None) targets = [("p_t", ["1", "2"]), ("q_t", ["0", "0"]), ("r_t", ["1", "3"])] # not all different values self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, None, targets, None, None) # Both features/feature_names or targets/target_names given features = [("p_t", ["1", "2"]), ("q_t", ["0", "1"]), ("r_t", ["1", "3"])] targets = [("p_t", ["1", "2"]), ("q_t", ["0", "1"]), ("r_t", ["1", "2"])] feature_names = ["p_t-1", "q_t-1", "r_t-1"] target_names = ["p_t", "q_t", "r_t"] self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, features, None, feature_names, None) self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, None, targets, None, target_names) self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, features, targets, feature_names, target_names) # target_names is not list of string data = [ \ ([0,0,0],[0,0,1]), \ ([0,0,0],[1,0,0])] feature_names = ["p_t-1", "q_t-1", "r_t-1"] target_names = ["p_t", "q_t", "r_t"] feature_names = "" self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, None, None, feature_names, target_names) feature_names = [1, 0.5, "lol"] self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, None, None, feature_names, target_names) # target_names is not list of string data = [ \ ([0,0,0],[0,0,1]), \ ([0,0,0],[1,0,0])] feature_names = ["p_t-1", "q_t-1", "r_t-1"] target_names = ["p_t", "q_t", "r_t"] target_names = "" self.assertRaises(TypeError, pylfit.preprocessing.transitions_dataset_from_array, data, None, None, feature_names, target_names) target_names = [1, 0.5, "lol"] self.assertRaises(ValueError, pylfit.preprocessing.transitions_dataset_from_array, data, None, None, feature_names, target_names) # Random tests for i in range(self._nb_random_tests): original_dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) data = original_dataset.data features = original_dataset.features targets = original_dataset.targets feature_names = [var for var, vals in features] target_names = [var for var, vals in targets] # empty dataset self.assertEqual( transitions_dataset_from_array(data=[], feature_domains=features, target_domains=targets), StateTransitionsDataset(data=[], features=features, targets=targets)) # Only data given dataset = pylfit.preprocessing.transitions_dataset_from_array( data=data) # Only names given dataset = pylfit.preprocessing.transitions_dataset_from_array( data=data, feature_names=feature_names, target_names=target_names) self.assertEqual([var for var, vals in dataset.features], feature_names) self.assertEqual([var for var, vals in dataset.targets], target_names) # all domain value appear in data for var_id, (var, vals) in enumerate(dataset.features): for val_id, val in enumerate(vals): appear = False for s1, s2 in data: if s1[var_id] == val: appear = True self.assertTrue(appear) for var_id, (var, vals) in enumerate(dataset.targets): for val_id, val in enumerate(vals): appear = False for s1, s2 in data: if s2[var_id] == val: appear = True self.assertTrue(appear) #data = [(np.array([str(i) for i in s1]), np.array([str(i) for i in s2])) for (s1,s2) in data] self.assertEqual(len(dataset.data), len(data)) for i in range(len(data)): self.assertTrue((dataset.data[i][0] == data[i][0]).all()) self.assertTrue((dataset.data[i][1] == data[i][1]).all()) # Domains given dataset = pylfit.preprocessing.transitions_dataset_from_array( data=data, feature_domains=features, target_domains=targets) self.assertEqual(dataset.features, features) self.assertEqual(dataset.targets, targets) self.assertEqual(len(dataset.data), len(data)) for i in range(len(data)): self.assertTrue((dataset.data[i][0] == data[i][0]).all()) self.assertTrue((dataset.data[i][1] == data[i][1]).all()) # feature domains only dataset = pylfit.preprocessing.transitions_dataset_from_array( data=data, feature_domains=features, target_names=target_names) self.assertEqual(dataset.features, features) self.assertEqual(len(dataset.data), len(data)) for i in range(len(data)): self.assertTrue((dataset.data[i][0] == data[i][0]).all()) self.assertTrue((dataset.data[i][1] == data[i][1]).all()) # target domains only dataset = pylfit.preprocessing.transitions_dataset_from_array( data=data, target_domains=targets, feature_names=feature_names) self.assertEqual(dataset.targets, targets) self.assertEqual(len(dataset.data), len(data)) for i in range(len(data)): self.assertTrue((dataset.data[i][0] == data[i][0]).all()) self.assertTrue((dataset.data[i][1] == data[i][1]).all()) # Exceptions # empty dataset self.assertRaises(ValueError, transitions_dataset_from_array, [], None, targets) self.assertRaises(ValueError, transitions_dataset_from_array, [], features, None) # Wrong data format data = [(list(s1) + [0], list(s2)) for s1, s2 in original_dataset.data] self.assertRaises(ValueError, transitions_dataset_from_array, data, features, targets) data = [(list(s1), list(s2) + [0]) for s1, s2 in original_dataset.data] self.assertRaises(ValueError, transitions_dataset_from_array, data, features, targets)
def test_predict(self): print(">> DMVLP.predict()") data = [ \ ([0,0,0],[0,0,1]), \ ([0,0,0],[1,0,0]), \ ([1,0,0],[0,0,0]), \ ([0,1,0],[1,0,1]), \ ([0,0,1],[0,0,1]), \ ([1,1,0],[1,0,0]), \ ([1,0,1],[0,1,0]), \ ([0,1,1],[1,0,1]), \ ([1,1,1],[1,1,0])] feature_names = ["p_t-1", "q_t-1", "r_t-1"] target_names = ["p_t", "q_t", "r_t"] dataset = pylfit.preprocessing.transitions_dataset_from_array( data=data, feature_names=feature_names, target_names=target_names) model = DMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm="gula") model.fit(dataset=dataset) self.assertEqual( set([ tuple(s) for s in model.predict([["0", "0", "0"]])[("0", "0", "0")] ]), set([('1', '0', '0'), ('0', '0', '0'), ('0', '0', '1'), ('1', '0', '1')])) self.assertEqual( set( tuple(s) for s in model.predict([["1", "1", "1"]])[("1", "1", "1")]), set([('1', '1', '0')])) self.assertEqual( set( tuple(s) for s in model.predict([["0", "0", "0"]], semantics="asynchronous")[("0", "0", "0")]), set([('1', '0', '0'), ('0', '0', '1')])) self.assertEqual( set([ tuple(s) for s in model.predict( [['1', '1', '1']], semantics="general")[("1", "1", "1")] ]), set([('1', '1', '0'), ('1', '1', '1')])) self.assertEqual( set([ tuple(s) for s in model.predict( [["0", "0", "0"]], semantics="general")[("0", "0", "0")] ]), set([('1', '0', '0'), ('0', '0', '0'), ('0', '0', '1'), ('1', '0', '1')])) for i in range(self._nb_tests): for semantics in [None, "synchronous", "asynchronous", "general"]: semantics_class = Synchronous if semantics == "asynchronous": semantics_class = Asynchronous if semantics == "general": semantics_class = General dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) # Need same features/targets for some semantics if semantics == "asynchronous" or semantics == "general": dataset = random_symmetric_StateTransitionsDataset(nb_transitions=random.randint(1, self._nb_transitions), \ nb_variables=random.randint(1,self._nb_features), \ max_variable_values=self._nb_feature_values) for algorithm in self._SUPPORTED_ALGORITHMS: model = DMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algorithm) model.fit(dataset=dataset) feature_states = [ list(s) for s in set(tuple(s1) for s1, s2 in dataset.data) ] if semantics is None: prediction = model.predict(feature_states) else: prediction = model.predict(feature_states, semantics=semantics) for state_id, s1 in enumerate(feature_states): feature_state_encoded = [] for var_id, val in enumerate(s1): val_id = model.features[var_id][1].index(str(val)) feature_state_encoded.append(val_id) #eprint(feature_state_encoded) target_states = semantics_class.next( feature_state_encoded, model.targets, model.rules) output = dict() for s in target_states: target_state = [] for var_id, val_id in enumerate(s): #eprint(var_id, val_id) if val_id == -1: target_state.append("?") else: target_state.append( model.targets[var_id][1][val_id]) output[tuple(target_state)] = target_states[s] self.assertEqual(prediction[tuple(s1)], output) # Force missing value rules = model.rules model.rules = [ r for r in model.rules if r.head_variable != random.randint(0, len(model.targets)) ] prediction = model.predict(feature_states, semantics=semantics) for state_id, s1 in enumerate(feature_states): feature_state_encoded = [] for var_id, val in enumerate(s1): val_id = model.features[var_id][1].index(str(val)) feature_state_encoded.append(val_id) #eprint(feature_state_encoded) target_states = semantics_class.next( feature_state_encoded, model.targets, model.rules) output = dict() for s in target_states: target_state = [] for var_id, val_id in enumerate(s): #eprint(var_id, val_id) if val_id == -1: target_state.append("?") else: target_state.append( model.targets[var_id][1][val_id]) output[tuple(target_state)] = target_states[s] self.assertEqual(prediction[tuple(s1)], output) model.rules = rules # Exceptions: self.assertRaises( TypeError, model.predict, "") # Feature_states bad format: is not a list self.assertRaises( TypeError, model.predict, [["0", "1"], 0, 10 ]) # Feature_states bad format: is not a list of list self.assertRaises( TypeError, model.predict, [["0", "1"], [0, 10]] ) # Feature_states bad format: is not a list of list of string feature_states = [ list(s) for s in set(tuple(s1) for s1, s2 in dataset.data) ] state_id = random.randint(0, len(feature_states) - 1) original = feature_states[state_id].copy() feature_states[state_id] = feature_states[ state_id][:-random.randint(1, len(dataset.features))] self.assertRaises( TypeError, model.predict, feature_states ) # Feature_states bad format: size of state not correspond to model features < feature_states[state_id] = original.copy() feature_states[state_id].extend( ["0" for i in range(random.randint(1, 10))]) self.assertRaises( TypeError, model.predict, feature_states ) # Feature_states bad format: size of state not correspond to model features > feature_states[state_id] = original.copy() var_id = random.randint(0, len(dataset.features) - 1) feature_states[state_id][var_id] = "bad_value" self.assertRaises( ValueError, model.predict, feature_states ) # Feature_states bad format: value out of domain feature_states[state_id] = original.copy() # Semantics restriction model_2 = model.copy() model_2.targets = model_2.targets + model_2.targets self.assertRaises(ValueError, model_2.predict, feature_states, "asynchronous") self.assertRaises(ValueError, model_2.predict, feature_states, "general") self.assertRaises(ValueError, model_2.predict, feature_states, "badvalue")
def test_explanation_score_from_predictions(self): print(">> pylfit.postprocessing.explanation_score_from_predictions(predictions, expected_model, dataset)") # unit test data = [ \ (["0","0","0"],["0","0","1"]), \ (["0","0","0"],["1","0","0"]), \ (["1","0","0"],["0","0","0"]), \ (["0","1","0"],["1","0","1"]), \ (["0","0","1"],["0","0","1"]), \ (["1","1","0"],["1","0","0"]), \ #(["1","0","1"],["0","1","0"]), \ (["0","1","1"],["1","0","1"]), \ (["1","1","1"],["1","1","0"])] dataset_perfect = pylfit.preprocessing.transitions_dataset_from_array(data=data, feature_names=["p_t_1","q_t_1","r_t_1"], target_names=["p_t","q_t","r_t"]) optimal_model = pylfit.models.WDMVLP(features=dataset_perfect.features, targets=dataset_perfect.targets) optimal_model.compile(algorithm="gula") # model.compile(algorithm="pride") optimal_model.fit(dataset=dataset_perfect) train_data = [ \ (["0","0","0"],["0","0","1"]), \ (["0","0","0"],["1","0","0"]), \ #(["1","0","0"],["0","0","0"]), \ #(["0","1","0"],["1","0","1"]), \ #(["0","0","1"],["0","0","1"]), \ #(["1","1","0"],["1","0","0"]), \ #(["1","0","1"],["0","1","0"]), \ #(["0","1","1"],["1","0","1"]), \ (["1","1","1"],["1","1","0"])] train_dataset = pylfit.preprocessing.transitions_dataset_from_array(data=train_data, feature_names=["p_t_1","q_t_1","r_t_1"], target_names=["p_t","q_t","r_t"]) model = pylfit.models.WDMVLP(features=train_dataset.features, targets=train_dataset.targets) model.compile(algorithm="gula") model.fit(dataset=train_dataset) init_states = [list(s) for s in set(tuple(s1) for s1,s2 in dataset_perfect.data)] predictions = model.predict(feature_states=init_states, raw_rules=True) self.assertEqual(round(explanation_score_from_predictions(predictions=predictions, expected_model=optimal_model, dataset=dataset_perfect),2), 0.28) train_data = [ \ (["0","0","0"],["0","0","1"]), \ (["0","0","0"],["1","0","0"]), \ (["1","0","0"],["0","0","0"]), \ (["0","1","0"],["1","0","1"]), \ #(["0","0","1"],["0","0","1"]), \ #(["1","1","0"],["1","0","0"]), \ #(["1","0","1"],["0","1","0"]), \ #(["0","1","1"],["1","0","1"]), \ (["1","1","1"],["1","1","0"])] train_dataset = pylfit.preprocessing.transitions_dataset_from_array(data=train_data, feature_names=["p_t_1","q_t_1","r_t_1"], target_names=["p_t","q_t","r_t"]) model = pylfit.models.WDMVLP(features=train_dataset.features, targets=train_dataset.targets) model.compile(algorithm="gula") model.fit(dataset=train_dataset) init_states = [list(s) for s in set(tuple(s1) for s1,s2 in dataset_perfect.data)] predictions = model.predict(feature_states=init_states, raw_rules=True) self.assertEqual(round(explanation_score_from_predictions(predictions=predictions, expected_model=optimal_model, dataset=dataset_perfect),2), 0.87) train_data = [ \ (["0","0","0"],["0","0","1"]), \ (["0","0","0"],["1","0","0"]), \ (["1","0","0"],["0","0","0"]), \ (["0","1","0"],["1","0","1"]), \ (["0","0","1"],["0","0","1"]), \ (["1","1","0"],["1","0","0"]), \ (["1","0","1"],["0","1","0"]), \ #(["0","1","1"],["1","0","1"]), \ (["1","1","1"],["1","1","0"])] train_dataset = pylfit.preprocessing.transitions_dataset_from_array(data=train_data, feature_names=["p_t_1","q_t_1","r_t_1"], target_names=["p_t","q_t","r_t"]) model = pylfit.models.WDMVLP(features=train_dataset.features, targets=train_dataset.targets) model.compile(algorithm="gula") model.fit(dataset=train_dataset) init_states = [list(s) for s in set(tuple(s1) for s1,s2 in dataset_perfect.data)] predictions = model.predict(feature_states=init_states, raw_rules=True) self.assertEqual(round(explanation_score_from_predictions(predictions=predictions, expected_model=optimal_model, dataset=dataset_perfect),2), 0.98) # None explanation predictions = {tuple(s1): {variable: {value: (proba, \ (int(proba*100), None),\ (100 - int(proba*100), None) )\ for val_id, value in enumerate(values) for proba in [round(random.uniform(0.0,1.0),2)]}\ for var_id, (variable, values) in enumerate(dataset_perfect.targets)}\ for s1 in init_states} self.assertEqual(explanation_score_from_predictions(predictions=predictions, expected_model=optimal_model, dataset=dataset_perfect), 0.0) # random tests for i in range(self._nb_random_tests): nb_features = random.randint(1,self._nb_features) nb_targets = random.randint(1,self._nb_targets) max_feature_values = random.randint(1,self._nb_values) max_target_values = random.randint(1,self._nb_values) nb_transitions = random.randint(2,self._nb_transitions) dataset = random_StateTransitionsDataset(nb_transitions, nb_features, nb_targets, max_feature_values, max_target_values) optimal_model = WDMVLP(dataset.features, dataset.targets) optimal_model.compile(algorithm="gula") optimal_model.fit(dataset=dataset) # Empty program model = WDMVLP(dataset.features, dataset.targets) model.compile(algorithm="gula") init_states = [list(s) for s in set(tuple(s1) for s1,s2 in dataset.data)] predictions = model.predict(feature_states=init_states, raw_rules=True) self.assertEqual(round(explanation_score_from_predictions(predictions=predictions, expected_model=optimal_model, dataset=dataset),2), 0.0) # Empty rule program model = WDMVLP(dataset.features, dataset.targets) model.compile(algorithm="gula") model.fit(StateTransitionsDataset([], dataset.features, dataset.targets)) init_states = [list(s) for s in set(tuple(s1) for s1,s2 in dataset.data)] predictions = model.predict(feature_states=init_states, raw_rules=True) self.assertEqual(round(explanation_score_from_predictions(predictions=predictions, expected_model=optimal_model, dataset=dataset),2), 0.0) # Train != test train_data = dataset.data[0:int(0.5*len(dataset.data))] test_data = dataset.data[int(0.5*len(dataset.data)):] train_dataset = StateTransitionsDataset(train_data, dataset.features, dataset.targets) test_dataset = StateTransitionsDataset(test_data, dataset.features, dataset.targets) model = WDMVLP(train_dataset.features, train_dataset.targets) model.compile(algorithm="gula") model.fit(dataset=train_dataset) # model = optimal -> 100 accuracy init_states = [list(s) for s in set(tuple(s1) for s1,s2 in train_dataset.data)] predictions = model.predict(feature_states=init_states, raw_rules=True) self.assertEqual(explanation_score_from_predictions(predictions=predictions, expected_model=model, dataset=train_dataset), 1.0) # Exception # Empty dataset self.assertRaises(ValueError, explanation_score_from_predictions, predictions, optimal_model, StateTransitionsDataset([], dataset.features, dataset.targets)) init_states = [list(s) for s in set(tuple(s1) for s1,s2 in test_dataset.data)] predictions = model.predict(feature_states=init_states, raw_rules=True) # Missing init state in dataset remove_s1, s2 = random.choice(test_dataset.data) self.assertRaises(ValueError, explanation_score_from_predictions, predictions, optimal_model, StateTransitionsDataset([(s1,s2) for (s1,s2) in test_dataset.data if list(s1) != list(remove_s1)], dataset.features, dataset.targets)) # Missing init state in predictions remove_s1 = random.choice(list(predictions.keys())) predictions_ = predictions.copy() predictions_.pop(remove_s1, None) self.assertRaises(ValueError, explanation_score_from_predictions, predictions_, optimal_model, test_dataset) # Bad target domain test_dataset_ = test_dataset.copy() test_dataset_.targets = [("a",["0"])] self.assertRaises(ValueError, explanation_score_from_predictions, predictions, optimal_model, test_dataset_) # train != test grouped_transitions = {tuple(s1) : set(tuple(s2_) for s1_,s2_ in test_dataset.data if tuple(s1) == tuple(s1_)) for s1,s2 in test_dataset.data} # expected output: kinda one-hot encoding of values occurences expected = {} count = 0 for s1, successors in grouped_transitions.items(): count += 1 occurs = {} for var in range(len(test_dataset.targets)): for val in range(len(test_dataset.targets[var][1])): occurs[(var,val)] = 0.0 for s2 in successors: if s2[var] == test_dataset.targets[var][1][val]: occurs[(var,val)] = 1.0 break expected[s1] = occurs sum_explanation_score = 0.0 prediction = model.predict(feature_states=[s1 for s1 in expected], raw_rules=True) for feature_state, actual in expected.items(): #eprint("Feature state: ", feature_state) #eprint(">> prediction: ",prediction[feature_state]) sum_score = 0.0 nb_targets = 0 for var_id, (variable, values) in enumerate(model.targets): #eprint(" "+variable+": ") for val_id, (value, (proba, (w1, r1), (w2, r2))) in enumerate(prediction[feature_state][variable].items()): #eprint(" "+value+" "+str(round(proba*100.0,2))+"%") # No decision or bad prediction implies wrong explanation if proba == 0.5 or (proba > 0.5 and actual[(var_id,val_id)] == 0.0) or (proba < 0.5 and actual[(var_id,val_id)] == 1.0): score = 0.0 sum_score += score nb_targets += 1 continue encoded_feature_state = pylfit.algorithms.GULA.encode_state(feature_state, model.features) # Predicted likely if proba > 0.5: expected_rules = [r for (w,r) in optimal_model.rules \ if r.head_variable == var_id and r.head_value == val_id and r.matches(encoded_feature_state)] explanation_rule = r1 # Predicted unlikely if proba < 0.5: expected_rules = [r for (w,r) in optimal_model.unlikeliness_rules \ if r.head_variable == var_id and r.head_value == val_id and r.matches(encoded_feature_state)] explanation_rule = r2 min_distance = len(model.features) nearest_expected = None for r in expected_rules: distance = pylfit.postprocessing.hamming_distance(explanation_rule,r) if distance <= min_distance: min_distance = distance nearest_expected = r score = 1.0 - (min_distance / len(model.features)) #eprint(explanation_type + " explanation evaluation") #eprint("Explanation rule: " + explanation) #eprint("Explanation score: ", end='') #eprint(str(round(score, 2)) + " (nearest expected " + explanation_type + " rule: " + nearest_expected.logic_form(model.features, model.targets) + " distance: " + str(min_distance) + ")") sum_score += score nb_targets += 1 sum_explanation_score += sum_score / nb_targets expected_score = sum_explanation_score / len(expected) self.assertEqual(explanation_score_from_predictions(predictions=prediction, expected_model=optimal_model, dataset=test_dataset), expected_score)
def test_extend(self): print(">> WDMVLP.extend(dataset, feature_states)") for test in range(0, self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) for algo in self._ALGORITHMS: for verbose in [0, 1]: model = WDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algo) f = io.StringIO() with contextlib.redirect_stderr(f): model.fit(dataset=dataset, verbose=verbose) original_rules = model.rules.copy() original_unlikeliness_rules = model.unlikeliness_rules.copy( ) # Encode data with StateTransitionsDataset data_encoded = [] for (s1, s2) in dataset.data: s1_encoded = [ domain.index(s1[var_id]) for var_id, (var, domain) in enumerate(dataset.features) ] s2_encoded = [ domain.index(s2[var_id]) for var_id, (var, domain) in enumerate(dataset.targets) ] data_encoded.append((s1_encoded, s2_encoded)) values_ids = [[j for j in dataset.features[i][1]] for i in range(0, len(dataset.features))] feature_states = [ list(i) for i in list(itertools.product(*values_ids)) ] feature_states_to_match = [ random.choice(feature_states) for i in range(10) ] #eprint(feature_states_to_match) #eprint(model.features) model.extend(dataset, feature_states_to_match) # No rule disapear for (w, r) in original_rules: self.assertTrue((w, r) in model.rules) for (w, r) in original_unlikeliness_rules: self.assertTrue((w, r) in model.unlikeliness_rules) # atmost one aditional rule per feature state for each var/val for var_id, (var, vals) in enumerate(dataset.targets): for val_id, val in enumerate(vals): self.assertTrue( len([(w, r) for (w, r) in model.rules if r.head_variable == var_id if r.head_value == val_id if (w, r) not in original_rules]) <= len(feature_states)) self.assertTrue( len([(w, r) for (w, r) in model.unlikeliness_rules if r.head_variable == var_id if r.head_value == val_id if (w, r) not in original_unlikeliness_rules ]) <= len(feature_states)) for feature_state in feature_states_to_match: encoded_feature_state = Algorithm.encode_state( feature_state, dataset.features) for var_id, (var, vals) in enumerate(dataset.targets): for val_id, val in enumerate(vals): #eprint("var: ", var_id) #eprint("val: ", val_id) pos, neg = PRIDE.interprete( data_encoded, var_id, val_id) # Only way to not match is no rule can be find new_rule = PRIDE.find_one_optimal_rule_of( var_id, val_id, len(dataset.features), pos, neg, encoded_feature_state, 0) matched = False for w, r in model.rules: if r.head_variable == var_id and r.head_value == val_id and r.matches( encoded_feature_state): matched = True break if not matched: self.assertTrue(new_rule is None) # Only way to not match is no unlikeliness rule can be find new_unlikeliness_rule = PRIDE.find_one_optimal_rule_of( var_id, val_id, len(dataset.features), neg, pos, encoded_feature_state, 0) matched = False for w, r in model.unlikeliness_rules: if r.head_variable == var_id and r.head_value == val_id and r.matches( encoded_feature_state): matched = True break if not matched: self.assertTrue( new_unlikeliness_rule is None) # check rules for var_id, (var, vals) in enumerate(dataset.targets): for val_id, val in enumerate(vals): pos, neg = PRIDE.interprete( data_encoded, var_id, val_id) new_likely_rules = [ x for x in model.rules if x not in original_rules ] new_unlikeliness_rules = [ x for x in model.unlikeliness_rules if x not in original_unlikeliness_rules ] unlikely_check = False for new_rules in [ new_likely_rules, new_unlikeliness_rules ]: if unlikely_check: pos_ = pos pos = neg neg = pos_ for w, r in [(w, r) for (w, r) in new_rules if r.head_variable == var_id if r.head_value == val_id]: # Cover at least a positive cover = False for s in pos: if r.matches(s): cover = True break self.assertTrue(cover) # No negative is covered cover = False for s in neg: if r.matches(s): cover = True break self.assertFalse(cover) # Rules is minimal for (var_id_, val_id_) in r.body: r.remove_condition( var_id_) # Try remove condition conflict = False for s in neg: if r.matches( s ): # Cover a negative example conflict = True break self.assertTrue(conflict) r.add_condition( var_id_, val_id_) # Cancel removal unlikely_check = True # Check weights feature_states = set(tuple(s1) for s1, s2 in data_encoded) for w, r in model.rules: expected_weight = 0 for s in feature_states: if r.matches(s): expected_weight += 1 self.assertEqual(w, expected_weight) for w, r in model.unlikeliness_rules: expected_weight = 0 for s in feature_states: if r.matches(s): expected_weight += 1 self.assertEqual(w, expected_weight) # Check feature state cannot be matched for var_id, (var, vals) in enumerate(dataset.targets): for val_id, val in enumerate(vals): pos, neg = PRIDE.interprete( data_encoded, var_id, val_id) if len(neg) > 0: state_raw = neg[0] state_string = [] for var_id_, val_id_ in enumerate(state_raw): #eprint(var_id, val_id) state_string.append( model.features[var_id_][1][val_id_]) f = io.StringIO() with contextlib.redirect_stderr(f): model.extend(dataset, [state_string], verbose) # exceptions self.assertRaises(TypeError, model.extend, dataset, "", verbose) self.assertRaises(TypeError, model.extend, dataset, [""], verbose) self.assertRaises(TypeError, model.extend, dataset, [["0", "1", "0"], [0, "0"]], verbose) self.assertRaises(TypeError, model.extend, dataset, [["0", "1", "0"], ["0", "0"]], verbose)
def test_fit_var_val(self): print(">> PRIDE.fit_var_val(variable, value, nb_features, positives, negatives)") for i in range(self._nb_tests): # Generate transitions dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) #dataset.summary() # Encode data with StateTransitionsDataset data_encoded = [] for (s1,s2) in dataset.data: s1_encoded = [domain.index(s1[var_id]) for var_id, (var,domain) in enumerate(dataset.features)] s2_encoded = [domain.index(s2[var_id]) for var_id, (var,domain) in enumerate(dataset.targets)] data_encoded.append((s1_encoded,s2_encoded)) # Group transitions by initial state data_grouped_by_init_state = [] for (s1,s2) in data_encoded: added = False for (s1_,S) in data_grouped_by_init_state: if s1_ == s1: if s2 not in S: S.append(s2) added = True break if not added: data_grouped_by_init_state.append((s1,[s2])) # new init state #eprint(data_grouped_by_init_state) # each target value for var_id, (var,vals) in enumerate(dataset.targets): for val_id, val in enumerate(vals): #eprint("var: ", var_id) #eprint("val: ", val_id) pos, neg = PRIDE.interprete(data_encoded, var_id, val_id) #eprint("neg: ", neg) f = io.StringIO() with contextlib.redirect_stderr(f): output = PRIDE.fit_var_val(var_id, val_id, len(dataset.features), pos, neg) #eprint() #eprint("rules: ", output) # Check head for r in output: self.assertEqual(r.head_variable, var_id) self.assertEqual(r.head_value, val_id) # Each positive is explained pos = [s1 for s1,s2 in data_encoded if s2[var_id] == val_id] for s in pos: cover = False for r in output: if r.matches(s): cover = True self.assertTrue(cover) # One rule cover the example # No negative is covered for s in neg: cover = False for r in output: if r.matches(s): cover = True self.assertFalse(cover) # no rule covers the example # All rules are minimals for r in output: for (var_id_, val_id_) in r.body: r.remove_condition(var_id_) # Try remove condition conflict = False for s in neg: if r.matches(s): # Cover a negative example conflict = True break self.assertTrue(conflict) r.add_condition(var_id_,val_id_) # Cancel removal
def test_fit__targets_to_learn(self): print(">> PRIDE.fit(dataset, targets_to_learn):") for test_id in range(self._nb_tests): # 0) exceptions #--------------- # Datatset type dataset = "" # not a StateTransitionsDataset self.assertRaises(ValueError, PRIDE.fit, dataset, dict()) # targets_to_learn type dataset = random_StateTransitionsDataset( \ nb_transitions=0, \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, max_target_values=self._nb_target_values) targets_to_learn = "" # not a dict self.assertRaises(ValueError, PRIDE.fit, dataset, targets_to_learn) targets_to_learn = {"1":["1","2"], 2:["1","2"]} # bad key self.assertRaises(ValueError, PRIDE.fit, dataset, targets_to_learn) targets_to_learn = {"1":"1,2", "2":["1","2"]} # bad values (not list) self.assertRaises(ValueError, PRIDE.fit, dataset, targets_to_learn) targets_to_learn = {"1":["1",2], "2":[1,"2"]} # bad values (not string) self.assertRaises(ValueError, PRIDE.fit, dataset, targets_to_learn) targets_to_learn = {"y0":["val_0","val_2"], "lool":["val_0","val_1"]} # bad values (not in targets) self.assertRaises(ValueError, PRIDE.fit, dataset, targets_to_learn) targets_to_learn = {"y0":["lool","val_2"]} # bad values (not domain) self.assertRaises(ValueError, PRIDE.fit, dataset, targets_to_learn) # 1) No transitions #-------------------- dataset = random_StateTransitionsDataset( \ nb_transitions=0, \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, max_target_values=self._nb_target_values) f = io.StringIO() with contextlib.redirect_stderr(f): output = PRIDE.fit(dataset=dataset) # Output must be empty self.assertEqual(output, []) # 2) Random observations # ------------------------ # Generate transitions dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) # Empty target list self.assertEqual(PRIDE.fit(dataset=dataset, targets_to_learn=dict()), []) #dataset.summary() targets_to_learn = dict() for a, b in dataset.targets: if random.choice([True,False]): b_ = random.sample(b, random.randint(0,len(b))) targets_to_learn[a] = b_ #eprint(targets_to_learn) f = io.StringIO() with contextlib.redirect_stderr(f): output = PRIDE.fit(dataset=dataset, targets_to_learn=targets_to_learn) # Encode data to check PRIDE output rules data_encoded = [] for (s1,s2) in dataset.data: s1_encoded = [domain.index(s1[var_id]) for var_id, (var,domain) in enumerate(dataset.features)] s2_encoded = [domain.index(s2[var_id]) for var_id, (var,domain) in enumerate(dataset.targets)] data_encoded.append((s1_encoded,s2_encoded)) # 2.1.1) Correctness (explain all) # ----------------- # all transitions are fully explained, i.e. each target value is explained by atleast one rule for (s1,s2) in data_encoded: for target_id in range(len(dataset.targets)): expected_value = s2_encoded[target_id] realizes_target = False # In partial mode only requested target values are expected target_name = dataset.targets[target_id][0] target_value_name = dataset.targets[target_id][1][expected_value] if target_name not in targets_to_learn: continue if target_value_name not in targets_to_learn[target_name]: continue for r in output: if r.head_variable == target_id and r.head_value == expected_value and r.matches(s1_encoded): realises_target = True #eprint(s1_encoded, " => ", target_id,"=",expected_value, " by ", r) break self.assertTrue(realises_target) #eprint("-------------------") #eprint(data_encoded) # 2.1.2) Correctness (no spurious observation) # ----------------- # No rules generate a unobserved target value from an observed state for r in output: for (s1,s2) in data_encoded: if r.matches(s1): observed = False for (s1_,s2_) in data_encoded: # Must be in a target state after s1 if s1_ == s1 and s2_[r.head_variable] == r.head_value: observed = True #eprint(r, " => ", s1_, s2_) break self.assertTrue(observed) # 2.2) minimality # ----------------- # All rules conditions are necessary, i.e. removing a condition makes realizes unobserved target value from observation for r in output: for (var_id, val_id) in r.body: r.remove_condition(var_id) # Try remove condition conflict = False for (s1,s2) in data_encoded: if r.matches(s1): observed = False for (s1_,s2_) in data_encoded: # Must be in a target state after s1 if s1_ == s1 and s2_[r.head_variable] == r.head_value: observed = True #eprint(r, " => ", s1_, s2_) break if not observed: conflict = True break r.add_condition(var_id,val_id) # Cancel removal # # DEBUG: if not conflict: eprint("not minimal "+r) self.assertTrue(conflict) # 2.3) only requested targets value appear in rule head # ------------ for r in output: target_name = dataset.targets[r.head_variable][0] target_value = dataset.targets[r.head_variable][1][r.head_value] self.assertTrue(target_name in targets_to_learn) self.assertTrue(target_value in targets_to_learn[target_name])
def test_fit(self): print(">> GULA.fit(dataset, targets_to_learn, verbose):") for test_id in range(self._nb_tests): # 0) exceptions #--------------- # Datatset type dataset = "" # not a StateTransitionsDataset self.assertRaises(ValueError, GULA.fit, dataset) # 1) No transitions #-------------------- dataset = random_StateTransitionsDataset( \ nb_transitions=0, \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, max_target_values=self._nb_target_values) output = GULA.fit(dataset=dataset) # Output must be one empty rule for each target value self.assertEqual(len(output), len([val for (var,vals) in dataset.targets for val in vals])) expected = [Rule(var_id,val_id,len(dataset.features)) for var_id, (var,vals) in enumerate(dataset.targets) for val_id, val in enumerate(vals)] #eprint(expected) #eprint(output) for r in expected: self.assertTrue(r in output) # 2) Random observations # ------------------------ for impossibility_mode in [False,True]: for verbose in [0,1]: # Generate transitions dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) # Empty target list self.assertEqual(GULA.fit(dataset=dataset, targets_to_learn=dict()), []) #dataset.summary() f = io.StringIO() with contextlib.redirect_stderr(f): output = GULA.fit(dataset=dataset, impossibility_mode=impossibility_mode, verbose=verbose) # Encode data to check GULA output rules data_encoded = [] for (s1,s2) in dataset.data: s1_encoded = [domain.index(s1[var_id]) for var_id, (var,domain) in enumerate(dataset.features)] s2_encoded = [domain.index(s2[var_id]) for var_id, (var,domain) in enumerate(dataset.targets)] data_encoded.append((s1_encoded,s2_encoded)) # 2.1.1) Correctness (explain all) # ----------------- # all transitions are fully explained, i.e. each target value is explained by atleast one rule if impossibility_mode == False: for (s1,s2) in data_encoded: for target_id in range(len(dataset.targets)): expected_value = s2_encoded[target_id] realizes_target = False for r in output: if r.head_variable == target_id and r.head_value == expected_value and r.matches(s1_encoded): realises_target = True #eprint(s1_encoded, " => ", target_id,"=",expected_value, " by ", r) break self.assertTrue(realises_target) #eprint("-------------------") #eprint(data_encoded) # 2.1.2) Correctness (no spurious observation) # ----------------- # No rules generate a unobserved target value from an observed state for r in output: for (s1,s2) in data_encoded: if r.matches(s1): observed = False for (s1_,s2_) in data_encoded: # Must be in a target state after s1 if s1_ == s1 and s2_[r.head_variable] == r.head_value: observed = True #eprint(r, " => ", s1_, s2_) break if impossibility_mode: self.assertFalse(observed) else: self.assertTrue(observed) # 2.2) Completness # ----------------- # all possible initial state is matched by a rule of each target # generate all combination of domains if impossibility_mode == False: encoded_domains = [set([i for i in range(len(domain))]) for (var, domain) in dataset.features] init_states_encoded = set([i for i in list(itertools.product(*encoded_domains))]) for s in init_states_encoded: for target_id in range(len(dataset.targets)): realises_target = False for r in output: if r.head_variable == target_id and r.matches(s): realises_target = True #eprint(s, " => ", target_id,"=",expected_value, " by ", r) break self.assertTrue(realises_target) # 2.3) minimality # ----------------- # All rules conditions are necessary, i.e. removing a condition makes realizes unobserved target value from observation data_encoded = [] for (s1,s2) in dataset.data: s1_encoded = [domain.index(s1[var_id]) for var_id, (var,domain) in enumerate(dataset.features)] s2_encoded = [domain.index(s2[var_id]) for var_id, (var,domain) in enumerate(dataset.targets)] data_encoded.append((s1_encoded,s2_encoded)) #dataset.summary() # Group transitions by initial state data_grouped_by_init_state = [] for (s1,s2) in data_encoded: added = False for (s1_,S) in data_grouped_by_init_state: if s1_ == s1: if s2 not in S: S.append(s2) added = True break if not added: data_grouped_by_init_state.append((s1,[s2])) # new init state for r in output: neg, pos = GULA.interprete(data_grouped_by_init_state, r.head_variable, r.head_value, True) if impossibility_mode: pos_ = pos pos = neg neg = pos_ for (var_id, val_id) in r.body: r.remove_condition(var_id) # Try remove condition conflict = False for s in neg: if r.matches(s): conflict = True break r.add_condition(var_id,val_id) # Cancel removal # # DEBUG: if not conflict: eprint("not minimal "+r.to_string()) self.assertTrue(conflict)
def test_constructor(self): print(">> CDMVLP(features, targets, rules)") for i in range(self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) model = CDMVLP(features=dataset.features, targets=dataset.targets) features = dataset.features targets = dataset.targets self.assertEqual(model.features, features) self.assertEqual(model.targets, targets) self.assertEqual(model.rules, []) self.assertEqual(model.constraints, []) self.assertEqual(model.algorithm, None) # Exceptions: #------------- # Features format features = '[("x0", ["0","1"]), ("x1", ["0","1"]), ("x2", ["0","1"])]' # not list self.assertRaises(TypeError, CDMVLP, features, targets) features = [["x0", ["0", "1"]], ("x1", ["0", "1"]), ("x2", ["0", "1"])] # not tuple self.assertRaises(TypeError, CDMVLP, features, targets) features = [("x0", "0", "1"), ("x1", "0", "1"), ("x2", ["0", "1"])] # not tuple of size 2 self.assertRaises(TypeError, CDMVLP, features, targets) features = [("x0", ["0", "1"]), ("x1", '0","1"'), ("x2", ["0", "1"])] # domain is not list self.assertRaises(TypeError, CDMVLP, features, targets) features = [("x0", ["0", "1"]), ("x1", [0, "1"]), ("x2", ["0", "1"])] # domain values are not string self.assertRaises(ValueError, CDMVLP, features, targets) # Targets format features = [("x0", ["0", "1"]), ("x1", ["0", "1"]), ("x2", ["0", "1"])] targets = '[("x0", ["0","1"]), ("x1", ["0","1"]), ("x2", ["0","1"])]' # not list self.assertRaises(TypeError, CDMVLP, features, targets) targets = [["x0", ["0", "1"]], ("x1", ["0", "1"]), ("x2", ["0", "1"])] # not tuple self.assertRaises(TypeError, CDMVLP, features, targets) targets = [("x0", "0", "1"), ("x1", "0", "1"), ("x2", ["0", "1"])] # not tuple of size 2 self.assertRaises(TypeError, CDMVLP, features, targets) targets = [("x0", ["0", "1"]), ("x1", '0","1"'), ("x2", ["0", "1"])] # domain is not list self.assertRaises(TypeError, CDMVLP, features, targets) targets = [("x0", ["0", "1"]), ("x1", [0, "1"]), ("x2", ["0", "1"])] # domain values are not string self.assertRaises(ValueError, CDMVLP, features, targets)
def test_find_one_optimal_rule_of(self): print(">> PRIDE.find_one_optimal_rule_of(variable, value, nb_features, positives, negatives, feature_state_to_match, verbose=0)") for i in range(self._nb_tests): for verbose in [0,1]: # Generate transitions dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) #dataset.summary() # Encode data with StateTransitionsDataset data_encoded = [] for (s1,s2) in dataset.data: s1_encoded = [domain.index(s1[var_id]) for var_id, (var,domain) in enumerate(dataset.features)] s2_encoded = [domain.index(s2[var_id]) for var_id, (var,domain) in enumerate(dataset.targets)] data_encoded.append((s1_encoded,s2_encoded)) values_ids = [[j for j in range(0,len(dataset.features[i][1]))] for i in range(0,len(dataset.features))] feature_states = [list(i) for i in list(itertools.product(*values_ids))] # each target value for var_id, (var,vals) in enumerate(dataset.targets): for val_id, val in enumerate(vals): #eprint("var: ", var_id) #eprint("val: ", val_id) pos, neg = PRIDE.interprete(data_encoded, var_id, val_id) if len(pos) == 0: continue feature_state_to_match = random.choice([s for s in feature_states]) #eprint("neg: ", neg) f = io.StringIO() with contextlib.redirect_stderr(f): output = PRIDE.find_one_optimal_rule_of(var_id, val_id, len(dataset.features), pos, neg, feature_state_to_match, verbose) #eprint() #eprint("rules: ", output) # Check no consistent rule exists if output is None: for s in pos: # Most specific rule that match both the pos and request feature state r = Rule(var_id, val_id, len(dataset.features)) for var in range(len(dataset.features)): if feature_state_to_match[var] == s[var]: r.add_condition(var,s[var]) # Must match atleast a neg if len(neg) > 0: cover = False for s in neg: if r.matches(s): cover = True break if not cover: eprint(feature_state_to_match) eprint(s) eprint(r.to_string()) self.assertTrue(cover) continue # Check head self.assertEqual(output.head_variable, var_id) self.assertEqual(output.head_value, val_id) # Cover at least a positive cover = False for s in pos: if output.matches(s): cover = True break self.assertTrue(cover) # No negative is covered cover = False for s in neg: if output.matches(s): cover = True break self.assertFalse(cover) # Rules is minimal for (var_id_, val_id_) in output.body: output.remove_condition(var_id_) # Try remove condition conflict = False for s in neg: if output.matches(s): # Cover a negative example conflict = True break self.assertTrue(conflict) output.add_condition(var_id_,val_id_) # Cancel removal
def test_fit(self): print(">> WDMVLP.fit(dataset)") for test in range(0, self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) for algo in self._ALGORITHMS: for verbose in [0, 1]: model = WDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algo) f = io.StringIO() with contextlib.redirect_stderr(f): model.fit(dataset=dataset, verbose=verbose) weighted_rules = {} train_init = set( tuple(Algorithm.encode_state(s1, dataset.features)) for s1, s2 in dataset.data) for w, r in model.rules: weight = 0 for s1 in train_init: if r.matches(s1): weight += 1 self.assertEqual(w, weight) for w, r in model.unlikeliness_rules: weight = 0 for s1 in train_init: if r.matches(s1): weight += 1 self.assertEqual(w, weight) # TODO: check no missing rules #model = WDMVLP(features=dataset.features, targets=dataset.targets) #model.compile(algorithm="pride") #model.fit(dataset=dataset) #expected_rules = PRIDE.fit(dataset) #self.assertEqual(expected_rules, model.rules)s # Exceptions #------------ self.assertRaises(ValueError, model.fit, []) # dataset is not of valid type original = WDMVLP._COMPATIBLE_DATASETS.copy() class newdataset(Dataset): def __init__(self, data, features, targets): x = "" WDMVLP._COMPATIBLE_DATASETS = [newdataset] self.assertRaises( ValueError, model.fit, newdataset([], [], []), verbose) # dataset not supported by the algo WDMVLP._COMPATIBLE_DATASETS = original model.algorithm = "lf1t" self.assertRaises(NotImplementedError, model.fit, dataset, verbose) # algorithm is not of valid)
def test_fit(self): print(">> PRIDE.fit(dataset, targets_to_learn, verbose):") for i in range(self._nb_tests): # Datatset type dataset = "" # not a StateTransitionsDataset self.assertRaises(ValueError, PRIDE.fit, dataset) # 1) No transitions #-------------------- dataset = random_StateTransitionsDataset( \ nb_transitions=0, \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, max_target_values=self._nb_target_values) f = io.StringIO() with contextlib.redirect_stderr(f): output = PRIDE.fit(dataset=dataset) # Output must be empty self.assertTrue(output == []) # 2) Random observations # ------------------------ for impossibility_mode in [False,True]: for verbose in [0,1]: # Generate transitions dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) #dataset.summary() f = io.StringIO() with contextlib.redirect_stderr(f): output = PRIDE.fit(dataset=dataset, impossibility_mode=impossibility_mode, verbose=verbose) # Encode data to check PRIDE output rules data_encoded = [] for (s1,s2) in dataset.data: s1_encoded = [domain.index(s1[var_id]) for var_id, (var,domain) in enumerate(dataset.features)] s2_encoded = [domain.index(s2[var_id]) for var_id, (var,domain) in enumerate(dataset.targets)] data_encoded.append((s1_encoded,s2_encoded)) # 2.1.1) Correctness (explain all) # ----------------- # all transitions are fully explained, i.e. each target value is explained by atleast one rule for (s1,s2) in data_encoded: for target_id in range(len(dataset.targets)): expected_value = s2_encoded[target_id] realizes_target = False for r in output: if r.head_variable == target_id and r.head_value == expected_value and r.matches(s1_encoded): realises_target = True #eprint(s1_encoded, " => ", target_id,"=",expected_value, " by ", r) break self.assertTrue(realises_target) #eprint("-------------------") #eprint(data_encoded) # 2.1.2) Correctness (no spurious observation) # ----------------- # No rules generate a unobserved target value from an observed state for r in output: for (s1,s2) in data_encoded: if r.matches(s1): observed = False for (s1_,s2_) in data_encoded: # Must be in a target state after s1 if s1_ == s1 and s2_[r.head_variable] == r.head_value: observed = True #eprint(r, " => ", s1_, s2_) break if impossibility_mode: self.assertFalse(observed) else: self.assertTrue(observed) # 2.2) minimality # ----------------- # All rules conditions are necessary, i.e. removing a condition makes realizes unobserved target value from observation for r in output: pos, neg = PRIDE.interprete(data_encoded, r.head_variable, r.head_value) if impossibility_mode: pos_ = pos pos = neg neg = pos_ for (var_id, val_id) in r.body: r.remove_condition(var_id) # Try remove condition conflict = False for s in neg: if r.matches(s): conflict = True break r.add_condition(var_id,val_id) # Cancel removal # # DEBUG: if not conflict: eprint("not minimal "+r.to_string()) self.assertTrue(conflict)
def test_predict(self): print(">> WDMVLP.predict()") # TODO: unit tests for test in range(0, self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) for algo in self._ALGORITHMS: for raw_rules in [True, False]: model = WDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algo) f = io.StringIO() with contextlib.redirect_stderr(f): model.fit(dataset=dataset) feature_state = random.choice(model.feature_states()) output = model.predict([list(feature_state)], raw_rules)[tuple(feature_state)] self.assertEqual(len(output.items()), len(model.targets)) feature_state = GULA.encode_state(feature_state, model.features) for var_id, (var, vals) in enumerate(model.targets): self.assertEqual(len(output[var]), len(model.targets[var_id][1])) for val_id, val in enumerate(vals): best_rule = None max_rule_weight = 0 for w, r in model.rules: if r.head_variable == var_id and r.head_value == val_id: if w > max_rule_weight and r.matches( feature_state): max_rule_weight = w best_rule = r elif w == max_rule_weight and r.matches( feature_state): if best_rule == None or r.size( ) < best_rule.size(): max_rule_weight = w best_rule = r best_anti_rule = None max_anti_rule_weight = 0 for w, r in model.unlikeliness_rules: if r.head_variable == var_id and r.head_value == val_id: if w > max_anti_rule_weight and r.matches( feature_state): max_anti_rule_weight = w best_anti_rule = r elif w == max_anti_rule_weight and r.matches( feature_state): if best_anti_rule == None or r.size( ) < best_anti_rule.size(): max_anti_rule_weight = w best_anti_rule = r if not raw_rules: if best_rule is not None: best_rule = best_rule.logic_form( model.features, model.targets) if best_anti_rule is not None: best_anti_rule = best_anti_rule.logic_form( model.features, model.targets) prediction = round( 0.5 + 0.5 * (max_rule_weight - max_anti_rule_weight) / max(1, (max_rule_weight + max_anti_rule_weight)), 3) self.assertEqual( output[var][val], (prediction, (max_rule_weight, best_rule), (max_anti_rule_weight, best_anti_rule))) # exceptions self.assertRaises(TypeError, model.predict, "") self.assertRaises(TypeError, model.predict, [""]) self.assertRaises(TypeError, model.predict, [["0", "1", "0"], [0, "0"]]) self.assertRaises(TypeError, model.predict, [["0", "1", "0"], ["0", "0"]])
def test_fit(self): print(">> Synchronizer.fit(dataset, complete, verbose)") for test_id in range(self._nb_tests): for complete in [True, False]: for verbose in [0, 1]: # 0) exceptions #--------------- # Datatset type dataset = "" # not a StateTransitionsDataset self.assertRaises(ValueError, Synchronizer.fit, dataset) # 1) No transitions #-------------------- dataset = random_StateTransitionsDataset( \ nb_transitions=0, \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, max_target_values=self._nb_target_values) f = io.StringIO() with contextlib.redirect_stderr(f): rules, constraints = Synchronizer.fit(dataset=dataset, complete=True, verbose=verbose) # Output must be one empty rule for each target value and the empty constraint self.assertEqual( len(rules), len([ val for (var, vals) in dataset.targets for val in vals ])) self.assertEqual(len(constraints), 1) expected = [ Rule(var_id, val_id, len(dataset.features)) for var_id, (var, vals) in enumerate(dataset.targets) for val_id, val in enumerate(vals) ] #eprint(expected) #eprint(output) for r in expected: self.assertTrue(r in rules) # 2) Random observations # ------------------------ for heuristic_partial in [True, False]: Synchronizer.HEURISTIC_PARTIAL_IMPOSSIBLE_STATE = heuristic_partial # Generate transitions dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) #dataset.summary() f = io.StringIO() with contextlib.redirect_stderr(f): rules, constraints = Synchronizer.fit( dataset=dataset, complete=complete, verbose=verbose) # Encode data to check Synchronizer output rules data_encoded = [] for (s1, s2) in dataset.data: s1_encoded = [ domain.index(s1[var_id]) for var_id, ( var, domain) in enumerate(dataset.features) ] s2_encoded = [ domain.index(s2[var_id]) for var_id, ( var, domain) in enumerate(dataset.targets) ] data_encoded.append((s1_encoded, s2_encoded)) # 2.1) Correctness (explain all and no spurious observation) # ----------------- # all transitions are fully explained, i.e. each target state are reproduce for (s1, s2) in data_encoded: next_states = SynchronousConstrained.next( s1, dataset.targets, rules, constraints) #eprint("rules: ", rules) #eprint("constraints: ", constraints) #eprint("s1: ", s1) #eprint("s2: ", s2) #eprint("next: ", next_states) self.assertTrue(tuple(s2) in next_states) for s3 in next_states: self.assertTrue((s1, list(s3)) in data_encoded) #eprint("-------------------") #eprint(data_encoded) # 2.2) Completness # ----------------- # all non observed initial state has no next state under synchronous constrainted semantics # generate all combination of domains encoded_domains = [ set([i for i in range(len(domain))]) for (var, domain) in dataset.features ] init_states_encoded = [ list(i) for i in list(itertools.product(*encoded_domains)) ] observed_init_states = [ s1 for (s1, s2) in data_encoded ] for s in init_states_encoded: next_states = SynchronousConstrained.next( s, dataset.targets, rules, constraints) if s not in observed_init_states: #eprint(s) if complete == True: self.assertEqual(len(next_states), 0) # 2.3) minimality # ----------------- # All rules conditions are necessary, i.e. removing a condition makes realizes unobserved target value from observation for r in rules: for (var_id, val_id) in r.body: r.remove_condition( var_id) # Try remove condition conflict = False for (s1, s2) in data_encoded: if r.matches(s1): observed = False for ( s1_, s2_ ) in data_encoded: # Must be in a target state after s1 if s1_ == s1 and s2_[ r. head_variable] == r.head_value: observed = True #eprint(r, " => ", s1_, s2_) break if not observed: conflict = True break r.add_condition(var_id, val_id) # Cancel removal # # DEBUG: if not conflict: eprint("not minimal " + r) self.assertTrue(conflict) # 2.4) Constraints are minimals #-------------------------------- # All constraints conditions are necessary, i.e. removing a condition makes some observed transitions impossible for r in constraints: for (var_id, val_id) in r.body: r.remove_condition( var_id) # Try remove condition conflict = False for (s1, s2) in data_encoded: if r.matches(s1 + s2): conflict = True break r.add_condition(var_id, val_id) # Cancel removal # # DEBUG: if not conflict: eprint("not minimal " + r) self.assertTrue(conflict) # 2.5) Constraints are all applicable #------------------------------------- for constraint in constraints: applicable = True for (var, val) in constraint.body: # Each condition on targets must be achievable by a rule head if var >= len(dataset.features): head_var = var - len(dataset.features) matching_rule = False # The conditions of the rule must be in the constraint for rule in rules: #eprint(rule) if rule.head_variable == head_var and rule.head_value == val: matching_conditions = True for (cond_var, cond_val) in rule.body: if constraint.has_condition( cond_var ) and constraint.get_condition( cond_var) != cond_val: matching_conditions = False break if matching_conditions: matching_rule = True break if not matching_rule: applicable = False break self.assertTrue(applicable) # Get applicables rules compatible_rules = [] for (var, val) in constraint.body: #eprint(var) # Each condition on targets must be achievable by a rule head if var >= len(dataset.features): compatible_rules.append([]) head_var = var - len(dataset.features) #eprint(var," ",val) # The conditions of the rule must be in the constraint for rule in rules: #eprint(rule) if rule.head_variable == head_var and rule.head_value == val: matching_conditions = True for (cond_var, cond_val) in rule.body: if constraint.has_condition( cond_var ) and constraint.get_condition( cond_var) != cond_val: matching_conditions = False #eprint("conflict on: ",cond_var,"=",cond_val) break if matching_conditions: compatible_rules[-1].append( rule) nb_combinations = np.prod( [len(l) for l in compatible_rules]) done = 0 applicable = False for combination in itertools.product( *compatible_rules): done += 1 #eprint(done,"/",nb_combinations) condition_variables = set() conditions = set() valid_combo = True for r in combination: for var, val in r.body: if var not in condition_variables: condition_variables.add(var) conditions.add((var, val)) elif (var, val) not in conditions: valid_combo = False break if not valid_combo: break if valid_combo: #eprint("valid combo: ", combination) applicable = True break self.assertTrue(applicable)
def test_accuracy_score(self): print(">> pylfit.postprocessing.accuracy_score(model, dataset)") # unit test test_data = [ \ (["0","0","0"],["0","0","1"]), \ (["0","0","0"],["1","0","0"]), \ (["1","0","0"],["0","0","0"]), \ (["0","1","0"],["1","0","1"]), \ (["0","0","1"],["0","0","1"]), \ (["1","1","0"],["1","0","0"]), \ #(["1","0","1"],["0","1","0"]), \ (["0","1","1"],["1","0","1"]), \ (["1","1","1"],["1","1","0"])] test_dataset = pylfit.preprocessing.transitions_dataset_from_array(data=test_data, feature_names=["p_t_1","q_t_1","r_t_1"], target_names=["p_t","q_t","r_t"]) train_data = [ \ (["0","0","0"],["0","0","1"]), \ (["0","0","0"],["1","0","0"]), \ #(["1","0","0"],["0","0","0"]), \ #(["0","1","0"],["1","0","1"]), \ #(["0","0","1"],["0","0","1"]), \ #(["1","1","0"],["1","0","0"]), \ #(["1","0","1"],["0","1","0"]), \ #(["0","1","1"],["1","0","1"]), \ (["1","1","1"],["1","1","0"])] train_dataset = pylfit.preprocessing.transitions_dataset_from_array(data=train_data, feature_names=["p_t_1","q_t_1","r_t_1"], target_names=["p_t","q_t","r_t"]) model = pylfit.models.WDMVLP(features=train_dataset.features, targets=train_dataset.targets) model.compile(algorithm="gula") model.fit(dataset=train_dataset) self.assertEqual(round(accuracy_score(model=model, dataset=test_dataset),2), 0.64) train_data = [ \ (["0","0","0"],["0","0","1"]), \ (["0","0","0"],["1","0","0"]), \ (["1","0","0"],["0","0","0"]), \ (["0","1","0"],["1","0","1"]), \ #(["0","0","1"],["0","0","1"]), \ #(["1","1","0"],["1","0","0"]), \ #(["1","0","1"],["0","1","0"]), \ #(["0","1","1"],["1","0","1"]), \ (["1","1","1"],["1","1","0"])] train_dataset = pylfit.preprocessing.transitions_dataset_from_array(data=train_data, feature_names=["p_t_1","q_t_1","r_t_1"], target_names=["p_t","q_t","r_t"]) model = pylfit.models.WDMVLP(features=train_dataset.features, targets=train_dataset.targets) model.compile(algorithm="gula") model.fit(dataset=train_dataset) self.assertEqual(round(accuracy_score(model=model, dataset=test_dataset),2), 0.84) train_data = [ \ (["0","0","0"],["0","0","1"]), \ (["0","0","0"],["1","0","0"]), \ (["1","0","0"],["0","0","0"]), \ (["0","1","0"],["1","0","1"]), \ (["0","0","1"],["0","0","1"]), \ #(["1","1","0"],["1","0","0"]), \ (["1","0","1"],["0","1","0"]), \ #(["0","1","1"],["1","0","1"]), \ (["1","1","1"],["1","1","0"])] train_dataset = pylfit.preprocessing.transitions_dataset_from_array(data=train_data, feature_names=["p_t_1","q_t_1","r_t_1"], target_names=["p_t","q_t","r_t"]) model = pylfit.models.WDMVLP(features=train_dataset.features, targets=train_dataset.targets) model.compile(algorithm="gula") model.fit(dataset=train_dataset) self.assertEqual(round(accuracy_score(model=model, dataset=test_dataset),2), 0.91) # random tests for i in range(self._nb_random_tests): nb_features = random.randint(1,self._nb_features) nb_targets = random.randint(1,self._nb_targets) max_feature_values = random.randint(1,self._nb_values) max_target_values = random.randint(1,self._nb_values) nb_transitions = random.randint(2,self._nb_transitions) dataset = random_StateTransitionsDataset(nb_transitions, nb_features, nb_targets, max_feature_values, max_target_values) # Empty program model = WDMVLP(dataset.features, dataset.targets) model.compile(algorithm="gula") self.assertEqual(accuracy_score(model=model, dataset=dataset), 0.5) # Empty rule program model = WDMVLP(dataset.features, dataset.targets) model.compile(algorithm="gula") model.fit(StateTransitionsDataset([], dataset.features, dataset.targets)) self.assertEqual(accuracy_score(model=model, dataset=dataset), 0.5) # Train != test train_data = dataset.data[0:int(0.5*len(dataset.data))] test_data = dataset.data[int(0.5*len(dataset.data)):] train_dataset = StateTransitionsDataset(train_data, dataset.features, dataset.targets) test_dataset = StateTransitionsDataset(test_data, dataset.features, dataset.targets) model = WDMVLP(train_dataset.features, train_dataset.targets) model.compile(algorithm="gula") model.fit(dataset=train_dataset) # Train = Test -> 100 accuracy self.assertEqual(accuracy_score(model=model, dataset=train_dataset), 1.0) grouped_transitions = {tuple(s1) : set(tuple(s2_) for s1_,s2_ in test_dataset.data if tuple(s1) == tuple(s1_)) for s1,s2 in test_dataset.data} # expected output expected = {} count = 0 for s1, successors in grouped_transitions.items(): count += 1 occurs = {} for var in range(len(test_dataset.targets)): for val in range(len(test_dataset.targets[var][1])): occurs[(var,val)] = 0.0 for s2 in successors: if s2[var] == test_dataset.targets[var][1][val]: occurs[(var,val)] = 1.0 break expected[s1] = occurs # predictions predicted = {} count = 0 for s1, successors in grouped_transitions.items(): count += 1 occurs = {} prediction = model.predict([list(s1)])[s1] for var_id, (var,vals) in enumerate(test_dataset.targets): for val_id, val in enumerate(test_dataset.targets[var_id][1]): occurs[(var_id,val_id)] = prediction[var][val][0] predicted[s1] = occurs # compute average accuracy global_error = 0 for s1, actual in expected.items(): state_error = 0 for var in range(len(test_dataset.targets)): for val in range(len(test_dataset.targets[var][1])): forecast = predicted[s1] state_error += abs(actual[(var,val)] - forecast[(var,val)]) global_error += state_error / len(actual.items()) global_error = global_error / len(expected.items()) accuracy = 1.0 - global_error self.assertEqual(accuracy_score(model=model,dataset=test_dataset), accuracy) # Exception self.assertRaises(ValueError, accuracy_score, model, StateTransitionsDataset([],dataset.features, dataset.targets))
def test_fit(self): print(">> DMVLP.fit(dataset, verbose)") for test in range(0, self._nb_tests): for verbose in [0, 1]: dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) model = DMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm="gula") f = io.StringIO() with contextlib.redirect_stderr(f): model.fit(dataset=dataset, verbose=verbose) expected_rules = GULA.fit(dataset) self.assertEqual(expected_rules, model.rules) model = DMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm="pride") f = io.StringIO() with contextlib.redirect_stderr(f): model.fit(dataset=dataset, verbose=verbose) expected_rules = PRIDE.fit(dataset) self.assertEqual(expected_rules, model.rules) # Exceptions #------------ for algorithm in self._SUPPORTED_ALGORITHMS: model = DMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algorithm) self.assertRaises(ValueError, model.fit, []) # dataset is not of valid type model.algorithm = "gulaaaaa" self.assertRaises(ValueError, model.fit, dataset, verbose) # algorithm is not of valid model.algorithm = algorithm original = DMVLP._COMPATIBLE_DATASETS.copy() class newdataset(Dataset): def __init__(self, data, features, targets): x = "" DMVLP._COMPATIBLE_DATASETS = [newdataset] self.assertRaises( ValueError, model.fit, newdataset([], [], []), verbose) # dataset not supported by the algo DMVLP._COMPATIBLE_DATASETS = original #self.assertRaises(ValueError, model.fit, dataset, verbose) # algorithm is not of valid model.algorithm = "lf1t" self.assertRaises(NotImplementedError, model.fit, dataset, verbose) # algorithm is not of valid