def test_compile(self): print(">> CDMVLP.compile(algorithm)") for i in range(self._nb_tests): for algorithm in self._SUPPORTED_ALGORITHMS: dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile() self.assertEqual(model.algorithm, "synchronizer") # default algorithm model.compile(algorithm=algorithm) self.assertEqual(model.algorithm, algorithm) self.assertRaises(ValueError, model.compile, "lol") self.assertRaises(ValueError, model.compile, "gula") #self.assertRaises(NotImplementedError, model.compile, "pride") #self.assertRaises(NotImplementedError, model.compile, "synchronizer-pride") original = CDMVLP._ALGORITHMS.copy() CDMVLP._ALGORITHMS = ["gula"] self.assertRaises(NotImplementedError, model.compile, "gula") # dataset not supported yet CDMVLP._ALGORITHMS = original
def random_CDMVLP(nb_features, nb_targets, max_feature_values, max_target_values, algorithm): dataset = random_StateTransitionsDataset(100, nb_features, nb_targets, max_feature_values, max_target_values) model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algorithm) model.fit(dataset=dataset) return model
def test_next(self): print(">> pylfit.semantics.SynchronousConstrained.next(feature_state, targets, rules)") # Unit test data = [ \ ([0,0,0],[0,0,1]), \ ([0,0,0],[1,0,0]), \ ([1,0,0],[0,0,0]), \ ([0,1,0],[1,0,1]), \ ([0,0,1],[0,0,1]), \ ([1,1,0],[1,0,0]), \ ([1,0,1],[0,1,0]), \ ([0,1,1],[1,0,1]), \ ([1,1,1],[1,1,0])] feature_names=["p_t-1","q_t-1","r_t-1"] target_names=["p_t","q_t","r_t"] dataset = pylfit.preprocessing.transitions_dataset_from_array(data=data, feature_names=feature_names, target_names=target_names) model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm="synchronizer") model.fit(dataset=dataset) feature_state = Algorithm.encode_state([0,0,0], model.features) self.assertEqual(set([tuple(s) for s in SynchronousConstrained.next(feature_state, model.targets, model.rules, model.constraints)]), set([(1,0,0), (0, 0, 1)])) feature_state = Algorithm.encode_state([1,1,1], model.features) self.assertEqual(set([tuple(s) for s in SynchronousConstrained.next(feature_state, model.targets, model.rules, model.constraints)]), set([(1,1,0)])) feature_state = Algorithm.encode_state([0,1,0], model.features) self.assertEqual(set([tuple(s) for s in SynchronousConstrained.next(feature_state, model.targets, model.rules, model.constraints)]), set([(1,0,1)])) # Random tests for i in range(self._nb_tests): # Apply CDMVLP correctly model = random_CDMVLP( \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values, \ algorithm="synchronizer") feature_state = random.choice(model.feature_states()) feature_state = Algorithm.encode_state(feature_state, model.features) target_states = SynchronousConstrained.next(feature_state, model.targets, model.rules, model.constraints) domains = [set() for var in model.targets] # Apply synchronous semantics candidates = Synchronous.next(feature_state, model.targets, model.rules) # Apply constraints expected = [] for s in candidates: valid = True for c in model.constraints: if c.matches(list(feature_state)+list(s)): valid = False #eprint(c, " matches ", feature_state, ", ", s) break if valid: # Decode state with domain values expected.append(s) for s2 in target_states: self.assertTrue(s2 in expected) for s2 in expected: self.assertTrue(s2 in target_states)
def _check_rules_and_predictions(self, dataset, expected_string_rules, expected_string_constraints): expected_string_rules = [ s.strip() for s in expected_string_rules.strip().split("\n") if len(s) > 0 ] expected_string_constraints = [ s.strip() for s in expected_string_constraints.strip().split("\n") if len(s) > 0 ] expected_rules = [] for string_rule in expected_string_rules: expected_rules.append( Rule.from_string(string_rule, dataset.features, dataset.targets)) expected_constraints = [] for string_constraint in expected_string_constraints: expected_constraints.append( Rule.from_string(string_constraint, dataset.features, dataset.targets)) #eprint(expected_rules) rules, constraints = Synchronizer.fit(dataset) #eprint(output) for r in expected_rules: if r not in rules: eprint("Missing rule: ", r) self.assertTrue(r in rules) for r in rules: if r not in expected_rules: eprint("Additional rule: ", r) self.assertTrue(r in expected_rules) for r in expected_constraints: if r not in constraints: eprint("Missing constraint: ", r) self.assertTrue(r in constraints) for r in constraints: if r not in expected_constraints: eprint("Additional constraint: ", r) self.assertTrue(r in constraints) model = CDMVLP(dataset.features, dataset.targets, rules, constraints) #model.compile("synchronizer") #model.summary() expected = set((tuple(s1), tuple(s2)) for s1, s2 in dataset.data) predicted = model.predict(model.feature_states()) predicted = set( (tuple(s1), tuple(s2)) for (s1, S2) in predicted for s2 in S2) eprint() done = 0 for s1, s2 in expected: done += 1 eprint("\rChecking transitions ", done, "/", len(expected), end='') self.assertTrue((s1, s2) in predicted) done = 0 for s1, s2 in predicted: done += 1 eprint("\rChecking transitions ", done, "/", len(predicted), end='') self.assertTrue((s1, s2) in expected)
def test_constructor(self): print(">> CDMVLP(features, targets, rules)") for i in range(self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) model = CDMVLP(features=dataset.features, targets=dataset.targets) features = dataset.features targets = dataset.targets self.assertEqual(model.features, features) self.assertEqual(model.targets, targets) self.assertEqual(model.rules, []) self.assertEqual(model.constraints, []) self.assertEqual(model.algorithm, None) # Exceptions: #------------- # Features format features = '[("x0", ["0","1"]), ("x1", ["0","1"]), ("x2", ["0","1"])]' # not list self.assertRaises(TypeError, CDMVLP, features, targets) features = [["x0", ["0", "1"]], ("x1", ["0", "1"]), ("x2", ["0", "1"])] # not tuple self.assertRaises(TypeError, CDMVLP, features, targets) features = [("x0", "0", "1"), ("x1", "0", "1"), ("x2", ["0", "1"])] # not tuple of size 2 self.assertRaises(TypeError, CDMVLP, features, targets) features = [("x0", ["0", "1"]), ("x1", '0","1"'), ("x2", ["0", "1"])] # domain is not list self.assertRaises(TypeError, CDMVLP, features, targets) features = [("x0", ["0", "1"]), ("x1", [0, "1"]), ("x2", ["0", "1"])] # domain values are not string self.assertRaises(ValueError, CDMVLP, features, targets) # Targets format features = [("x0", ["0", "1"]), ("x1", ["0", "1"]), ("x2", ["0", "1"])] targets = '[("x0", ["0","1"]), ("x1", ["0","1"]), ("x2", ["0","1"])]' # not list self.assertRaises(TypeError, CDMVLP, features, targets) targets = [["x0", ["0", "1"]], ("x1", ["0", "1"]), ("x2", ["0", "1"])] # not tuple self.assertRaises(TypeError, CDMVLP, features, targets) targets = [("x0", "0", "1"), ("x1", "0", "1"), ("x2", ["0", "1"])] # not tuple of size 2 self.assertRaises(TypeError, CDMVLP, features, targets) targets = [("x0", ["0", "1"]), ("x1", '0","1"'), ("x2", ["0", "1"])] # domain is not list self.assertRaises(TypeError, CDMVLP, features, targets) targets = [("x0", ["0", "1"]), ("x1", [0, "1"]), ("x2", ["0", "1"])] # domain values are not string self.assertRaises(ValueError, CDMVLP, features, targets)
def test_summary(self): print(">> CDMVLP.summary()") for i in range(self._nb_tests): for algorithm in self._SUPPORTED_ALGORITHMS: # Empty CDMVLP model = random_CDMVLP( \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values, \ algorithm=algorithm) model.rules = [] model.constraints = [] expected_print = \ "CDMVLP summary:\n"+\ " Algorithm: "+ algorithm +"\n" expected_print += " Features: \n" for var, vals in model.features: expected_print += " " + var + ": " + str(vals) + "\n" expected_print += " Targets: \n" for var, vals in model.targets: expected_print += " " + var + ": " + str(vals) + "\n" expected_print += " Rules: []\n" expected_print += " Constraints: []\n" old_stdout = sys.stdout sys.stdout = mystdout = StringIO() model.summary() sys.stdout = old_stdout self.assertEqual(mystdout.getvalue(), expected_print) # Usual CDMVLP model = random_CDMVLP( \ nb_features=random.randint(2,self._nb_features), \ nb_targets=random.randint(2,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values, \ algorithm=algorithm) expected_print = \ "CDMVLP summary:\n"+\ " Algorithm: "+ algorithm +"\n" expected_print += " Features: \n" for var, vals in model.features: expected_print += " " + var + ": " + str(vals) + "\n" expected_print += " Targets: \n" for var, vals in model.targets: expected_print += " " + var + ": " + str(vals) + "\n" if len(model.rules) == 0: expected_print += " Rules: []\n" else: expected_print += " Rules:\n" for r in model.rules: expected_print += " " + r.logic_form( model.features, model.targets) + "\n" if len(model.constraints) == 0: expected_print += " Constraints: []\n" else: expected_print += " Constraints:\n" for r in model.constraints: expected_print += " " + r.logic_form( model.features, model.targets) + "\n" old_stdout = sys.stdout sys.stdout = mystdout = StringIO() model.summary() sys.stdout = old_stdout self.assertEqual(mystdout.getvalue(), expected_print) # Exceptions #------------ model = CDMVLP(features=model.features, targets=model.targets) self.assertRaises(ValueError, model.summary) # compile not called
def test_predict(self): print(">> CDMVLP.predict()") for i in range(self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) for algorithm in self._SUPPORTED_ALGORITHMS: model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algorithm) model.fit(dataset=dataset) feature_states = list(set( tuple(s1) for s1, s2 in dataset.data)) prediction = model.predict(feature_states) for state_id, s1 in enumerate(feature_states): feature_state_encoded = [] for var_id, val in enumerate(s1): val_id = model.features[var_id][1].index(str(val)) feature_state_encoded.append(val_id) #eprint(feature_state_encoded) target_states = SynchronousConstrained.next( feature_state_encoded, model.targets, model.rules, model.constraints) output = [] for s in target_states: target_state = [] for var_id, val_id in enumerate(s): #eprint(var_id, val_id) if val_id == -1: target_state.append("?") else: target_state.append( model.targets[var_id][1][val_id]) output.append(target_state) self.assertEqual(prediction[state_id][0], list(s1)) self.assertEqual(prediction[state_id][1], output) # Force missing value model.rules = [ r for r in model.rules if r.head_variable != random.randint(0, len(model.targets)) ] prediction = model.predict(feature_states) for state_id, s1 in enumerate(feature_states): feature_state_encoded = [] for var_id, val in enumerate(s1): val_id = model.features[var_id][1].index(str(val)) feature_state_encoded.append(val_id) #eprint(feature_state_encoded) target_states = SynchronousConstrained.next( feature_state_encoded, model.targets, model.rules, model.constraints) output = [] for s in target_states: target_state = [] for var_id, val_id in enumerate(s): #eprint(var_id, val_id) if val_id == -1: target_state.append("?") else: target_state.append( model.targets[var_id][1][val_id]) output.append(target_state) self.assertEqual(prediction[state_id][1], output) # Exceptions: self.assertRaises( TypeError, model.predict, "") # Feature_states bad format: is not a list self.assertRaises( TypeError, model.predict, [["0", "1"], 0, 10 ]) # Feature_states bad format: is not a list of list self.assertRaises( TypeError, model.predict, [["0", "1"], [0, 10]] ) # Feature_states bad format: is not a list of list of string feature_states = [ list(s) for s in set(tuple(s1) for s1, s2 in dataset.data) ] state_id = random.randint(0, len(feature_states) - 1) original = feature_states[state_id].copy() feature_states[state_id] = feature_states[ state_id][:-random.randint(1, len(dataset.features))] self.assertRaises( TypeError, model.predict, feature_states ) # Feature_states bad format: size of state not correspond to model features < feature_states[state_id] = original.copy() feature_states[state_id].extend( ["0" for i in range(random.randint(1, 10))]) self.assertRaises( TypeError, model.predict, feature_states ) # Feature_states bad format: size of state not correspond to model features > feature_states[state_id] = original.copy() var_id = random.randint(0, len(dataset.features) - 1) feature_states[state_id][var_id] = "bad_value" self.assertRaises( ValueError, model.predict, feature_states ) # Feature_states bad format: value out of domain feature_states[state_id] = original.copy()
def test_fit(self): print(">> CDMVLP.fit(dataset)") for i in range(self._nb_tests): dataset = random_StateTransitionsDataset( \ nb_transitions=random.randint(1, self._nb_transitions), \ nb_features=random.randint(1,self._nb_features), \ nb_targets=random.randint(1,self._nb_targets), \ max_feature_values=self._nb_feature_values, \ max_target_values=self._nb_target_values) for algorithm in self._SUPPORTED_ALGORITHMS: for verbose in [0, 1]: model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algorithm) f = io.StringIO() with contextlib.redirect_stderr(f): model.fit(dataset=dataset, verbose=verbose) expected_rules, expected_constraints = Synchronizer.fit( dataset, complete=(algorithm == "synchronizer")) self.assertEqual(expected_rules, model.rules) self.assertEqual(expected_constraints, model.constraints) # Exceptions #------------ model = CDMVLP(features=dataset.features, targets=dataset.targets) model.compile(algorithm=algorithm) self.assertRaises(ValueError, model.fit, [], verbose) # dataset is not of valid type model.algorithm = "bad_value" self.assertRaises(ValueError, model.fit, dataset, verbose) # algorithm not supported model.algorithm = algorithm original = CDMVLP._COMPATIBLE_DATASETS.copy() class newdataset(Dataset): def __init__(self, data, features, targets): x = "" CDMVLP._COMPATIBLE_DATASETS = [newdataset] self.assertRaises( ValueError, model.fit, newdataset([], [], []), verbose) # dataset not supported by the algo CDMVLP._COMPATIBLE_DATASETS = original model.algorithm = "gula" original = CDMVLP._ALGORITHMS.copy() class newdataset(Dataset): def __init__(self, data, features, targets): x = "" CDMVLP._ALGORITHMS = ["gula"] self.assertRaises(NotImplementedError, model.fit, dataset, verbose) # dataset not supported yet CDMVLP._ALGORITHMS = original
def evaluate_scalability_on_bn_benchmark(algorithm, benchmark, benchmark_name, semantics, run_tests, train_size=None, full_transitions=None): """ Evaluate accuracy and explainability of an algorithm over a given benchmark with a given number/proporsion of training samples. Args: algorithm: Class Class of the algorithm to be tested benchmark: String Label of the benchmark to be tested semantics: String Semantics to be tested train_size: float in [0,1] or int Size of the training set in proportion (float in [0,1]) or explicit (int) """ # 0) Extract logic program #----------------------- P = benchmark #eprint(P) #eprint(semantics) # 1) Generate transitions #------------------------------------- # Boolean network benchmarks only have rules for value 1, if none match next value is 0 if full_transitions is None: eprint("Generating benchmark transitions ...") full_transitions = [ (np.array(feature_state), np.array(["0" if x == "?" else "1" for x in target_state])) for feature_state in benchmark.feature_states() for target_state in benchmark.predict([feature_state], semantics)[ tuple(feature_state)] ] #eprint(full_transitions) # 2) Prepare scores containers #--------------------------- results_time = [] # 3) Average over several tests #----------------------------- for run in range(run_tests): # 3.1 Split train/test sets #----------------------- random.shuffle(full_transitions) train = full_transitions test = [] # Complete, Proportion or explicit? if train_size is not None: if isinstance(train_size, float): # percentage last_obs = max(int(train_size * len(full_transitions)), 1) else: # exact number of transitions last_obs = train_size train = full_transitions[:last_obs] test = full_transitions[last_obs:] # DBG if run == 0: eprint(">>> Start Training on " + str(len(train)) + "/" + str(len(full_transitions)) + " transitions (" + str(round(100 * len(train) / len(full_transitions), 2)) + "%)") eprint(">>>> run: " + str(run + 1) + "/" + str(run_tests), end='') dataset = StateTransitionsDataset(train, benchmark.features, benchmark.targets) # csv format of results if train_size != None: expected_train_size = train_size else: expected_train_size = 1.0 real_train_size = round(len(train) / (len(full_transitions)), 2) common_settings = \ algorithm + "," +\ semantics + "," +\ benchmark_name + "," +\ str(len(benchmark.features)) + "," +\ str(len(full_transitions)) + "," +\ "random_transitions" + "," +\ str(expected_train_size) + "," +\ str(real_train_size) + "," +\ str(len(train)) # 3.2) Learn from training set #------------------------- # Define a timeout signal.signal(signal.SIGALRM, handler) signal.alarm(TIME_OUT) run_time = -2 try: start = time.time() if algorithm in ["gula", "pride", "brute-force"]: model = WDMVLP(features=benchmark.features, targets=benchmark.targets) elif algorithm in ["synchronizer"]: model = CDMVLP(features=benchmark.features, targets=benchmark.targets) else: eprint("Error, algorithm not accepted: " + algorithm) exit() model.compile(algorithm=algorithm) model.fit(dataset) signal.alarm(0) end = time.time() run_time = end - start results_time.append(run_time) except TimeoutException: signal.alarm(0) eprint(" TIME OUT") print(common_settings + "," + "-1") return len(train), -1 #signal.alarm(0) print(common_settings + "," + str(run_time)) eprint(" " + str(round(run_time, 3)) + "s") # 4) Average scores #------------------- avg_run_time = sum(results_time) / run_tests eprint(">> AVG Run time: " + str(round(avg_run_time, 3)) + "s") return len(train), avg_run_time