def test___init__(self): print(">> LogicProgram.__init__(self, variables, values, rules)") for i in range(self.__nb_unit_test): variables = [ "x" + str(i) for i in range(random.randint(1, self.__nb_variables)) ] values = [] rules = [] for var in range(len(variables)): values.append([ val for val in range(0, random.randint(2, self.__nb_values)) ]) for j in range(random.randint(0, self.__nb_rules)): r = self.random_rule(variables, values, self.__body_size) rules.append(r) p = LogicProgram(variables, values, rules) self.assertEqual(p.get_variables(), variables) self.assertEqual(p.get_values(), values) self.assertEqual(p.get_rules(), rules)
def test_precision(self): print(">> LogicProgram.precision(expected, predicted)") self.assertEqual(LogicProgram.precision([], []), 1.0) # Equal programs for i in range(self.__nb_unit_test): nb_var = random.randint(1, self.__nb_variables) nb_values = random.randint(2, self.__nb_values) nb_states = random.randint(1, 100) expected = [] predicted = [] for j in range(nb_states): s1 = [random.randint(0, nb_values) for var in range(nb_var)] s2 = [random.randint(0, nb_values) for var in range(nb_var)] s2_ = [random.randint(0, nb_values) for var in range(nb_var)] expected.append((s1, s2)) predicted.append((s1, s2_)) precision = LogicProgram.precision(expected, predicted) error = 0 for i in range(len(expected)): s1, s2 = expected[i] for j in range(len(predicted)): s1_, s2_ = predicted[j] if s1 == s1_: for var in range(len(s2)): if s2_[var] != s2[var]: error += 1 break #for i in range(len(expected)): # s1, s2 = expected[i] # s1_, s2_ = predicted[j] # for k in range(len(s2)): # if s2[k] != s2_[k]: # error += 1 total = nb_states * nb_var self.assertEqual(precision, 1.0 - (error / total)) # error of size state_id = random.randint(0, len(expected) - 1) modif = random.randint(1, len(expected[state_id])) expected[state_id] = (expected[state_id][0][:-modif], expected[state_id][1]) self.assertRaises(ValueError, LogicProgram.precision, expected, predicted)
def fit(variables, values, time_series): """ Preprocess transitions and learn rules for all variables/values. Args: variables: list of string variables of the system values: list of list of string possible value of each variable time_series: list of list (list of int, list of int) sequences of state transitions of the system Returns: LogicProgram A logic program whose rules: - explain/reproduce all the input transitions - are minimals """ #eprint("Start LFkT learning...") # Nothing to learn if len(time_series) == 0: return LogicProgram(variables, values, []) rules = [] # Learn rules for each variable/value for var in range(0, len(variables)): for val in range(0, len(values[var])): positives, negatives, delay = LFkT.interprete( variables, values, time_series, var, val) # Extend Herbrand Base extended_variables = variables.copy() extended_values = values.copy() for d in range(1, delay): extended_variables += [ var + "_" + str(d) for var in variables ] extended_values += values rules += GULA.fit_var_val(extended_variables, extended_values, var, val, positives, negatives) # Instanciate output logic program output = LogicProgram(variables, values, rules) return output
def fit(variables, values, transitions): """ Preprocess transitions and learn rules for all observed variables/values. Assume deterministics transitions: only one future for each state. Args: variables: list of string variables of the system values: list of list of string possible value of each variable transitions: list of tuple (list of int, list of int) state transitions of a the system Returns: LogicProgram A logic program whose rules: - explain/reproduce all the input transitions - are minimals """ #eprint("Start LF1T learning...") rules = [] # Learn rules for each variable/value for var in range(0, len(variables)): for val in range(0, len(values[var])): rules += LF1T.fit_var_val(variables, values, transitions, var, val) # Instanciate output logic program output = LogicProgram(variables, values, rules) return output
def fit(variables, values, transitions, program=None): """ Preprocess transitions and learn rules for all observed variables/values. Args: variables: list of string variables of the system values: list of list of string possible value of each variable transitions: list of tuple (list of int, list of int) state transitions of dynamic system program: LogicProgram A logic program to be fitted (kind of background knowledge) Returns: LogicProgram A logic program whose rules: - explain/reproduce all the input transitions - are minimals """ #eprint("Start GULA learning...") rules = [] # Learn rules for each observed variable/value for var in range(0, len(variables)): for val in range(0, len(values[var])): positives, negatives = GULA.interprete(transitions, var, val) rules += GULA.fit_var_val(variables, values, var, val, positives, negatives, program) # Instanciate output logic program output = LogicProgram(variables, values, rules) return output
def fit(variables, values, transitions): """ Preprocess transitions and learn rules for all observed variables/values. Args: transitions: list of tuple (list of int, list of int) state transitions of dynamic system Returns: LogicProgram A logic program whose rules: - are minimals - explain/reproduce all the input transitions """ #eprint("Start PRIDE learning...") # Nothing to learn if len(transitions) == 0: return LogicProgram(variables, values, []) rules = [] nb_variables = len(transitions[0][1]) # Extract observed values values = [] for var in range(0, nb_variables): v = [] for t1, t2 in transitions: if t2[var] not in v: v.append(t2[var]) values.append(v) #print("Set of values: "+str(values)) # Learn rules for each observed variable/value for var in range(0, nb_variables): for val in values[var]: positives, negatives = PRIDE.interprete(transitions, var, val) rules += PRIDE.fit_var_val(var, val, positives, negatives) # Instanciate output logic program variables = [var for var in range(nb_variables)] output = LogicProgram(variables, values, rules) return output
def test_compare(self): print(">> LogicProgram.compare(other)") # Equal programs for i in range(self.__nb_unit_test): p1 = self.random_program(self.__nb_variables, self.__nb_values, self.__body_size) p2 = LogicProgram(p1.get_variables(), p1.get_values(), p1.get_rules()) common, missing, over = p1.compare(p2) self.assertEqual(len(common), len(p1.get_rules())) self.assertEqual(len(missing), 0) self.assertEqual(len(over), 0) # Equal programs reverse call for i in range(self.__nb_unit_test): p1 = self.random_program(self.__nb_variables, self.__nb_values, self.__body_size) p2 = LogicProgram(p1.get_variables(), p1.get_values(), p1.get_rules()) common, missing, over = p2.compare(p1) self.assertEqual(len(common), len(p1.get_rules())) self.assertEqual(len(missing), 0) self.assertEqual(len(over), 0) # Random programs for i in range(self.__nb_unit_test): p1 = self.random_program(self.__nb_variables, self.__nb_values, self.__body_size) p2 = self.random_program(self.__nb_variables, self.__nb_values, self.__body_size) common, missing, over = p1.compare(p2) # All rules appear in a one of the set for r in p1.get_rules(): self.assertTrue(r in common or r in missing) for r in p2.get_rules(): self.assertTrue(r in common or r in over) # All rules are correctly placed for r in common: self.assertTrue(r in p1.get_rules() and r in p2.get_rules()) for r in missing: self.assertTrue(r in p1.get_rules() and r not in p2.get_rules()) for r in over: self.assertTrue(r not in p1.get_rules() and r in p2.get_rules())
def random_program(self, nb_variables, nb_values, body_size): variables = ["x"+str(i) for i in range(random.randint(1,nb_variables))] values = [] rules = [] for var in range(len(variables)): values.append([val for val in range(0,random.randint(2,nb_values))]) for j in range(random.randint(0,100)): r = self.random_rule(variables, values, body_size) rules.append(r) return LogicProgram(variables, values, rules)
def fit(variables, values, transitions): """ Preprocess transitions and learn rules for all variables/values. Args: variables: list of string variables of the system values: list of list of string possible value of each variable transitions: list of (list of int, list of int) state transitions of a dynamic system Returns: list of LogicProgram - each rules are minimals - the output set explains/reproduces only the input transitions """ #eprint("Start LUST learning...") # Nothing to learn if len(transitions) == 0: return [LogicProgram(variables, values, [])] rules = [] # Extract strictly determinists states and separate non-determinist ones deterministic_core, deterministic_sets = LUST.interprete( variables, values, transitions) output = [] common = GULA.fit(variables, values, deterministic_core) # deterministic input if len(deterministic_sets) == 0: return [common] for s in deterministic_sets: p = GULA.fit(variables, values, s, common) output.append(p) return output
sys.path.insert(0, 'src/objects') from utils import eprint from logicProgram import LogicProgram from pride import PRIDE # 1: Main #------------ if __name__ == '__main__': # 0) Example from text file representing a logic program #-------------------------------------------------------- eprint("Example using logic program definition file:") eprint("----------------------------------------------") benchmark = LogicProgram.load_from_file("benchmarks/logic_programs/repressilator.lp") eprint("Original logic program: \n", benchmark.logic_form()) eprint("Generating transitions...") input = benchmark.generate_all_transitions() eprint("PRIDE input: \n", input) model = PRIDE.fit(benchmark.get_variables(), benchmark.get_values(), input) eprint("PRIDE output: \n", model.logic_form()) expected = benchmark.generate_all_transitions() predicted = model.generate_all_transitions()
def test_load_from_file(self): print(">> LogicProgram.load_from_file(self, file_path)") for i in range(self.__nb_unit_test): variables = ["x"+str(i) for i in range(random.randint(1,self.__nb_variables))] values = [] rules = [] for var in range(len(variables)): values.append([str(val) for val in range(0,random.randint(2,self.__nb_values))]) out = "" # Variables for var in range(len(variables)): out += "VAR x" + str(var) + " " for val in values[var]: out += str(val) + " " out = out[:-1] + "\n" out += "\n" # Rules for j in range(random.randint(0,100)): r = self.random_rule(variables, values, self.__body_size) rules.append(r) out += "x"+str(r.get_head_variable()) + "(" + str(r.get_head_value()) + ",T) :- " if len(r.get_body()) == 0: out = out[:-4] + ".\n" else: for var, val in r.get_body(): out += "x" + str(var) + "(" + str(val) + ",T-1), " out = out[:-2] + ".\n" # Random empty line if random.randint(0,1): out += "\n" #eprint(out) f = open(self.__tmp_file_path, "w") f.write(out) f.close() p = LogicProgram.load_from_file(self.__tmp_file_path) self.assertEqual(p.get_variables(), variables) self.assertEqual(p.get_values(), values) for r in rules: if r not in p.get_rules(): eprint(r.to_string()) eprint(p.to_string()) self.assertTrue(r in p.get_rules()) for r in p.get_rules(): self.assertTrue(r in rules) if os.path.exists(self.__tmp_file_path): os.remove(self.__tmp_file_path)
def test_random(self): print(">> LogicProgram.random(variables, values, rule_min_size, rule_max_size, delay=1)") # No delay for i in range(self.__nb_unit_test): variables = ["x"+str(i) for i in range(random.randint(1,self.__nb_variables))] values = [] for var in range(len(variables)): values.append([val for val in range(random.randint(2,self.__nb_values))]) min_body_size = 0 max_body_size = random.randint(min_body_size, len(variables)) p = LogicProgram.random(variables, values, min_body_size, max_body_size) #eprint(p.to_string()) self.assertEqual(p.get_variables(), variables) self.assertEqual(p.get_values(), values) for r in p.get_rules(): self.assertTrue(len(r.get_body()) >= min_body_size) self.assertTrue(len(r.get_body()) <= max_body_size) states = p.states() for s in states: for var in range(len(s)): matched = False conclusion = -1 for r in p.get_rules(): if r.get_head_variable() == var and r.matches(s): matched = True if conclusion == -1: # stored first conclusion conclusion = r.get_head_value() else: # check conflict self.assertEqual(conclusion, r.get_head_value()) self.assertTrue(matched) # No cross-matching for r1 in p.get_rules(): for r2 in p.get_rules(): if r1 == r2 or r1.get_head_variable() != r2.get_head_variable(): continue #eprint(r1) #eprint(r2) #eprint() self.assertFalse(r1.cross_matches(r2)) # Delay for i in range(self.__nb_unit_test): variables = ["x"+str(i) for i in range(random.randint(1,self.__nb_variables))] values = [] for var in range(len(variables)): values.append([val for val in range(random.randint(2,self.__nb_values))]) min_body_size = 0 max_body_size = random.randint(min_body_size, len(variables)) delay = random.randint(1, self.__max_delay) p = LogicProgram.random(variables, values, min_body_size, max_body_size, delay) #eprint(p.logic_form()) extended_variables = variables.copy() extended_values = values.copy() for d in range(1,delay): extended_variables += [var+"_"+str(d) for var in variables] extended_values += values self.assertEqual(p.get_variables(), variables) self.assertEqual(p.get_values(), values) for r in p.get_rules(): self.assertTrue(len(r.get_body()) >= min_body_size) #self.assertTrue(len(r.get_body()) <= max_body_size) p_ = LogicProgram(extended_variables, extended_values,[]) states = p_.states() for s in states: for var in range(len(variables)): matched = False conclusion = -1 for r in p.get_rules(): if r.get_head_variable() == var and r.matches(s): matched = True if conclusion == -1: # stored first conclusion conclusion = r.get_head_value() else: # check conflict self.assertEqual(conclusion, r.get_head_value()) self.assertTrue(matched)
def test_random_LP(algorithm, nb_variables, nb_values, max_body_size, delay=1, train_size=None): max_body_size = max(0, max_body_size) results_time = [] results_common = [] results_missing = [] results_over = [] results_precision = [] for run in range(run_tests): variables = ["x" + str(i) for i in range(nb_variables)] values = [] for var in range(len(variables)): values.append([val for val in range(nb_values)]) min_body_size = 0 p = LogicProgram.random(variables, values, min_body_size, max_body_size, delay) serie_size = delay + random.randint(delay, 10) time_series = p.generate_all_time_series(serie_size) #eprint(p.logic_form()) random.shuffle(time_series) train = time_series test = [] if train_size is not None: if isinstance(train_size, float): # percentage last_obs = max(int(train_size * len(time_series)), 1) else: # exact number of transitions last_obs = train_size train = time_series[:last_obs] test = time_series[last_obs:] #eprint(train) if run == 0: eprint(">>> Start Training on ", len(train), "/", len(time_series), " time series of size ", len(time_series[0]), " (", round(100 * len(train) / len(time_series), 2), "%)") eprint("\r>>> run: ", run + 1, "/", run_tests, end='') start = time.time() model = algorithm.fit(p.get_variables(), p.get_values(), time_series) end = time.time() results_time.append(round(end - start, 3)) common, missing, over = p.compare(model) #eprint(">>> Original:") #eprint(P.to_string()) #eprint(">>> Learned:") #eprint(model.to_string()) #eprint(">>> Logic Program comparaison:") #eprint(">>>> Common: "+str(len(common))+"/"+str(len(P.get_rules()))+"("+str(round(100 * len(common) / len(P.get_rules()),2))+"%)") #eprint(">>>> Missing: "+str(len(missing))+"/"+str(len(P.get_rules()))+"("+str(round(100 * len(missing) / len(P.get_rules()),2))+"%)") #eprint(">>>> Over: "+str(len(over))+"/"+str(len(P.get_rules()))+"("+str(round(100 * len(over) / len(model.get_rules()),2))+"%)") results_common.append(len(common)) results_missing.append(len(missing)) results_over.append(len(over)) if len(test) == 0: test = train pred = [(s[:-1], model.next_state(s[:-1])) for s in test] test = [(s[:-1], s[-1]) for s in test] precision = round(LogicProgram.precision(test, pred), 2) #eprint(test) #eprint(pred) #eprint(">>> Prediction precision") #eprint(">>>> " + str(round(precision * 100,2)) + "%") results_precision.append(precision) run_time = round(sum(results_time) / run_tests, 3) common = sum(results_common) / run_tests missing = sum(results_missing) / run_tests over = sum(results_over) / run_tests precision = sum(results_precision) / run_tests eprint() eprint(">>> Run time: " + str(run_time) + "s") eprint(">>> Logic Program comparaison:") eprint(">>>> AVG Common: " + str(common) + "/" + str(len(p.get_rules())) + "(" + str(round(100 * common / len(p.get_rules()), 2)) + "%)") eprint(">>>> AVG Missing: " + str(missing) + "/" + str(len(p.get_rules())) + "(" + str(round(100 * missing / len(p.get_rules()), 2)) + "%)") eprint(">>>> AVG Over: " + str(over) + "/" + str(len(p.get_rules())) + "(" + str(round(100 * over / len(model.get_rules()), 2)) + "%)") eprint(">>> Prediction precision") eprint(">>>> AVG accuracy: " + str(round(precision * 100, 2)) + "%") return round(precision * 100, 2)
def test_fit(self): print(">> LFkT.fit(variables, values, time_series)") # No transitions variables = [ "x" + str(i) for i in range(random.randint(1, self.__nb_variables)) ] values = [] for var in range(len(variables)): values.append( [val for val in range(random.randint(2, self.__nb_values))]) min_body_size = 0 max_body_size = random.randint(min_body_size, len(variables)) delay_original = random.randint(2, self.__max_delay) p = LogicProgram.random(variables, values, min_body_size, max_body_size, delay_original) p_ = LFkT.fit(p.get_variables(), p.get_values(), []) self.assertEqual(p_.get_variables(), p.get_variables()) self.assertEqual(p_.get_values(), p.get_values()) self.assertEqual(p_.get_rules(), []) for i in range(self.__nb_unit_test): #eprint("\rTest ", i+1, "/", self.__nb_unit_test, end='') # Generate transitions variables = [ "x" + str(i) for i in range(random.randint(1, self.__nb_variables)) ] values = [] for var in range(len(variables)): values.append([ val for val in range(random.randint(2, self.__nb_values)) ]) min_body_size = 0 max_body_size = random.randint(min_body_size, len(variables)) delay_original = random.randint(2, self.__max_delay) p = LogicProgram.random(variables, values, min_body_size, max_body_size, delay_original) time_series = p.generate_all_time_series(delay_original * 10) #eprint(p.logic_form()) #eprint(time_series) p_ = LFkT.fit(p.get_variables(), p.get_values(), time_series) rules = p_.get_rules() #eprint(p_.logic_form()) for variable in range(len(p.get_variables())): for value in range(len(p.get_values()[variable])): #eprint("var="+str(variable)+", val="+str(value)) pos, neg, delay = LFkT.interprete(p.get_variables(), p.get_values(), time_series, variable, value) #eprint("pos: ", pos) # Each positive is explained for s in pos: cover = False for r in rules: if r.get_head_variable() == variable \ and r.get_head_value() == value \ and r.matches(s): cover = True #if not cover: # eprint(p_) # eprint(s) self.assertTrue(cover) # One rule cover the example #eprint("neg: ", neg) # No negative is covered for s in neg: cover = False for r in rules: if r.get_head_variable() == variable \ and r.get_head_value() == value \ and r.matches(s): cover = True self.assertFalse(cover) # no rule covers the example # All rules are minimals for r in rules: if r.get_head_variable( ) == variable and r.get_head_value() == value: for (var, val) in r.get_body(): r.remove_condition(var) # Try remove condition conflict = False for s in neg: if r.matches( s): # Cover a negative example conflict = True break # # DEBUG: if not conflict: eprint("not minimal " + r.to_string()) eprint(neg) self.assertTrue(conflict) r.add_condition(var, val) # Cancel removal
def test_interprete(self): print(">> LFkT.interprete(transitions, variable, value)") for i in range(self.__nb_unit_test): #eprint("Start test ", i, "/", self.__nb_unit_test) # Generate transitions variables = [ "x" + str(i) for i in range(random.randint(1, self.__nb_variables)) ] values = [] for var in range(len(variables)): values.append([ val for val in range(random.randint(2, self.__nb_values)) ]) min_body_size = 0 max_body_size = random.randint(min_body_size, len(variables)) delay_original = random.randint(1, self.__max_delay) #eprint("Generating random program") #eprint("variables: ", variables) #eprint("Values: ", values) #eprint("delay: ", delay_original) p = LogicProgram.random(variables, values, min_body_size, max_body_size, delay_original) #eprint("Generating series...") time_series = p.generate_all_time_series(delay_original) var = random.randint(0, len(p.get_variables()) - 1) val = random.randint(0, len(p.get_values()[var]) - 1) #eprint("interpreting...") pos, neg, delay = LFkT.interprete(p.get_variables(), p.get_values(), time_series, var, val) # DBG #eprint("variables: ", variables) #eprint("values", values) #eprint("delay: ", delay_original) #eprint(p.logic_form()) #eprint(time_series) #eprint("var: ", var) #eprint("val: ", val) #eprint("pos: ", pos) #eprint("neg: ",neg) #eprint("delay detected: ", delay) # All pos are valid for s in pos: for serie in time_series: for id in range(len(serie) - delay): s1 = serie[id:id + delay].copy() s1.reverse() s1 = [y for x in s1 for y in x] #eprint(s1) #eprint(s) s2 = serie[id + delay] if s1 == s: self.assertEqual(s2[var], val) break # All neg are valid for s in neg: for serie in time_series: for id in range(len(serie) - delay): s1 = serie[id:id + delay].copy() s1.reverse() s1 = [y for x in s1 for y in x] s2 = serie[id + delay] if s1 == s: self.assertTrue(s2[var] != val) break # All transitions are interpreted #eprint("var/val: ", var, "/", val) #eprint("delay: ", delay) #eprint("Time serie: ", time_series) for serie in time_series: #eprint("checking: ", serie) for id in range(delay, len(serie)): s1 = serie[id - delay:id].copy() s1.reverse() s1 = [y for x in s1 for y in x] s2 = serie[id] #eprint("s1: ", s1, ", s2: ", s2) #eprint("pos: ", pos) #eprint("neg: ", neg) if s2[var] == val: self.assertTrue(s1 in pos) self.assertFalse(s1 in neg) else: self.assertFalse(s1 in pos) self.assertTrue(s1 in neg) # delay valid global_delay = 1 for serie_1 in time_series: for id_state_1 in range(len(serie_1) - 1): state_1 = serie_1[id_state_1] next_1 = serie_1[id_state_1 + 1] # search duplicate with different future for serie_2 in time_series: for id_state_2 in range(len(serie_2) - 1): state_2 = serie_2[id_state_2] next_2 = serie_2[id_state_2 + 1] # Non-determinism detected if state_1 == state_2 and next_1[var] != next_2[ var]: local_delay = 2 id_1 = id_state_1 id_2 = id_state_2 while id_1 > 0 and id_2 > 0: previous_1 = serie_1[id_1 - 1] previous_2 = serie_2[id_2 - 1] if previous_1 != previous_2: break local_delay += 1 id_1 -= 1 id_2 -= 1 global_delay = max(global_delay, local_delay) self.assertTrue(local_delay <= delay) self.assertEqual(delay, global_delay)
sys.path.insert(0, 'src/objects') from utils import eprint from logicProgram import LogicProgram from lfkt import LFkT # 1: Main #------------ if __name__ == '__main__': # 0) Example from text file representing a logic program #-------------------------------------------------------- eprint("Example using logic program definition file:") eprint("----------------------------------------------") benchmark = LogicProgram.load_from_file( "benchmarks/logic_programs/repressilator_delayed.lp") eprint("Original logic program: \n", benchmark.logic_form()) time_serie_size = 10 eprint("Generating time series of size ", time_serie_size) input = benchmark.generate_all_time_series(time_serie_size) eprint("LFkT input:") for s in input: eprint(s) model = LFkT.fit(benchmark.get_variables(), benchmark.get_values(), input)
def test_interprete(self): print(">> LUST.interprete(variables, values, transitions)") for i in range(self.__nb_unit_test): #eprint("test: ", i, "/", self.__nb_unit_test) # No transitions variables = [ "x" + str(i) for i in range(random.randint(1, self.__nb_variables)) ] values = [] for var in range(len(variables)): values.append([ val for val in range(random.randint(2, self.__nb_values)) ]) min_body_size = 0 max_body_size = random.randint(min_body_size, len(variables)) p = LogicProgram.random(variables, values, min_body_size, max_body_size) var = random.randint(0, len(p.get_variables()) - 1) val = random.randint(0, len(p.get_values()[var]) - 1) DC, DS = LUST.interprete(p.get_variables(), p.get_values(), []) self.assertEqual(DC, []) self.assertEqual(DS, []) # Regular case variables = [ "x" + str(i) for i in range(random.randint(1, self.__nb_variables)) ] values = [] for var in range(len(variables)): values.append([ val for val in range(random.randint(2, self.__nb_values)) ]) nb_programs = random.randint(1, self.__max_programs) transitions = [] for j in range(nb_programs): # Generate transitions min_body_size = 0 max_body_size = random.randint(min_body_size, len(variables)) p = LogicProgram.random(variables, values, min_body_size, max_body_size) transitions += p.generate_all_transitions() #eprint(p.logic_form()) #eprint(transitions) var = random.randint(0, len(p.get_variables()) - 1) val = random.randint(0, len(p.get_values()[var]) - 1) DC, DS = LUST.interprete(p.get_variables(), p.get_values(), transitions) D = [] ND = [] for s1, s2 in transitions: deterministic = True for s3, s4 in transitions: if s1 == s3 and s2 != s4: ND.append([s1, s2]) deterministic = False break if deterministic: D.append([s1, s2]) #eprint("DC: ",DC) #eprint("DS: ",DS) #eprint("D: ",D) #eprint("ND: ",ND) # All deterministic are only in DC for s1, s2 in D: self.assertTrue([s1, s2] in DC) for s in DS: self.assertTrue([s1, s2] not in s) # All DC are deterministic for s1, s2 in DC: self.assertTrue([s1, s2] in D) # All non deterministic sets are set for s in DS: for s1, s2 in s: occ = 0 for s3, s4 in s: if s1 == s3 and s2 == s4: occ += 1 self.assertEqual(occ, 1) # All input origin state appears in each DS TODO for s1, s2 in ND: for s in DS: occurs = False for s3, s4 in s: if s1 == s3: occurs = True self.assertTrue(occurs) # All DS are deterministic for s in DS: for s1, s2 in s: for s3, s4 in s: if s1 == s3: self.assertTrue(s2 == s4)
def evaluate_on_benchmark(algorithm, benchmark, train_size=None): """ Evaluate accuracy and explainability of an algorithm over a given benchmark with a given number/propertion of training samples. Args: name: String Label of the benchmark to be tested train_size: float in [0,1] or int Size of the training set in proportion (float in [0,1]) or explicit (int) """ # 0) Extract logic program #----------------------- benchmark = benchmarks[benchmark] P = LogicProgram.load_from_file(benchmark) #eprint(P.to_string()) # 1) Generate transitions #------------------------------------- full_transitions = P.generate_all_transitions() # 2) Prepare scores containers #--------------------------- results_time = [] results_common = [] results_missing = [] results_over = [] results_precision = [] # 3) Average over several tests #----------------------------- for run in range(run_tests): # 3.1 Split train/test sets #----------------------- random.shuffle(full_transitions) train = full_transitions test = [] # Complete, Proportion or explicit? if train_size is not None: if isinstance(train_size, float): # percentage last_obs = max(int(train_size * len(full_transitions)), 1) else: # exact number of transitions last_obs = train_size train = full_transitions[:last_obs] test = full_transitions[last_obs:] # DBG if run == 0: eprint(">>> Start Training on " + str(len(train)) + "/" + str(len(full_transitions)) + " transitions (" + str(round(100 * len(train) / len(full_transitions), 2)) + "%)") eprint("\r>>> run: " + str(run + 1) + "/" + str(run_tests), end='') # 3.2) Learn from training set #------------------------- start = time.time() model = algorithm.fit(P.get_variables(), P.get_values(), train) end = time.time() results_time.append(round(end - start, 3)) # 3.3) Evaluate model against originals rules #----------------------------------------------- # LUST special case if type(model) == list: model = model[0] common, missing, over = P.compare(model) #eprint(">>> Original:") #eprint(P.to_string()) #eprint(">>> Learned:") #eprint(model.to_string()) #eprint(">>> Logic Program comparaison:") #eprint(">>>> Common: "+str(len(common))+"/"+str(len(P.get_rules()))+"("+str(round(100 * len(common) / len(P.get_rules()),2))+"%)") #eprint(">>>> Missing: "+str(len(missing))+"/"+str(len(P.get_rules()))+"("+str(round(100 * len(missing) / len(P.get_rules()),2))+"%)") #eprint(">>>> Over: "+str(len(over))+"/"+str(len(P.get_rules()))+"("+str(round(100 * len(over) / len(model.get_rules()),2))+"%)") # Collect scores results_common.append(len(common)) results_missing.append(len(missing)) results_over.append(len(over)) # Perfect case: evaluate over all transitions if len(test) == 0: test = train # 3.4) Evaluate accuracy prediction over unseen states #------------------------------------------------- pred = [(s1, model.next(s1)) for s1, s2 in test] precision = round(LogicProgram.precision(test, pred), 2) #eprint(">>> Prediction precision") #eprint(">>>> " + str(round(precision * 100,2)) + "%") results_precision.append(precision) # 4) Average scores #------------------- run_time = sum(results_time) / run_tests common = sum(results_common) / run_tests missing = sum(results_missing) / run_tests over = sum(results_over) / run_tests precision = sum(results_precision) / run_tests eprint() eprint(">>> Run time: " + str(run_time) + "s") eprint(">>> Logic Program comparaison:") eprint(">>>> AVG Common: " + str(common) + "/" + str(len(P.get_rules())) + "(" + str(round(100 * common / len(P.get_rules()), 2)) + "%)") eprint(">>>> AVG Missing: " + str(missing) + "/" + str(len(P.get_rules())) + "(" + str(round(100 * missing / len(P.get_rules()), 2)) + "%)") eprint(">>>> AVG Over: " + str(over) + "/" + str(len(P.get_rules())) + "(" + str(round(100 * over / len(model.get_rules()), 2)) + "%)") eprint(">>> Prediction precision") eprint(">>>> AVG accuracy: " + str(round(precision * 100, 2)) + "%") return round(precision * 100, 2)
def evaluate_on_benchmark_with_NN(algorithm, benchmark, train_size, artificial_size=None): """ Evaluate accuracy and explainability of an algorithm over a given benchmark with a given number/propertion of training samples. Additional artificial transitions are produced by Neural network. Args: name: String Label of the benchmark to be tested train_size: float in [0,1] or int Size of the training set in proportion (float in [0,1]) or explicit (int) """ # 0) Extract logic program #----------------------- benchmark = benchmarks[benchmark] P = LogicProgram.load_from_file(benchmark) #eprint(P.to_string()) # 1) Generate transitions #------------------------------------- full_transitions = P.generate_all_transitions() # 2) Prepare scores containers #--------------------------- results_time = [] results_common = [] results_missing = [] results_over = [] results_precision_NN = [] results_precision_algo = [] # 3) Average over several tests #----------------------------- for run in range(run_tests): # 3.1 Split train/test sets #----------------------- random.shuffle(full_transitions) train = full_transitions test = [] # Complete, Proportion or explicit? if train_size is not None: if isinstance(train_size, float): # percentage last_obs = max(int(train_size * len(full_transitions)), 1) else: # exact number of transitions last_obs = train_size train = full_transitions[:last_obs] test = full_transitions[last_obs:] # DBG if run == 0: eprint(">>> Start Training on " + str(len(train)) + "/" + str(len(full_transitions)) + " transitions (" + str(round(100 * len(train) / len(full_transitions), 2)) + "%)") if artificial_size is None: eprint(">>>> Generating all " + str(artificial_size) + " test transitions from NN") else: if artificial_size > len(test): eprint( ">>>> Warning given artificial training set size is greater than total unseen transitions: " + str(artificial_size) + "/" + str(len(test))) eprint(">>>> Generating all " + str(len(test)) + " test transitions from NN") else: eprint(">>>> Generating " + str(artificial_size) + " random artificial transitions from NN") eprint("\r>>> run: " + str(run + 1) + "/" + str(run_tests), end='') # 3.2) Train Neural Network #--------------------------- #eprint(">>>> Training NN") start = time.time() train_X = np.array([s1 for s1, s2 in train]) train_y = np.array([s2 for s1, s2 in train]) NN = Sequential() NN.add(Dense(128, activation='relu', input_dim=train_X.shape[1])) NN.add(Dense(64, activation='relu')) NN.add(Dense(32, activation='relu')) NN.add(Dense(train_y.shape[1], activation='sigmoid')) NN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # Train the model, iterating on the data in batches of 32 samples NN.fit(train_X, train_y, epochs=100, batch_size=32, verbose=0) # 3.3) Generate artificial data #------------------------------ generated = [] # All unobserved transition will be predicted by the NN if artificial_size is None or artificial_size >= len(test): for s1, s2 in full_transitions: unknown = True for s1_, s2_ in train: # predict only unknown transitions if s1 == s1_: unknown = False break if unknown: prediction = NN.predict(np.array([s1])) prediction = [int(i > 0.5) for i in prediction[0]] generated.append((s1, prediction)) else: # generate given number of artificial transition while len(generated) < artificial_size: s1 = [ random.randint(0, len(P.get_values()[var]) - 1) for var in range(len(P.get_variables())) ] # random state unknown = True for s1_, s2_ in train: # predict only unknown transitions if s1 == s1_: unknown = False break if unknown: prediction = NN.predict(np.array([s1])) prediction = [int(i > 0.5) for i in prediction[0]] generated.append((s1, prediction)) #eprint("\r"+str(len(generated))+"/"+str(artificial_size), end='') #eprint("NN generated: ") #eprint(generated) # Merge raw data + artificial train = train + generated # 3.4) Learn from extended training set #--------------------------------------- model = algorithm.fit(P.get_variables(), P.get_values(), train) end = time.time() results_time.append(round(end - start, 3)) # 3.5) Evaluate model against originals rules #----------------------------------------------- common, missing, over = P.compare(model) #eprint(">>> Original:") #eprint(P.to_string()) #eprint(">>> Learned:") #eprint(model.to_string()) #eprint(">>> Logic Program comparaison:") #eprint(">>>> Common: "+str(len(common))+"/"+str(len(P.get_rules()))+"("+str(round(100 * len(common) / len(P.get_rules()),2))+"%)") #eprint(">>>> Missing: "+str(len(missing))+"/"+str(len(P.get_rules()))+"("+str(round(100 * len(missing) / len(P.get_rules()),2))+"%)") #eprint(">>>> Over: "+str(len(over))+"/"+str(len(P.get_rules()))+"("+str(round(100 * len(over) / len(model.get_rules()),2))+"%)") # Collect scores results_common.append(len(common)) results_missing.append(len(missing)) results_over.append(len(over)) # Perfect case: evaluate over all transitions if len(test) == 0: test = train # 3.6) Evaluate accuracy prediction over unseen states #------------------------------------------------------ # NN accuracy predictions = NN.predict(np.array([s1 for s1, s2 in test])) for s in predictions: for i in range(len(s)): s[i] = int(s[i] > 0.5) pred = [] for i in range(len(test)): pred.append((test[i][0], predictions[i])) precision_NN = round(LogicProgram.precision(test, pred), 2) # Algorithm accuracy pred = [(s1, model.next(s1)) for s1, s2 in test] precision_algo = round(LogicProgram.precision(test, pred), 2) #eprint(">>> Prediction precision") #eprint(">>>> " + str(round(precision * 100,2)) + "%") results_precision_NN.append(precision_NN) results_precision_algo.append(precision_algo) # 4) Average scores #------------------- run_time = sum(results_time) / run_tests common = sum(results_common) / run_tests missing = sum(results_missing) / run_tests over = sum(results_over) / run_tests precision_NN = sum(results_precision_NN) / run_tests precision_algo = sum(results_precision_algo) / run_tests eprint() eprint(">>> Scores over " + str(len(test)) + " test samples:") eprint(">>> Run time: " + str(run_time) + "s") eprint(">>>> Logic Program comparaison:") eprint(">>>>> AVG Common: " + str(common) + "/" + str(len(P.get_rules())) + "(" + str(round(100 * common / len(P.get_rules()), 2)) + "%)") eprint(">>>>> AVG Missing: " + str(missing) + "/" + str(len(P.get_rules())) + "(" + str(round(100 * missing / len(P.get_rules()), 2)) + "%)") eprint(">>>>> AVG Over: " + str(over) + "/" + str(len(P.get_rules())) + "(" + str(round(100 * over / len(model.get_rules()), 2)) + "%)") eprint(">>>> Prediction precision") eprint(">>>>> AVG accuracy NN: " + str(round(precision_NN * 100, 2)) + "%") eprint(">>>>> AVG accuracy algorithm: " + str(round(precision_algo * 100, 2)) + "%") return round(precision_algo * 100, 2)
def test_fit(self): print(">> LUST.fit(variables, values, transitions)") # No transitions variables = [ "x" + str(i) for i in range(random.randint(1, self.__nb_variables)) ] values = [] for var in range(len(variables)): values.append( [val for val in range(random.randint(2, self.__nb_values))]) min_body_size = 0 max_body_size = random.randint(min_body_size, len(variables)) p = LogicProgram.random(variables, values, min_body_size, max_body_size) p_ = LUST.fit(p.get_variables(), p.get_values(), []) self.assertEqual(len(p_), 1) p_ = p_[0] self.assertEqual(p_.get_variables(), p.get_variables()) self.assertEqual(p_.get_values(), p.get_values()) self.assertEqual(p_.get_rules(), []) for i in range(self.__nb_unit_test): #eprint("test: ", i, "/", self.__nb_unit_test) variables = [ "x" + str(i) for i in range(random.randint(1, self.__nb_variables)) ] values = [] for var in range(len(variables)): values.append([ val for val in range(random.randint(2, self.__nb_values)) ]) nb_programs = random.randint(1, self.__max_programs) transitions = [] for j in range(nb_programs): # Generate transitions min_body_size = 0 max_body_size = random.randint(min_body_size, len(variables)) p = LogicProgram.random(variables, values, min_body_size, max_body_size) transitions += p.generate_all_transitions() #eprint(p.logic_form()) #eprint(transitions) P = LUST.fit(p.get_variables(), p.get_values(), transitions) #rules = p_.get_rules() # Generate transitions predictions = [] for p in P: #eprint(p.logic_form()) predictions += p.generate_all_transitions() # Remove incomplete states #predictions = [ [s1,s2] for s1,s2 in predictions if -1 not in s2 ] #eprint("Expected: ", transitions) #eprint("Predicted: ", predictions) # All original transitions are predicted for s1, s2 in transitions: self.assertTrue([s1, s2] in predictions) # All predictions are in original transitions for s1, s2 in predictions: #eprint(s1,s2) self.assertTrue([s1, s2] in transitions)