def longest_prefix_cex_processing(s_union_s_dot_a: list, cex: tuple, closedness='suffix'): """ Suffix processing strategy found in Shahbaz-Groz paper 'Inferring Mealy Machines'. It splits the counterexample into prefix and suffix. The prefix is the longest element of the S union S.A that matches the beginning of the counterexample. By removing such prefixes from counterexample, no consistency check is needed. Args: s_union_s_dot_a: list of all prefixes found in observation table sorted from shortest to longest cex: counterexample closedness: either 'suffix' or 'prefix'. (Default value = 'suffix') s_union_s_dot_a: list: cex: tuple: counterexample Returns: suffixes to add to the E set """ prefixes = s_union_s_dot_a prefixes.reverse() trimmed_suffix = None for p in prefixes: if p == cex[:len(p)]: trimmed_suffix = cex[len(p):] break trimmed_suffix = trimmed_suffix if trimmed_suffix else cex suffixes = all_suffixes(trimmed_suffix) if closedness == 'suffix' else all_prefixes(trimmed_suffix) suffixes.reverse() return suffixes
def rs_cex_processing(sul: SUL, cex: tuple, hypothesis, suffix_closedness=True, closedness='suffix'): """Riverst-Schapire counter example processing. Args: sul: system under learning cex: found counterexample hypothesis: hypothesis on which counterexample was found suffix_closedness: If true all suffixes will be added, else just one (Default value = True) closedness: either 'suffix' or 'prefix'. (Default value = 'suffix') sul: SUL: system under learning cex: tuple: counterexample Returns: suffixes to be added to the E set """ # cex_out = self.sul.query(tuple(cex)) cex_out = sul.query(cex) cex_input = list(cex) lower = 1 upper = len(cex_input) - 2 while True: hypothesis.reset_to_initial() mid = (lower + upper) // 2 # arr[:n] -> first n values # arr[n:] -> last n values for s_p in cex_input[:mid]: hypothesis.step(s_p) s_bracket = hypothesis.current_state.prefix d = tuple(cex_input[mid:]) mq = sul.query(s_bracket + d) if mq[-1] == cex_out[-1]: # only check if the last element is the same as the cex lower = mid + 1 if upper < lower: suffix = tuple(d[1:]) break else: upper = mid - 1 if upper < lower: suffix = d break if suffix_closedness: suffixes = all_suffixes(suffix) if closedness == 'suffix' else all_prefixes(suffix) suffixes.reverse() suffix_to_query = suffixes else: suffix_to_query = [suffix] return suffix_to_query
def rpni_mealy_example(): import random from aalpy.learning_algs import run_RPNI from aalpy.utils import generate_random_mealy_machine, load_automaton_from_file from aalpy.utils.HelperFunctions import all_prefixes random.seed(1) model = generate_random_mealy_machine(num_states=5, input_alphabet=[1, 2, 3], output_alphabet=['a', 'b']) model = load_automaton_from_file('DotModels/Bluetooth/bluetooth_model.dot', automaton_type='mealy') input_al = model.get_input_alphabet() num_sequences = 1000 data = [] for _ in range(num_sequences): seq_len = random.randint(1, 10) random_seq = random.choices(input_al, k=seq_len) # make sure that all prefixes all included in the dataset for prefix in all_prefixes(random_seq): output = model.compute_output_seq(model.initial_state, prefix)[-1] data.append((prefix, output)) rpni_model = run_RPNI(data, automaton_type='mealy', print_info=True) return rpni_model
def run_Lstar(alphabet: list, sul: SUL, eq_oracle: Oracle, automaton_type, closing_strategy='longest_first', cex_processing='rs', suffix_closedness=True, closedness_type='suffix', max_learning_rounds=None, cache_and_non_det_check=True, return_data=False, print_level=2): """Executes L* algorithm with Riverst-Schapire counter example processing. Args: alphabet: input alphabet sul: system under learning eq_oracle: equivalence oracle automaton_type: type of automaton to be learned. Either 'dfa', 'mealy' or 'moore'. closing_strategy: closing strategy used in the close method. Either 'longest_first', 'shortest_first' or 'single' (Default value = 'longest_first') cex_processing: Counterexample processing strategy. Either None, 'rs' (Riverst-Schapire) or 'longest_prefix'. (Default value = 'rs') suffix_closedness: if True E set will be suffix closed, (Default value = True) closedness_type: either 'suffix' or 'prefix'. If suffix, E set will be suffix closed, prefix closed otherwise meaning that all prefixes of the suffix will be added. If false, just a single suffix will be added. (Default value = 'suffix') max_learning_rounds: number of learning rounds after which learning will terminate (Default value = None) cache_and_non_det_check: Use caching and non-determinism checks (Default value = True) return_data: if True, a map containing all information(runtime/#queries/#steps) will be returned (Default value = False) print_level: 0 - None, 1 - just results, 2 - current round and hypothesis size, 3 - educational/debug (Default value = 2) Returns: automaton of type automaton_type (dict containing all information about learning if 'return_data' is True) """ assert cex_processing in counterexample_processing_strategy assert closedness_type in closedness_options assert print_level in print_options if cache_and_non_det_check: # Wrap the sul in the CacheSUL, so that all steps/queries are cached sul = CacheSUL(sul) eq_oracle.sul = sul start_time = time.time() eq_query_time = 0 learning_rounds = 0 hypothesis = None observation_table = ObservationTable(alphabet, sul, automaton_type) # Initial update of observation table, for empty row observation_table.update_obs_table() while True: learning_rounds += 1 if max_learning_rounds and learning_rounds - 1 == max_learning_rounds: break # Make observation table consistent (iff there is no counterexample processing) if not cex_processing: inconsistent_rows = observation_table.get_causes_of_inconsistency() while inconsistent_rows is not None: extend_set(observation_table.E, inconsistent_rows) observation_table.update_obs_table(e_set=inconsistent_rows) inconsistent_rows = observation_table.get_causes_of_inconsistency( ) # Close observation table rows_to_close = observation_table.get_rows_to_close(closing_strategy) while rows_to_close is not None: rows_to_query = [] for row in rows_to_close: observation_table.S.append(row) rows_to_query.extend([row + (a, ) for a in alphabet]) observation_table.update_obs_table(s_set=rows_to_query) rows_to_close = observation_table.get_rows_to_close( closing_strategy) # Generate hypothesis hypothesis = observation_table.gen_hypothesis( check_for_duplicate_rows=cex_processing is None) if print_level > 1: print( f'Hypothesis {learning_rounds}: {len(hypothesis.states)} states.' ) if print_level == 3: print_observation_table(observation_table, 'det') # Find counterexample eq_query_start = time.time() cex = eq_oracle.find_cex(hypothesis) eq_query_time += time.time() - eq_query_start # If no counterexample is found, return the hypothesis if cex is None: break if print_level == 3: print('Counterexample', cex) # Process counterexample and ask membership queries if not cex_processing: s_to_update = [] added_rows = extend_set(observation_table.S, all_prefixes(cex)) s_to_update.extend(added_rows) for p in added_rows: s_to_update.extend([p + (a, ) for a in alphabet]) observation_table.update_obs_table(s_set=s_to_update) continue elif cex_processing == 'longest_prefix': cex_suffixes = longest_prefix_cex_processing( observation_table.S + list(observation_table.s_dot_a()), cex, closedness_type) else: cex_suffixes = rs_cex_processing(sul, cex, hypothesis, suffix_closedness, closedness_type) added_suffixes = extend_set(observation_table.E, cex_suffixes) observation_table.update_obs_table(e_set=added_suffixes) total_time = round(time.time() - start_time, 2) eq_query_time = round(eq_query_time, 2) learning_time = round(total_time - eq_query_time, 2) info = { 'learning_rounds': learning_rounds, 'automaton_size': len(hypothesis.states), 'queries_learning': sul.num_queries, 'steps_learning': sul.num_steps, 'queries_eq_oracle': eq_oracle.num_queries, 'steps_eq_oracle': eq_oracle.num_steps, 'learning_time': learning_time, 'eq_oracle_time': eq_query_time, 'total_time': total_time, 'characterization set': observation_table.E } if cache_and_non_det_check: info['cache_saved'] = sul.num_cached_queries if print_level > 0: print_learning_info(info) if return_data: return hypothesis, info return hypothesis