def generate_data_based_on_characterization_set(automaton, automaton_type='mealy'): from aalpy.SULs import MealySUL, DfaSUL from aalpy.oracles import RandomWalkEqOracle from aalpy.learning_algs import run_Lstar # automaton = load_automaton_from_file(path_to_automaton, automaton_type) alphabet = automaton.get_input_alphabet() eq_oracle = RandomWalkEqOracle(alphabet, automaton, num_steps=5000, reset_prob=0.09, reset_after_cex=True) sul = DfaSUL(automaton) if automaton_type == 'dfa' else MealySUL(automaton) automaton, data = run_Lstar(alphabet, sul, eq_oracle, automaton_type=automaton_type, print_level=0, return_data=True, suffix_closedness=True) characterization_set = data['characterization set'] prefixes = [state.prefix for state in automaton.states] sequences = [p + e for e in characterization_set for p in prefixes] sequences.extend([ p + tuple([i]) + e for p in prefixes for i in automaton.get_input_alphabet() for e in characterization_set ]) # sequences.extend([p + e for p in sequences for e in characterization_set]) for _ in range(1): sequences.extend([ p + tuple([i]) + e for p in sequences for i in automaton.get_input_alphabet() for e in characterization_set ]) for _ in range(3): sequences.extend(sequences) labels = [sul.query(s)[-1] for s in sequences] sequences = [list(s) for s in sequences] input_al = automaton.get_input_alphabet() output_al = { output for state in automaton.states for output in state.output_fun.values() } input_dict = tokenized_dict(input_al) out_dict = tokenized_dict(output_al) train_seq = [seq_to_tokens(word, input_dict) for word in sequences] train_labels = [seq_to_tokens(word, out_dict) for word in labels] return train_seq, train_labels
def verify_cex(aalpy_model, white_box_model, rnn, cex_set): """ Verify that counterexamples are not spurious and find which model classified correctly :param aalpy_model: model obtained by our approach :param white_box_model: modle obtained by refinement-based learning :param rnn: RNN that serves as system under learning :param cex_set: found cases of non-conformance between two models :return: """ correct_model = None for cex in cex_set: sul1, sul2 = DfaSUL(aalpy_model), DfaSUL(white_box_model) output_black_box = sul1.query(cex)[-1] output_white_box = sul2.query(cex)[-1] rnn.renew() rnn_sul = RNN_BinarySUL_for_Weiss_Framework(rnn) rnn_output = rnn_sul.query(cex)[-1] if output_black_box == output_white_box: return False if output_black_box != rnn_output and output_white_box != rnn_output: return False if output_black_box == rnn_output: if correct_model and correct_model == 'White-Box': assert False correct_model = 'Black-Box' else: print(output_black_box) print(rnn_output) if correct_model and correct_model == 'Black-Box': assert False correct_model = 'White-Box' print(f'All examples were classified correctly by the {correct_model} model and misclassified by the other.') return True
def accuracy_test(): ground_truth_model = load_automaton_from_file( 'TrainingDataAndAutomata/bp_depth4.dot', automaton_type='dfa') input_al = ground_truth_model.get_input_alphabet() output_al = [1, 0] train_seq, train_labels = generate_data_from_automaton(ground_truth_model, input_al, num_examples=10000, lens=(1, 2, 3, 5, 8, 10, 12, 15, 20, 25, 30)) x_train, y_train, x_test, y_test = split_train_validation(train_seq, train_labels, 0.8, uniform=True) # Train all neural networks with same parameters, this can be configured to train with different parameters rnn = RNNClassifier(input_al, output_dim=len(output_al), num_layers=2, hidden_dim=50, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test, batch_size=32, nn_type='GRU') rnn.train(epochs=150, stop_acc=1.0, stop_epochs=2, verbose=1) sul = RnnBinarySUL(rnn) gt_sul = DfaSUL(ground_truth_model) random_walk_eq_oracle = RandomWalkEqOracle(input_al, sul, num_steps=10000, reset_prob=0.05) random_word_eq_oracle = RandomWordEqOracle(input_al, sul, min_walk_len=5, max_walk_len=25, num_walks=1000) random_w_eq_oracle = RandomWMethodEqOracle(input_al, sul, walks_per_state=200, walk_len=25) learned_model = run_Lstar(input_al, sul, random_word_eq_oracle, automaton_type='dfa', max_learning_rounds=5) from random import choice, randint random_tc = [] coverage_guided_tc = [] num_tc = 1000 for _ in range(num_tc): random_tc.append( tuple(choice(input_al) for _ in range(randint(10, 25)))) prefix = choice(learned_model.states).prefix middle = tuple(choice(input_al) for _ in range(20)) suffix = choice(learned_model.characterization_set) coverage_guided_tc.append(prefix + middle + suffix) num_adv_random = 0 for tc in random_tc: correct = gt_sul.query(tc) trained = sul.query(tc) if correct != trained: num_adv_random += 1 num_adv_guided = 0 for tc in coverage_guided_tc: correct = gt_sul.query(tc) trained = sul.query(tc) if correct != trained: num_adv_guided += 1 print(f'Random sampling: {round((num_adv_random/num_tc)*100,2)}') print(f'Guided sampling: {round((num_adv_guided/num_tc)*100,2)}')