Ejemplo n.º 1
0
def generate_data_based_on_characterization_set(automaton,
                                                automaton_type='mealy'):
    from aalpy.SULs import MealySUL, DfaSUL
    from aalpy.oracles import RandomWalkEqOracle
    from aalpy.learning_algs import run_Lstar

    # automaton = load_automaton_from_file(path_to_automaton, automaton_type)
    alphabet = automaton.get_input_alphabet()
    eq_oracle = RandomWalkEqOracle(alphabet,
                                   automaton,
                                   num_steps=5000,
                                   reset_prob=0.09,
                                   reset_after_cex=True)

    sul = DfaSUL(automaton) if automaton_type == 'dfa' else MealySUL(automaton)

    automaton, data = run_Lstar(alphabet,
                                sul,
                                eq_oracle,
                                automaton_type=automaton_type,
                                print_level=0,
                                return_data=True,
                                suffix_closedness=True)

    characterization_set = data['characterization set']
    prefixes = [state.prefix for state in automaton.states]

    sequences = [p + e for e in characterization_set for p in prefixes]

    sequences.extend([
        p + tuple([i]) + e for p in prefixes
        for i in automaton.get_input_alphabet() for e in characterization_set
    ])
    # sequences.extend([p + e for p in sequences for e in characterization_set])
    for _ in range(1):
        sequences.extend([
            p + tuple([i]) + e for p in sequences
            for i in automaton.get_input_alphabet()
            for e in characterization_set
        ])
    for _ in range(3):
        sequences.extend(sequences)

    labels = [sul.query(s)[-1] for s in sequences]

    sequences = [list(s) for s in sequences]

    input_al = automaton.get_input_alphabet()
    output_al = {
        output
        for state in automaton.states for output in state.output_fun.values()
    }

    input_dict = tokenized_dict(input_al)
    out_dict = tokenized_dict(output_al)

    train_seq = [seq_to_tokens(word, input_dict) for word in sequences]
    train_labels = [seq_to_tokens(word, out_dict) for word in labels]

    return train_seq, train_labels
def verify_cex(aalpy_model, white_box_model, rnn, cex_set):
    """
    Verify that counterexamples are not spurious and find which model classified correctly
    :param aalpy_model: model obtained by our approach
    :param white_box_model: modle obtained by refinement-based learning
    :param rnn: RNN that serves as system under learning
    :param cex_set: found cases of non-conformance between two models
    :return:
    """
    correct_model = None
    for cex in cex_set:
        sul1, sul2 = DfaSUL(aalpy_model), DfaSUL(white_box_model)
        output_black_box = sul1.query(cex)[-1]
        output_white_box = sul2.query(cex)[-1]

        rnn.renew()
        rnn_sul = RNN_BinarySUL_for_Weiss_Framework(rnn)
        rnn_output = rnn_sul.query(cex)[-1]

        if output_black_box == output_white_box:
            return False
        if output_black_box != rnn_output and output_white_box != rnn_output:
            return False
        if output_black_box == rnn_output:
            if correct_model and correct_model == 'White-Box':
                assert False
            correct_model = 'Black-Box'
        else:
            print(output_black_box)
            print(rnn_output)
            if correct_model and correct_model == 'Black-Box':
                assert False
            correct_model = 'White-Box'

    print(f'All examples were classified correctly by the {correct_model} model and misclassified by the other.')
    return True
Ejemplo n.º 3
0
def accuracy_test():
    ground_truth_model = load_automaton_from_file(
        'TrainingDataAndAutomata/bp_depth4.dot', automaton_type='dfa')
    input_al = ground_truth_model.get_input_alphabet()
    output_al = [1, 0]

    train_seq, train_labels = generate_data_from_automaton(ground_truth_model,
                                                           input_al,
                                                           num_examples=10000,
                                                           lens=(1, 2, 3, 5, 8,
                                                                 10, 12, 15,
                                                                 20, 25, 30))

    x_train, y_train, x_test, y_test = split_train_validation(train_seq,
                                                              train_labels,
                                                              0.8,
                                                              uniform=True)

    # Train all neural networks with same parameters, this can be configured to train with different parameters
    rnn = RNNClassifier(input_al,
                        output_dim=len(output_al),
                        num_layers=2,
                        hidden_dim=50,
                        x_train=x_train,
                        y_train=y_train,
                        x_test=x_test,
                        y_test=y_test,
                        batch_size=32,
                        nn_type='GRU')

    rnn.train(epochs=150, stop_acc=1.0, stop_epochs=2, verbose=1)

    sul = RnnBinarySUL(rnn)
    gt_sul = DfaSUL(ground_truth_model)

    random_walk_eq_oracle = RandomWalkEqOracle(input_al,
                                               sul,
                                               num_steps=10000,
                                               reset_prob=0.05)
    random_word_eq_oracle = RandomWordEqOracle(input_al,
                                               sul,
                                               min_walk_len=5,
                                               max_walk_len=25,
                                               num_walks=1000)
    random_w_eq_oracle = RandomWMethodEqOracle(input_al,
                                               sul,
                                               walks_per_state=200,
                                               walk_len=25)

    learned_model = run_Lstar(input_al,
                              sul,
                              random_word_eq_oracle,
                              automaton_type='dfa',
                              max_learning_rounds=5)

    from random import choice, randint
    random_tc = []
    coverage_guided_tc = []
    num_tc = 1000
    for _ in range(num_tc):
        random_tc.append(
            tuple(choice(input_al) for _ in range(randint(10, 25))))

        prefix = choice(learned_model.states).prefix
        middle = tuple(choice(input_al) for _ in range(20))
        suffix = choice(learned_model.characterization_set)
        coverage_guided_tc.append(prefix + middle + suffix)

    num_adv_random = 0
    for tc in random_tc:
        correct = gt_sul.query(tc)
        trained = sul.query(tc)
        if correct != trained:
            num_adv_random += 1

    num_adv_guided = 0
    for tc in coverage_guided_tc:
        correct = gt_sul.query(tc)
        trained = sul.query(tc)
        if correct != trained:
            num_adv_guided += 1

    print(f'Random sampling: {round((num_adv_random/num_tc)*100,2)}')
    print(f'Guided sampling: {round((num_adv_guided/num_tc)*100,2)}')