Example #1
0
def train_RNN_on_mealy_machine(mealy_machine: MealyMachine,
                               ex_name,
                               num_hidden_dim=2,
                               hidden_dim_size=50,
                               nn_type='GRU',
                               batch_size=32,
                               lens=(2, 8, 10, 12, 15),
                               stopping_acc=1.0,
                               num_train_samples=15000,
                               load=False):
    assert nn_type in ['GRU', 'LSTM']

    input_al = mealy_machine.get_input_alphabet()
    output_al = {
        output
        for state in mealy_machine.states
        for output in state.output_fun.values()
    }

    train_seq, train_labels = generate_data_from_automaton(
        mealy_machine, input_al, num_examples=num_train_samples, lens=lens)

    x_train, y_train, x_test, y_test = split_train_validation(train_seq,
                                                              train_labels,
                                                              0.8,
                                                              uniform=True)

    # train_seq, train_labels = generate_data_based_on_characterization_set(mealy_machine)
    # x_train, y_train, x_test, y_test = split_train_validation(train_seq, train_labels, 0.8, uniform=True)

    rnn = RNNClassifier(input_al,
                        output_dim=len(output_al),
                        num_layers=num_hidden_dim,
                        hidden_dim=hidden_dim_size,
                        x_train=x_train,
                        y_train=y_train,
                        x_test=x_test,
                        y_test=y_test,
                        batch_size=batch_size,
                        nn_type=nn_type)

    if not load:
        rnn.train(epochs=150, stop_acc=stopping_acc, stop_epochs=3)
        rnn.save(f'RNN_Models/{ex_name}.rnn')
    else:
        rnn.load(f'RNN_Models/{ex_name}.rnn')

    return rnn
Example #2
0
def conformance_check_2_RNNs(experiment='coffee'):
    """
    Show how learning based testing can find differences between 2 trained RNNs.
    RNNs are have the same configuration, but it can be different.
    :param experiment: either coffee of mqtt
    :return: cases of non-conformance between trained RNNs
    """
    if experiment == 'coffee':
        mm, exp = get_coffee_machine(), experiment
    else:
        mm, exp = get_mqtt_mealy(), experiment

    input_al = mm.get_input_alphabet()
    output_al = {
        output
        for state in mm.states for output in state.output_fun.values()
    }

    train_seq, train_labels = generate_data_from_automaton(mm,
                                                           input_al,
                                                           num_examples=10000,
                                                           lens=(2, 5, 8, 10))

    training_data = (train_seq, train_labels)

    rnn_1 = train_RNN_on_mealy_data(mm, data=training_data, ex_name=f'{exp}_1')
    rnn_2 = train_RNN_on_mealy_data(mm, data=training_data, ex_name=f'{exp}_2')

    learned_automaton_1 = extract_finite_state_transducer(
        rnn_1, input_al, output_al, max_learning_rounds=25)
    learned_automaton_2 = extract_finite_state_transducer(
        rnn_2, input_al, output_al, max_learning_rounds=25)

    sul = MealySUL(learned_automaton_1)
    sul2 = MealySUL(learned_automaton_2)

    eq_oracle = LongCexEqOracle(input_al,
                                sul,
                                num_walks=500,
                                min_walk_len=1,
                                max_walk_len=30,
                                reset_after_cex=True)
    eq_oracle = StatePrefixEqOracle(input_al,
                                    sul,
                                    walks_per_state=100,
                                    walk_len=20)

    cex_set = set()
    for i in range(200):
        cex = eq_oracle.find_cex(learned_automaton_2)
        if cex:
            if tuple(cex) not in cex_set:
                print(
                    '--------------------------------------------------------------------------'
                )
                print('Case of Non-Conformance between Automata: ', cex)
                print('Model 1  : ', sul.query(cex))
                print('Model 2  : ', sul2.query(cex))
            cex_set.add(tuple(cex))

    return cex_set
Example #3
0
def accuracy_test():
    ground_truth_model = load_automaton_from_file(
        'TrainingDataAndAutomata/bp_depth4.dot', automaton_type='dfa')
    input_al = ground_truth_model.get_input_alphabet()
    output_al = [1, 0]

    train_seq, train_labels = generate_data_from_automaton(ground_truth_model,
                                                           input_al,
                                                           num_examples=10000,
                                                           lens=(1, 2, 3, 5, 8,
                                                                 10, 12, 15,
                                                                 20, 25, 30))

    x_train, y_train, x_test, y_test = split_train_validation(train_seq,
                                                              train_labels,
                                                              0.8,
                                                              uniform=True)

    # Train all neural networks with same parameters, this can be configured to train with different parameters
    rnn = RNNClassifier(input_al,
                        output_dim=len(output_al),
                        num_layers=2,
                        hidden_dim=50,
                        x_train=x_train,
                        y_train=y_train,
                        x_test=x_test,
                        y_test=y_test,
                        batch_size=32,
                        nn_type='GRU')

    rnn.train(epochs=150, stop_acc=1.0, stop_epochs=2, verbose=1)

    sul = RnnBinarySUL(rnn)
    gt_sul = DfaSUL(ground_truth_model)

    random_walk_eq_oracle = RandomWalkEqOracle(input_al,
                                               sul,
                                               num_steps=10000,
                                               reset_prob=0.05)
    random_word_eq_oracle = RandomWordEqOracle(input_al,
                                               sul,
                                               min_walk_len=5,
                                               max_walk_len=25,
                                               num_walks=1000)
    random_w_eq_oracle = RandomWMethodEqOracle(input_al,
                                               sul,
                                               walks_per_state=200,
                                               walk_len=25)

    learned_model = run_Lstar(input_al,
                              sul,
                              random_word_eq_oracle,
                              automaton_type='dfa',
                              max_learning_rounds=5)

    from random import choice, randint
    random_tc = []
    coverage_guided_tc = []
    num_tc = 1000
    for _ in range(num_tc):
        random_tc.append(
            tuple(choice(input_al) for _ in range(randint(10, 25))))

        prefix = choice(learned_model.states).prefix
        middle = tuple(choice(input_al) for _ in range(20))
        suffix = choice(learned_model.characterization_set)
        coverage_guided_tc.append(prefix + middle + suffix)

    num_adv_random = 0
    for tc in random_tc:
        correct = gt_sul.query(tc)
        trained = sul.query(tc)
        if correct != trained:
            num_adv_random += 1

    num_adv_guided = 0
    for tc in coverage_guided_tc:
        correct = gt_sul.query(tc)
        trained = sul.query(tc)
        if correct != trained:
            num_adv_guided += 1

    print(f'Random sampling: {round((num_adv_random/num_tc)*100,2)}')
    print(f'Guided sampling: {round((num_adv_guided/num_tc)*100,2)}')
Example #4
0
def retraining_based_on_non_conformance(ground_truth_model=get_coffee_machine(
),
                                        num_rnns=2,
                                        num_training_samples=5000,
                                        samples_lens=(3, 6, 9, 12)):
    """

    :param ground_truth_model: correct model used for labeling cases of non-conformance
    :param num_rnns: number of RNN to be trained and learned
    :param num_training_samples: initial number of training samples in the training data set
    :param samples_lens: lengths of initial training data set samples
    :return: one RNN obtained after active retraining
    """
    assert num_rnns >= 2 and num_training_samples > 0

    input_al = ground_truth_model.get_input_alphabet()

    if isinstance(ground_truth_model, MealyMachine):
        output_al = {
            output
            for state in ground_truth_model.states
            for output in state.output_fun.values()
        }
    else:
        output_al = [False, True]

    # Create initial training data
    train_seq, train_labels = generate_data_from_automaton(
        ground_truth_model,
        input_al,
        num_examples=num_training_samples,
        lens=samples_lens)

    # While the input-output behaviour of all trained neural networks is different
    iteration = 0
    while True:
        iteration += 1
        print(f'Learning/extraction round: {iteration}')

        trained_networks = []

        x_train, y_train, x_test, y_test = split_train_validation(train_seq,
                                                                  train_labels,
                                                                  0.8,
                                                                  uniform=True)

        # Train all neural networks with same parameters
        for i in range(num_rnns):
            rnn = RNNClassifier(input_al,
                                output_dim=len(output_al),
                                num_layers=2,
                                hidden_dim=40,
                                x_train=x_train,
                                y_train=y_train,
                                x_test=x_test,
                                y_test=y_test,
                                batch_size=32,
                                nn_type='GRU')
            print(f'Starting training of RNN {i}')
            rnn.train(epochs=150, stop_acc=1.0, stop_epochs=3, verbose=False)
            trained_networks.append(rnn)

        learned_automatons = []

        # Extract automaton for each neural network
        for i, rnn in enumerate(trained_networks):
            print(f'Starting extraction of the automaton from RNN {i}')
            learned_automaton = extract_finite_state_transducer(
                rnn, input_al, output_al, max_learning_rounds=8, print_level=0)
            learned_automatons.append(learned_automaton)

        learned_automatons.sort(key=lambda x: len(x.states), reverse=True)

        # Select one automaton as a basis for conformance-checking. You can also do conformance checking with all pairs
        # of learned automata.

        base_sul = MealySUL(learned_automatons[0])

        # Select the eq. oracle
        eq_oracle = LongCexEqOracle(input_al,
                                    base_sul,
                                    num_walks=500,
                                    min_walk_len=1,
                                    max_walk_len=30,
                                    reset_after_cex=True)
        eq_oracle = StatePrefixEqOracle(input_al,
                                        base_sul,
                                        walks_per_state=100,
                                        walk_len=50)

        cex_set = set()

        # Try to find cases of non-conformance between learned automatons.
        for la in learned_automatons[1:]:
            for i in range(200):
                cex = eq_oracle.find_cex(la)
                if cex:
                    cex_set.add(tuple(cex))

        # If there were no counterexamples between any learned automata, we end the procedure
        if not cex_set:
            for i, la in enumerate(learned_automatons):
                print(f'Size of automata {i}: {len(la.states)}')
            print(learned_automatons[-1])
            print('No counterexamples between extracted automata found.')
            break

        # Ask ground truth model for correct labels
        new_x, new_y = label_sequences_with_correct_model(
            ground_truth_model, cex_set)

        print(f'Adding {len(cex_set)} new examples to training data.')
        new_x = tokenize(new_x, input_al)
        new_y = tokenize(new_y, output_al)

        train_seq.extend(new_x)
        train_labels.extend(new_y)
        print(f'Size of training data: {len(train_seq)}')
Example #5
0
def retraining_based_on_ground_truth(ground_truth_model=get_coffee_machine(),
                                     num_train_samples=5000,
                                     lens=(3, 8, 10, 12, 15)):
    """
    :param ground_truth_model: correct model used for data generation and confromance checking
    :param num_train_samples: num of training samples for the initial data generation
    :param lens: lengths of counterexample
    :return: trained RNN that conforms to the ground truth model
    """
    input_al = ground_truth_model.get_input_alphabet()

    if isinstance(ground_truth_model, MealyMachine):
        output_al = {
            output
            for state in ground_truth_model.states
            for output in state.output_fun.values()
        }
    else:
        output_al = [False, True]

    # Create initial training data
    train_seq, train_labels = generate_data_from_automaton(
        ground_truth_model,
        input_al,
        num_examples=num_train_samples,
        lens=lens)

    # While the input-output behaviour of all trained neural networks is different
    iteration = 0
    while True:
        iteration += 1

        # split dataset into training and verification
        x_train, y_train, x_test, y_test = split_train_validation(train_seq,
                                                                  train_labels,
                                                                  0.8,
                                                                  uniform=True)

        # Train all neural networks with same parameters, this can be configured to train with different parameters
        rnn = RNNClassifier(input_al,
                            output_dim=len(output_al),
                            num_layers=2,
                            hidden_dim=40,
                            x_train=x_train,
                            y_train=y_train,
                            x_test=x_test,
                            y_test=y_test,
                            batch_size=32,
                            nn_type='GRU')

        print(
            f"Starting training of the neural network for the {iteration} time"
        )
        # Train the NN
        rnn.train(epochs=150, stop_acc=1.0, stop_epochs=3, verbose=0)

        # encode outputs
        outputs_2_ints = {
            integer: output
            for output, integer in tokenized_dict(output_al).items()
        }

        # use RNN as SUL
        sul = RnnMealySUL(rnn, outputs_2_ints)

        # Select the eq. oracle
        eq_oracle = LongCexEqOracle(input_al,
                                    sul,
                                    num_walks=500,
                                    min_walk_len=1,
                                    max_walk_len=30,
                                    reset_after_cex=True)
        eq_oracle = StatePrefixEqOracle(input_al,
                                        sul,
                                        walks_per_state=200,
                                        walk_len=20)

        cex_set = set()

        # Try to find cases of non-conformance between learned automatons.
        print('Searching for counterexample.')
        for i in range(200):
            # Conformance check ground truth model and trained RNN
            # Alternatively, one can extract automaton from RNN and then model check against GT
            cex = eq_oracle.find_cex(ground_truth_model)
            if cex:
                cex_set.add(tuple(cex))

        # if there were no counterexamples between any learned automata, we end the procedure
        if not cex_set:
            print(
                'No counterexamples found between extracted automaton and neural network.'
            )
            # Extract automaton from rnn and print it

            final_model = run_Lstar(input_al,
                                    sul,
                                    eq_oracle,
                                    automaton_type='mealy',
                                    max_learning_rounds=15)
            print(final_model)
            return rnn

        # Ask ground truth model for correct labels
        new_x, new_y = label_sequences_with_correct_model(
            ground_truth_model, cex_set)

        print(f'Adding {len(cex_set)} new examples to training data.')
        new_x = tokenize(new_x, input_al)
        new_y = tokenize(new_y, output_al)

        train_seq.extend(new_x)
        train_labels.extend(new_y)
        print(f'Size of training data: {len(train_seq)}')