Esempio n. 1
0
    def test_saving_loading(self):
        try:
            dfa = get_Angluin_dfa()
            mealy = load_automaton_from_file('../DotModels/Angluin_Mealy.dot',
                                             automaton_type='mealy')
            moore = load_automaton_from_file('../DotModels/Angluin_Moore.dot',
                                             automaton_type='moore')
            onfsm = get_benchmark_ONFSM()
            mdp = get_small_pomdp()
            smm = get_faulty_coffee_machine_SMM()
            mc = generate_random_markov_chain(num_states=10)

            models = [dfa, mealy, moore, onfsm, mc, mdp, smm]
            types = ['dfa', 'mealy', 'moore', 'onfsm', 'mc', 'mdp', 'smm']

            for model, type in zip(models, types):
                model.save()
                loaded_model = load_automaton_from_file(
                    'LearnedModel.dot', type)
                loaded_model.save()
                loaded_model2 = load_automaton_from_file(
                    'LearnedModel.dot', type)
                # loaded_model2.visualize(path=type)
                if type != 'mc':
                    ia = model.get_input_alphabet()
                    ia2 = loaded_model2.get_input_alphabet()
                    assert set(ia) == set(ia2)
                if type in {'dfa', 'moore', 'mealy'}:
                    assert model.compute_characterization_set(
                    ) == loaded_model2.compute_characterization_set()

            assert True
        except:
            assert False
Esempio n. 2
0
 def get_test_automata(self):
     return {"angluin_dfa": get_Angluin_dfa(),
         "angluin_mealy": load_automaton_from_file('../DotModels/Angluin_Mealy.dot', automaton_type='mealy'),
         "angluin_moore": load_automaton_from_file('../DotModels/Angluin_Moore.dot', automaton_type='moore'),
         "mqtt": load_automaton_from_file('../DotModels/MQTT/emqtt__two_client_will_retain.dot',
                                          automaton_type='mealy'),
         "openssl": load_automaton_from_file('../DotModels/TLS/OpenSSL_1.0.2_server_regular.dot',
                                             automaton_type='mealy'),
         "tcp_server": load_automaton_from_file('../DotModels/TCP/TCP_Linux_Server.dot',
                                             automaton_type='mealy')}
Esempio n. 3
0
def active_alergia_example(example='first_grid'):
    from random import choice, randint
    from aalpy.SULs import MdpSUL
    from aalpy.utils import load_automaton_from_file
    from aalpy.learning_algs import run_active_Alergia
    from aalpy.learning_algs.stochastic_passive.ActiveAleriga import RandomWordSampler

    mdp = load_automaton_from_file(f'./DotModels/MDPs/{example}.dot', automaton_type='mdp')
    input_alphabet = mdp.get_input_alphabet()

    sul = MdpSUL(mdp)

    data = []
    for _ in range(50000):
        input_query = tuple(choice(input_alphabet) for _ in range(randint(6, 14)))
        outputs = sul.query(input_query)
        # format data in [O, (I, O), (I, O)...]
        formatted_io = [outputs.pop(0)]
        for i, o in zip(input_query, outputs):
            formatted_io.append((i, o))
        data.append(formatted_io)

    sampler = RandomWordSampler(num_walks=1000, min_walk_len=8, max_walk_len=20)
    model = run_active_Alergia(data, sul, sampler, n_iter=10)

    print(model)
Esempio n. 4
0
    def test_learning_based_on_accuracy_based_stopping(self):

        example = 'first_grid'
        mdp = load_automaton_from_file(f'../DotModels/MDPs/{example}.dot', automaton_type='mdp')

        min_rounds = 10
        max_rounds = 500

        from aalpy.automata import StochasticMealyMachine
        from aalpy.utils import model_check_experiment, get_properties_file, \
            get_correct_prop_values
        from aalpy.automata.StochasticMealyMachine import smm_to_mdp_conversion

        aalpy.paths.path_to_prism = "C:/Program Files/prism-4.6/bin/prism.bat"
        aalpy.paths.path_to_properties = "../Benchmarking/prism_eval_props/"

        stopping_based_on_prop = (get_properties_file(example), get_correct_prop_values(example), 0.02)

        input_alphabet = mdp.get_input_alphabet()

        automaton_type = ['mdp', 'smm']
        similarity_strategy = ['classic', 'normal', 'chi2']
        cex_processing = [None, 'longest_prefix']
        samples_cex_strategy = [None, 'bfs', 'random:200:0.3']

        for aut_type in automaton_type:
            for strategy in similarity_strategy:
                for cex in cex_processing:
                    for sample_cex in samples_cex_strategy:

                        sul = StochasticMealySUL(mdp) if aut_type == 'smm' else MdpSUL(mdp)

                        eq_oracle = UnseenOutputRandomWalkEqOracle(input_alphabet, sul=sul, num_steps=200,
                                                                   reset_prob=0.25,
                                                                   reset_after_cex=True)

                        learned_model = run_stochastic_Lstar(input_alphabet=input_alphabet, eq_oracle=eq_oracle,
                                                             sul=sul, n_c=20,
                                                             n_resample=1000, min_rounds=min_rounds,
                                                             max_rounds=max_rounds,
                                                             automaton_type=aut_type, strategy=strategy,
                                                             cex_processing=cex,
                                                             samples_cex_strategy=sample_cex, target_unambiguity=0.99,
                                                             property_based_stopping=stopping_based_on_prop,
                                                             print_level=0)

                        if isinstance(learned_model, StochasticMealyMachine):
                            mdp = smm_to_mdp_conversion(learned_model)
                        else:
                            mdp = learned_model

                        results, diff = model_check_experiment(get_properties_file(example),
                                                               get_correct_prop_values(example), mdp)

                        for d in diff.values():
                            if d > stopping_based_on_prop[2]:
                                assert False

        assert True
Esempio n. 5
0
        def __init__(self):
            super().__init__()

            five_clients_mqtt_mealy = load_automaton_from_file('DotModels/five_clients_mqtt_abstracted_onfsm.dot',
                                                               automaton_type='mealy')
            self.five_client_mqtt = MealySUL(five_clients_mqtt_mealy)
            self.connected_clients = set()
            self.subscribed_clients = set()

            self.clients = ('c0', 'c1', 'c2', 'c3', 'c4')
def find_bp_cex():
    """
    This example shows how transition focus equivalence oracle can be used to efficiently find counterexamples.
    """
    rnn, alphabet, train_set = train_or_load_rnn('bp_2', num_layers=2, hidden_dim=50,
                                                 rnn_class=GRUNetwork, train=False)

    model = load_automaton_from_file('TrainingDataAndAutomata/bp_depth4.dot', automaton_type='dfa')
    sul = RNN_BinarySUL_for_Weiss_Framework(rnn)
    eq_oracle = TransitionFocusOracle(alphabet, sul, num_random_walks=1000, walk_len=20)

    cex_set = set()
    for _ in range(10):
        start_time = time.time()
        cex = eq_oracle.find_cex(model)
        if tuple(cex) in cex_set:
            continue
        cex_set.add(tuple(cex))
        end_time = time.time() - start_time
        print(round(end_time, 2), "".join(cex))
Esempio n. 7
0
def benchmark_stochastic_example(example, automaton_type='smm', n_c=20, n_resample=1000, min_rounds=10, max_rounds=500,
                                 strategy='normal', cex_processing='longest_prefix', stopping_based_on_prop=None,
                                 samples_cex_strategy=None):
    """
    Learning the stochastic Mealy Machine(SMM) various benchmarking examples
    found in Chapter 7 of Martin's Tappler PhD thesis.
    :param n_c: cutoff for a state to be considered complete
    :param automaton_type: either smm (stochastic mealy machine) or mdp (Markov decision process)
    :param n_resample: resampling size
    :param example: One of ['first_grid', 'second_grid', 'shared_coin', 'slot_machine']
    :param min_rounds: minimum number of learning rounds
    :param max_rounds: maximum number of learning rounds
    :param strategy: normal, classic or chi2
    :param cex_processing: counterexample processing strategy
    :stopping_based_on_prop: a tuple (path to properties, correct values, error bound)
    :param samples_cex_strategy: strategy to sample cex in the trace tree
    :return: learned SMM

    """
    from aalpy.SULs import MdpSUL
    from aalpy.oracles import RandomWalkEqOracle, RandomWordEqOracle
    from aalpy.learning_algs import run_stochastic_Lstar
    from aalpy.utils import load_automaton_from_file

    # Specify the path to the dot file containing a MDP
    mdp = load_automaton_from_file(f'./DotModels/MDPs/{example}.dot', automaton_type='mdp')
    input_alphabet = mdp.get_input_alphabet()

    sul = MdpSUL(mdp)
    eq_oracle = RandomWordEqOracle(input_alphabet, sul, num_walks=100, min_walk_len=5, max_walk_len=15,
                                   reset_after_cex=True)
    eq_oracle = RandomWalkEqOracle(input_alphabet, sul=sul, num_steps=2000, reset_prob=0.25,
                                   reset_after_cex=True)

    learned_mdp = run_stochastic_Lstar(input_alphabet=input_alphabet, eq_oracle=eq_oracle, sul=sul, n_c=n_c,
                                       n_resample=n_resample, min_rounds=min_rounds, max_rounds=max_rounds,
                                       automaton_type=automaton_type, strategy=strategy, cex_processing=cex_processing,
                                       samples_cex_strategy=samples_cex_strategy, target_unambiguity=0.99,
                                       property_based_stopping=stopping_based_on_prop)

    return learned_mdp
Esempio n. 8
0
def rpni_mealy_example():
    import random
    from aalpy.learning_algs import run_RPNI
    from aalpy.utils import generate_random_mealy_machine, load_automaton_from_file
    from aalpy.utils.HelperFunctions import all_prefixes
    random.seed(1)

    model = generate_random_mealy_machine(num_states=5, input_alphabet=[1, 2, 3], output_alphabet=['a', 'b'])
    model = load_automaton_from_file('DotModels/Bluetooth/bluetooth_model.dot', automaton_type='mealy')

    input_al = model.get_input_alphabet()
    num_sequences = 1000
    data = []
    for _ in range(num_sequences):
        seq_len = random.randint(1, 10)
        random_seq = random.choices(input_al, k=seq_len)
        # make sure that all prefixes all included in the dataset
        for prefix in all_prefixes(random_seq):
            output = model.compute_output_seq(model.initial_state, prefix)[-1]
            data.append((prefix, output))

    rpni_model = run_RPNI(data, automaton_type='mealy', print_info=True)

    return rpni_model
Esempio n. 9
0
def accuracy_test():
    ground_truth_model = load_automaton_from_file(
        'TrainingDataAndAutomata/bp_depth4.dot', automaton_type='dfa')
    input_al = ground_truth_model.get_input_alphabet()
    output_al = [1, 0]

    train_seq, train_labels = generate_data_from_automaton(ground_truth_model,
                                                           input_al,
                                                           num_examples=10000,
                                                           lens=(1, 2, 3, 5, 8,
                                                                 10, 12, 15,
                                                                 20, 25, 30))

    x_train, y_train, x_test, y_test = split_train_validation(train_seq,
                                                              train_labels,
                                                              0.8,
                                                              uniform=True)

    # Train all neural networks with same parameters, this can be configured to train with different parameters
    rnn = RNNClassifier(input_al,
                        output_dim=len(output_al),
                        num_layers=2,
                        hidden_dim=50,
                        x_train=x_train,
                        y_train=y_train,
                        x_test=x_test,
                        y_test=y_test,
                        batch_size=32,
                        nn_type='GRU')

    rnn.train(epochs=150, stop_acc=1.0, stop_epochs=2, verbose=1)

    sul = RnnBinarySUL(rnn)
    gt_sul = DfaSUL(ground_truth_model)

    random_walk_eq_oracle = RandomWalkEqOracle(input_al,
                                               sul,
                                               num_steps=10000,
                                               reset_prob=0.05)
    random_word_eq_oracle = RandomWordEqOracle(input_al,
                                               sul,
                                               min_walk_len=5,
                                               max_walk_len=25,
                                               num_walks=1000)
    random_w_eq_oracle = RandomWMethodEqOracle(input_al,
                                               sul,
                                               walks_per_state=200,
                                               walk_len=25)

    learned_model = run_Lstar(input_al,
                              sul,
                              random_word_eq_oracle,
                              automaton_type='dfa',
                              max_learning_rounds=5)

    from random import choice, randint
    random_tc = []
    coverage_guided_tc = []
    num_tc = 1000
    for _ in range(num_tc):
        random_tc.append(
            tuple(choice(input_al) for _ in range(randint(10, 25))))

        prefix = choice(learned_model.states).prefix
        middle = tuple(choice(input_al) for _ in range(20))
        suffix = choice(learned_model.characterization_set)
        coverage_guided_tc.append(prefix + middle + suffix)

    num_adv_random = 0
    for tc in random_tc:
        correct = gt_sul.query(tc)
        trained = sul.query(tc)
        if correct != trained:
            num_adv_random += 1

    num_adv_guided = 0
    for tc in coverage_guided_tc:
        correct = gt_sul.query(tc)
        trained = sul.query(tc)
        if correct != trained:
            num_adv_guided += 1

    print(f'Random sampling: {round((num_adv_random/num_tc)*100,2)}')
    print(f'Guided sampling: {round((num_adv_guided/num_tc)*100,2)}')
Esempio n. 10
0
def get_tomita(tomita_num):
    return load_automaton_from_file(
        f'TrainingDataAndAutomata/tomita_{tomita_num}.dot',
        automaton_type='dfa',
        compute_prefixes=True)
Esempio n. 11
0
def get_ssh():
    return load_automaton_from_file('TrainingDataAndAutomata/OpenSSH.dot',
                                    automaton_type='mealy',
                                    compute_prefixes=True)
Esempio n. 12
0
def get_tcp():
    return load_automaton_from_file(
        'TrainingDataAndAutomata/TCP_Linux_Client.dot',
        automaton_type='mealy',
        compute_prefixes=True)
Esempio n. 13
0
def get_coffee_machine():
    return load_automaton_from_file(
        'TrainingDataAndAutomata/Coffee_machine.dot',
        automaton_type='mealy',
        compute_prefixes=True)
Esempio n. 14
0
def get_mqtt_mealy():
    return load_automaton_from_file('TrainingDataAndAutomata/MQTT.dot',
                                    automaton_type='mealy',
                                    compute_prefixes=True)
Esempio n. 15
0
def generate_concrete_data_MQTT(num_examples,
                                num_rand_topics=5,
                                lens=(1, 2, 4, 6, 10),
                                uniform_concretion=False):
    mealy_machine = load_automaton_from_file(
        'TrainingDataAndAutomata/MQTT.dot', automaton_type='mealy')
    input_al = mealy_machine.get_input_alphabet()

    sum_lens = sum(lens)

    if uniform_concretion:
        num_examples = num_examples // num_rand_topics

    # key is length, value is number of examples for said length
    ex_per_len = {}
    for l in lens:
        # if l == 1 or l == 2:
        #     ex_per_len[l] = pow(len(input_al), l + 2)
        #     sum_lens -= l
        #     continue

        ex_per_len[l] = int(num_examples * (l / sum_lens)) + 1

    abstract_train_seq = []
    train_labels = []

    for l in ex_per_len.keys():
        for i in range(ex_per_len[l]):
            if random.random() <= 0.15 and l > 1:
                seq = [choice(input_al) for _ in range(l)]
            else:
                seq = ['connect']
                possible_inputs = ['subscribe', 'unsubscribe', 'publish']
                if random.random() <= 0.2:
                    possible_inputs.extend(
                        ['disconnect', 'connect', 'invalid'])
                seq.extend([choice(possible_inputs) for _ in range(l - 1)])

            mealy_machine.reset_to_initial()
            out = None
            for inp in seq:
                out = mealy_machine.step(inp)

            abstract_train_seq.append(seq)
            train_labels.append(out)

    random_topics = [gen_random_str() for _ in range(num_rand_topics)]
    concrete_train_seq = []
    concrete_input_set = set()
    concrete_labels = []

    for ind, seq in enumerate(abstract_train_seq):

        topics = [choice(random_topics)
                  ] if not uniform_concretion else random_topics
        for t in topics:

            topic = t
            concrete_seq = []

            for abstract_input in seq:
                if abstract_input == 'connect':
                    concrete_seq.append('connect_Client1_ping=False')
                elif abstract_input == 'disconnect':
                    concrete_seq.append(f'disconnect_Client1_var')
                elif abstract_input == 'subscribe':
                    concrete_seq.append(
                        f'subscribe_Client1_topic="{topic}"_retain=False')
                elif abstract_input == 'unsubscribe':
                    concrete_seq.append(f'unsubscribe_Client1_topic="{topic}"')
                elif abstract_input == 'publish':
                    concrete_seq.append(
                        f'publish_Client1_global_topic="{topic}"')
                elif abstract_input == 'invalid':
                    concrete_seq.append(f'invalid=Client1_opt=NULL')
                else:
                    assert False

            if train_labels[ind] == 'CONNACK' or train_labels[
                    ind] == 'CONCLOSED':
                concrete_labels.append(train_labels[ind] + f'_User1')
            else:
                concrete_labels.append(train_labels[ind] +
                                       f'_User1_topic:{topic}')

            # concrete_labels.append(train_labels[ind])

            concrete_train_seq.append(concrete_seq)

            concrete_input_set.update(concrete_seq)

    concrete_input_set = list(concrete_input_set)
    output_al = list(set(concrete_labels))

    # map to integers
    input_dict = tokenized_dict(concrete_input_set)
    out_dict = tokenized_dict(output_al)

    train_seq = [
        seq_to_tokens(word, input_dict) for word in concrete_train_seq
    ]
    train_labels = [seq_to_tokens(word, out_dict) for word in concrete_labels]

    return train_seq, train_labels, concrete_input_set, output_al
Esempio n. 16
0
                n_c, n_resample = n_c, n_resample
            elif exp_name == 'shared_coin':
                n_c, n_resample = n_c, n_resample
            elif exp_name == 'slot_machine':
                n_c, n_resample = n_c, n_resample
        else:
            if exp_name == 'first_grid':
                n_c, n_resample = 20, 1000
            elif exp_name == 'second_grid':
                n_c, n_resample = 20, 1000
            elif exp_name == 'shared_coin':
                n_c, n_resample = 50, 5000
            elif exp_name == 'slot_machine':
                n_c, n_resample = 100, 10000

        original_mdp = load_automaton_from_file(path_to_dir + file,
                                                automaton_type='mdp')
        input_alphabet = original_mdp.get_input_alphabet()

        original_prism_file_name = f'{benchmark_dir}/original_{exp_name}.prism'
        mdp_2_prism_format(original_mdp,
                           name=exp_name,
                           output_path=f'{original_prism_file_name}')

        mdp_sul = MdpSUL(original_mdp)

        eq_oracle = UnseenOutputRandomWalkEqOracle(input_alphabet,
                                                   mdp_sul,
                                                   num_steps=n_resample *
                                                   (1 / 0.25),
                                                   reset_after_cex=True,
                                                   reset_prob=0.25)
Esempio n. 17
0
run_times = []

# change on which folder to perform experiments
exp, sul = dfa_2000_states_10_inputs, DfaSUL

benchmarks = os.listdir(exp)
benchmarks = benchmarks[:10]

caching_opt = [True, False]
closing_options = ['shortest_first', 'longest_first', 'single']
suffix_processing = ['all', 'single']
counter_example_processing = ['rs', 'longest_prefix', None]
e_closedness = ['prefix', 'suffix']

for b in benchmarks:
    automaton = load_automaton_from_file(f'{exp}/{b}', automaton_type='dfa')
    input_al = automaton.get_input_alphabet()

    sul_dfa = sul(automaton)

    state_origin_eq_oracle = StatePrefixEqOracle(input_al,
                                                 sul_dfa,
                                                 walks_per_state=5,
                                                 walk_len=25)

    learned_dfa, data = run_Lstar(input_al,
                                  sul_dfa,
                                  state_origin_eq_oracle,
                                  automaton_type='dfa',
                                  cache_and_non_det_check=False,
                                  cex_processing='rs',
Esempio n. 18
0
import random

from aalpy.SULs import MdpSUL, StochasticMealySUL
from aalpy.base import SUL
from aalpy.automata import Mdp, MdpState, StochasticMealyState, StochasticMealyMachine
from aalpy.learning_algs import run_Lstar, run_stochastic_Lstar
from aalpy.oracles import RandomWMethodEqOracle, RandomWordEqOracle
from aalpy.utils import load_automaton_from_file

model = load_automaton_from_file('CYW43455.dot',
                                 automaton_type='mealy',
                                 compute_prefixes=True)
alphabet = model.get_input_alphabet()
print(alphabet)


class ModelSUL(SUL):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.last_output = 'init'

    def pre(self):
        self.model.reset_to_initial()
        self.last_output = 'init'

    def post(self):
        pass

    def step(self, letter):
        if not letter:
Esempio n. 19
0
n_c = 10
n_resample = 1000
min_rounds = 25
max_rounds = 500
experiment_repetition = 10

uniform_parameters = False
strategy = ["normal", "chi2"]  # chi_square
cex_sampling = [
    'bfs',
]  # random:100:0.15
cex_processing = [None, 'longest_prefix']  # add a single prefix
start = time.time()

model_dict = {
    m.split('.')[0]: load_automaton_from_file(path_to_dir + m,
                                              automaton_type='mdp')
    for m in files
}

for strat in strategy:
    for cex_stat in cex_sampling:
        for cex_proc in cex_processing:
            print(strat, cex_stat, cex_proc)
            benchmark_dir = f'FM_mdp_smm/benchmark_22_04_{strat}_{cex_proc}/'
            for seed in range(experiment_repetition):
                print(seed)
                random.seed(seeds[seed])
                import os

                if not os.path.exists(benchmark_dir):
                    os.makedirs(benchmark_dir)
Esempio n. 20
0
import unittest

from aalpy.SULs import DfaSUL, MealySUL, MooreSUL
from aalpy.automata import Dfa, MealyMachine, MooreMachine
from aalpy.learning_algs import run_Lstar
from aalpy.oracles import WMethodEqOracle, RandomWalkEqOracle, StatePrefixEqOracle, TransitionFocusOracle, \
    RandomWMethodEqOracle, BreadthFirstExplorationEqOracle, RandomWordEqOracle, CacheBasedEqOracle, \
    KWayStateCoverageEqOracle
from aalpy.utils import get_Angluin_dfa, load_automaton_from_file

correct_automata = {Dfa: get_Angluin_dfa(),
                    MealyMachine: load_automaton_from_file('../DotModels/Angluin_Mealy.dot', automaton_type='mealy'),
                    MooreMachine: load_automaton_from_file('../DotModels/Angluin_Moore.dot', automaton_type='moore')}

suls = {Dfa: DfaSUL,
        MealyMachine: MealySUL,
        MooreMachine: MooreSUL}


class DeterministicTest(unittest.TestCase):

    def prove_equivalence(self, learned_automaton):

        correct_automaton = correct_automata[learned_automaton.__class__]

        # only work if correct automaton is already minimal
        if len(learned_automaton.states) != len(correct_automaton.states):
            print(len(learned_automaton.states), len(correct_automaton.states))
            return False

        alphabet = learned_automaton.get_input_alphabet()