Ejemplo n.º 1
0
    def __init__(self, path, reset_cost, default_reward, value_expert):
        self.world = Model(path, reset_cost, default_reward)
        self.value_expert = value_expert

        in_letters = [Letter(symbol) for symbol in self.world.mrm.observations]
        self.kbase = MRMActiveKnowledgeBase(self.world)
        self.OT = ObservationTable(input_letters=in_letters,
                                   knowledge_base=self.kbase)

        print('Initializing OT')
        self.OT.initialize()
        print('OT initialized')

        #COUNTERS
        self.total_learning_time = 0
        self.total_exploring_time = 0
        self.rewards = 0
        self.iteration4Explor = 0
        self.iteration4OT = 0
        self.nuof_counter_examples = 0

        #EXECUTION
        self.learn()
        while not self.check():
            self.learn()

        #END PRINT
        self.endPrints()
        remove(TMP_MODEL_PATH)
Ejemplo n.º 2
0
    def __init__(self, input_vocabulary, knowledge_base, max_states, tmp_dir=None, eqtests=None):
        """Implementation of the LSTAR algorithm.

        Per default, WPMethod is used for equivalence tests. However, one can prefer a RandomWalkMethod
        by specifying the following 'eqtests' parameter:
        
        eqtests = RandomWalkMethod(self.knowledge_base, self.input_letters, 10000, 0.7)

        """


    
        self.input_letters = [Letter(symbol) for symbol in input_vocabulary]
        self.knowledge_base = knowledge_base
        self.tmp_dir = tmp_dir
        self.observation_table = ObservationTable(self.input_letters, self.knowledge_base)
        self.max_states = max_states
        self.eqtests = eqtests
        self.__f_stop = False
Ejemplo n.º 3
0
    def __init__(self, input_vocabulary, knowledge_base, max_states, tmp_dir=None, eqtests=None):
        """Implementation of the LSTAR algorithm.

        Per default, WPMethod is used for equivalence tests. However, one can prefer a RandomWalkMethod
        by specifying the following 'eqtests' parameter:
        
        eqtests = RandomWalkMethod(self.knowledge_base, self.input_letters, 10000, 0.7)

        """


    
        self.input_letters = [Letter(symbol) for symbol in input_vocabulary]
        self.knowledge_base = knowledge_base
        self.tmp_dir = tmp_dir
        self.observation_table = ObservationTable(self.input_letters, self.knowledge_base)
        self.max_states = max_states
        self.eqtests = eqtests
Ejemplo n.º 4
0
class LSTAR(object):
    """TODO : Describe here the inner working of the LSTAR Algorithm


    >>> from pylstar import LSTAR
    >>> from pylstar import State
    >>> from pylstar import Transition
    >>> from pylstar import Automata
    >>> from pylstar import Letter
    >>> from pylstar import FakeActiveKnowledgeBase
    >>> symbol_a = "a"
    >>> symbol_b = "b"
    >>> symbol_c = "c"
    >>> symbol_1 = 1
    >>> symbol_2 = 2
    >>> symbol_3 = 3
    >>> l_a = Letter(symbol_a)
    >>> l_b = Letter(symbol_b)
    >>> l_c = Letter(symbol_c)
    >>> l_1 = Letter(symbol_1)
    >>> l_2 = Letter(symbol_2)
    >>> l_3 = Letter(symbol_3)
    >>> s0 = State("S0")
    >>> s1 = State("S1")
    >>> s2 = State("S2")
    >>> t1 = Transition("t1", output_state=s0, input_letter=l_a, output_letter=l_1)
    >>> t2 = Transition("t2", output_state=s1, input_letter=l_b, output_letter=l_2)
    >>> t3 = Transition("t3", output_state=s2, input_letter=l_c, output_letter=l_3)
    >>> s0.transitions = [t1, t2, t3]
    >>> t4 = Transition("t4", output_state=s1, input_letter=l_a, output_letter=l_2)
    >>> t5 = Transition("t5", output_state=s1, input_letter=l_b, output_letter=l_3)
    >>> t6 = Transition("t6", output_state=s0, input_letter=l_c, output_letter=l_1)
    >>> s1.transitions = [t4, t5, t6]
    >>> t7 = Transition("t7", output_state=s2, input_letter=l_a, output_letter=l_2)
    >>> t8 = Transition("t8", output_state=s2, input_letter=l_b, output_letter=l_3)
    >>> t9 = Transition("t9", output_state=s1, input_letter=l_c, output_letter=l_1)
    >>> s2.transitions = [t7, t8, t9]
    >>> automata = Automata(s0)
    >>> kbase = FakeActiveKnowledgeBase(automata)
    >>> input_vocabulary = [symbol_a, symbol_b, symbol_c]
    >>> lstar = LSTAR(input_vocabulary, kbase, max_states = 5)
    >>> infered_automata = lstar.learn()
    >>> print(infered_automata.build_dot_code())
    digraph "Automata" {
    "0" [shape=doubleoctagon, style=filled, fillcolor=white, URL="0"];
    "2" [shape=ellipse, style=filled, fillcolor=white, URL="2"];
    "1" [shape=ellipse, style=filled, fillcolor=white, URL="1"];
    "0" -> "0" [fontsize=5, label="a / 1", URL="t0"];
    "0" -> "1" [fontsize=5, label="b / 2", URL="t1"];
    "0" -> "2" [fontsize=5, label="c / 3", URL="t2"];
    "2" -> "2" [fontsize=5, label="a / 2", URL="t6"];
    "2" -> "2" [fontsize=5, label="b / 3", URL="t7"];
    "2" -> "1" [fontsize=5, label="c / 1", URL="t8"];
    "1" -> "1" [fontsize=5, label="a / 2", URL="t3"];
    "1" -> "1" [fontsize=5, label="b / 3", URL="t4"];
    "1" -> "0" [fontsize=5, label="c / 1", URL="t5"];
    }

    >>> from pylstar import LSTAR
    >>> from pylstar import State
    >>> from pylstar import Transition
    >>> from pylstar import Automata
    >>> from pylstar import Letter
    >>> from pylstar import FakeActiveKnowledgeBase
    >>> # input symbols
    >>> symbol_hello = "hello"
    >>> symbol_bye = "bye"
    >>> symbol_pass_valid = "pass valid"
    >>> symbol_pass_invalid = "pass invalid"
    >>> symbol_cmd1 = "cmd1"
    >>> symbol_cmd2 = "cmd2"
    >>> # output symbols
    >>> symbol_pass_request = "pass?"
    >>> symbol_ack = "ack"
    >>> symbol_welcome = "welcome"
    >>> symbol_error = "error"
    >>> # create a letter for each symbol
    >>> l_hello = Letter(symbol_hello)
    >>> l_bye = Letter(symbol_bye)
    >>> l_pass_valid = Letter(symbol_pass_valid)
    >>> l_pass_invalid = Letter("pass invalid")
    >>> l_cmd1 = Letter(symbol_cmd1)
    >>> l_cmd2 = Letter(symbol_cmd2)
    >>> l_welcome = Letter(symbol_welcome)
    >>> l_ack = Letter(symbol_ack)
    >>> l_pass_request = Letter(symbol_pass_request)
    >>> l_error = Letter(symbol_error)
    >>> # create the infered automata
    >>> s0 = State("S0")
    >>> s1 = State("S1")
    >>> s2 = State("S2")
    >>> t1 = Transition("t1", output_state=s1, input_letter=l_hello, output_letter=l_pass_request)
    >>> t2 = Transition("t2", output_state=s0, input_letter=l_bye, output_letter=l_ack)
    >>> t3 = Transition("t3", output_state=s0, input_letter=l_pass_valid, output_letter=l_error)
    >>> t4 = Transition("t4", output_state=s0, input_letter=l_pass_invalid, output_letter=l_error)
    >>> t5 = Transition("t5", output_state=s0, input_letter=l_cmd1, output_letter=l_error)
    >>> t6 = Transition("t6", output_state=s0, input_letter=l_cmd2, output_letter=l_error)
    >>> s0.transitions = [t1, t2, t3, t4, t5, t6]
    >>> t7 = Transition("t7", output_state=s1, input_letter=l_hello, output_letter=l_error)
    >>> t8 = Transition("t8", output_state=s0, input_letter=l_bye, output_letter=l_ack)
    >>> t9 = Transition("t9", output_state=s2, input_letter=l_pass_valid, output_letter=l_welcome)
    >>> t10 = Transition("t10", output_state=s1, input_letter=l_pass_invalid, output_letter=l_error)
    >>> t11 = Transition("t11", output_state=s1, input_letter=l_cmd1, output_letter=l_error)
    >>> t12 = Transition("t12", output_state=s1, input_letter=l_cmd2, output_letter=l_error)
    >>> s1.transitions = [t7, t8, t9, t10, t11, t12]
    >>> t13 = Transition("t13", output_state=s2, input_letter=l_hello, output_letter=l_error)
    >>> t14 = Transition("t14", output_state=s0, input_letter=l_bye, output_letter=l_ack)
    >>> t15 = Transition("t15", output_state=s2, input_letter=l_pass_valid, output_letter=l_error)
    >>> t16 = Transition("t16", output_state=s2, input_letter=l_pass_invalid, output_letter=l_error)
    >>> t17 = Transition("t17", output_state=s2, input_letter=l_cmd1, output_letter=l_ack)
    >>> t18 = Transition("t18", output_state=s2, input_letter=l_cmd2, output_letter=l_ack)
    >>> s2.transitions = [t13, t14, t15, t16, t17, t18]
    >>> automata = Automata(s0)
    >>> kbase = FakeActiveKnowledgeBase(automata)
    >>> input_vocabulary = [symbol_hello, symbol_bye, symbol_pass_valid, symbol_pass_invalid, symbol_cmd1, symbol_cmd2]
    >>> lstar = LSTAR(input_vocabulary, kbase, max_states = 5)
    >>> infered_automata = lstar.learn()
    >>> print(infered_automata.build_dot_code())
    digraph "Automata" {
    "0" [shape=doubleoctagon, style=filled, fillcolor=white, URL="0"];
    "1" [shape=ellipse, style=filled, fillcolor=white, URL="1"];
    "2" [shape=ellipse, style=filled, fillcolor=white, URL="2"];
    "0" -> "1" [fontsize=5, label="hello / pass?", URL="t0"];
    "0" -> "0" [fontsize=5, label="bye / ack", URL="t1"];
    "0" -> "0" [fontsize=5, label="pass valid / error", URL="t2"];
    "0" -> "0" [fontsize=5, label="pass invalid / error", URL="t3"];
    "0" -> "0" [fontsize=5, label="cmd1 / error", URL="t4"];
    "0" -> "0" [fontsize=5, label="cmd2 / error", URL="t5"];
    "1" -> "1" [fontsize=5, label="hello / error", URL="t6"];
    "1" -> "0" [fontsize=5, label="bye / ack", URL="t7"];
    "1" -> "2" [fontsize=5, label="pass valid / welcome", URL="t8"];
    "1" -> "1" [fontsize=5, label="pass invalid / error", URL="t9"];
    "1" -> "1" [fontsize=5, label="cmd1 / error", URL="t10"];
    "1" -> "1" [fontsize=5, label="cmd2 / error", URL="t11"];
    "2" -> "2" [fontsize=5, label="hello / error", URL="t12"];
    "2" -> "0" [fontsize=5, label="bye / ack", URL="t13"];
    "2" -> "2" [fontsize=5, label="pass valid / error", URL="t14"];
    "2" -> "2" [fontsize=5, label="pass invalid / error", URL="t15"];
    "2" -> "2" [fontsize=5, label="cmd1 / ack", URL="t16"];
    "2" -> "2" [fontsize=5, label="cmd2 / ack", URL="t17"];
    }


    >>> from pylstar import LSTAR
    >>> from pylstar import State
    >>> from pylstar import Transition
    >>> from pylstar import Automata
    >>> from pylstar import Letter
    >>> from pylstar import FakeActiveKnowledgeBase
    >>> symbol_a = "a"
    >>> symbol_b = "b"
    >>> symbol_c = "c"
    >>> symbol_1 = 1
    >>> symbol_2 = 2
    >>> symbol_3 = 3
    >>> l_a = Letter(symbol_a)
    >>> l_b = Letter(symbol_b)
    >>> l_c = Letter(symbol_c)
    >>> l_1 = Letter(symbol_1)
    >>> l_2 = Letter(symbol_2)
    >>> l_3 = Letter(symbol_3)
    >>> s0 = State("S0")
    >>> s1 = State("S1")
    >>> s2 = State("S2")
    >>> t1 = Transition("t1", output_state=s0, input_letter=l_a, output_letter=l_1)
    >>> t2 = Transition("t2", output_state=s1, input_letter=l_b, output_letter=l_2)
    >>> t3 = Transition("t3", output_state=s2, input_letter=l_c, output_letter=l_3)
    >>> s0.transitions = [t1, t2, t3]
    >>> t4 = Transition("t4", output_state=s2, input_letter=l_a, output_letter=l_3)
    >>> t5 = Transition("t5", output_state=s0, input_letter=l_b, output_letter=l_1)
    >>> t6 = Transition("t6", output_state=s1, input_letter=l_c, output_letter=l_2)
    >>> s1.transitions = [t4, t5, t6]
    >>> t7 = Transition("t7", output_state=s1, input_letter=l_a, output_letter=l_2)
    >>> t8 = Transition("t8", output_state=s2, input_letter=l_b, output_letter=l_3)
    >>> t9 = Transition("t9", output_state=s0, input_letter=l_c, output_letter=l_1)
    >>> s2.transitions = [t7, t8, t9]
    >>> automata = Automata(s0)
    >>> kbase = FakeActiveKnowledgeBase(automata)
    >>> input_vocabulary = [symbol_a, symbol_b, symbol_c]
    >>> lstar = LSTAR(input_vocabulary, kbase, max_states = 5)
    >>> infered_automata = lstar.learn()
    >>> print(infered_automata.build_dot_code())
    digraph "Automata" {
    "0" [shape=doubleoctagon, style=filled, fillcolor=white, URL="0"];
    "2" [shape=ellipse, style=filled, fillcolor=white, URL="2"];
    "1" [shape=ellipse, style=filled, fillcolor=white, URL="1"];
    "0" -> "0" [fontsize=5, label="a / 1", URL="t0"];
    "0" -> "1" [fontsize=5, label="b / 2", URL="t1"];
    "0" -> "2" [fontsize=5, label="c / 3", URL="t2"];
    "2" -> "1" [fontsize=5, label="a / 2", URL="t6"];
    "2" -> "2" [fontsize=5, label="b / 3", URL="t7"];
    "2" -> "0" [fontsize=5, label="c / 1", URL="t8"];
    "1" -> "2" [fontsize=5, label="a / 3", URL="t3"];
    "1" -> "0" [fontsize=5, label="b / 1", URL="t4"];
    "1" -> "1" [fontsize=5, label="c / 2", URL="t5"];
    }

    
    
    """

    def __init__(self, input_vocabulary, knowledge_base, max_states, tmp_dir=None, eqtests=None):
        """Implementation of the LSTAR algorithm.

        Per default, WPMethod is used for equivalence tests. However, one can prefer a RandomWalkMethod
        by specifying the following 'eqtests' parameter:
        
        eqtests = RandomWalkMethod(self.knowledge_base, self.input_letters, 10000, 0.7)

        """


    
        self.input_letters = [Letter(symbol) for symbol in input_vocabulary]
        self.knowledge_base = knowledge_base
        self.tmp_dir = tmp_dir
        self.observation_table = ObservationTable(self.input_letters, self.knowledge_base)
        self.max_states = max_states
        self.eqtests = eqtests
        self.__f_stop = False

    def stop(self):
        """This method can be use to trigger the end of the learning process"""
        
        self._logger.info("Stopping the LSTAR learning process.")
        self.__f_stop = True
        
    def learn(self):
        self._logger.info("Starting the LSTAR learning process.")

        # intialization
        self.__initialize()

        f_hypothesis_is_valid = False
        i_round = 1
        
        while not f_hypothesis_is_valid and not self.__f_stop:
        
            hypothesis = self.build_hypothesis(i_round)

            self.__serialize_hypothesis(i_round, hypothesis)

            counterexample = self.eqtests.find_counterexample(hypothesis)
            if counterexample is not None:
                self._logger.info("Counterexample '{}' found.".format(counterexample))
                self.fix_hypothesis(counterexample)
            else:
                f_hypothesis_is_valid = True

            i_round += 1

        self.__serialize_observation_table(i_round)

        self._logger.info("Automata successfully computed")
        return hypothesis

    def __serialize_hypothesis(self, i_round, hypothesis):
        if i_round is None:
            raise Exception("i_round cannot be None")
        if hypothesis is None:
            raise Exception("Hypothesis cannot be None")

        dot_code = hypothesis.build_dot_code()
        filepath = os.path.join(self.tmp_dir, "hypothesis_{}.dot".format(i_round))
        with open(filepath, 'w') as fd:
            fd.write(dot_code)

        self._logger.info("Hypothesis produced on round '{}' stored in '{}'".format(i_round, filepath))

    def __serialize_observation_table(self, i_round):
        if self.observation_table is None:
            raise Exception("Observation table cannot ne Bone")
        
        serialized_table = self.observation_table.serialize()
        str_date = datetime.strftime(datetime.now(), "%Y%m%d_%H%M%S")
        filepath = os.path.join(self.tmp_dir, "observation_table_{}_{}.raw".format(i_round, str_date))
        with open(filepath, 'w') as fd:
            fd.write(serialized_table)

        self._logger.info("Observation table serialized in '{}'".format(filepath))
        
    def fix_hypothesis(self, counterexample):
        if counterexample is None:
            raise Exception("counterexample cannot be None")
        self._logger.debug("fix hypothesis with counterexample '{}'".format(counterexample))

        input_word = counterexample.input_word
        output_word = counterexample.output_word        
        self.observation_table.add_counterexample(input_word, output_word)

    def build_hypothesis(self, i_round):
        if i_round is None:
            raise Exception("i_round cannot be None")

        f_consistent = False
        f_closed = False
        self._logger.info("Building the hypothesis ({} round)".format(i_round))
        while not f_consistent or not f_closed:

            if not self.observation_table.is_closed():
                self._logger.info("Observation table is not closed.")
                self.observation_table.close_table()
                f_closed = False
            else:
                self._logger.info("Observation table is closed")
                f_closed = True

            inconsistency = self.observation_table.find_inconsistency()
            if inconsistency is not None:
                self._logger.info("Observation table is not consistent.")
                self.observation_table.make_consistent(inconsistency)
                f_consistent = False
            else:
                self._logger.info("Observation table is consistent")
                f_consistent = True

            self.__serialize_observation_table(i_round)
                                
        self._logger.info("Hypothesis computed")
        return self.observation_table.build_hypothesis()
            

    def __initialize(self):
        """Initialization of the observation table"""
        
        self.observation_table.initialize()

        self._logger.info("Observation table is initialized")
        self._logger.info("\n"+str(self.observation_table))        

    @property
    def input_vocabulary(self):
        """Input_vocabulary to use  """
        return self.__input_vocabulary
    
    @input_vocabulary.setter
    def input_vocabulary(self, input_vocabulary):
        if input_vocabulary is None:
            raise ValueError("Input_vocabulary cannot be None")
        if len(input_vocabulary) == 0:
            raise ValueError("Input vocabulary cannot be empty")
        self.__input_vocabulary = input_vocabulary

    @property
    def knowledge_base(self):
        """Membership Knowledge_base"""
        return self.__knowledge_base
    
    @knowledge_base.setter
    def knowledge_base(self, knowledge_base):
        if knowledge_base is None:
            raise ValueError("Knowledge_base cannot be None")
        self.__knowledge_base = knowledge_base

    @property
    def tmp_dir(self):
        """Temporary directory that host serialized observation tables and hypothesis"""
        return self.__tmp_dir

    @tmp_dir.setter
    def tmp_dir(self, value):
        if value is None:
            self.__tmp_dir = tempfile.mkdtemp(prefix='pylstar_')
        else:
            self.__tmp_dir = value

    @property
    def eqtests(self):
        return self.__eqtests

    @eqtests.setter
    def eqtests(self, eqtests):
        if eqtests is None:
            self.__eqtests = WpMethodEQ(self.knowledge_base, self.max_states, self.input_letters)
        else:
            self.__eqtests = eqtests
Ejemplo n.º 5
0
class LSTAR(object):
    """TODO : Describe here the inner working of the LSTAR Algorithm


    >>> from pylstar.LSTAR import LSTAR
    >>> from pylstar.automata.State import State
    >>> from pylstar.automata.Transition import Transition
    >>> from pylstar.automata.Automata import Automata
    >>> from pylstar.Letter import Letter
    >>> from pylstar.FakeActiveKnowledgeBase import FakeActiveKnowledgeBase
    >>> symbol_a = "a"
    >>> symbol_b = "b"
    >>> symbol_c = "c"
    >>> symbol_1 = 1
    >>> symbol_2 = 2
    >>> symbol_3 = 3
    >>> l_a = Letter(symbol_a)
    >>> l_b = Letter(symbol_b)
    >>> l_c = Letter(symbol_c)
    >>> l_1 = Letter(symbol_1)
    >>> l_2 = Letter(symbol_2)
    >>> l_3 = Letter(symbol_3)
    >>> s0 = State("S0")
    >>> s1 = State("S1")
    >>> s2 = State("S2")
    >>> t1 = Transition("t1", output_state=s0, input_letter=l_a, output_letter=l_1)
    >>> t2 = Transition("t2", output_state=s1, input_letter=l_b, output_letter=l_2)
    >>> t3 = Transition("t3", output_state=s2, input_letter=l_c, output_letter=l_3)
    >>> s0.transitions = [t1, t2, t3]
    >>> t4 = Transition("t4", output_state=s1, input_letter=l_a, output_letter=l_2)
    >>> t5 = Transition("t5", output_state=s1, input_letter=l_b, output_letter=l_3)
    >>> t6 = Transition("t6", output_state=s0, input_letter=l_c, output_letter=l_1)
    >>> s1.transitions = [t4, t5, t6]
    >>> t7 = Transition("t7", output_state=s2, input_letter=l_a, output_letter=l_2)
    >>> t8 = Transition("t8", output_state=s2, input_letter=l_b, output_letter=l_3)
    >>> t9 = Transition("t9", output_state=s1, input_letter=l_c, output_letter=l_1)
    >>> s2.transitions = [t7, t8, t9]
    >>> automata = Automata(s0)
    >>> kbase = FakeActiveKnowledgeBase(automata)
    >>> input_vocabulary = [symbol_a, symbol_b, symbol_c]
    >>> lstar = LSTAR(input_vocabulary, kbase, max_states = 5)
    >>> infered_automata = lstar.learn()
    >>> print infered_automata.build_dot_code()
    digraph G {
    "1,2,3,2" [shape=doubleoctagon, style=filled, fillcolor=white, URL="1,2,3,2"];
    "2,3,1,2" [shape=ellipse, style=filled, fillcolor=white, URL="2,3,1,2"];
    "2,3,1,1" [shape=ellipse, style=filled, fillcolor=white, URL="2,3,1,1"];
    "1,2,3,2" -> "1,2,3,2" [fontsize=5, label="I='Letter('a')' / O='Letter(1)'", URL="t0"];
    "1,2,3,2" -> "2,3,1,1" [fontsize=5, label="I='Letter('b')' / O='Letter(2)'", URL="t1"];
    "1,2,3,2" -> "2,3,1,2" [fontsize=5, label="I='Letter('c')' / O='Letter(3)'", URL="t2"];
    "2,3,1,2" -> "2,3,1,2" [fontsize=5, label="I='Letter('a')' / O='Letter(2)'", URL="t6"];
    "2,3,1,2" -> "2,3,1,2" [fontsize=5, label="I='Letter('b')' / O='Letter(3)'", URL="t7"];
    "2,3,1,2" -> "2,3,1,1" [fontsize=5, label="I='Letter('c')' / O='Letter(1)'", URL="t8"];
    "2,3,1,1" -> "2,3,1,1" [fontsize=5, label="I='Letter('a')' / O='Letter(2)'", URL="t3"];
    "2,3,1,1" -> "2,3,1,1" [fontsize=5, label="I='Letter('b')' / O='Letter(3)'", URL="t4"];
    "2,3,1,1" -> "1,2,3,2" [fontsize=5, label="I='Letter('c')' / O='Letter(1)'", URL="t5"];
    }
    

    >>> from pylstar.LSTAR import LSTAR
    >>> from pylstar.automata.State import State
    >>> from pylstar.automata.Transition import Transition
    >>> from pylstar.automata.Automata import Automata
    >>> from pylstar.Letter import Letter
    >>> from pylstar.FakeActiveKnowledgeBase import FakeActiveKnowledgeBase
    >>> # input symbols
    >>> symbol_hello = "hello"
    >>> symbol_bye = "bye"
    >>> symbol_pass_valid = "pass valid"
    >>> symbol_pass_invalid = "pass invalid"
    >>> symbol_cmd1 = "cmd1"
    >>> symbol_cmd2 = "cmd2"
    >>> # output symbols
    >>> symbol_pass_request = "pass?"
    >>> symbol_ack = "ack"
    >>> symbol_welcome = "welcome"
    >>> symbol_error = "error"
    >>> # create a letter for each symbol
    >>> l_hello = Letter(symbol_hello)
    >>> l_bye = Letter(symbol_bye)
    >>> l_pass_valid = Letter(symbol_pass_valid)
    >>> l_pass_invalid = Letter("pass invalid")
    >>> l_cmd1 = Letter(symbol_cmd1)
    >>> l_cmd2 = Letter(symbol_cmd2)
    >>> l_welcome = Letter(symbol_welcome)
    >>> l_ack = Letter(symbol_ack)
    >>> l_pass_request = Letter(symbol_pass_request)
    >>> l_error = Letter(symbol_error)
    >>> # create the infered automata
    >>> s0 = State("S0")
    >>> s1 = State("S1")
    >>> s2 = State("S2")
    >>> t1 = Transition("t1", output_state=s1, input_letter=l_hello, output_letter=l_pass_request)
    >>> t2 = Transition("t2", output_state=s0, input_letter=l_bye, output_letter=l_ack)
    >>> t3 = Transition("t3", output_state=s0, input_letter=l_pass_valid, output_letter=l_error)
    >>> t4 = Transition("t4", output_state=s0, input_letter=l_pass_invalid, output_letter=l_error)
    >>> t5 = Transition("t5", output_state=s0, input_letter=l_cmd1, output_letter=l_error)
    >>> t6 = Transition("t6", output_state=s0, input_letter=l_cmd2, output_letter=l_error)
    >>> s0.transitions = [t1, t2, t3, t4, t5, t6]
    >>> t7 = Transition("t7", output_state=s1, input_letter=l_hello, output_letter=l_error)
    >>> t8 = Transition("t8", output_state=s0, input_letter=l_bye, output_letter=l_ack)
    >>> t9 = Transition("t9", output_state=s2, input_letter=l_pass_valid, output_letter=l_welcome)
    >>> t10 = Transition("t10", output_state=s1, input_letter=l_pass_invalid, output_letter=l_error)
    >>> t11 = Transition("t11", output_state=s1, input_letter=l_cmd1, output_letter=l_error)
    >>> t12 = Transition("t12", output_state=s1, input_letter=l_cmd2, output_letter=l_error)
    >>> s1.transitions = [t7, t8, t9, t10, t11, t12]
    >>> t13 = Transition("t13", output_state=s2, input_letter=l_hello, output_letter=l_error)
    >>> t14 = Transition("t14", output_state=s0, input_letter=l_bye, output_letter=l_ack)
    >>> t15 = Transition("t15", output_state=s2, input_letter=l_pass_valid, output_letter=l_error)
    >>> t16 = Transition("t16", output_state=s2, input_letter=l_pass_invalid, output_letter=l_error)
    >>> t17 = Transition("t17", output_state=s2, input_letter=l_cmd1, output_letter=l_ack)
    >>> t18 = Transition("t18", output_state=s2, input_letter=l_cmd2, output_letter=l_ack)
    >>> s2.transitions = [t13, t14, t15, t16, t17, t18]
    >>> automata = Automata(s0)
    >>> kbase = FakeActiveKnowledgeBase(automata)
    >>> input_vocabulary = [symbol_hello, symbol_bye, symbol_pass_valid, symbol_pass_invalid, symbol_cmd1, symbol_cmd2]
    >>> lstar = LSTAR(input_vocabulary, kbase, max_states = 5)
    >>> infered_automata = lstar.learn()
    >>> print infered_automata.build_dot_code()
    digraph G {
    "'pass?','ack','error','error','error','error'" [shape=doubleoctagon, style=filled, fillcolor=white, URL="'pass?','ack','error','error','error','error'"];
    "'error','ack','welcome','error','error','error'" [shape=ellipse, style=filled, fillcolor=white, URL="'error','ack','welcome','error','error','error'"];
    "'error','ack','error','error','ack','ack'" [shape=ellipse, style=filled, fillcolor=white, URL="'error','ack','error','error','ack','ack'"];
    "'pass?','ack','error','error','error','error'" -> "'error','ack','welcome','error','error','error'" [fontsize=5, label="I='Letter('hello')' / O='Letter('pass?')'", URL="t0"];
    "'pass?','ack','error','error','error','error'" -> "'pass?','ack','error','error','error','error'" [fontsize=5, label="I='Letter('bye')' / O='Letter('ack')'", URL="t1"];
    "'pass?','ack','error','error','error','error'" -> "'pass?','ack','error','error','error','error'" [fontsize=5, label="I='Letter('pass valid')' / O='Letter('error')'", URL="t2"];
    "'pass?','ack','error','error','error','error'" -> "'pass?','ack','error','error','error','error'" [fontsize=5, label="I='Letter('pass invalid')' / O='Letter('error')'", URL="t3"];
    "'pass?','ack','error','error','error','error'" -> "'pass?','ack','error','error','error','error'" [fontsize=5, label="I='Letter('cmd1')' / O='Letter('error')'", URL="t4"];
    "'pass?','ack','error','error','error','error'" -> "'pass?','ack','error','error','error','error'" [fontsize=5, label="I='Letter('cmd2')' / O='Letter('error')'", URL="t5"];
    "'error','ack','welcome','error','error','error'" -> "'error','ack','welcome','error','error','error'" [fontsize=5, label="I='Letter('hello')' / O='Letter('error')'", URL="t6"];
    "'error','ack','welcome','error','error','error'" -> "'pass?','ack','error','error','error','error'" [fontsize=5, label="I='Letter('bye')' / O='Letter('ack')'", URL="t7"];
    "'error','ack','welcome','error','error','error'" -> "'error','ack','error','error','ack','ack'" [fontsize=5, label="I='Letter('pass valid')' / O='Letter('welcome')'", URL="t8"];
    "'error','ack','welcome','error','error','error'" -> "'error','ack','welcome','error','error','error'" [fontsize=5, label="I='Letter('pass invalid')' / O='Letter('error')'", URL="t9"];
    "'error','ack','welcome','error','error','error'" -> "'error','ack','welcome','error','error','error'" [fontsize=5, label="I='Letter('cmd1')' / O='Letter('error')'", URL="t10"];
    "'error','ack','welcome','error','error','error'" -> "'error','ack','welcome','error','error','error'" [fontsize=5, label="I='Letter('cmd2')' / O='Letter('error')'", URL="t11"];
    "'error','ack','error','error','ack','ack'" -> "'error','ack','error','error','ack','ack'" [fontsize=5, label="I='Letter('hello')' / O='Letter('error')'", URL="t12"];
    "'error','ack','error','error','ack','ack'" -> "'pass?','ack','error','error','error','error'" [fontsize=5, label="I='Letter('bye')' / O='Letter('ack')'", URL="t13"];
    "'error','ack','error','error','ack','ack'" -> "'error','ack','error','error','ack','ack'" [fontsize=5, label="I='Letter('pass valid')' / O='Letter('error')'", URL="t14"];
    "'error','ack','error','error','ack','ack'" -> "'error','ack','error','error','ack','ack'" [fontsize=5, label="I='Letter('pass invalid')' / O='Letter('error')'", URL="t15"];
    "'error','ack','error','error','ack','ack'" -> "'error','ack','error','error','ack','ack'" [fontsize=5, label="I='Letter('cmd1')' / O='Letter('ack')'", URL="t16"];
    "'error','ack','error','error','ack','ack'" -> "'error','ack','error','error','ack','ack'" [fontsize=5, label="I='Letter('cmd2')' / O='Letter('ack')'", URL="t17"];
    }


    >>> from pylstar.LSTAR import LSTAR
    >>> from pylstar.automata.State import State
    >>> from pylstar.automata.Transition import Transition
    >>> from pylstar.automata.Automata import Automata
    >>> from pylstar.Letter import Letter
    >>> from pylstar.FakeActiveKnowledgeBase import FakeActiveKnowledgeBase
    >>> symbol_a = "a"
    >>> symbol_b = "b"
    >>> symbol_c = "c"
    >>> symbol_1 = 1
    >>> symbol_2 = 2
    >>> symbol_3 = 3
    >>> l_a = Letter(symbol_a)
    >>> l_b = Letter(symbol_b)
    >>> l_c = Letter(symbol_c)
    >>> l_1 = Letter(symbol_1)
    >>> l_2 = Letter(symbol_2)
    >>> l_3 = Letter(symbol_3)
    >>> s0 = State("S0")
    >>> s1 = State("S1")
    >>> s2 = State("S2")
    >>> t1 = Transition("t1", output_state=s0, input_letter=l_a, output_letter=l_1)
    >>> t2 = Transition("t2", output_state=s1, input_letter=l_b, output_letter=l_2)
    >>> t3 = Transition("t3", output_state=s2, input_letter=l_c, output_letter=l_3)
    >>> s0.transitions = [t1, t2, t3]
    >>> t4 = Transition("t4", output_state=s2, input_letter=l_a, output_letter=l_3)
    >>> t5 = Transition("t5", output_state=s0, input_letter=l_b, output_letter=l_1)
    >>> t6 = Transition("t6", output_state=s1, input_letter=l_c, output_letter=l_2)
    >>> s1.transitions = [t4, t5, t6]
    >>> t7 = Transition("t7", output_state=s1, input_letter=l_a, output_letter=l_2)
    >>> t8 = Transition("t8", output_state=s2, input_letter=l_b, output_letter=l_3)
    >>> t9 = Transition("t9", output_state=s0, input_letter=l_c, output_letter=l_1)
    >>> s2.transitions = [t7, t8, t9]
    >>> automata = Automata(s0)
    >>> kbase = FakeActiveKnowledgeBase(automata)
    >>> input_vocabulary = [symbol_a, symbol_b, symbol_c]
    >>> lstar = LSTAR(input_vocabulary, kbase, max_states = 5)
    >>> infered_automata = lstar.learn()
    >>> print infered_automata.build_dot_code()
    digraph G {
    "1,2,3" [shape=doubleoctagon, style=filled, fillcolor=white, URL="1,2,3"];
    "2,3,1" [shape=ellipse, style=filled, fillcolor=white, URL="2,3,1"];
    "3,1,2" [shape=ellipse, style=filled, fillcolor=white, URL="3,1,2"];
    "1,2,3" -> "1,2,3" [fontsize=5, label="I='Letter('a')' / O='Letter(1)'", URL="t6"];
    "1,2,3" -> "3,1,2" [fontsize=5, label="I='Letter('b')' / O='Letter(2)'", URL="t7"];
    "1,2,3" -> "2,3,1" [fontsize=5, label="I='Letter('c')' / O='Letter(3)'", URL="t8"];
    "2,3,1" -> "3,1,2" [fontsize=5, label="I='Letter('a')' / O='Letter(2)'", URL="t3"];
    "2,3,1" -> "2,3,1" [fontsize=5, label="I='Letter('b')' / O='Letter(3)'", URL="t4"];
    "2,3,1" -> "1,2,3" [fontsize=5, label="I='Letter('c')' / O='Letter(1)'", URL="t5"];
    "3,1,2" -> "2,3,1" [fontsize=5, label="I='Letter('a')' / O='Letter(3)'", URL="t0"];
    "3,1,2" -> "1,2,3" [fontsize=5, label="I='Letter('b')' / O='Letter(1)'", URL="t1"];
    "3,1,2" -> "3,1,2" [fontsize=5, label="I='Letter('c')' / O='Letter(2)'", URL="t2"];
    }

    
    
    """

    def __init__(self, input_vocabulary, knowledge_base, max_states, tmp_dir=None, eqtests=None):
        """Implementation of the LSTAR algorithm.

        Per default, WPMethod is used for equivalence tests. However, one can prefer a RandomWalkMethod
        by specifying the following 'eqtests' parameter:
        
        eqtests = RandomWalkMethod(self.knowledge_base, self.input_letters, 10000, 0.7)

        """


    
        self.input_letters = [Letter(symbol) for symbol in input_vocabulary]
        self.knowledge_base = knowledge_base
        self.tmp_dir = tmp_dir
        self.observation_table = ObservationTable(self.input_letters, self.knowledge_base)
        self.max_states = max_states
        self.eqtests = eqtests

    def learn(self):
        self._logger.info("Starting the LSTAR learning process.")

        # intialization
        self.__initialize()

        f_hypothesis_is_valid = False
        i_round = 1
        
        while not f_hypothesis_is_valid:
        
            hypothesis = self.build_hypothesis(i_round)

            self.__serialize_hypothesis(i_round, hypothesis)

            counterexample = self.eqtests.find_counterexample(hypothesis)
            if counterexample is not None:
                self._logger.info("Counterexample '{}' found.".format(counterexample))
                self.fix_hypothesis(counterexample)
            else:
                f_hypothesis_is_valid = True

            i_round += 1

        self._logger.info("Automata successfully computed")
        return hypothesis

    def __serialize_hypothesis(self, i_round, hypothesis):
        if i_round is None:
            raise Exception("i_round cannot be None")
        if hypothesis is None:
            raise Exception("Hypothesis cannot be None")

        dot_code = hypothesis.build_dot_code()
        filepath = os.path.join(self.tmp_dir, "hypothesis_{}.dot".format(i_round))
        with open(filepath, 'w') as fd:
            fd.write(dot_code)

        self._logger.info("Hypothesis produced on round '{}' stored in '{}'".format(i_round, filepath))

    def __serialize_observation_table(self, i_round):
        if self.observation_table is None:
            raise Exception("Observation table cannot ne Bone")
        
        serialized_table = self.observation_table.serialize()
        str_date = datetime.strftime(datetime.now(), "%Y%m%d_%H%M%S")
        filepath = os.path.join(self.tmp_dir, "observation_table_{}_{}.raw".format(i_round, str_date))
        with open(filepath, 'w') as fd:
            fd.write(serialized_table)

        self._logger.info("Observation table serialized in '{}'".format(filepath))
        
    def fix_hypothesis(self, counterexample):
        if counterexample is None:
            raise Exception("counterexample cannot be None")
        self._logger.debug("fix hypothesis with counterexample '{}'".format(counterexample))

        input_word = counterexample.input_word
        output_word = counterexample.output_word        
        self.observation_table.add_counterexample(input_word, output_word)

    def build_hypothesis(self, i_round):
        if i_round is None:
            raise Exception("i_round cannot be None")

        f_consistent = False
        f_closed = False
        while not f_consistent or not f_closed:
        
            if not self.observation_table.is_closed():
                self._logger.info("Observation table is not closed.")
                self.observation_table.close_table()
                f_closed = False
            else:
                self._logger.info("Observation table is closed")
                f_closed = True

            inconsistency = self.observation_table.find_inconsistency()
            if inconsistency is not None:
                self._logger.info("Observation table is not consistent.")
                self.observation_table.make_consistent(inconsistency)
                f_consistent = False
            else:
                self._logger.info("Observation table is consistent")
                f_consistent = True

            self.__serialize_observation_table(i_round)
                                
        self._logger.info("Hypothesis computed")
        return self.observation_table.build_hypothesis()
            

    def __initialize(self):
        """Initialization of the observation table"""
        
        self.observation_table.initialize()

        self._logger.info("Observation table is initialized")
        self._logger.info("\n"+str(self.observation_table))        

    @property
    def input_vocabulary(self):
        """Input_vocabulary to use  """
        return self.__input_vocabulary
    
    @input_vocabulary.setter
    def input_vocabulary(self, input_vocabulary):
        if input_vocabulary is None:
            raise ValueError("Input_vocabulary cannot be None")
        if len(input_vocabulary) == 0:
            raise ValueError("Input vocabulary cannot be empty")
        self.__input_vocabulary = input_vocabulary

    @property
    def knowledge_base(self):
        """Membership Knowledge_base"""
        return self.__knowledge_base
    
    @knowledge_base.setter
    def knowledge_base(self, knowledge_base):
        if knowledge_base is None:
            raise ValueError("Knowledge_base cannot be None")
        self.__knowledge_base = knowledge_base

    @property
    def tmp_dir(self):
        """Temporary directory that host serialized observation tables and hypothesis"""
        return self.__tmp_dir

    @tmp_dir.setter
    def tmp_dir(self, value):
        if value is None:
            self.__tmp_dir = tempfile.mkdtemp(prefix='pylstar_')
        else:
            self.__tmp_dir = value

    @property
    def eqtests(self):
        return self.__eqtests

    @eqtests.setter
    def eqtests(self, eqtests):
        if eqtests is None:
            self.__eqtests = WpMethodEQ(self.knowledge_base, self.max_states, self.input_letters)
        else:
            self.__eqtests = eqtests
Ejemplo n.º 6
0
class LearningMRM:
    """Main class of the framework"""
    def __init__(self, path, reset_cost, default_reward, value_expert):
        self.world = Model(path, reset_cost, default_reward)
        self.value_expert = value_expert

        in_letters = [Letter(symbol) for symbol in self.world.mrm.observations]
        self.kbase = MRMActiveKnowledgeBase(self.world)
        self.OT = ObservationTable(input_letters=in_letters,
                                   knowledge_base=self.kbase)

        print('Initializing OT')
        self.OT.initialize()
        print('OT initialized')

        #COUNTERS
        self.total_learning_time = 0
        self.total_exploring_time = 0
        self.rewards = 0
        self.iteration4Explor = 0
        self.iteration4OT = 0
        self.nuof_counter_examples = 0

        #EXECUTION
        self.learn()
        while not self.check():
            self.learn()

        #END PRINT
        self.endPrints()
        remove(TMP_MODEL_PATH)

    def learn(self):
        """Use L*_M algorithm to learn the MRM"""
        StartTime = time.time()
        iteration4OT = 0
        closed = self.OT.is_closed()
        inconsistency = self.OT.find_inconsistency()

        while not closed or inconsistency is not None:
            iteration4OT += 1
            print('Building the OT;', 'iteration', iteration4OT)

            if not closed:
                self.OT.close_table()

            if inconsistency is not None:
                self.OT.make_consistent(inconsistency)

            closed = self.OT.is_closed()
            inconsistency = self.OT.find_inconsistency()

        EndTime = time.time()
        self.total_learning_time += EndTime - StartTime

    def check(self):
        """Check if the hypothesis is correct"""
        StartTime = time.time()
        self.createHypothesis()

        if not self.passedFirstCheck():  # Expert value check
            res = False
        elif not self.passedSecondCheck():  # Exploitation check
            res = False
        else:
            res = True

        EndTime = time.time()
        self.total_exploring_time += EndTime - StartTime
        return res

    def endPrints(self):
        print()
        print('Optimization problem:', ["MIN", "MAX"][MODE])
        print('# learning actions:', self.kbase.actionsForLearning)
        print()
        print('rewards:', self.rewards)
        print('nuof_MQs:', self.kbase.nuof_MQs)
        print('Exploration iterations:', self.iteration4Explor)
        print('nuof_counter_examples:', self.nuof_counter_examples)
        print('total_learning_time:', self.total_learning_time)
        print('total_exploring_time:', self.total_exploring_time)

    def createHypothesis(self):
        print()
        print('Building hypothesis MRM...')
        RM = self.OT.build_hypothesis()
        print('Hypothesis MRM built !')
        self.buildProductAutomaton(
            RM)  # Write the prism file with the hypothesis

        program = stormpy.parse_prism_program(TMP_MODEL_PATH)
        properties = stormpy.parse_properties_for_prism_program(
            "Rmax=? [ LRA ]", program)
        options = stormpy.BuilderOptions(True,
                                         True)  #To keep rewards and labels
        self.h = stormpy.build_sparse_model_with_options(program, options)
        self.result_h = stormpy.model_checking(self.h,
                                               properties[0],
                                               extract_scheduler=True).at(0)
        self.scheduler = stormpy.model_checking(
            self.h, properties[0], extract_scheduler=True).scheduler

    def passedFirstCheck(self):
        # Expert value check
        if self.result_h < self.value_expert:
            print(self.result_h, " < Value_expert:", self.value_expert,
                  ", looking for counter-example...")
            self.findCounterExample()
            return False
        else:
            print(self.result_h, " >= Value_expert:", self.value_expert)
            return True

    def passedSecondCheck(self):
        # Exploitation check. Check if the hypothesis is correct by executing the strategy
        # After STERPS_PER_EPISODE we reset the system
        # We end if we observe a counter example or if we have already performed ACTIONTOTEXECUTE actions
        actionsExecuted = 0
        while actionsExecuted < ACTIONTOEXECUTE:
            self.iteration4Explor += 1
            print()
            print('Exploration iteration', self.iteration4Explor)

            # Initialize agent's state to a rand position and
            current_state_h = self.resetExploration()

            for step in range(STEPS_PER_EPISODE):
                a = self.scheduler.get_choice(current_state_h)
                if self.isResetAction(a):
                    next_state_h = self.resetExploration()
                    obs = "null"
                    r_h = self.world.mrm.reset_cost
                    r_m = self.world.mrm.reset_cost
                    next_state_m = self.world.map.current

                else:
                    (r_h, r_m, next_state_h, next_state_m,
                     obs) = self.executeOneStepExploration(
                         current_state_h, a.get_deterministic_choice())

                self.updateTraces(obs, r_m)

                actionsExecuted += 1
                if self.isCounterExample(r_h, r_m):
                    self.nuof_counter_examples += 1
                    return False
                else:
                    current_state_h = next_state_h
                    self.world.map.current = next_state_m
        return True

    def getStateInHypothesis(self, states_h, state):
        for i in states_h:
            if int(i.name) == int(state):
                return i

    def buildProductAutomaton(self, h):
        """Given a hypothesis of the angluin algo, build the product between the gird and this hypothesis and write it in a PRISM file.
		The init state is {'c1','r1','null'} with no obs already made"""

        rewards = "rewards\n"
        labels = ''
        out_file = open(TMP_MODEL_PATH, 'w')
        #module
        out_file.write("mdp\n\nmodule tmp\n\n")

        #number of state and initial state
        new_states = []
        for s in self.world.map.states:
            for o in range(len(h.get_states())):
                labels += 'label "' + s + '_' + str(o) + '" = s=' + str(
                    len(new_states)) + ' ;\n'
                new_states.append((s, o))

        out_file.write("\ts : [0.." + str(len(new_states) - 1) + "] init " +
                       str(new_states.index((self.world.map.initiales[0],
                                             0))) + ";\n\n")

        #transitions
        for s in new_states:
            state_id = self.world.map.getIdState(s[0])
            for a in self.world.map.availableActions(s[0]):
                action_id = self.world.map.getIdAction(a)
                obs = self.world.map.labelling[state_id][action_id]

                #if len(self.world.map.transitions[state_id][action_id]) > 0:
                out_file.write("\t[" + a + "] s=" + str(new_states.index(s)) +
                               "-> ")
                temp_list = []

                if obs == 'null':
                    rewards += "\t[" + a + "] (s=" + str(
                        new_states.index(s)) + ") : " + str(
                            self.world.mrm.default_reward) + ";\n"
                    for [dest, prob
                         ] in self.world.map.transitions[state_id][action_id]:
                        index_dest = str(
                            new_states.index(
                                (self.world.map.getStateFromId(dest), s[1])))
                        temp_list.append(
                            str(prob) + " : (s'=" + index_dest + ")")
                else:
                    tr_val = h.play_word(
                        Word([Letter(obs)]),
                        self.getStateInHypothesis(h.get_states(), s[1]))
                    state_in_h = int(tr_val[1][-1].name)
                    rewards += "\t[" + a + "] (s=" + str(
                        new_states.index(s)) + ") : " + str(
                            tr_val[0].last_letter().name) + ";\n"
                    for [dest, prob
                         ] in self.world.map.transitions[state_id][action_id]:
                        index_dest = str(
                            new_states.index(
                                (self.world.map.getStateFromId(dest),
                                 state_in_h)))
                        temp_list.append(
                            str(prob) + " : (s'=" + index_dest + ")")

                out_file.write(" + ".join(temp_list))
                out_file.write(";\n")

            a = "reset"
            out_file.write(
                "\t[" + a + "] s=" + str(new_states.index(s)) +
                "-> 1.0 : (s'=" +
                str(new_states.index((self.world.map.initiales[0], 0))) +
                ");\n")
            rewards += "\t[" + a + "] (s=" + str(
                new_states.index(s)) + ") : " + str(
                    self.world.mrm.reset_cost) + ";\n"

        out_file.write("\nendmodule\n\n")
        out_file.write(labels)

        rewards += "endrewards\n"
        out_file.write(rewards)
        out_file.close()

    def resetH(self):
        for s in range(len(self.h.states)):
            if {str(self.world.map.current) + '_0'
                }.issubset(self.h.states[s].labels):
                return s

    def getNextSateH(self, state, action):
        action = state.actions[action]
        r = random.random()
        c = 0
        for transition in action.transitions:
            c += transition.value()
            if r < c:
                break
        return transition.column

    def getRewardH(self, state, action):
        c = 0
        for i in range(state):  # i: id in h => state in h => state in m
            c += len(self.h.states[i].actions)
        return self.h.reward_models[''].state_action_rewards[c + int(
            action.__str__())]  # +1 because we have the reset action

    def fromIdStateHToIdStateM(self, sh):
        pattern = re.compile("s[0-9]+_[0-9]+")
        for i in self.h.states[sh].labels:
            if pattern.match(i):
                return i[:i.index('_')]

    def executeOneStepExploration(self, current_state_h, action):
        next_state_h = self.getNextSateH(self.h.states[current_state_h],
                                         action)
        r_h = self.getRewardH(current_state_h, action)
        obs = self.world.map.labelling[self.world.map.getIdState(
            self.world.map.current)][int(action.__str__())]
        next_state_m = self.fromIdStateHToIdStateM(next_state_h)
        #[next_state_m,obs] = self.world.map.moveFrom(self.world.map.current,self.world.map.actions[int(action.__str__())])
        r_m = None
        if obs == 'null':
            r_m = self.world.mrm.default_reward
        else:
            r_m = self.world.mrm.move(obs)

        return (r_h, r_m, next_state_h, next_state_m, obs)

    def isCounterExample(self, r_h, r_m):
        """Return True if the two rewards r_h and r_m are different and add the counter example at the OT."""
        if r_m != r_h:
            print("CE", r_m, r_h, self.observation_seq)
            input_word = Word(
                [Letter(symbol) for symbol in self.observation_seq])
            output_word = Word(
                [Letter(symbol) for symbol in self.reward_trace])
            self.OT.add_counterexample(input_word, output_word)
            return True
        return False

    def findCounterExample(self):
        """Execute actions uniformly at random until we get a counter example"""
        while True:
            current_state_h = self.resetExploration()

            for ep in range(STEPS_PER_EPISODE):
                a = int(random.random() // (1 / (len(
                    self.world.map.availableActions(self.world.map.current)))))
                (r_h, r_m, next_state_h, next_state_m,
                 obs) = self.executeOneStepExploration(current_state_h, a)

                if obs != 'null':
                    self.observation_seq.append(obs)
                    self.reward_trace.append(r_m)

                if self.isCounterExample(r_h, r_m):
                    self.nuof_counter_examples += 1
                    return None
                else:
                    current_state_h = next_state_h
                    self.world.map.current = next_state_m

    def isResetAction(self, a):
        return int(a.__str__()) == len(self.world.map.actions)

    def resetExploration(self):
        self.observation_seq = []
        self.reward_trace = []

        self.world.map.reset()
        self.world.mrm.reset()
        return self.resetH()

    def updateTraces(self, obs, r_m):
        if obs != 'null':
            self.observation_seq.append(obs)
            self.reward_trace.append(r_m)
        self.rewards += r_m