예제 #1
0
from src.automata.ldba import LDBA

# an example automaton for "iron then wood then work_bench" or
# "F (iron & XF (wood & XF (work_bench)))"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
minecraft_6 = LDBA(accepting_sets=[[3]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'iron' in label:
            self.automaton_state = 1
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if 'wood' in label:
            self.automaton_state = 2
        else:
            self.automaton_state = 1
    # state 2
    elif self.automaton_state == 2:
        if 'work_bench' in label:
            self.automaton_state = 3
        else:
            self.automaton_state = 2
    # state 3
    elif self.automaton_state == 3:
예제 #2
0
from src.automata.ldba import LDBA

# an example automaton for "visiting goal1 and goal2 infinitely often" or
# "GF goal1 & GF goal2"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
surveillance = LDBA(accepting_sets=[[0]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'goal1' in label:
            self.automaton_state = 2
        else:
            self.automaton_state = 1
    # state 1
    elif self.automaton_state == 1:
        if 'goal1' in label:
            self.automaton_state = 2
        else:
            self.automaton_state = 1
    # state 2
    elif self.automaton_state == 2:
        if 'goal2' in label:
            self.automaton_state = 0
        else:
            self.automaton_state = 2
    # step function returns the new automaton state
    return self.automaton_state
예제 #3
0
from src.automata.ldba import LDBA

# an example automaton for "goal1 or goal2 while avoiding unsafe" or "(FG goal1 | FG goal2) & G !unsafe"
# automaton image is available in "./assets" or "https://i.imgur.com/gyDED4O.png"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
goal1_or_goal2 = LDBA(accepting_sets=[[1, 2]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'epsilon_1' in label:
            self.automaton_state = 1
        elif 'epsilon_2' in label:
            self.automaton_state = 2
        elif 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if 'goal1' in label and 'unsafe' not in label:
            self.automaton_state = 1
        else:
            self.automaton_state = -1  # un-accepting sink state
    # state 2
    elif self.automaton_state == 2:
        if 'goal2' in label and 'unsafe' not in label:
            self.automaton_state = 2
예제 #4
0
from src.automata.ldba import LDBA

# an example automaton for "wood then iron then work_bench then gold" or
# "F (wood & XF (iron & XF (work_bench & XF gold)))"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
minecraft_7 = LDBA(accepting_sets=[[4]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'wood' in label:
            self.automaton_state = 1
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if 'iron' in label:
            self.automaton_state = 2
        else:
            self.automaton_state = 1
    # state 2
    elif self.automaton_state == 2:
        if 'work_bench' in label:
            self.automaton_state = 3
        else:
            self.automaton_state = 2
    # state 3
    elif self.automaton_state == 3:
예제 #5
0
from src.automata.ldba import LDBA

# an example automaton for "goal1 while avoiding unsafe" or "F goal1 & G !unsafe"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
mars_rover_1_and_3 = LDBA(accepting_sets=[[1]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'goal1' in label and 'unsafe' not in label:
            self.automaton_state = 1
        elif 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 2
    # step function returns the new automaton state
    return self.automaton_state


# now override the step function
LDBA.step = step.__get__(mars_rover_1_and_3, LDBA)
예제 #6
0
from src.automata.ldba import LDBA

# an example automaton for "grass then tool_shed" or "F (wood & XF (tool_shed))"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
minecraft_2 = LDBA(accepting_sets=[[2]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'grass' in label:
            self.automaton_state = 1
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if 'tool_shed' in label:
            self.automaton_state = 2
        else:
            self.automaton_state = 1
    # state 2
    elif self.automaton_state == 2:
        self.automaton_state = 2
    # step function returns the new automaton state
    return self.automaton_state


# now override the step function
LDBA.step = step.__get__(minecraft_2, LDBA)
예제 #7
0
from src.automata.ldba import LDBA

# an example automaton for "(food1 then food2) or (food2 then food1) while avoiding ghost" or
# "(F (food1 & F food2) || F (food2 & F food1)) & G !ghost"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
pacman_foods = LDBA(accepting_sets=[[1], [2], [3]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if label is not None and 'food1' in label and 'ghost' not in label:
            self.automaton_state = 1
        elif label is not None and 'food2' in label and 'ghost' not in label:
            self.automaton_state = 2
        elif label is not None and 'ghost' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if label is not None and 'food2' in label and 'ghost' not in label:
            self.automaton_state = 3
        elif label is not None and 'ghost' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 1
    # state 2
    elif self.automaton_state == 2:
예제 #8
0
from src.automata.ldba import LDBA

# an example automaton for "goal1 then goal2 while avoiding unsafe" or "F (goal1 & XF (goal2)) & G !unsafe"
# automaton image is available in "./assets" or "https://i.imgur.com/R7Zg5mw.png"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
goal1_then_goal2 = LDBA(accepting_sets=[[2]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'goal1' in label and 'unsafe' not in label:
            self.automaton_state = 1
        elif 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if 'goal2' in label and 'unsafe' not in label:
            self.automaton_state = 2
        elif 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 1
    # state 2
    elif self.automaton_state == 2:
        if 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
예제 #9
0
파일: train.py 프로젝트: grockious/lcrl
def train(
        MDP,
        LDBA,
        algorithm='ql',
        episode_num=2500,
        iteration_num_max=4000,
        discount_factor=0.95,
        learning_rate=0.9,
        nfq_replay_buffer_size=100,
        ddpg_replay_buffer_size=50000,
        decaying_learning_rate=False,
        epsilon=0.1,
        save_dir='./results',
        test=True,
        average_window=-1,
):
    learning_task = LCRL(MDP, LDBA, discount_factor, learning_rate, decaying_learning_rate, epsilon)

    if algorithm == 'ql':
        learning_task.train_ql(episode_num, iteration_num_max)
        import dill
        from src.environments.mars_rover_discrete_action import MarsRover
    elif algorithm == 'nfq':
        learning_task.train_nfq(episode_num, iteration_num_max, nfq_replay_buffer_size)
        import dill
        from src.environments.mars_rover_discrete_action import MarsRover
    elif algorithm == 'ddpg':
        learning_task.train_ddpg(episode_num, iteration_num_max, ddpg_replay_buffer_size)
        import dill
        import tensorflow as tf
        from src.environments.mars_rover_continuous_action import MarsRover
    else:
        raise NotImplementedError('New learning algorithms will be added to lcrl_core.py soon.')

    if average_window == -1:
        average_window = int(0.03 * episode_num)

    plt.plot(learning_task.q_at_initial_state, c="royalblue")
    plt.xlabel('Episode Number')
    plt.ylabel('Value Function at The Initial State')
    plt.grid(True)
    if average_window > 0:
        avg = np.convolve(learning_task.q_at_initial_state, np.ones((average_window,)) / average_window, mode='valid')
        plt.plot(avg, c='darkblue')

    # saving the results
    results_path = os.path.join(os.getcwd(), save_dir[2:])
    dt_string = datetime.now().strftime("%d.%m.%Y_%H.%M.%S")
    results_sub_path = os.path.join(os.getcwd(), save_dir[2:], dt_string)
    if not os.path.exists(results_path):
        os.mkdir(results_path)
    os.mkdir(results_sub_path)
    plt.savefig(os.path.join(results_sub_path, 'convergence.png'))

    plt.show()

    if test:
        print('testing...')
        number_of_tests = 100
        number_of_successes = 0
        for tt in range(number_of_tests):
            learning_task.MDP.reset()
            learning_task.LDBA.reset()
            # check if MDP current_state is a list or ndarray:
            if type(learning_task.MDP.current_state) == np.ndarray:
                ndarray = True
                test_path = [learning_task.MDP.current_state.tolist()]
            else:
                ndarray = False
                test_path = [learning_task.MDP.current_state]
            iteration_num = 0
            while learning_task.LDBA.accepting_frontier_set and iteration_num < iteration_num_max \
                    and learning_task.LDBA.automaton_state != -1:
                iteration_num += 1
                if ndarray:
                    if algorithm == "nfq":
                        current_state = MDP.current_state.tolist() + [LDBA.automaton_state]
                    if algorithm == "ddpg":
                        current_state = MDP.current_state.tolist() + [LDBA.automaton_state]
                        prev_state = np.array(current_state[0:2].copy())
                else:
                    current_state = learning_task.MDP.current_state + [learning_task.LDBA.automaton_state]

                if learning_task.epsilon_transitions_exists:
                    product_MDP_action_space = learning_task.action_space_augmentation()
                else:
                    product_MDP_action_space = MDP.action_space

                if not algorithm == "ddpg":
                    Qs = []
                    if (not ndarray) and (str(current_state) in learning_task.Q.keys()):
                        for action_index in range(len(product_MDP_action_space)):
                            Qs.append(learning_task.Q[str(current_state)][product_MDP_action_space[action_index]])
                    elif ndarray:
                        for action_index in range(len(product_MDP_action_space)):
                            Qs.append(learning_task.Q[current_state[-1]].predict(
                                [MDP.current_state.tolist() + [action_index]]))
                    else:
                        Qs.append(0)
                    maxQ_action_index = random.choice(np.where(Qs == np.max(Qs))[0])
                    maxQ_action = product_MDP_action_space[maxQ_action_index]
                    # check if an epsilon-transition is taken
                    if learning_task.epsilon_transitions_exists and \
                            maxQ_action_index > len(learning_task.MDP.action_space) - 1:
                        epsilon_transition_taken = True
                    else:
                        epsilon_transition_taken = False
                    if epsilon_transition_taken:
                        next_MDP_state = learning_task.MDP.current_state if not ndarray else learning_task.MDP.current_state.tolist()
                        next_automaton_state = learning_task.LDBA.step(maxQ_action)
                    else:
                        next_MDP_state = learning_task.MDP.step(maxQ_action)
                        next_automaton_state = learning_task.LDBA.step(learning_task.MDP.state_label(next_MDP_state))
                        if ndarray:
                            next_MDP_state = next_MDP_state.tolist()
                else:
                    # action space bounds
                    lower_bound = -1
                    upper_bound = 1
                    tf_prev_state = tf.expand_dims(tf.convert_to_tensor(prev_state), 0)
                    sampled_actions = tf.squeeze(learning_task.Q[current_state[-1]](tf_prev_state))
                    sampled_actions = sampled_actions.numpy()
                    legal_action = np.clip(sampled_actions, lower_bound, upper_bound)
                    action = np.squeeze(legal_action)
                    if learning_task.epsilon_transitions_exists and \
                            LDBA.automaton_state in LDBA.epsilon_transitions.keys() and \
                            random.random() > 0.5:
                        epsilon_action = random.choice(product_MDP_action_space[2:])
                        action = [np.squeeze(
                            int(epsilon_action[-1]) + learning_task.upper_bound
                        )]
                        epsilon_transition_taken = True
                    else:
                        epsilon_transition_taken = False
                    # product MDP modification (for more details refer to https://bit.ly/LCRL_CDC_2019)
                    if epsilon_transition_taken:
                        next_MDP_state = MDP.current_state.tolist()
                        next_automaton_state = LDBA.step(epsilon_action)
                    else:
                        next_MDP_state = MDP.step(action).tolist()
                        next_automaton_state = LDBA.step(MDP.state_label(next_MDP_state))

                    state = np.array(next_MDP_state.copy())

                    # product MDP: synchronise the automaton with MDP
                    current_state = next_MDP_state + [next_automaton_state]

                test_path.append(next_MDP_state)
                if not epsilon_transition_taken:
                    learning_task.LDBA.accepting_frontier_function(next_automaton_state)

                if not learning_task.LDBA.accepting_frontier_set:
                    number_of_successes += 1

        print('success rate in testing: ' + str(100 * number_of_successes / number_of_tests) + '%')

    if isinstance(MDP, SlipperyGrid) and test:
        # plt.plot(learning_task.path_length, c='royalblue')
        # plt.xlabel('Episode Number')
        # plt.ylabel('Agent Traversed Distance from The Initial State')
        # plt.grid(True)
        # if average_window > 0:
        #     avg = np.convolve(learning_task.path_length, np.ones((average_window,)) / average_window, mode='valid')
        #     plt.plot(avg, c='darkblue')
        # plt.savefig(os.path.join(results_sub_path, 'traversed distance in the grid.png'))
        # plt.show()

        distinct_labels = np.unique(learning_task.MDP.labels)
        labels_dic = {}
        label_indx = 0
        bounds = [-0.9]
        cmap = plt.get_cmap('gist_rainbow')
        for label in distinct_labels:
            labels_dic[label] = label_indx
            bounds.append(bounds[-1] + 1)
            label_indx += 1
        color_map = cmap(np.linspace(0, 1, len(distinct_labels)))
        cmap = colors.ListedColormap(color_map)
        norm = colors.BoundaryNorm(bounds, cmap.N)
        labels_value = np.zeros([learning_task.MDP.shape[0], learning_task.MDP.shape[1]])
        for i in range(learning_task.MDP.shape[0]):
            for j in range(learning_task.MDP.shape[1]):
                labels_value[i][j] = labels_dic[learning_task.MDP.state_label([i, j])]
        patches = [mpatches.Patch(color=color_map[i], label=list(distinct_labels)[i]) for i in
                   range(len(distinct_labels))]
        plt.imshow(labels_value, interpolation='nearest', cmap=cmap, norm=norm)
        plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        path_x, path_y = np.array(test_path).T
        plt.scatter(path_y, path_x, c='lime', edgecolors='teal')
        plt.scatter(path_y[0], path_x[0], c='red', edgecolors='black')
        plt.annotate('s_0', (path_y[0], path_x[0]), fontsize=15, xytext=(20, 20), textcoords="offset points",
                     va="center", ha="left",
                     bbox=dict(boxstyle="round", fc="w"),
                     arrowprops=dict(arrowstyle="->"))
        plt.title('This policy is synthesised by the trained agent')
        plt.savefig(
            os.path.join(results_sub_path, 'tested_policy.png'), bbox_inches="tight")
        plt.show()
        is_gif = input(
            'Would you like to create a gif for the the control policy? '
            'If so, type in "y", otherwise, type in "n". ')
        if is_gif == 'y' or is_gif == 'Y':
            animate(learning_task.MDP, test_path, results_sub_path, labels_value, cmap, norm, patches)
        print('\n---------------------------------\n')
        print('The results have been saved here:\n')
        print(results_sub_path)
        return learning_task

    if isinstance(MDP, MarsRover) and test:
        plt.imshow(MDP.background)
        path_x, path_y = np.array(test_path).T
        plt.scatter(path_y, path_x, c='lime', edgecolors='teal')
        plt.scatter(path_y[0], path_x[0], c='red', edgecolors='black')
        plt.annotate('s_0', (path_y[0], path_x[0]), fontsize=15, xytext=(20, 20), textcoords="offset points",
                     va="center", ha="left",
                     bbox=dict(boxstyle="round", fc="w"),
                     arrowprops=dict(arrowstyle="->"))
        plt.title('This policy is synthesised by the trained agent')
        plt.savefig(
            os.path.join(results_sub_path, 'tested_policy.png'), bbox_inches="tight")
        plt.show()
        is_gif = input(
            'Would you like to create a gif for the the control policy? '
            'If so, type in "y", otherwise, type in "n". ')
        if is_gif == 'y' or is_gif == 'Y':
            animate(learning_task.MDP, test_path, results_sub_path, labels_value, cmap, norm, patches)
        print('\n---------------------------------\n')
        print('The results have been saved here:\n')
        print(results_sub_path)
        return learning_task

    if algorithm == 'ql':
        with open(os.path.join(results_sub_path, 'learned_model.pkl'), 'wb') as learning_file:
            dill.dump(learning_task, learning_file)
        if test:
            with open(os.path.join(results_sub_path, 'test_results.pkl'), 'wb') as test_file:
                dill.dump(test_path, test_file)
        print('In order to load the learning results use the following command in Python console:')
        print('import dill')
        print("learned_model = dill.load(open('" + os.path.join(results_sub_path, 'learned_model.pkl') + "', 'rb'))")
        if test:
            print("tested_trace = dill.load(open('" + os.path.join(results_sub_path, 'test_results.pkl') + "', 'rb'))")
        print('\n---------------------------------\n')
        if learning_task.early_interruption == 0:
            print("Training finished successfully!")
        else:
            print("Training results have been saved successfully! [Note: training was interrupted by user]")
        return learning_task
    # TODO: change the save method and add nfq & ddpg
    return learning_task
예제 #10
0
파일: slp_easy.py 프로젝트: grockious/lcrl
from src.automata.ldba import LDBA

# an example automaton for "goal1" or "F goal1"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
slp_easy = LDBA(accepting_sets=[[1]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'goal1' in label:
            self.automaton_state = 1
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        self.automaton_state = 1

    # step function returns the new automaton state
    return self.automaton_state


# now override the step function
LDBA.step = step.__get__(slp_easy, LDBA)

# finally, does the LDBA contains an epsilon transition? if so then
# for each state with outgoing epsilon-transition define a different epsilon
# example: <LDBA_object>.epsilon_transitions = {0: ['epsilon_0'], 4: ['epsilon_1']}
# "0" and "4" are automaton_states
예제 #11
0
from src.automata.ldba import LDBA

# an example automaton for "goal1 while avoiding unsafe" or "F goal1 & G !unsafe"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
frozenlake_reach_avoid = LDBA(accepting_sets=[[1]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'goal1' in label and 'unsafe' not in label:
            self.automaton_state = 1
        elif 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 2
    # step function returns the new automaton state
    return self.automaton_state


# now override the step function
LDBA.step = step.__get__(frozenlake_reach_avoid, LDBA)
예제 #12
0
from src.automata.ldba import LDBA

# an example automaton for "goal1 then goal2 then goal3 then goal4" or
# "F (goal1 & XF (goal2 & XF (goal3 & XF goal4)))"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
slp_hard = LDBA(accepting_sets=[[4]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'goal1' in label:
            self.automaton_state = 1
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if 'goal2' in label:
            self.automaton_state = 2
        else:
            self.automaton_state = 1
    # state 2
    elif self.automaton_state == 2:
        if 'goal3' in label:
            self.automaton_state = 3
        else:
            self.automaton_state = 2
    # state 3
    elif self.automaton_state == 3:
예제 #13
0
from src.automata.ldba import LDBA

# an example automaton for "goal2 then goal1 while avoiding unsafe" or "F (goal2 & XF (goal1)) & G !unsafe"
# only the automaton "step" function and the "accepting_sets" attribute need to be specified
# "accepting_sets" for Generalised Büchi Accepting (more details here https://bit.ly/ldba_paper)
mars_rover_2_and_4 = LDBA(accepting_sets=[[1]])


# "step" function for the automaton transitions (input: label, output: automaton_state, un-accepting sink state is "-1")
def step(self, label):
    # state 0
    if self.automaton_state == 0:
        if 'goal1' in label and 'unsafe' not in label:
            self.automaton_state = 1
        elif 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 0
    # state 1
    elif self.automaton_state == 1:
        if 'unsafe' in label:
            self.automaton_state = -1  # un-accepting sink state
        else:
            self.automaton_state = 2
    # step function returns the new automaton state
    return self.automaton_state


# now override the step function
LDBA.step = step.__get__(mars_rover_2_and_4, LDBA)