def set_action_values(self, action_values: Dict[FarmState, List[float]]):
        grid_dim_x, grid_dim_y = self.env.grid_shape

        for grid_arrows_row in self.grid_arrows:
            for grid_arrow in grid_arrows_row:
                self.board.delete(grid_arrow)

        self.grid_arrows: List[List[List]] = []
        for pos_i in range(grid_dim_x):
            grid_arrows_row: List = []
            for pos_j in range(grid_dim_y):
                state: FarmState = FarmState((pos_i, pos_j), self.goal_idx,
                                             self.plant_idxs, self.rocks_idxs)
                if self.env.is_terminal(state):
                    continue

                for action, action_value in enumerate(action_values[state]):
                    color = _get_color(action_value, self.val_min,
                                       self.val_max)
                    # color = hsl_interp((action_value - self.val_min) / (self.val_max - self.val_min))
                    grid_arrow = self._create_arrow(action, pos_i, pos_j,
                                                    color)
                    grid_arrows_row.append(grid_arrow)

            self.grid_arrows.append(grid_arrows_row)
Beispiel #2
0
    def _update_policy(self, policy: Dict[FarmState, List[float]]):
        grid_dim_x, grid_dim_y = self.env.grid_shape

        for grid_arrows_row in self.grid_arrows:
            for grid_arrow in grid_arrows_row:
                self.board.delete(grid_arrow)

        self.grid_arrows: List[List[List]] = []
        for pos_i in range(grid_dim_x):
            grid_arrows_row: List = []
            for pos_j in range(grid_dim_y):
                state: FarmState = FarmState((pos_i, pos_j), self.goal_idx,
                                             self.plant_idxs, self.rocks_idxs)
                if self.env.is_terminal(state):
                    continue

                for action, policy_prob in enumerate(policy[state]):
                    if policy_prob == 0.0:
                        continue
                    color: str = "gray%i" % (100 - 100 * policy_prob)
                    grid_arrow = self._create_arrow(action, pos_i, pos_j,
                                                    color)
                    grid_arrows_row.append(grid_arrow)

            self.grid_arrows.append(grid_arrows_row)
Beispiel #3
0
    def _update_action_vals_color_dqn(self, dqn, device):
        dqn.eval()
        cell_score_max: float = self.val_max
        cell_score_min: float = self.val_min

        grid_dim_x, grid_dim_y = self.env.grid_shape

        for pos_i in range(grid_dim_x):
            for pos_j in range(grid_dim_y):
                state: FarmState = FarmState((pos_i, pos_j), self.goal_idx,
                                             self.plant_idxs, self.rocks_idxs)

                if self.env.is_terminal(state):
                    continue

                state_tens = torch.tensor(self.env.state_to_nnet_input(state),
                                          device=device)
                action_vals_state = dqn(
                    state_tens.float()).cpu().data.numpy()[0, :]

                for action in range(self.num_actions):
                    action_val: float = action_vals_state[action]
                    color = _get_color(action_val, cell_score_min,
                                       cell_score_max)
                    self.board.itemconfigure(
                        self.action_val_arrows[pos_i][pos_j][action],
                        fill=color)
Beispiel #4
0
    def greedy_policy_vis(self, num_steps: int):
        def _update():
            self.window.update()

        curr_state = FarmState(self.start_idx, self.goal_idx, self.plant_idxs,
                               self.rocks_idxs)

        self.board.delete(self.agent_img)
        self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                          [curr_state.agent_idx])[0]
        _update()
        time.sleep(self.wait)

        print("Step: ", end='', flush=True)
        for itr in range(num_steps):
            print("%i..." % itr, end='', flush=True)

            if self.env.is_terminal(curr_state):
                break

            action: int = int(np.argmax(self.action_vals[curr_state]))
            curr_state, _ = self.env.sample_transition(curr_state, action)

            self.board.delete(self.agent_img)
            self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                              [curr_state.agent_idx])[0]

            _update()
            time.sleep(self.wait)

        print("")
Beispiel #5
0
    def q_learning(self, epsilon: float, learning_rate: float,
                   wait_step: float):
        state: FarmState = FarmState(self.start_idx, self.goal_idx,
                                     self.plant_idxs, self.rocks_idxs)

        def _update():
            self._update_action_vals_color()
            self.window.update()

        episode_num: int = 0
        print("Q-learning, episode %i" % episode_num)
        while episode_num < 1000:
            if self.env.is_terminal(state):
                episode_num = episode_num + 1
                if episode_num % 100 == 0:
                    print("Visualizing greedy policy")
                    _update()
                    self.greedy_policy_vis(40)
                state = FarmState(self.start_idx, self.goal_idx,
                                  self.plant_idxs, self.rocks_idxs)

                print("Q-learning, episode %i" % episode_num)

            state, self.action_vals = q_learning_step(self.env, state,
                                                      self.action_vals,
                                                      epsilon, learning_rate,
                                                      self.discount)

            if wait_step > 0.0:
                self.board.delete(self.agent_img)
                self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                                  [state.agent_idx])[0]

                _update()
                time.sleep(wait_step)

        _update()

        print("DONE")
Beispiel #6
0
    def _init_action_vals_color(self):
        grid_dim_x, grid_dim_y = self.env.grid_shape

        self.action_val_arrows = []
        for pos_i in range(grid_dim_x):
            grid_arrows_row: List = []
            for pos_j in range(grid_dim_y):

                state_action_val_arrows: List = []
                state = FarmState((pos_i, pos_j), self.goal_idx,
                                  self.plant_idxs, self.rocks_idxs)
                if not self.env.is_terminal(state):
                    for action in range(self.num_actions):
                        grid_arrow = self._create_arrow(
                            action, pos_i, pos_j, "white")
                        state_action_val_arrows.append(grid_arrow)

                grid_arrows_row.append(state_action_val_arrows)

            self.action_val_arrows.append(grid_arrows_row)
Beispiel #7
0
    def _update_action_vals_color(self):
        cell_score_max: float = self.val_max
        if self.val_min is None:
            cell_score_min: float = min(self.state_vals.values()) - 1E-9
        else:
            cell_score_min: float = self.val_min

        grid_dim_x, grid_dim_y = self.env.grid_shape

        for pos_i in range(grid_dim_x):
            for pos_j in range(grid_dim_y):
                state: FarmState = FarmState((pos_i, pos_j), self.goal_idx,
                                             self.plant_idxs, self.rocks_idxs)
                if self.env.is_terminal(state):
                    continue

                for action in range(self.num_actions):
                    action_val: float = self.action_vals[state][action]
                    color = _get_color(action_val, cell_score_min,
                                       cell_score_max)
                    self.board.itemconfigure(
                        self.action_val_arrows[pos_i][pos_j][action],
                        fill=color)
def get_environment(env_name: str):
    env_name = env_name.lower()
    farm_regex = re.search("aifarm(_(\S+))?", env_name)
    env: Environment

    if farm_regex is not None:
        from environments.farm_grid_world import FarmGridWorld, FarmState
        from visualizer.farm_visualizer import InteractiveFarm

        grid = np.loadtxt("maps/map1.txt")
        grid = np.transpose(grid)

        assert np.sum(grid == 1) == 1, "Only one agent allowed"
        assert np.sum(grid == 2) == 1, "Only one goal allowed"

        env: FarmGridWorld = FarmGridWorld(grid.shape, float(farm_regex.group(2)), grid)
        viz = InteractiveFarm(env, grid)

        # get states
        states: List[FarmState] = []

        for pos_i in range(grid.shape[0]):
            for pos_j in range(grid.shape[1]):
                state: FarmState = FarmState((pos_i, pos_j), viz.goal_idx, viz.plant_idxs, viz.rocks_idxs)
                states.append(state)
    elif env_name == "puzzle8":
        from environments.n_puzzle import NPuzzle
        env = NPuzzle(3)

        states = pickle.load(open("data/puzzle8.pkl", "rb"))['states']

        viz = None
    else:
        raise ValueError('No known environment %s' % env_name)

    return env, viz, states
Beispiel #9
0
    def greedy_policy_vis_dqn(self, num_steps: int, dqn: nn.Module, device):
        def _update():
            self.window.update()

        curr_state = FarmState(self.start_idx, self.goal_idx, self.plant_idxs,
                               self.rocks_idxs)

        self.board.delete(self.agent_img)
        self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                          [curr_state.agent_idx])[0]
        _update()
        time.sleep(self.wait)

        print("Step: ", end='', flush=True)
        for itr in range(num_steps):
            print("%i..." % itr, end='', flush=True)

            if self.env.is_terminal(curr_state):
                break

            state_tens = torch.tensor(self.env.state_to_nnet_input(curr_state),
                                      device=device)
            action_vals_state = dqn(
                state_tens.float()).cpu().data.numpy()[0, :]

            action: int = int(np.argmax(action_vals_state))
            curr_state, _ = self.env.sample_transition(curr_state, action)

            self.board.delete(self.agent_img)
            self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                              [curr_state.agent_idx])[0]

            _update()
            time.sleep(self.wait)

        print("")
Beispiel #10
0
    def astar(self, weight: float):
        # get nnet
        torch.set_num_threads(1)
        device: torch.device = torch.device("cpu")

        nnet = self.env.get_state_value_nnet()
        state_dict = torch.load(
            "saved_models/supervised_small/model_state_dict.pt")
        nnet.load_state_dict(state_dict)
        nnet.eval()

        # get heuristic function
        def heuristic_fn(states):
            # return np.zeros(len(states))
            nnet_inputs_np_l = [
                self.env.state_to_nnet_input(state_i) for state_i in states
            ]
            nnet_input_np = np.concatenate(nnet_inputs_np_l, axis=0)
            nnet_input = torch.tensor(nnet_input_np, device=device)
            state_vals: np.array = nnet(
                nnet_input.float()).cpu().data.numpy()[:, 0]

            return -state_vals

        state: FarmState = FarmState(self.start_idx, self.goal_idx,
                                     self.plant_idxs, self.rocks_idxs)
        astar = AStar(state, self.env, heuristic_fn, weight)

        grid_dim_x, grid_dim_y = self.env.grid_shape
        grid_text_astar: List[List[List]] = []
        for pos_i in range(grid_dim_x):
            grid_text_rows: List = []
            for pos_j in range(grid_dim_y):
                txt_i = (pos_i + 0.5) * self.width
                txt_j = pos_j * self.width + self.text_offset

                txt1 = self.board.create_text(txt_i,
                                              txt_j,
                                              text="",
                                              fill="black")
                txt2 = self.board.create_text(txt_i,
                                              txt_j + 20,
                                              text="",
                                              fill="black")
                txt3 = self.board.create_text(txt_i,
                                              txt_j + 40,
                                              text="",
                                              fill="black")

                grid_text_rows.append([txt1, txt2, txt3])
            grid_text_astar.append(grid_text_rows)

        def _update():
            for node in astar.instance.closed_dict.keys():
                pos_i_up, pos_j_up = node.state.agent_idx
                self.board.itemconfigure(self.grid_squares[pos_i_up][pos_j_up],
                                         fill="red")

            for node in astar.instance.open_set:
                pos_i_up, pos_j_up = node.state.agent_idx
                self.board.itemconfigure(self.grid_squares[pos_i_up][pos_j_up],
                                         fill="grey")
                self.board.itemconfigure(
                    grid_text_astar[pos_i_up][pos_j_up][0],
                    text='g=%.1f' % node.path_cost)
                self.board.itemconfigure(
                    grid_text_astar[pos_i_up][pos_j_up][1],
                    text='h=%.1f' % node.heuristic)
                self.board.itemconfigure(
                    grid_text_astar[pos_i_up][pos_j_up][2],
                    text='f=%.1f' % node.cost)

            self.window.update()

        while not astar.is_solved():
            if self.wait > 0:
                _update()
                time.sleep(self.wait)

            astar.step(heuristic_fn)

        if self.wait > 0:
            _update()
            time.sleep(self.wait)

        actions = astar.get_soln_actions()

        for action in actions:
            state = self.env.sample_transition(state, action)[0]
            self.board.delete(self.agent_img)
            self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                              [state.agent_idx])[0]

            self.window.update()
            time.sleep(0.1)
Beispiel #11
0
    def policy_gradient(self, learning_rate: float, wait_step: float):
        torch.set_num_threads(1)
        device: torch.device = torch.device("cpu")
        nnet = self.env.get_policy_nnet()

        optimizer: Optimizer = optim.Adam(nnet.parameters(), lr=learning_rate)

        def _update():
            nnet.eval()
            policy: Dict[FarmState, List[float]] = {}
            for state_up in self.states:
                nnet_input_np_state_up = self.env.state_to_nnet_input(state_up)
                nnet_input_up = torch.tensor(nnet_input_np_state_up,
                                             device=device)
                policy[state_up] = list(
                    nnet(nnet_input_up.float()).cpu().data.numpy()[0, :])

            self._update_policy(policy)
            self.window.update()

        episode_num: int = 0
        max_steps: int = 100
        while episode_num < 1000:
            state = FarmState(self.start_idx, self.goal_idx, self.plant_idxs,
                              self.rocks_idxs)
            print("Policy gradient, episode %i" % episode_num)

            episode_num = episode_num + 1

            if episode_num % 10 == 0:
                print("Visualizing greedy policy")
                _update()
                time.sleep(self.wait)

            actions: List[int] = []
            rewards: List[float] = []
            nnet_inputs_np_l = []
            nnet.eval()
            for episode_step in range(max_steps):
                nnet_input_np_state = self.env.state_to_nnet_input(state)
                nnet_input = torch.tensor(nnet_input_np_state, device=device)
                action_probs = nnet(
                    nnet_input.float()).cpu().data.numpy()[0, :]

                nnet_inputs_np_l.append(nnet_input_np_state)

                action = np.random.choice(4, p=action_probs)
                state, reward = self.env.sample_transition(state, action)

                actions.append(action)
                rewards.append(reward)

                if self.env.is_terminal(state):
                    break

            nnet_inputs_np = np.concatenate(nnet_inputs_np_l, axis=0)

            nnet.train()
            optimizer.zero_grad()

            nnet_inputs = torch.tensor(nnet_inputs_np, device=device)
            nnet_outputs = nnet(nnet_inputs.float())

            actions_t = torch.tensor(np.array(actions),
                                     device=device).long().unsqueeze(1)
            log_prob = torch.log(nnet_outputs)
            log_prob_actions = log_prob.gather(1, actions_t)[:, 0]

            returns = np.cumsum(np.array(rewards)[::-1])[::-1]

            returns_t = torch.tensor(returns.astype(np.float32), device=device)
            loss = torch.mean(-returns_t * log_prob_actions)

            loss.backward()
            optimizer.step()

            if wait_step > 0.0:
                self.board.delete(self.agent_img)
                self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                                  [state.agent_idx])[0]

                _update()
                time.sleep(wait_step)

        _update()

        print("DONE")
Beispiel #12
0
    def deep_q_learning(self, epsilon: float, learning_rate: float,
                        batch_size: int, wait_step: float):
        state: FarmState = FarmState(self.start_idx, self.goal_idx,
                                     self.plant_idxs, self.rocks_idxs)

        torch.set_num_threads(1)
        device: torch.device = torch.device("cpu")
        dqn: nn.Module = get_dqn()
        optimizer: Optimizer = optim.Adam(dqn.parameters(), lr=0.001)

        dqn_target: nn.Module = get_dqn()
        dqn_target.eval()

        replay_buffer: List = []

        def _update():
            self._update_action_vals_color_dqn(dqn, device)
            self.window.update()

        _update()

        episode_num: int = 0
        step_num: int = 0
        update_num: int = 100
        total_steps: int = 0
        print("Q-learning, episode %i" % episode_num)
        while episode_num < 1000:
            dqn.eval()
            if self.env.is_terminal(state) or (step_num >= 50):
                episode_num = episode_num + 1
                if episode_num % 100 == 0:
                    print("Visualizing greedy policy")
                    _update()
                    self.greedy_policy_vis_dqn(40, dqn, device)

                # state = np.random.choice(self.states)
                state = FarmState(self.start_idx, self.goal_idx,
                                  self.plant_idxs, self.rocks_idxs)

                step_num: int = 0
                print("Q-learning, episode %i" % episode_num)

            state, dqn, replay_buffer = deep_q_learning_step(
                self.env, state, dqn, dqn_target, epsilon, self.discount,
                batch_size, optimizer, device, replay_buffer)

            if total_steps % update_num == 0:
                dqn_target.load_state_dict(dqn.state_dict())
                dqn_target.eval()

            if len(replay_buffer) > 10000:
                replay_buffer.pop(0)

            if wait_step > 0.0:
                self.board.delete(self.agent_img)
                self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                                  [state.agent_idx])[0]

                _update()
                time.sleep(wait_step)

            step_num += 1
            total_steps += 1

        _update()

        print("DONE")
Beispiel #13
0
    def __init__(self,
                 env: FarmGridWorld,
                 grid: np.ndarray,
                 discount: float,
                 val_type: str,
                 show_policy: bool = True,
                 wait: float = 0.1,
                 val_min: Optional[float] = None):
        # 0: up, 1: down, 2: left, 3: right

        super().__init__()
        # initialize environment
        self.wait: float = wait
        self.val_type: str = val_type.upper()
        self.show_policy: bool = show_policy
        self.val_min: float = val_min
        self.val_max: float = 0

        self.env: FarmGridWorld = env
        self.discount: float = discount

        self.num_actions: int = 4

        self.agent_idx: Tuple[int, int] = mask_to_idxs(grid, 1)[0]
        self.start_idx = self.agent_idx

        self.goal_idx: Tuple[int, int] = mask_to_idxs(grid, 2)[0]
        self.plant_idxs: List[Tuple[int, int]] = mask_to_idxs(grid, 3)
        self.rocks_idxs: List[Tuple[int, int]] = mask_to_idxs(grid, 4)

        # enumerate states
        self.states: List[FarmState] = []

        for pos_i in range(grid.shape[0]):
            for pos_j in range(grid.shape[1]):
                state: FarmState = FarmState((pos_i, pos_j), self.goal_idx,
                                             self.plant_idxs, self.rocks_idxs)
                self.states.append(state)

        # enumerate value funcs
        self.state_vals: Dict[FarmState, float] = dict()
        self.action_vals: Dict[FarmState, List[float]] = dict()
        self.state_visits: Dict[FarmState, int] = dict()
        for state in self.states:
            self.state_visits[state] = 0
            self.state_vals[state] = 0.0

            if self.env.is_terminal(state):
                self.action_vals[state] = [0] * self.num_actions
            else:
                self.action_vals[state] = [0] * self.num_actions

        # initialize board
        self.window = tkinter.Tk()
        self.window.wm_title("CSE 790 Farm")

        self.width: int = 70
        self.width_half: int = int(self.width / 2)
        self.text_offset: int = 17

        # load pictures
        path = os.getcwd() + "/images/"
        self.goal_pic = ImageTk.PhotoImage(file=path + 'goal.png')
        self.plant_pic = ImageTk.PhotoImage(file=path + 'plant.png')
        self.robot_pic = ImageTk.PhotoImage(file=path + 'robot.png')
        self.rocks_pic = ImageTk.PhotoImage(file=path + 'rocks.png')

        grid_dim_x, grid_dim_y = env.grid_shape

        self.board: Canvas = Canvas(self.window,
                                    width=grid_dim_y * self.width + 2,
                                    height=grid_dim_x * self.width + 2)

        # create initial grid squares
        self.grid_squares: List[List] = []
        for pos_i in range(grid_dim_x):
            grid_squares_row: List = []
            for pos_j in range(grid_dim_y):
                square = self.board.create_rectangle(
                    pos_i * self.width + 4,
                    pos_j * self.width + 4, (pos_i + 1) * self.width + 4,
                    (pos_j + 1) * self.width + 4,
                    fill="white",
                    width=1)

                grid_squares_row.append(square)
            self.grid_squares.append(grid_squares_row)

        # create figures
        self._place_imgs(self.board, self.goal_pic, [self.goal_idx])
        self._place_imgs(self.board, self.plant_pic, self.plant_idxs)
        self._place_imgs(self.board, self.rocks_pic, self.rocks_idxs)
        self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                          [self.agent_idx])[0]

        # create grid arrows
        self.grid_arrows: List[List[List]] = []

        if self.val_type == "STATE":
            # create initial grid values
            self.grid_text: List[List] = []
            for pos_i in range(grid_dim_x):
                grid_text_rows: List = []
                for pos_j in range(grid_dim_y):
                    val = self.board.create_text(
                        pos_i * self.width + self.width_half,
                        pos_j * self.width + self.width_half,
                        text="",
                        fill="black")
                    grid_text_rows.append(val)
                self.grid_text.append(grid_text_rows)

        self.board.pack(side=LEFT)

        do_buttons: bool = False
        if do_buttons:
            # make control buttons
            panel = Frame(self.window)
            panel.pack(side=RIGHT)
            Label(text="Buttons\n", font="Verdana 12 bold").pack()

            value_itr_frame = Frame(self.window)
            value_itr_frame.pack()

            b1 = Button(text="Save Figure")
            b1.bind("<Button-1>", self.save_board)
            b1.pack()

            vi_button = Button(text="Value Iteration")
            vi_button.bind("<Button-1>", self.value_iteration)
            vi_button.pack()

        if self.val_type == "ACTION":
            self._init_action_vals_color()

        # self.update()

        # self.monte_carlo_policy_evaluation()
        # self.td_policy_evaluation(5)
        # self.td_lambda_policy_evaluation(0.5)
        # self.policy_evaluation()
        # self.q_learning()

        self.window.update()
Beispiel #14
0
    def __init__(self, env: FarmGridWorld, grid: np.ndarray):
        # 0: up, 1: down, 2: left, 3: right

        super().__init__()
        # initialize environment
        self.val_max: float = 0

        self.env: FarmGridWorld = env

        self.num_actions: int = 4

        self.agent_idx: Tuple[int, int] = mask_to_idxs(grid, 1)[0]
        self.start_idx = self.agent_idx

        self.goal_idx: Tuple[int, int] = mask_to_idxs(grid, 2)[0]
        self.plant_idxs: List[Tuple[int, int]] = mask_to_idxs(grid, 3)
        self.rocks_idxs: List[Tuple[int, int]] = mask_to_idxs(grid, 4)

        # enumerate states
        self.states: List[FarmState] = []

        for pos_i in range(grid.shape[0]):
            for pos_j in range(grid.shape[1]):
                state: FarmState = FarmState((pos_i, pos_j), self.goal_idx,
                                             self.plant_idxs, self.rocks_idxs)
                self.states.append(state)

        # enumerate value funcs
        self.state_vals: Dict[FarmState, float] = dict()
        self.action_vals: Dict[FarmState, List[float]] = dict()
        self.state_visits: Dict[FarmState, int] = dict()
        for state in self.states:
            self.state_visits[state] = 0
            self.state_vals[state] = 0.0

            if self.env.is_terminal(state):
                self.action_vals[state] = [0] * self.num_actions
            else:
                self.action_vals[state] = [0] * self.num_actions

        # initialize board
        self.window = tkinter.Tk()
        self.window.wm_title("AI Farm")

        self.width: int = 70
        self.width_half: int = int(self.width / 2)
        self.text_offset: int = 17

        # load pictures
        path = os.getcwd() + "/images/"
        self.goal_pic = ImageTk.PhotoImage(file=path + 'goal.png')
        self.plant_pic = ImageTk.PhotoImage(file=path + 'plant.png')
        self.robot_pic = ImageTk.PhotoImage(file=path + 'robot.png')
        self.rocks_pic = ImageTk.PhotoImage(file=path + 'rocks.png')

        grid_dim_x, grid_dim_y = env.grid_shape

        self.board: Canvas = Canvas(self.window,
                                    width=grid_dim_y * self.width + 2,
                                    height=grid_dim_x * self.width + 2)

        # create initial grid squares
        self.grid_squares: List[List] = []
        for pos_i in range(grid_dim_x):
            grid_squares_row: List = []
            for pos_j in range(grid_dim_y):
                square = self.board.create_rectangle(
                    pos_i * self.width + 4,
                    pos_j * self.width + 4, (pos_i + 1) * self.width + 4,
                    (pos_j + 1) * self.width + 4,
                    fill="white",
                    width=1)

                grid_squares_row.append(square)
            self.grid_squares.append(grid_squares_row)

        # create figures
        self._place_imgs(self.board, self.goal_pic, [self.goal_idx])
        self._place_imgs(self.board, self.plant_pic, self.plant_idxs)
        self._place_imgs(self.board, self.rocks_pic, self.rocks_idxs)
        self.agent_img = self._place_imgs(self.board, self.robot_pic,
                                          [self.agent_idx])[0]

        # create grid arrows
        self.grid_arrows: List[List[List]] = []

        self.board.pack(side=LEFT)

        self.window.update()