Esempio n. 1
0
    def __init__(self,
                 master,
                 grid_top,
                 grid_bottom,
                 grid_left,
                 grid_right,
                 controller_row,
                 controller_column,
                 width,
                 height,
                 video_path,
                 update_requested,
                 update_frame=None):
        tk.Frame.__init__(self, master, width=width, height=height)
        self.master = master
        self.update_requested = update_requested
        self.update_frame = update_frame

        # video configuration
        self.video_path = video_path
        self.video_label = tk.Label(self)
        self.video_label.grid(row=0, column=0)
        self.video_width = width
        self.current_frame = 0

        # controller menu with play/pause and slider
        self.controller = tk.Frame(self.master)

        button_image_scale = 0.2
        play_image_path = "media/video_animation/play.png"
        self.play_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(play_image_path), button_image_scale))
        self.play_button = tk.Label(self.controller, image=self.play_image)
        pause_image_path = "media/video_animation/pause.png"
        self.pause_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(pause_image_path), button_image_scale))
        self.pause_button = tk.Label(self.controller, image=self.pause_image)
        self.play_button.lift(self.pause_button)

        self.play_button.bind("<Button-1>", self.play)
        self.pause_button.bind("<Button-1>", self.pause)

        self.play_button.grid(row=0, column=0, sticky=tk.NW + tk.SE)
        self.pause_button.grid(row=0, column=0, sticky=tk.NW + tk.SE)

        # lay frame and controller
        self.grid(row=grid_top,
                  column=grid_left,
                  rowspan=grid_bottom - grid_top + 1,
                  columnspan=grid_right - grid_left + 1)
        self.controller.grid(row=controller_row, column=controller_column)

        self.playing = False
    def create_widgets(self):

        # set column and row spacing for the frame
        self.columnconfigure(0, weight=2)
        self.columnconfigure(1, weight=6)
        self.columnconfigure(2, weight=2)
        self.rowconfigure(0, weight=7)
        self.rowconfigure(1, weight=1)

        # add logo image specification
        pathway_image_scale = 0.5
        self.logo_image_path = 'media/module_prelude/title/airi_logo.png'
        self.logo_image = ImageTk.PhotoImage(im.scale_image(Image.open(self.logo_image_path), pathway_image_scale))
    

        # add widgets
        self.title = tk.Label(self, text="Reinforcement Learning\n&\nDeep Q-Learning\n101", font=("Courier", 48))
        self.logo_image_label = tk.Label(self, image=self.logo_image)
        self.author = tk.Label(self, text="Author: Channy Hong", font=("Courier", 14))
        self.button_to_roadmap = tk.Button(self,
                                     command=lambda: self.controller.show_frame(roadmap_frame),
                                     text="To Roadmap", font=("Courier", 14))

        # lay widgets
        self.title.grid(row=0, column=1)
        self.logo_image_label.grid(row=1, column=0)
        self.author.grid(row=1, column=1)
        self.button_to_roadmap.grid(row=1, column=2, padx=40, sticky=tk.E)
    def create_widgets(self):
        
        # set column and row spacing for the frame
        self.columnconfigure(0, weight=1)
        self.columnconfigure(1, weight=2)
        self.columnconfigure(2, weight=2)
        self.columnconfigure(3, weight=2)
        self.columnconfigure(4, weight=2)
        self.columnconfigure(5, weight=2)
        self.columnconfigure(6, weight=1)
        self.rowconfigure(0, weight=3)
        self.rowconfigure(1, weight=3)
        self.rowconfigure(2, weight=1)
        self.rowconfigure(3, weight=1)
        self.rowconfigure(4, weight=1)
        self.rowconfigure(5, weight=1)
        self.rowconfigure(6, weight=1)
        self.rowconfigure(7, weight=1)
        self.rowconfigure(8, weight=3)

        # add specifications
        pathway_image_scale = 1
        self.intro_image_path = 'media/module_prelude/roadmap/intro.png'
        self.intro_image = ImageTk.PhotoImage(im.scale_image(Image.open(self.intro_image_path), pathway_image_scale))
        self.play_image_path = 'media/module_prelude/roadmap/play.png'
        self.play_image = ImageTk.PhotoImage(im.scale_image(Image.open(self.play_image_path), pathway_image_scale))
        self.q_image_path = 'media/module_prelude/roadmap/q.png'
        self.q_image = ImageTk.PhotoImage(im.scale_image(Image.open(self.q_image_path), pathway_image_scale))
        self.dqn_image_path = 'media/module_prelude/roadmap/dqn.png'
        self.dqn_image = ImageTk.PhotoImage(im.scale_image(Image.open(self.dqn_image_path), pathway_image_scale))
        self.challenge_image_path = 'media/module_prelude/roadmap/challenge.png'
        self.challenge_image = ImageTk.PhotoImage(im.scale_image(Image.open(self.challenge_image_path), pathway_image_scale))

        button_wraplength = 200
        button_width_text = 24
        button_height_text = 2
        button_font_size = 12

        # add widgets
        self.title = tk.Label(self, text="Roadmap", font=("Courier", 48))

        self.intro_image_label = tk.Label(self, image=self.intro_image)
        self.play_image_label = tk.Label(self, image=self.play_image)
        self.q_image_label = tk.Label(self, image=self.q_image)
        self.dqn_image_label = tk.Label(self, image=self.dqn_image)
        self.challenge_image_label = tk.Label(self, image=self.challenge_image)

        self.intro_tutorial_overview_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(intro.tutorial_overview_frame),
                                    text="Tutorial Overview", borderwidth=1, font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.intro_artificial_intelligence_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(intro.artificial_intelligence_frame),
                                    text="Artificial Intelligence", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.intro_ai_in_the_21st_century_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(intro.ai_in_the_21st_century_frame),
                                    text="AI In The 21st Century", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.intro_briefly_on_q_learning_and_deep_q_network_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(intro.briefly_on_q_learning_and_deep_q_network_frame),
                                    text="Briefly On Q-Learning & Deep Q-Network", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.play_pong_gameplay_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(play.pong_gameplay_frame),
                                    text="Pong Gameplay", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.play_reflection_on_pong_gameplay_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(play.reflection_on_pong_gameplay_frame),
                                    text="Reflection On Pong Gameplay", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.play_terminology_definition_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(play.terminology_definition_frame),
                                    text="Terminology: Definition", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.play_terminology_pong_mix_and_match_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(play.terminology_pong_mix_and_match_frame),
                                    text="Terminology: Pong Mix & Match", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.play_strategy_from_pong_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(play.strategy_from_pong_frame),
                                    text="Strategy From Pong", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.q_action_value_function_reward_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(q.action_value_function_reward_frame),
                                    text="Action-Value Function: Reward", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.q_action_value_function_max_q_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(q.action_value_function_max_q_frame),
                                    text="Action-Value Function: Max Q", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.q_q_learning_algorithm_initialization_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(q.q_learning_algorithm_initialization_frame),
                                    text="Q-Learning Algorithm: Initialization", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.q_q_learning_algorithm_training_sequence_frame_button = tk.Button(self,
                                    command=lambda: [self.controller.show_frame(q.q_learning_algorithm_training_sequence_frame), self.controller.frames[q.q_learning_algorithm_training_sequence_frame].video_animation.open_video()],
                                    text="Q-Learning Algorithm: Training Sequence", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.q_q_learning_algorithm_exploration_v_exploitation_frame_button = tk.Button(self,
                                    command=lambda: [self.controller.show_frame(q.q_learning_algorithm_exploration_v_exploitation_frame), self.controller.frames[q.q_learning_algorithm_exploration_v_exploitation_frame].video_animation.open_video()],
                                    text="Q-Learning Algorithm: Exploration v. Exploitation", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.q_action_value_function_revisited_discount_factor_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(q.action_value_function_revisited_discount_factor_frame),
                                    text="Action-Value Function Revisited: Discount Factor", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.dqn_the_large_problem_with_pixel_input_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(dqn.the_large_problem_with_pixel_input_frame),
                                    text="The Large Problem With Pixel Input", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.dqn_q_network_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(dqn.q_network_frame),
                                    text="Q-Network", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.dqn_q_network_training_sequence_frame_button = tk.Button(self,
                                    command=lambda: [self.controller.show_frame(dqn.q_network_training_sequence_frame), self.controller.frames[dqn.q_network_training_sequence_frame].video_animation.open_video()],
                                    text="Q-Network: Training Sequence", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.dqn_replay_memory_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(dqn.replay_memory_frame),
                                    text="Replay Memory", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.challenge_challenge_dqn_player_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(challenge.challenge_dqn_player_frame),
                                    text="Challenge: DQN Player!", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)
        self.challenge_reflection_on_evolution_of_dqn_player_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(challenge.reflection_on_evolution_of_dqn_player_frame),
                                    text="Reflection On Evolution Of DQN Player", font=("Courier", button_font_size), width=button_width_text, height=button_height_text, wraplength=button_wraplength)


        self.previous_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(title_frame),
                                    text="Previous", font=("Courier", 14))
        self.next_frame_button = tk.Button(self,
                                    command=lambda: self.controller.show_frame(intro.tutorial_overview_frame),
                                    text="Next", font=("Courier", 14))

        
        # lay widgets
        self.title.grid(row=0, 
        				column=0,
        				columnspan=7)
        self.intro_image_label.grid(row=1,
                          column=1,
                          sticky=tk.NW+tk.SE)
        self.play_image_label.grid(row=1,
                          column=2,
                          sticky=tk.NW+tk.SE)
        self.q_image_label.grid(row=1,
                          column=3,
                          sticky=tk.NW+tk.SE)
        self.dqn_image_label.grid(row=1,
                          column=4,
                          sticky=tk.NW+tk.SE)
        self.challenge_image_label.grid(row=1,
                          column=5,
                          sticky=tk.NW+tk.SE)
        self.intro_tutorial_overview_frame_button.grid(row=2, 
                             column=1, 
                             sticky=tk.NW+tk.SE)
        self.intro_artificial_intelligence_frame_button.grid(row=3, 
                             column=1, 
                             sticky=tk.NW+tk.SE)
        self.intro_ai_in_the_21st_century_frame_button.grid(row=4, 
                             column=1, 
                             sticky=tk.NW+tk.SE)
        self.intro_briefly_on_q_learning_and_deep_q_network_frame_button.grid(row=5, 
                             column=1, 
                             sticky=tk.NW+tk.SE)
        self.play_pong_gameplay_frame_button.grid(row=2, 
                             column=2, 
                             sticky=tk.NW+tk.SE)
        self.play_reflection_on_pong_gameplay_frame_button.grid(row=3, 
                             column=2, 
                             sticky=tk.NW+tk.SE)
        self.play_terminology_definition_frame_button.grid(row=4, 
                             column=2, 
                             sticky=tk.NW+tk.SE)
        self.play_terminology_pong_mix_and_match_frame_button.grid(row=5, 
                             column=2, 
                             sticky=tk.NW+tk.SE)
        self.play_strategy_from_pong_frame_button.grid(row=6, 
                             column=2, 
                             sticky=tk.NW+tk.SE)
        self.q_action_value_function_reward_frame_button.grid(row=2, 
                             column=3, 
                             sticky=tk.NW+tk.SE)
        self.q_action_value_function_max_q_frame_button.grid(row=3, 
                             column=3, 
                             sticky=tk.NW+tk.SE)
        self.q_q_learning_algorithm_initialization_frame_button.grid(row=4, 
                             column=3, 
                             sticky=tk.NW+tk.SE)
        self.q_q_learning_algorithm_training_sequence_frame_button.grid(row=5, 
                             column=3, 
                             sticky=tk.NW+tk.SE)
        self.q_q_learning_algorithm_exploration_v_exploitation_frame_button.grid(row=6, 
                             column=3, 
                             sticky=tk.NW+tk.SE)
        self.q_action_value_function_revisited_discount_factor_frame_button.grid(row=7, 
                             column=3, 
                             sticky=tk.NW+tk.SE)
        self.dqn_the_large_problem_with_pixel_input_frame_button.grid(row=2, 
                             column=4, 
                             sticky=tk.NW+tk.SE)
        self.dqn_q_network_frame_button.grid(row=3, 
                             column=4, 
                             sticky=tk.NW+tk.SE)
        self.dqn_q_network_training_sequence_frame_button.grid(row=4, 
                             column=4, 
                             sticky=tk.NW+tk.SE)
        self.dqn_replay_memory_frame_button.grid(row=5,
                             column=4,
                             sticky=tk.NW+tk.SE)
        self.challenge_challenge_dqn_player_frame_button.grid(row=2,
                             column=5,
                             sticky=tk.NW+tk.SE)
        self.challenge_reflection_on_evolution_of_dqn_player_frame_button.grid(row=3,
                             column=5,
                             sticky=tk.NW+tk.SE)


        self.previous_frame_button.grid(row=8, 
                             column=0, 
                             padx=40, 
                             sticky=tk.W)
        self.next_frame_button.grid(row=8, 
                             column=6, 
                             padx=40, 
                             sticky=tk.E)
    def create_widgets(self):

        # set column and row spacing for the frame
        self.columnconfigure(0, weight=1)
        self.columnconfigure(1, weight=1)
        self.columnconfigure(2, weight=1)
        self.rowconfigure(0, weight=1)
        self.rowconfigure(1, weight=1)
        self.rowconfigure(2, weight=1)
        self.rowconfigure(3, weight=1)
        self.rowconfigure(4, weight=1)
        self.rowconfigure(5, weight=1)

        # add widgets
        self.title = tk.Label(self,
                              text="Q-Learning Algorithm: Initialization",
                              font=("Courier", 48))

        self.text1 = tk.Label(
            self,
            font=("Courier", 16),
            wraplength=1000,
            justify=tk.LEFT,
            text=
            "Now, let's see how our Q-Learning Agent equipped with the 'Q-Learning' algorithm actually plays 'Pong'!"
        )

        self.previous_frame_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(
                action_value_function_max_q_frame),
            text="Previous",
            font=("Courier", 14))
        self.to_roadmap_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(prelude.roadmap_frame),
            text="To Roadmap",
            font=("Courier", 14))
        self.next_frame_button = tk.Button(
            self,
            command=lambda: [
                self.controller.show_frame(
                    q_learning_algorithm_training_sequence_frame), self.
                controller.frames[q_learning_algorithm_training_sequence_frame
                                  ].video_animation.open_video()
            ],
            text="Next",
            font=("Courier", 14))

        # add toggle texts (args: self, master, text_string, width=1, height=1, wraplength=None, justify=LEFT, row=0, column=0, rowspan=1, columnspan=1, font_type="Courier", font_size=16)
        init_image_scale = 0.5
        init_image_path = 'media/module_q/q_learning_algorithm_initialization_frame/initialization.png'
        self.init_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(init_image_path), init_image_scale))

        self.text2 = eh.toggle_label(
            self,
            "First thing's first, our Q-Learning Agent needs to iterate through every possible (state, action) pairs and initialize their action-values to 0.",
            wraplength=1000,
            row=2,
            column=1)
        self.initialization_image = eh.toggle_image(self,
                                                    image=self.init_image,
                                                    row=3,
                                                    column=1,
                                                    width=1000,
                                                    height=200)
        self.text4 = eh.toggle_label(
            self,
            "Then, the game will start and our computer player will observe a state in the form of pixel data. It will compare all available action-values (3 of them in our case: up, down, or staying still) at the current state, and simply choose to take the action with the largest action-value.",
            wraplength=1000,
            row=4,
            column=1)

        # lay widgets
        self.title.grid(row=0, column=0, columnspan=3)

        self.text1.grid(row=1, column=1)

        self.previous_frame_button.grid(row=5, column=0, padx=10, sticky=tk.W)
        self.to_roadmap_button.grid(row=5, column=1, columnspan=1)
        self.next_frame_button.grid(row=5, column=2, padx=10, sticky=tk.E)
    def create_widgets(self):

        # set column and row spacing for the frame
        self.columnconfigure(0, weight=1)
        self.columnconfigure(1, weight=8)
        self.columnconfigure(2, weight=1)
        self.rowconfigure(0, weight=1)
        self.rowconfigure(1, weight=1)
        self.rowconfigure(2, weight=1)
        self.rowconfigure(3, weight=1)
        self.rowconfigure(4, weight=1)
        self.rowconfigure(5, weight=1)
        self.rowconfigure(6, weight=1)

        # add widgets
        self.title = tk.Label(
            self,
            text="Action-Value Function Revisited: Discount Factor",
            font=("Courier", 36))

        self.row_one_frame = tk.Frame(self)
        self.row_two_frame = tk.Frame(self)
        self.row_five_frame = tk.Frame(self)

        self.text1 = tk.Label(
            self.row_one_frame,
            font=("Courier", 14),
            wraplength=500,
            justify=tk.LEFT,
            text="Now, let's go back to our action-value function!")

        self.previous_frame_button = tk.Button(
            self,
            command=lambda: [
                self.controller.show_frame(
                    q_learning_algorithm_exploration_v_exploitation_frame),
                self.controller.frames[
                    q_learning_algorithm_exploration_v_exploitation_frame].
                video_animation.open_video()
            ],
            text="Previous",
            font=("Courier", 14))
        self.to_roadmap_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(prelude.roadmap_frame),
            text="To Roadmap",
            font=("Courier", 14))
        self.next_frame_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(
                dqn.the_large_problem_with_pixel_input_frame),
            text="Next",
            font=("Courier", 14))

        # add toggle texts (args: self, master, text_string, width=1, height=1, wraplength=None, justify=LEFT, row=0, column=0, rowspan=1, columnspan=1, font_type="Courier", font_size=16)
        q_function_original_image_scale = 0.5
        q_function_original_image_path = 'media/module_q/action_value_function_revisited_discount_factor_frame/original.png'
        self.q_function_original_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(q_function_original_image_path),
                           q_function_original_image_scale))
        self.image_one = eh.toggle_image(self.row_one_frame,
                                         image=self.q_function_original_image,
                                         row=0,
                                         column=1,
                                         width=600,
                                         height=100)

        states_image_scale = 0.5
        states_image_path = 'media/module_q/action_value_function_revisited_discount_factor_frame/states.png'
        self.states_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(states_image_path), states_image_scale))
        self.image_two = eh.toggle_image(self.row_two_frame,
                                         image=self.states_image,
                                         row=0,
                                         column=0,
                                         width=600,
                                         height=200)
        self.text2 = eh.toggle_label(
            self.row_two_frame,
            "As you can see, the action-value for certain (action, state) pairs are all equal to one another at 10, regardless of how far away it may be in time to the actual reward-reaping instance!",
            wraplength=300,
            font_size=14,
            row=0,
            column=1,
            width=40)

        self.text3 = eh.toggle_label(
            self,
            "Indeed, with certain degrees of uncertainty as introduced with the epsilon variable from before, this can become problematic. What's more, we also have to take into account the various uncertainties derived from our adversary and the environment itself.",
            wraplength=1000,
            font_size=14,
            row=3,
            column=1,
            height=2)
        self.text4 = eh.toggle_label(
            self,
            "The idea is that future rewards that are far away should be counted less, since there is a higher uncertainty of reaching that reward-reaping state.",
            wraplength=1000,
            font_size=14,
            row=4,
            column=1,
            height=2)

        q_function_updated_image_scale = 0.5
        q_function_updated_image_path = 'media/module_q/action_value_function_revisited_discount_factor_frame/updated.png'
        self.q_function_updated_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(q_function_updated_image_path),
                           q_function_updated_image_scale))
        self.image_three = eh.toggle_image(self.row_five_frame,
                                           image=self.q_function_updated_image,
                                           row=0,
                                           column=1,
                                           width=600,
                                           height=100)
        self.text5 = eh.toggle_label(
            self.row_five_frame,
            "And we can do this by casting a bit of doubt to the 'forward-planning' aspect of our action-value function, by multiplying with a gamma constant between 0 and 1 representing what's known as the 'discount factor.'",
            wraplength=300,
            font_size=14,
            width=40,
            row=0,
            column=0)

        # lay widgets
        self.title.grid(row=0, column=0, columnspan=3)

        self.row_one_frame.grid(row=1, column=1)
        self.row_two_frame.grid(row=2, column=1)
        self.row_five_frame.grid(row=5, column=1)

        self.text1.grid(row=0, column=0)

        self.previous_frame_button.grid(row=6, column=0, padx=0, sticky=tk.W)
        self.to_roadmap_button.grid(row=6, column=1, columnspan=1)
        self.next_frame_button.grid(row=6, column=7, padx=0, sticky=tk.E)
    def create_widgets(self):

        # set column and row spacing for the frame
        self.columnconfigure(0, weight=1)
        self.columnconfigure(1, weight=4)
        self.columnconfigure(2, weight=4)
        self.columnconfigure(3, weight=1)
        self.rowconfigure(0, weight=1)
        self.rowconfigure(1, weight=1)
        self.rowconfigure(2, weight=1)
        self.rowconfigure(3, weight=1)
        self.rowconfigure(4, weight=1)
        self.rowconfigure(5, weight=1)
        self.rowconfigure(6, weight=1)
        self.rowconfigure(7, weight=1)
        explanation_width = 50
        explanation_height = 2
        explanation_wraplength = 400

        # add widgets
        self.title = tk.Label(self,
                              text="Action-Value Function: Reward",
                              font=("Courier", 48))

        self.text1 = tk.Label(
            self,
            font=("Courier", 16),
            wraplength=1200,
            justify=tk.LEFT,
            text=
            "Okay, let's assume we are on the current (time = t) 'state'. We have three choices of action: go up, down, or stay still. According to our strategy [pop up], what action should we take?"
        )

        self.previous_frame_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(
                play.strategy_from_pong_frame),
            text="Previous",
            font=("Courier", 14))
        self.to_roadmap_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(prelude.roadmap_frame),
            text="To Roadmap",
            font=("Courier", 14))
        self.next_frame_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(
                action_value_function_max_q_frame),
            text="Next",
            font=("Courier", 14))

        # define scheme frame
        self.scheme_frame = tk.Frame(self)
        self.scheme_frame.columnconfigure(0, weight=1)
        self.scheme_frame.columnconfigure(1, weight=1)
        self.scheme_frame.columnconfigure(2, weight=1)
        self.scheme_frame.rowconfigure(0, weight=1)
        self.scheme_frame.rowconfigure(1, weight=1)
        self.scheme_frame.rowconfigure(2, weight=1)
        self.scheme_frame.rowconfigure(3, weight=1)

        self.time_now = tk.Label(self.scheme_frame,
                                 text="time = t",
                                 font=("Courier", 14))
        self.time_plus_one = tk.Label(self.scheme_frame,
                                      text="time = t + 1",
                                      font=("Courier", 14))

        state_width = 210
        state_height = 150

        state_image_scale = 0.1
        original_image_path = 'media/module_q/action_value_function_reward_frame/original.png'
        self.original_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(original_image_path), state_image_scale))
        self.original_state = tk.Label(self.scheme_frame,
                                       width=state_width,
                                       height=state_height,
                                       image=self.original_image)

        self.time_now.grid(row=0, column=0)
        self.time_plus_one.grid(row=0, column=2)
        self.original_state.grid(row=2, column=0, sticky=tk.E)

        # add best_action_scheme (args: self, master, frame_id, scheme_frame, state_image_scale, button_one_row, button_one_column, button_two_row, button_two_column, button_three_row, button_three_column, state_one_row, state_one_column, state_two_row, state_two_column, state_three_row, state_three_column, state_width, state_height, explanation_row, explanation_column, explanation_width, explanation_height, explanation_wraplength, explanation_string)
        self.best_action_scheme = eh.best_action_scheme(
            self, "reward", self.scheme_frame, state_image_scale, 1, 1, 2, 1,
            3, 1, 1, 2, 2, 2, 3, 2, state_width, state_height, 2, 2,
            explanation_width, explanation_height, explanation_wraplength,
            "According to our strategy, it made sense to press up, because it resulted in a state that awarded a reward of 10."
        )

        # add toggle texts (args: self, master, text_string, width=1, height=1, wraplength=None, justify=LEFT, row=0, column=0, rowspan=1, columnspan=1, font_type="Courier", font_size=16)
        function_image_scale = 0.5
        function_image_path = 'media/module_q/action_value_function_reward_frame/function.png'
        self.function_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(function_image_path),
                           function_image_scale))

        self.explanation_two = eh.toggle_label(
            self,
            "Then, for our Q-Learning algorithm, how about we assign a value to an action, or an action-value (otherwise known as a Q-value) at each possible state?",
            wraplength=explanation_wraplength,
            row=3,
            column=2,
            width=explanation_width,
            height=explanation_height)
        self.explanation_three = eh.toggle_label(
            self,
            "How so? Well, the simplest way to define such an action-value might be to let it be equal to the reward at the resulting state?:",
            wraplength=explanation_wraplength,
            row=4,
            column=2,
            width=explanation_width,
            height=explanation_height)
        self.explanation_four = eh.toggle_image(self,
                                                image=self.function_image,
                                                row=5,
                                                column=2,
                                                width=400,
                                                height=50)
        self.explanation_five = eh.toggle_label(
            self,
            "And now, we simply choose to take the action with the largest action-value! We now have an algorithm: a way of deciding which action to take in a given state.",
            wraplength=explanation_wraplength,
            row=6,
            column=2,
            width=explanation_width,
            height=explanation_height)

        # lay widgets
        self.title.grid(row=0, column=0, columnspan=4)

        self.text1.grid(row=1, column=0, columnspan=4)
        self.scheme_frame.grid(row=2, column=1, rowspan=5, padx=40)

        self.previous_frame_button.grid(row=7, column=0, padx=10, sticky=tk.W)
        self.to_roadmap_button.grid(row=7, column=1, columnspan=2)
        self.next_frame_button.grid(row=7, column=3, padx=10, sticky=tk.E)
    def create_widgets(self):

        # set column and row spacing for the frame
        self.columnconfigure(0, weight=1)
        self.columnconfigure(1, weight=4)
        self.columnconfigure(2, weight=4)
        self.columnconfigure(3, weight=1)
        self.rowconfigure(0, weight=1)
        self.rowconfigure(1, weight=1)
        self.rowconfigure(2, weight=1)
        self.rowconfigure(3, weight=1)
        self.rowconfigure(4, weight=1)
        self.rowconfigure(5, weight=1)
        self.rowconfigure(6, weight=1)
        self.rowconfigure(7, weight=1)
        self.rowconfigure(8, weight=1)
        explanation_width = 50
        explanation_height = 2
        explanation_wraplength = 400

        # add widgets
        self.title = tk.Label(self,
                              text="Action-Value Function: Max Q",
                              font=("Courier", 48))

        self.text1 = tk.Label(
            self,
            font=("Courier", 16),
            wraplength=1200,
            justify=tk.LEFT,
            text=
            "How about now, a little further away? Again we have three choices: according to our strategy [pop up], which action should we take?"
        )

        self.previous_frame_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(
                action_value_function_reward_frame),
            text="Previous",
            font=("Courier", 14))
        self.to_roadmap_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(prelude.roadmap_frame),
            text="To Roadmap",
            font=("Courier", 14))
        self.next_frame_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(
                q_learning_algorithm_initialization_frame),
            text="Next",
            font=("Courier", 14))

        # define scheme frame
        self.scheme_frame = tk.Frame(self)
        self.scheme_frame.columnconfigure(0, weight=1)
        self.scheme_frame.columnconfigure(1, weight=1)
        self.scheme_frame.columnconfigure(2, weight=1)
        self.scheme_frame.rowconfigure(0, weight=1)
        self.scheme_frame.rowconfigure(1, weight=1)
        self.scheme_frame.rowconfigure(2, weight=1)
        self.scheme_frame.rowconfigure(3, weight=1)

        self.time_now = tk.Label(self.scheme_frame,
                                 text="time = t",
                                 font=("Courier", 14))
        self.time_plus_one = tk.Label(self.scheme_frame,
                                      text="time = t + 1",
                                      font=("Courier", 14))

        state_width = 210
        state_height = 150

        state_image_scale = 0.1
        original_image_path = 'media/module_q/action_value_function_max_q_frame/original.png'
        self.original_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(original_image_path), state_image_scale))
        self.original_state = tk.Label(self.scheme_frame,
                                       width=state_width,
                                       height=state_height,
                                       image=self.original_image)

        self.time_now.grid(row=0, column=0)
        self.time_plus_one.grid(row=0, column=2)
        self.original_state.grid(row=2, column=0, sticky=tk.E)

        # add best_action_scheme (args: self, master, frame_id, scheme_frame, state_image_scale, button_one_row, button_one_column, button_two_row, button_two_column, button_three_row, button_three_column, state_one_row, state_one_column, state_two_row, state_two_column, state_three_row, state_three_column, state_width, state_height, explanation_row, explanation_column, explanation_width, explanation_height, explanation_wraplength, explanation_string)
        self.best_action_scheme = eh.best_action_scheme(
            self, "max_q", self.scheme_frame, state_image_scale, 1, 1, 2, 1, 3,
            1, 1, 2, 2, 2, 3, 2, state_width, state_height, 2, 2,
            explanation_width, explanation_height, explanation_wraplength,
            "According to our strategy, it made sense to press up, because the resulting state gets us closer to the state that would give us a positive reward. Note that this requires some forward thinking."
        )

        # add toggle texts (args: self, master, text_string, width=1, height=1, wraplength=None, justify=LEFT, row=0, column=0, rowspan=1, columnspan=1, font_type="Courier", font_size=16)
        function_image_scale = 0.5
        function_image_path = 'media/module_q/action_value_function_max_q_frame/function.png'
        self.function_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(function_image_path),
                           function_image_scale))

        self.explanation_two = eh.toggle_label(
            self,
            "Unfortunately, our current action-value (Q-value) function does not work here because every reward of our immediate resulting states are all the same at 0.",
            wraplength=explanation_wraplength,
            row=3,
            column=2,
            width=explanation_width,
            height=explanation_height)
        self.explanation_three = eh.toggle_label(
            self,
            "Indeed, our current Q-value function does not have any notion of forward-planning. Then, how do we give our function this necessary sense of directionality?",
            wraplength=explanation_wraplength,
            row=4,
            column=2,
            width=explanation_width,
            height=explanation_height)
        self.explanation_four = eh.toggle_label(
            self,
            "The answer is simple: have our action-value function take into account any potential future reward! Then on top of what we already have, let's add the maximum available action-value at the resulting state to our function:",
            wraplength=explanation_wraplength,
            row=5,
            column=2,
            width=explanation_width,
            height=3)
        self.explanation_five = eh.toggle_image(self,
                                                image=self.function_image,
                                                row=6,
                                                column=2,
                                                width=400,
                                                height=50)
        self.explanation_six = eh.toggle_label(
            self,
            "We now have our 'Q-Learning' algorithm!",
            wraplength=explanation_wraplength,
            row=7,
            column=2,
            width=explanation_width,
            height=explanation_height)

        # lay widgets
        self.title.grid(row=0, column=0, columnspan=4)

        self.text1.grid(row=1, column=0, columnspan=4)
        self.scheme_frame.grid(row=2, column=1, rowspan=5, padx=40)

        self.previous_frame_button.grid(row=8, column=0, padx=10, sticky=tk.W)
        self.to_roadmap_button.grid(row=8, column=1, columnspan=2)
        self.next_frame_button.grid(row=8, column=3, padx=10, sticky=tk.E)
Esempio n. 8
0
    def create_widgets(self):

        # set column and row spacing for the frame
        self.columnconfigure(0, weight=1)
        self.columnconfigure(1, weight=1)
        self.columnconfigure(2, weight=1)
        self.rowconfigure(0, weight=1)
        self.rowconfigure(1, weight=1)
        self.rowconfigure(2, weight=1)
        self.rowconfigure(3, weight=1)
        self.rowconfigure(4, weight=1)
        self.rowconfigure(5, weight=1)

        # add widgets
        self.title = tk.Label(self,
                              text="The Large Problem With Pixel Input",
                              font=("Courier", 48))

        self.text1 = tk.Label(
            self,
            font=("Courier", 16),
            wraplength=1000,
            justify=tk.LEFT,
            text=
            "But right from the beginning, we encounter a very large problem. Literally. Think about the input that our computer player receives. The information from the game that our 'Q-Learning' player receives is a 600x400 pixel gameplay screen. Assuming that each pixel can be either white or black, that leaves our computer player with a total of 2600 x 400 state spaces to work with."
        )

        self.previous_frame_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(
                q.action_value_function_revisited_discount_factor_frame),
            text="Previous",
            font=("Courier", 14))
        self.to_roadmap_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(prelude.roadmap_frame),
            text="To Roadmap",
            font=("Courier", 14))
        self.next_frame_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(q_network_frame),
            text="Next",
            font=("Courier", 14))

        # add toggle texts (args: self, master, text_string, width=1, height=1, wraplength=None, justify=LEFT, row=0, column=0, rowspan=1, columnspan=1, font_type="Courier", font_size=16)
        self.text2 = eh.toggle_label(
            self,
            "So, when our Q-Learning Agent iterates through every possible (state, action) pair and initialize each action-value as 0, it would have to do it 2600 x 400 times!",
            wraplength=1000,
            font_size=16,
            row=2,
            column=1)

        input_problem_image_scale = 0.64
        input_problem_image_path = 'media/module_dqn/the_large_problem_with_pixel_input_frame/input_problem.png'
        self.input_problem_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(input_problem_image_path),
                           input_problem_image_scale))
        self.image_one = eh.toggle_image(self,
                                         image=self.input_problem_image,
                                         row=3,
                                         column=1,
                                         width=600,
                                         height=200)

        self.text3 = eh.toggle_label(
            self,
            "Storing pixel data state by state becomes problematic very quickly. For example, if we assume black and white binary pixels, even a 20 x 20 screen would leave us with 2^400 = 2.58 x 10^120 states. To give you some perspective, there are estimated to be around 10^80 atoms in the entire universe.",
            wraplength=1000,
            font_size=16,
            row=4,
            column=1)

        # lay widgets
        self.title.grid(row=0, column=0, columnspan=3)

        self.text1.grid(row=1, column=1)

        self.previous_frame_button.grid(row=5, column=0, padx=10, sticky=tk.W)
        self.to_roadmap_button.grid(row=5, column=1, columnspan=1)
        self.next_frame_button.grid(row=5, column=7, padx=10, sticky=tk.E)
Esempio n. 9
0
    def create_widgets(self):

        # set column and row spacing for the frame
        self.columnconfigure(0, weight=1)
        self.columnconfigure(1, weight=1)
        self.columnconfigure(2, weight=1)
        self.rowconfigure(0, weight=1)
        self.rowconfigure(1, weight=1)
        self.rowconfigure(2, weight=1)
        self.rowconfigure(3, weight=1)
        self.rowconfigure(4, weight=1)
        self.rowconfigure(5, weight=1)

        # add widgets
        self.title = tk.Label(self, text="Q-Network", font=("Courier", 48))

        self.row_one_frame = tk.Frame(self)
        self.text1 = tk.Label(
            self.row_one_frame,
            font=("Courier", 16),
            wraplength=500,
            justify=tk.LEFT,
            text=
            "Ok, then. Let's change our strategy and go back to how we think. Remember our strategy? Let's take a look at its language."
        )

        self.previous_frame_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(
                the_large_problem_with_pixel_input_frame),
            text="Previous",
            font=("Courier", 14))
        self.to_roadmap_button = tk.Button(
            self,
            command=lambda: self.controller.show_frame(prelude.roadmap_frame),
            text="To Roadmap",
            font=("Courier", 14))
        self.next_frame_button = tk.Button(
            self,
            command=lambda: [
                self.controller.show_frame(q_network_training_sequence_frame),
                self.controller.frames[q_network_training_sequence_frame
                                       ].video_animation.open_video()
            ],
            text="Next",
            font=("Courier", 14))

        # add toggle texts (args: self, master, text_string, width=1, height=1, wraplength=None, justify=LEFT, row=0, column=0, rowspan=1, columnspan=1, font_type="Courier", font_size=16)
        self.text2 = eh.toggle_label(
            self.row_one_frame,
            "- Where the ball is going to end up when it gets to my paddle, based on current trajectory\n- Where my paddle is currently at",
            wraplength=500,
            font_size=16,
            row=0,
            column=1,
            width=50)
        self.text3 = eh.toggle_label(
            self,
            "So, we were never thinking about the pixel input of each frame to begin with. Rather, we were identifying the characteristics of various key objects in the screen and calculating what that meant for our ability to gain reward.",
            wraplength=1000,
            font_size=16,
            row=2,
            column=1,
            width=80)
        self.text4 = eh.toggle_label(
            self,
            "And this is exactly where neural networks come in for the rescue! Neural network is meant to predict values from an input.",
            wraplength=1000,
            font_size=16,
            row=3,
            column=1)

        q_network_image_scale = 0.5
        q_network_image_path = 'media/module_dqn/q_network_frame/q_network.png'
        self.q_network_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(q_network_image_path),
                           q_network_image_scale))
        self.q_network = eh.toggle_image(self,
                                         image=self.q_network_image,
                                         row=4,
                                         column=1,
                                         width=800,
                                         height=300)

        # lay widgets
        self.title.grid(row=0, column=0, columnspan=3)

        self.row_one_frame.grid(row=1, column=1)
        self.text1.grid(row=0, column=0)

        self.previous_frame_button.grid(row=5, column=0, padx=0, sticky=tk.W)
        self.to_roadmap_button.grid(row=5, column=1, columnspan=1)
        self.next_frame_button.grid(row=5, column=2, padx=0, sticky=tk.E)
Esempio n. 10
0
    def __init__(self, master, frame_id, scheme_frame, state_image_scale,
                 button_one_row, button_one_column, button_two_row,
                 button_two_column, button_three_row, button_three_column,
                 state_one_row, state_one_column, state_two_row,
                 state_two_column, state_three_row, state_three_column,
                 state_width, state_height, explanation_row,
                 explanation_column, explanation_width, explanation_height,
                 explanation_wraplength, explanation_string):

        # state hidden booleans
        self.state_one_hidden = True
        self.state_two_hidden = True
        self.state_three_hidden = True
        self.explanation_hidden = True

        # specify and add components
        button_image_scale = 0.3
        up_image_path = "media/event_handler/shared/up.png"
        self.up_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(up_image_path), button_image_scale))
        down_image_path = "media/event_handler/shared/down.png"
        self.down_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(down_image_path), button_image_scale))

        self.button_one = tk.Button(scheme_frame,
                                    command=lambda: self.show_state_one(),
                                    image=self.up_image)
        self.button_two = tk.Button(
            scheme_frame,
            command=lambda: self.show_state_two(),
            text="Press\nNothingg ",
            font=("Courier", 14),
            wraplength=64)  # little hack to make the button look as it does
        self.button_three = tk.Button(scheme_frame,
                                      command=lambda: self.show_state_three(),
                                      image=self.down_image)

        state_one_image_path = "media/event_handler/best_action_scheme_" + frame_id + "/one.png"
        self.state_one_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(state_one_image_path),
                           state_image_scale))
        self.state_one = tk.Label(scheme_frame,
                                  width=state_width,
                                  height=state_height,
                                  image=self.state_one_image,
                                  bg='green')
        self.state_one_screen = tk.Canvas(scheme_frame,
                                          width=state_width,
                                          height=state_height)

        state_two_image_path = "media/event_handler/best_action_scheme_" + frame_id + "/two.png"
        self.state_two_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(state_two_image_path),
                           state_image_scale))
        self.state_two = tk.Label(scheme_frame,
                                  width=state_width,
                                  height=state_height,
                                  image=self.state_two_image,
                                  bg='red')
        self.state_two_screen = tk.Canvas(scheme_frame,
                                          width=state_width,
                                          height=state_height)

        state_three_image_path = "media/event_handler/best_action_scheme_" + frame_id + "/three.png"
        self.state_three_image = ImageTk.PhotoImage(
            im.scale_image(Image.open(state_three_image_path),
                           state_image_scale))
        self.state_three = tk.Label(scheme_frame,
                                    width=state_width,
                                    height=state_height,
                                    image=self.state_three_image,
                                    bg='red')
        self.state_three_screen = tk.Canvas(scheme_frame,
                                            width=state_width,
                                            height=state_height)

        self.explanation = tk.Label(master,
                                    wraplength=explanation_wraplength,
                                    width=explanation_width,
                                    height=explanation_height,
                                    justify=tk.LEFT,
                                    text=explanation_string,
                                    borderwidth=3,
                                    relief="groove",
                                    font=("Courier", 16))
        self.explanation_screen = tk.Canvas(master,
                                            width=explanation_width,
                                            height=explanation_height,
                                            borderwidth=3,
                                            relief="groove")
        self.explanation.bind("<Button-1>", self.hide_explanation)
        self.explanation_screen.bind("<Button-1>", self.show_explanation)

        # lay components
        self.button_one.grid(row=button_one_row, column=button_one_column)
        self.button_two.grid(row=button_two_row, column=button_two_column)
        self.button_three.grid(row=button_three_row,
                               column=button_three_column)

        self.state_one.grid(row=state_one_row,
                            column=state_one_column,
                            sticky=tk.NW + tk.SE)
        self.state_one_screen.grid(row=state_one_row,
                                   column=state_one_column,
                                   sticky=tk.NW + tk.SE)
        self.state_two.grid(row=state_two_row,
                            column=state_two_column,
                            sticky=tk.NW + tk.SE)
        self.state_two_screen.grid(row=state_two_row,
                                   column=state_two_column,
                                   sticky=tk.NW + tk.SE)
        self.state_three.grid(row=state_three_row,
                              column=state_three_column,
                              sticky=tk.NW + tk.SE)
        self.state_three_screen.grid(row=state_three_row,
                                     column=state_three_column,
                                     sticky=tk.NW + tk.SE)

        self.explanation.grid(row=explanation_row,
                              column=explanation_column,
                              sticky=tk.NW + tk.SE)
        self.explanation_screen.grid(row=explanation_row,
                                     column=explanation_column,
                                     sticky=tk.NW + tk.SE)