from my_dueling_dqn import DuelingDQN
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure
import tkinter as tk

root = tk.Tk()
root.title("matplotlib in TK")
f = Figure(figsize=(6, 6), dpi=100)
canvas = FigureCanvasTkAgg(f, master=root)
canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)

env = freq_env()
MEMORY_SIZE = 5000
ACTION_SPACE = 10

sess = tf.Session()
with tf.variable_scope('natural'):
    natural_DQN = DuelingDQN(n_actions=ACTION_SPACE,
                             n_features=160,
                             memory_size=MEMORY_SIZE,
                             e_greedy_increment=0.001,
                             sess=sess,
                             dueling=False)

with tf.variable_scope('dueling'):
    dueling_DQN = DuelingDQN(n_actions=ACTION_SPACE,
                             n_features=160,
Exemple #2
0
def main():
    #env = gym.make(ENV_NAME)
    env = freq_env()
    agent = Agent(num_actions=env.actions)

    root = tk.Tk()
    root.title("matplotlib in TK")
    f = Figure(figsize=(6, 6), dpi=100)
    canvas = FigureCanvasTkAgg(f, master=root)
    canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=1)
    waterfall_figure = f.add_subplot(111)

    if TRAIN:  # Train mode
        if env.mode == 0:
            for _ in range(NUM_EPISODES):
                terminal = False
                observation = env.waterfall_reset()
                last_observation = observation
                state = agent.get_initial_state(last_observation)
                while not terminal:
                    last_observation = observation
                    action = agent.get_action(state)
                    observation, reward, terminal = env.step(
                        action, last_observation)
                    #waterfall_figure.imshow(observation[0, :, :, 0])
                    #canvas.draw()
                    processed_observation = preprocess(observation)
                    #print(observation==last_observation)
                    state = agent.run(state, action, reward, terminal,
                                      processed_observation)
        elif env.mode == 1:
            for _ in range(NUM_EPISODES):
                terminal = False
                observation = env.waterfall_reset_combo()
                last_observation = observation
                state = agent.get_initial_state(last_observation)
                while not terminal:
                    last_observation = observation
                    action = agent.get_action(state)
                    observation, reward, terminal = env.step_combo(
                        action, last_observation)
                    #waterfall_figure.imshow(observation[0, :, :, 0])
                    #canvas.draw()
                    processed_observation = preprocess(observation)
                    #print(observation==last_observation)
                    state = agent.run(state, action, reward, terminal,
                                      processed_observation)
        elif env.mode == 2:
            for _ in range(NUM_EPISODES):
                terminal = False
                observation = env.waterfall_reset_dynamic()
                last_observation = observation
                state = agent.get_initial_state(last_observation)
                while not terminal:
                    last_observation = observation
                    action = agent.get_action(state)
                    observation, reward, terminal = env.step_dynamic(
                        action, last_observation)
                    #waterfall_figure.imshow(observation[0, :, :, 0])
                    #canvas.draw()
                    processed_observation = preprocess(observation)
                    #print(observation==last_observation)
                    state = agent.run(state, action, reward, terminal,
                                      processed_observation)
        elif env.mode == 3:
            for _ in range(NUM_EPISODES):
                terminal = False
                observation = env.waterfall_reset_intelligent()
                last_observation = observation
                state = agent.get_initial_state(last_observation)
                while not terminal:
                    last_observation = observation
                    action = agent.get_action(state)
                    observation, reward, terminal = env.step_intelligent(
                        action, last_observation)
                    #waterfall_figure.imshow(observation[0, :, :, 0])
                    #canvas.draw()
                    processed_observation = preprocess(observation)
                    #print(observation==last_observation)
                    state = agent.run(state, action, reward, terminal,
                                      processed_observation)
    else:  # Test mode
        if env.mode == 0:
            for _ in range(NUM_EPISODES_AT_TEST):
                terminal = False
                observation = env.waterfall_reset()
                """
                for _ in range(random.randint(1, NO_OP_STEPS)):
                    last_observation = observation
                    observation, _, _, _ = env.step(0)  # Do nothing
                """
                last_observation = observation
                state = agent.get_initial_state(last_observation)
                while not terminal:
                    last_observation = observation
                    action = agent.get_action_at_test(state)
                    observation, reward, terminal = env.step(
                        action, last_observation)
                    waterfall_figure.imshow(observation[0, :, :, 0])
                    canvas.draw()
                    processed_observation = preprocess(observation)
                    state = processed_observation
        elif env.mode == 1:
            for _ in range(NUM_EPISODES_AT_TEST):
                terminal = False
                observation = env.waterfall_reset_combo()
                """
                for _ in range(random.randint(1, NO_OP_STEPS)):
                    last_observation = observation
                    observation, _, _, _ = env.step(0)  # Do nothing
                """
                last_observation = observation
                state = agent.get_initial_state(last_observation)
                while not terminal:
                    last_observation = observation
                    action = agent.get_action_at_test(state)
                    observation, reward, terminal = env.step_combo(
                        action, last_observation)
                    waterfall_figure.imshow(observation[0, :, :, 0])
                    canvas.draw()
                    processed_observation = preprocess(observation)
                    state = processed_observation
        elif env.mode == 2:
            for _ in range(NUM_EPISODES_AT_TEST):
                terminal = False
                observation = env.waterfall_reset_dynamic()
                """
                for _ in range(random.randint(1, NO_OP_STEPS)):
                    last_observation = observation
                    observation, _, _, _ = env.step(0)  # Do nothing
                """
                last_observation = observation
                state = agent.get_initial_state(last_observation)
                while not terminal:
                    last_observation = observation
                    action = agent.get_action_at_test(state)
                    observation, reward, terminal = env.step_dynamic(
                        action, last_observation)
                    waterfall_figure.imshow(observation[0, :, :, 0])
                    canvas.draw()
                    processed_observation = preprocess(observation)
                    state = processed_observation
        elif env.mode == 3:
            for _ in range(NUM_EPISODES_AT_TEST):
                terminal = False
                observation = env.waterfall_reset_intelligent()
                """
                for _ in range(random.randint(1, NO_OP_STEPS)):
                    last_observation = observation
                    observation, _, _, _ = env.step(0)  # Do nothing
                """
                last_observation = observation
                state = agent.get_initial_state(last_observation)
                while not terminal:
                    last_observation = observation
                    action = agent.get_action_at_test(state)
                    observation, reward, terminal = env.step_intelligent(
                        action, last_observation)
                    waterfall_figure.imshow(observation[0, :, :, 0])
                    canvas.draw()
                    processed_observation = preprocess(observation)
                    state = processed_observation