import gym from QL_agents import QLAgent # Constants MAX_EPISODES = 10000000 # 10000000 MAX_EPISODE_TIME = 100000 RANDOM_STATES = 50000 # 50000 MAX_STATES = 10000000 # 10000000 SAVE_AFTER = 500000 # 1000000 SAVE_TARGET_MODEL_AFTER = 10000 # 10000 MAX_NOOP = 30 # initialize gym environment and the agent env = gym.make('BreakoutDeterministic-v4') agent = QLAgent('breakout', recurrent=False) # setup to continue training # agent.epsilon = 0.1 # agent.model.load_weights('DQN_breakout_weights14000000.hdf5') # agent.target_model.load_weights('DQN_breakout_weights14000000.hdf5') episode_return = 0 state_counter = 1 random_counter = 0 act_list = [] training_returns = [] # Iterate the game for e in xrange(MAX_EPISODES): # Observe reward and initialize first state obs = agent.preprocessor.get_center_objects(env.reset())
import gym from QL_agents import QLAgent # Constants MAX_EPISODES = 10000000 # 10000000 MAX_EPISODE_TIME = 100000 RANDOM_STATES = 5000 # 50000 MAX_STATES = 10000000 # 10000000 SAVE_AFTER = 1000000 # 1000000 SAVE_TARGET_MODEL_AFTER = 10000 # 10000 MAX_NOOP = 30 # initialize gym environment and the agent env = gym.make('Pong-v0') agent = QLAgent('pong', recurrent=False) agent.epsilon = 0.1 agent.model.load_weights('supervised_randframskip_breakout_weights_huber_target10000_hidden_150_100_4000000.hdf5') # counters episode_return = 0 state_counter = 1 random_counter = 0 act_list = [] training_returns = [] # Iterate the game for e in xrange(MAX_EPISODES): # Observe reward and initialize first state obs = agent.preprocessor.get_center_objects(env.reset())
# agent.model.load_weights('QN_pong_weights_6000000.hdf5') # To test supervised fixed frame skipping load the supervised weights # agent.model.load_weights('supervised_fixed_breakout_weights_4000000.hdf5') # To test supervised random frame skipping change the env and load the supervised weights # env = gym.make('Breakout-v0') # agent = QLAgent('breakout', recurrent=False) # agent.model.load_weights('supervised_random_breakout_weights_4000000.hdf5') # To test supervised fixed transfer load supervised fixed pong weights # agent.model.load_weights('supervised_fixed_pong_weights_4000000.hdf5') # To test supervised random frame skipping transfer change the env and load the supervised weights env = gym.make('Breakout-v0') agent = QLAgent('breakout', recurrent=False) agent.model.load_weights('supervised_random_pong_weights_4000000.hdf5') returns = [] episode_return = 0 for episode in xrange(100): # Observe reward and initialize first state obs = agent.preprocessor.get_center_objects(env.reset()) # Initialize the first state with the same 4 images current_state = np.concatenate((obs, obs, obs, obs)) for time_step in xrange(20000): # print "episode:", e, "time_step:", time_step
# agent.model.load_weights('QN_breakout_weights_6000000.hdf5') # To test supervised fixed frame skipping load the supervised weights # agent.model.load_weights('supervised_fixed_pong_weights_4000000.hdf5') # To test supervised random frame skipping change the env and load the supervised weights # env = gym.make('Pong-v0') # agent = QLAgent('pong', recurrent=False) # agent.model.load_weights('supervised_random_pong_weights_4000000.hdf5') # To test supervised fixed transfer load supervised fixed pong weights # agent.model.load_weights('supervised_fixed_breakout_weights_4000000.hdf5') # To test supervised random frame skipping transfer change the env and load the supervised weights env = gym.make('Pong-v0') agent = QLAgent('pong', recurrent=False) agent.model.load_weights('supervised_random_breakout_weights__4000000.hdf5') returns = [] episode_return = 0 for episode in xrange(100): # Observe reward and initialize first state obs = agent.preprocessor.get_center_objects(env.reset()) # Initialize the first state with the same 4 images current_state = np.concatenate((obs, obs, obs, obs)) for time_step in xrange(20000): # print "episode:", e, "time_step:", time_step