'exploration': 0 }) num_episodes = 10 * 1000 max_step_in_one_episode = 1000000000 update_freq = 4 num_pre_train = 1000 save_mode_per_episode = 1000 tf.reset_default_graph() env = bird.GameState() training_net = Agent.TrainingQNetwork(act_num=2) frozen_net = Agent.FrozenQNetwork(act_num=2) memory = Agent.ExperienceMemory() model = Agent.Model() chooser = Agent.Chooser(act_num=2, num_pre_train=num_pre_train) updater = Agent.Updater() def next_step(a): action = np.zeros(shape=[ 2, ]) action[a] = 1 nextObservation = np.zeros(shape=[84, 84, 4], dtype=np.uint8) reward = 0 reward_sum = 0 terminal = False for i in range(4): next_image, reward, terminal = env.frame_step(action) reward_sum += reward
}) num_episodes = 10 * 1000 max_step_in_one_episode = 100 train_freq = 1 update_freq = 1000 num_pre_train = 1000 save_mode_every = 1000 tf.reset_default_graph() env = bird.GameState() training_net = Agent.TrainingQNetwork(act_num=2) frozen_net = Agent.FrozenQNetwork(act_num=2) memory = Agent.ExperienceMemory() model = Agent.Model() chooser = Agent.Chooser(2, num_pre_train=num_pre_train) updater = Agent.Updater() def next_step(a): action = np.zeros(shape=[ 2, ]) action[a] = 1 nextObservation = np.zeros(shape=[84, 84, 4], dtype=np.uint8) reward = 0 reward_sum = 0 terminal = False for i in range(4): next_image, reward, terminal = env.frame_step(action) reward_sum += reward