Ejemplo n.º 1
0
    'exploration': 0
})
num_episodes = 10 * 1000
max_step_in_one_episode = 1000000000
update_freq = 4
num_pre_train = 1000
save_mode_per_episode = 1000

tf.reset_default_graph()

env = bird.GameState()
training_net = Agent.TrainingQNetwork(act_num=2)
frozen_net = Agent.FrozenQNetwork(act_num=2)
memory = Agent.ExperienceMemory()
model = Agent.Model()
chooser = Agent.Chooser(act_num=2, num_pre_train=num_pre_train)
updater = Agent.Updater()


def next_step(a):
    action = np.zeros(shape=[
        2,
    ])
    action[a] = 1
    nextObservation = np.zeros(shape=[84, 84, 4], dtype=np.uint8)
    reward = 0
    reward_sum = 0
    terminal = False
    for i in range(4):
        next_image, reward, terminal = env.frame_step(action)
        reward_sum += reward
Ejemplo n.º 2
0
})
num_episodes = 10 * 1000
max_step_in_one_episode = 100
train_freq = 1
update_freq = 1000
num_pre_train = 1000
save_mode_every = 1000

tf.reset_default_graph()

env = bird.GameState()
training_net = Agent.TrainingQNetwork(act_num=2)
frozen_net = Agent.FrozenQNetwork(act_num=2)
memory = Agent.ExperienceMemory()
model = Agent.Model()
chooser = Agent.Chooser(2, num_pre_train=num_pre_train)
updater = Agent.Updater()


def next_step(a):
    action = np.zeros(shape=[
        2,
    ])
    action[a] = 1
    nextObservation = np.zeros(shape=[84, 84, 4], dtype=np.uint8)
    reward = 0
    reward_sum = 0
    terminal = False
    for i in range(4):
        next_image, reward, terminal = env.frame_step(action)
        reward_sum += reward