def __init__(self, name): FlappyAgent.__init__(self, name) self.player_y_bins = np.linspace(0, 512, 15) self.player_vel_bins = np.linspace(-8, 10, 15) self.next_pipe_dist_bins = np.linspace(0, 288, 15) self.next_pipe_top_bins = np.linspace(0, 512, 15)
def __init__(self, name): FlappyAgent.__init__(self, name) self.lr = 0.00001 self.episode = [] self.weights = [ np.array([0] * 4, dtype=np.float64), np.array([0] * 4, dtype=np.float64) ] # [0 flap, 0 noop]
def __init__(self, name): FlappyAgent.__init__(self, name) self.player_pipe_difference_bins = np.linspace(13, 76, 3)
def __init__(self, name): FlappyAgent.__init__(self, name) self.learning_rate = 0.00001 self.episode = [] self.weights = [np.array([0]*4, dtype=np.float64), np.array([0]*4, dtype=np.float64)]
self.epoch_iteration += 1 self.epoch_reward_sum += reward if done: print('\nAggregate test reward:', self.epoch_reward_sum) self.state = self.env.reset() self.epoch_reward_sum = 0 self.epoch_iteration = 0 self.draw() call_latency = self.test_update_interval self.window.after(call_latency, self.loop) if __name__ == '__main__': env_scaler = 10 env = Environment() n_iterations = int(2e5) agent = FlappyAgent(observation_space_size=env.observation_space_size, action_space=env.action_space, n_iterations=n_iterations) gui = FlappyGUI(agent=agent, n_iterations=n_iterations, env=env, env_scaler=10, train_update_interval=25, test_update_interval=50)
def __init__(self, name): FlappyAgent.__init__(self, name) self.player_pipe_difference_bins = np.linspace(13, 76, 11) self.pipe_next_pipe_difference_bins = np.linspace(-158, 158, 5)
def __init__(self, name): FlappyAgent.__init__(self, name) self.pipe_next_pipe_difference_bins = np.linspace(-158, 158, 5) self.distance_to_pipe_bins = np.linspace(0, 65, 4) self.bird_pipe_difference_bins = np.linspace(0, 100, 11)
def __init__(self, name): FlappyAgent.__init__(self, name) self.count = 0 self.split = 5
# For plotting metrics epoch_iterations_list = [] epoch_rewards_list = [] best_epoch_reward_sum = -1 best_epoch = 0 n_iterations = int(2e5) iteration = 0 epoch = 0 print_epoch_interval = 500 env = Environment() agent = FlappyAgent(observation_space_size=env.observation_space_size, action_space=env.action_space, n_iterations=n_iterations) while iteration < n_iterations: state = env.reset() epoch_iteration, epoch_reward_sum = 0, 0 done = False while not done: action = agent.step(state) next_state, reward, done, info = env.step(action) agent.learn(state, action, next_state, reward) state = next_state