예제 #1
0
파일: q_learning.py 프로젝트: Miscon/Flappy
    def __init__(self, name):
        FlappyAgent.__init__(self, name)

        self.player_y_bins = np.linspace(0, 512, 15)
        self.player_vel_bins = np.linspace(-8, 10, 15)
        self.next_pipe_dist_bins = np.linspace(0, 288, 15)
        self.next_pipe_top_bins = np.linspace(0, 512, 15)
예제 #2
0
 def __init__(self, name):
     FlappyAgent.__init__(self, name)
     self.lr = 0.00001
     self.episode = []
     self.weights = [
         np.array([0] * 4, dtype=np.float64),
         np.array([0] * 4, dtype=np.float64)
     ]  # [0 flap, 0 noop]
예제 #3
0
    def __init__(self, name):
        FlappyAgent.__init__(self, name)

        self.player_pipe_difference_bins = np.linspace(13, 76, 3)
예제 #4
0
파일: la.py 프로젝트: emilnewel/flappy-itml
 def __init__(self, name):
     FlappyAgent.__init__(self, name)
     self.learning_rate = 0.00001
     self.episode = []
     self.weights = [np.array([0]*4, dtype=np.float64), np.array([0]*4, dtype=np.float64)]    
예제 #5
0
            self.epoch_iteration += 1
            self.epoch_reward_sum += reward

            if done:
                print('\nAggregate test reward:', self.epoch_reward_sum)
                self.state = self.env.reset()
                self.epoch_reward_sum = 0
                self.epoch_iteration = 0

            self.draw()
            call_latency = self.test_update_interval

        self.window.after(call_latency, self.loop)


if __name__ == '__main__':
    env_scaler = 10
    env = Environment()

    n_iterations = int(2e5)

    agent = FlappyAgent(observation_space_size=env.observation_space_size,
                        action_space=env.action_space,
                        n_iterations=n_iterations)
    gui = FlappyGUI(agent=agent,
                    n_iterations=n_iterations,
                    env=env,
                    env_scaler=10,
                    train_update_interval=25,
                    test_update_interval=50)
예제 #6
0
    def __init__(self, name):
        FlappyAgent.__init__(self, name)

        self.player_pipe_difference_bins = np.linspace(13, 76, 11)
        self.pipe_next_pipe_difference_bins = np.linspace(-158, 158, 5)
예제 #7
0
    def __init__(self, name):
        FlappyAgent.__init__(self, name)

        self.pipe_next_pipe_difference_bins = np.linspace(-158, 158, 5)
        self.distance_to_pipe_bins = np.linspace(0, 65, 4)
        self.bird_pipe_difference_bins = np.linspace(0, 100, 11)
예제 #8
0
 def __init__(self, name):
     FlappyAgent.__init__(self, name)
     self.count = 0
     self.split = 5
예제 #9
0
# For plotting metrics
epoch_iterations_list = []
epoch_rewards_list = []
best_epoch_reward_sum = -1
best_epoch = 0

n_iterations = int(2e5)
iteration = 0
epoch = 0

print_epoch_interval = 500

env = Environment()
agent = FlappyAgent(observation_space_size=env.observation_space_size,
                    action_space=env.action_space,
                    n_iterations=n_iterations)

while iteration < n_iterations:
    state = env.reset()

    epoch_iteration, epoch_reward_sum = 0, 0
    done = False

    while not done:
        action = agent.step(state)
        next_state, reward, done, info = env.step(action)
        agent.learn(state, action, next_state, reward)

        state = next_state