def train(FRAME_TRAIN=1000005):
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=True)
    p.init()
    ob = game.getGameState()
    state = ob
    state = np.reshape(np.asarray(list(state.values())), [1, 8])
    total_reward = 0
    agent = DDQN_Agent.DeepQAgent()
    agent.load('model95000')
    batch_size = 32
    my_timer = time.time()
    prev_frame = 0
    data = []
    for i in range(FRAME_TRAIN):
        if p.game_over():
            data.append(total_reward)
            p.reset_game()
            print(
                "Total reward = {}, Frame = {}, epsilon = {}, frame/second = {}"
                .format(total_reward, i, agent.epsilon,
                        (i - prev_frame) / (time.time() - my_timer)))
            total_reward = 0
            prev_frame = i
            my_timer = time.time()

        # get action from agent
        action = agent.act(state)

        # take action
        reward = p.act(p.getActionSet()[action])

        # making the reward space less sparse
        if reward < 0:
            reward = -1

        total_reward += reward
        next_state = np.asarray(list(game.getGameState().values()))
        next_state = np.reshape(next_state, [1, 8])

        # remember and replay
        agent.remember(state, action, reward, next_state, p.game_over())
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)

        state = next_state

        # save Model
        if i % 5000 == 0:
            print("Updating weights")
            agent.save('newmodel' + str(i))
            agent.target_model.set_weights(agent.model.get_weights())

        # Plot socre
        if i % 1000 == 0:
            plot(data)
Exemple #2
0
def main(argv):
    try:
        opts, _ = getopt.getopt(argv, "hr")
    except getopt.GetoptError:
        print("birdML.py [-h | -r]")
        sys.exit(2)

    record = False
    for opt, arg in opts:
        if opt == '-h':
            print("-h to help")
            print("-r record")
        elif opt == '-r':
            record = True

    netb = netBrain()
    netb.summary()
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=True, force_fps=True)
    p.init()
    actions = p.getActionSet()

    out = 1

    epochs = 50
    for i in range(epochs):
        lstates = []
        rewards = []
        if record:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter('Videos/test_' + str(i) + '.mov', fourcc,
                                  30.0, (288, 512))
        for d in range(10):
            while not p.game_over():
                if record:
                    obs = p.getScreenRGB()
                    obs = cv2.transpose(obs)
                    obs = cv2.cvtColor(obs, cv2.COLOR_RGB2BGR)
                    out.write(obs)
                st = game.getGameState()
                gstate = list(st.values())
                gstate = np.array([np.array(gstate)])
                lstates.append(gstate[0])
                pred = netb.predict(gstate)[0]
                a = pred.argmax()
                p.act(actions[a])
                if st['next_pipe_bottom_y'] < st['player_y']:
                    pred[0] = 1.0
                    pred[1] = 0.0
                elif st['next_pipe_top_y'] > st['player_y']:
                    pred[0] = 0.0
                    pred[1] = 1.0
                rewards.append(pred)
            p.reset_game()
        netb.fit(np.array(lstates),
                 np.array(rewards),
                 batch_size=10,
                 epochs=10)
        if record:
            out.release()
Exemple #3
0
def train(FRAME_TRAIN=1000005):
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=True)
    p.init()
    ob = game.getGameState()
    state = ob
    state = np.reshape(np.asarray(list(state.values())), [1, 8])
    total_reward = 0
    agent = DDQN_Agent.DeepQAgent()
    agent.load("model95000")
    batch_size = 32
    my_timer = time.time()
    prev_frame = 0
    data = []
    for i in range(FRAME_TRAIN):
        if p.game_over():
            data.append(total_reward)
            p.reset_game()
            print(
                "Total reward = {}, Frame = {}, epsilon = {}, frame/second = {}"
                .format(total_reward, i, agent.epsilon,
                        (i - prev_frame) / (time.time() - my_timer)))
            total_reward = 0
            prev_frame = i
            my_timer = time.time()

        # get action from agent
        action = agent.act(state)

        # take action
        reward = p.act(p.getActionSet()[action])

        # making the reward space less sparse
        if reward < 0:
            reward = -1

        total_reward += reward
        next_state = np.asarray(list(game.getGameState().values()))
        next_state = np.reshape(next_state, [1, 8])

        state = next_state
        # time.sleep(0.3)
        # Plot socre
        if i % 1000 == 0:
            plot(data)
Exemple #4
0
    def play(self, fast=True):
        """Use athlete to play.
        Args:
            fast <bool>: set to True if the screen should be hidden and speed
            enhanced
        """
        game = FlappyBird()
        env = PLE(game,
                  fps=30,
                  frame_skip=1,
                  num_steps=1,
                  force_fps=fast,
                  display_screen=not fast)
        env.init()
        pipes = []
        i = 0
        while i < 100:
            env.reset_game()
            pipes.append(0)
            while not env.game_over():
                A = self.act(game.getGameState())
                r = env.act(ACTIONS[A])
                if r == 1.:
                    pipes[-1] += 1
            if not fast:
                print('\n- Score: {} pipes'.format(pipes[-1]))
                print('- Played {} games'.format(len(pipes)))
                print('- Average score: {} pipes'.format(np.round(np.mean(pipes), decimals=1)))
            else:
                i += 1

        print('\n- Max score: {} pipes'.format(np.max(pipes)))
        print('- Games < 15 pipes: {}'.format(
            len(tuple(filter(lambda x: x < 15, pipes)))
        ))
        print('- Played {} games'.format(100))
        print('- Average score: {} pipes'.format(
            np.round(np.mean(pipes), decimals=1))
        )
Exemple #5
0
def test_model_G(nb_games, model):
    game = FlappyBird(
        graphics="fixed"
    )  # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
    p = PLE(game,
            fps=30,
            frame_skip=1,
            num_steps=1,
            force_fps=True,
            display_screen=False)
    p.init()
    reward = 0.0

    cumulated = np.zeros((nb_games))
    list_actions = [0, 119]

    for i in range(nb_games):
        p.reset_game()

        while (not p.game_over()):
            state = game.getGameState()

            screen_x = process_screen(p.getScreenRGB())
            stacked_x = deque([screen_x, screen_x, screen_x, screen_x],
                              maxlen=4)
            x = np.stack(stacked_x, axis=-1)
            action = list_actions[np.argmax(
                model.predict(np.expand_dims(x, axis=0)))]

            reward = p.act(action)

            cumulated[i] = cumulated[i] + reward

    avg_score = np.mean(cumulated)
    print('Average : ' + str(avg_score))
    mx_score = np.max(cumulated)
    print('Max : ' + str(mx_score))
    return avg_score, mx_score
Exemple #6
0
def test():
    game2 = FlappyBird()
    p2 = PLE(game2,
             fps=30,
             frame_skip=1,
             num_steps=1,
             force_fps=True,
             display_screen=False)
    p2.init()
    reward = 0.0

    nb_games = 10
    cumulated = np.zeros((nb_games))
    for i in range(nb_games):
        p2.reset_game()

        while (not p2.game_over()):
            state = game2.getGameState()
            screen = p2.getScreenRGB()
            action = FlappyPolicy(state, screen)

            reward = p2.act(action)
            cumulated[i] = cumulated[i] + reward
    return np.mean(cumulated)
Exemple #7
0
# You're not allowed to change this file
from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
from FlappyAgent import FlappyPolicy

game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True)
# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.

p.init()
reward = 0.0

nb_games = 100
cumulated = np.zeros((nb_games))

for i in range(nb_games):
    p.reset_game()
    
    while(not p.game_over()):
        state = game.getGameState()
        screen = p.getScreenRGB()
        action=FlappyPolicy(state, screen) ### Your job is to define this function.
        
        reward = p.act(action)
        cumulated[i] = cumulated[i] + reward

average_score = np.mean(cumulated)
max_score = np.max(cumulated)
Exemple #8
0
                game_current_state[2],
                action] + self._alpha * (reward + self._gamma * np.max(
                    self.Q_values[game_next_state[0], game_next_state[1],
                                  game_next_state[2]]))


if __name__ == "__main__":
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=True)
    #creating a QlAgent class object
    agent = QLAgent(flappy_actions=p.getActionSet(), grid_size=10)

    p.init()

    #get the current state values(state array)
    game_current_state = agent.get_current_state(game.getGameState())
    #initializing the episode to 0
    number_of_episods = 0
    #initializing the maximum score variable to 0
    maximum_score = 0

    #creating a while loop to itaraqte through the episodes
    while True:
        #get the optimal action to the current state and store in in the variable
        maximum_action = agent.get_action(game_current_state)
        #get the score in the current episode
        current_score = p.score()
        #get the maximim score by comparing with the current acore
        maximum_score = max(current_score, maximum_score)
        #get the reward value by performing the above action (rward is either 1 or -1000)
        reward = agent.perform_action(p, maximum_action)
Exemple #9
0
        for ea in partie:

            old_state, action, reward, futur_state = ea
            # Mise à jour Offline
            # Formule du cours:  Q(s,a) = Q(s,a) + alpha*(R + gamma* max(Q(s',a)) - Q(s,a))
            Q_function[old_state[0]][old_state[1]][
                old_state[2]][action] = Q_function[old_state[0]][old_state[1]][
                    old_state[2]][action] + alpha * (reward + gamma * max(
                        Q_function[futur_state[0]][futur_state[1]][
                            futur_state[2]]) - Q_function[old_state[0]][
                                old_state[1]][old_state[2]][action])

        partie = []
        p.reset_game()
        state = game.getGameState()
        RS = reduce_state(state)

    else:  # Pour la première partie

        partie = []
        p.reset_game()
        state = game.getGameState()
        RS = reduce_state(state)

    while (not p.game_over()):

        epsilon = np.random.uniform(0, 101)
        if epsilon > epsilon_act:  #Q-greedy

            qval = Q_function[RS[0]][RS[1]][RS[2]]
Exemple #10
0
    def update_Q(self, s, s_prime, reward, action):
        self.Q[s[0], s[1], s[2], action] = (1 - self._alpha) * self.Q[
            s[0], s[1], s[2], action] + self._alpha * (
                reward + self._lambda *
                np.max(self.Q[s_prime[0], s_prime[1], s_prime[2]]))


if __name__ == "__main__":
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=True)
    agent = Agent(action_space=p.getActionSet(), grid_size=10)

    p.init()

    s = agent.get_current_state(game.getGameState())
    episodes = 0
    max_score = 0

    while True:
        # Find the optimal action based on the current state
        max_action = agent.optimal_action(s)

        current_score = p.score()
        max_score = max(current_score, max_score)

        # Perform the optimal action and return the reward
        reward = agent.act(p, max_action)

        # Get the next game state after performing the optimal action
        s_prime = agent.get_current_state(game.getGameState())
Exemple #11
0
class GymFlappy(gym.Env, EzPickle):
    def __init__(self, config=None):
        EzPickle.__init__(self)

        # Aid options
        self.pre_play = True
        self.force_calm = False
        self.positive_counts = 0

        self.display_screen = False
        if config:
            self.display_screen = config['display_screen']

        self.observation_space = spaces.Box(0,
                                            1,
                                            shape=(8, ),
                                            dtype=np.float32)
        self.action_space = weightedDiscrete(2)  #spaces.Discrete(2)

        self.vel_max = 15
        self.vel_min = -15
        self.dist_max = 500
        self.dist_min = 0
        self.y_max = 500
        self.y_min = 0

        self.game = FlappyBird(graphics="fancy")
        self.p = PLE(self.game,
                     fps=30,
                     frame_skip=1,
                     num_steps=1,
                     force_fps=True,
                     display_screen=self.display_screen,
                     rng=0)
        self.p.rng = self.game.rng
        self.game.player.rng = self.game.rng

        self.p.init()

        self.current_t = 0
        self.max_t = 1000

    def _get_obs(self):
        state = self.game.getGameState()
        obs = np.empty((8, ))
        obs[0] = (state["player_y"] - self.y_min) / (self.y_max - self.y_min)
        obs[1] = (state["next_pipe_dist_to_player"] -
                  self.dist_min) / (self.dist_max - self.dist_min)
        obs[2] = (state["next_pipe_top_y"] - self.y_min) / (self.y_max -
                                                            self.y_min)
        obs[3] = (state["next_pipe_bottom_y"] - self.y_min) / (self.y_max -
                                                               self.y_min)
        obs[4] = (state["next_next_pipe_dist_to_player"] -
                  self.dist_min) / (self.dist_max - self.dist_min)
        obs[5] = (state["next_next_pipe_top_y"] - self.y_min) / (self.y_max -
                                                                 self.y_min)
        obs[6] = (state["next_next_pipe_bottom_y"] -
                  self.y_min) / (self.y_max - self.y_min)
        obs[7] = (state["player_vel"] - self.vel_min) / (self.vel_max -
                                                         self.vel_min)
        return obs

    def reset(self):
        self.current_t = 0
        self.p.reset_game()

        if self.pre_play:  # Get rid of the first second of game
            ini_fc = self.force_calm
            self.force_calm = False
            for i in range(25):
                a = 0
                if i % 10 == 0:
                    a = 1
                self.step(np.array([a]))
            self.force_calm = ini_fc

        return self._get_obs()

    def step(self, action):
        self.current_t += 1
        reward = self.p.act(119 if action == 1 else 0)

        if self.force_calm:  # ensures each action is followed by no action
            for i in range(1):
                r = self.p.act(0)
            reward += r

        done = self.current_t >= self.max_t or self.p.game_over()

        done = done or self._double_check_done()

        info = {}
        return self._get_obs(), reward, done, info

    def __getstate__(self):

        dc = lambda x: copy.deepcopy(x)

        # get all game attributes
        _game_state = self.game.__dict__
        _player_state = self.game.player.__dict__
        _pipe_state = self.game.pipe_group.__dict__
        pipe_sprites = self.game.pipe_group.spritedict
        pipe_xs = []
        pipe_ys = []
        pipe_rects = []
        for _, sprite in enumerate(pipe_sprites):
            pipe_xs.append(dc(sprite.x))
            pipe_ys.append(dc(sprite.gap_start))
            pipe_rects.append(dc(pipe_sprites[sprite]))
        lives = dc(self.game.lives)
        score = dc(self.game.getScore())
        pscore = dc(self.p.previous_score)

        # remove images (heavy and require additional serialization):
        __game_state = {}
        __player_state = {}
        for attr in _game_state:
            if attr in [
                    'screen', 'images', 'clock', 'player', 'backdrop',
                    "pipe_group"
            ]:
                pass
            else:
                __game_state[attr] = _game_state[attr]
        for attr in _player_state:
            if attr in ['image', 'image_assets']:
                pass
            else:
                __player_state[attr] = _player_state[attr]

        # accomodate multiple envs in parallel
        game_state = dc(__game_state)
        player_state = dc(__player_state)
        pipe_state = _pipe_state

        # this is a non-PLE parameter that needs to be reset too
        envtime = dc(self.current_t)
        rng_state = self.game.rng.get_state()

        stategroup = (game_state, player_state, pipe_state,
                      (pipe_xs, pipe_rects,
                       pipe_ys), lives, envtime, rng_state, score, pscore)
        return stategroup

    def __setstate__(self, stategroup):
        '''
        Stategroup required (ugly yet somewhat functional):

        0   game_state dictionary (game.__dict__)
        1   player_state dictionary (game.player.__dict__)
        2   pipe_state idctionary (game.pipe_group.__dict__)
        3   x positions of pipes in game (list)
        4   lives (game.lives, used in game.game_over())
        5   current time (self.current_t)
        6   rng state
        '''
        # use update to preserve images we didn't save
        self.game.__dict__.update(stategroup[0])
        self.game.player.__dict__.update(stategroup[1])
        #self.game.pipe_group.__dict__.update(stategroup[2]) # was introducing reference crossing
        pipe_sprites = self.game.pipe_group.spritedict
        for i, sprite in enumerate(pipe_sprites):
            sprite.x = stategroup[3][0][i]
            pipe_sprites[sprite] = stategroup[3][1][i]
            sprite.gap_start = stategroup[3][2][i]

        self.game.lives = stategroup[4]

        # prevent Gym env to return false dones
        self.current_t = stategroup[5]
        self.game.rng.set_state(stategroup[6])

        # fix stupid reward
        self.game.score = stategroup[7]
        self.p.previous_score = stategroup[8]
        return self._get_obs()

    def get_state(self):
        return self.__getstate__()

    def set_state(self, state):
        return self.__setstate__(state)

    def reset_counts(self):
        self.positive_counts = 0

    def _double_check_done(self):
        '''
        Manually inspects game to detect collisions
        Worthy of suicide but necessary...
        '''

        # Check pipe collisions
        for p in self.game.pipe_group:
            hit = pygame.sprite.spritecollide(self.game.player,
                                              self.game.pipe_group, False)
            is_in_pipe = (p.x - p.width / 2 -
                          20) <= self.game.player.pos_x < (p.x + p.width / 2)
            for h in hit:  # do check to see if its within the gap.
                top_pipe_check = (
                    (self.game.player.pos_y - self.game.player.height / 2 + 12)
                    <= h.gap_start) and is_in_pipe
                bot_pipe_check = (
                    (self.game.player.pos_y + self.game.player.height) >
                    h.gap_start + self.game.pipe_gap) and is_in_pipe
                boom = bot_pipe_check or top_pipe_check
                if boom:
                    return True

        # floor limit
        if self.game.player.pos_y >= 0.79 * self.game.height - self.game.player.height:
            return True

        # went above the screen
        if self.game.player.pos_y <= 0: return True

        return False
batchSize = 256  # mini batch size

jeu = FlappyBird()
p = PLE(jeu,
        fps=30,
        frame_skip=1,
        num_steps=1,
        force_fps=True,
        display_screen=True)
p.init()

i = 0

while (True):
    p.reset_game()
    state = jeu.getGameState()
    state = np.array(list(state.values()))
    while (not jeu.game_over()):

        qval = model.predict(
            state.reshape(1, len(state)), batch_size=batchSize
        )  #Learn Q (Q-learning) / model initialise avant (neural-network)
        if (random.random() < epsilon):  # exploration exploitation strategy
            action = np.random.randint(0, 2)
        else:  #choose best action from Q(s,a) values
            qval_av_action = [-9999] * 2

            for ac in range(0, 2):
                qval_av_action[ac] = qval[0][ac]
            action = (np.argmax(qval_av_action))
        #Take action, observe new state S'
Exemple #13
0
 def train(self, episodes=1000):
     """Train the athlete.
     Args:
         episodes <int>: number of episodes to iterate over
     """
     # Initialize exploration only data
     epsilon = 1
     epsilon_decay = 1 / episodes
     jumprate = 0.1
     # For log purposes only
     self.print_data = dict({
         'hits': 0,
         'games_played': 1,
         'below_15': 0,
         'pipes_below_15': 0,
         'ep': 0,
         'pipes': 0,
         'episodes': episodes
     })
     # Start game
     game = FlappyBird()
     env = PLE(game,
               fps=30,
               frame_skip=1,
               num_steps=1,
               force_fps=True,
               display_screen=False)
     env.init()
     for _ in range(episodes):
         self.print_data['ep'] += 1
         self.print_data['pipes'] = 0
         # Reset game
         env.reset_game()
         S = self.state2coord(game.getGameState())
         while not env.game_over():
             # Has the state been visited already ?
             if self.Q.get(S) is None:
                 self.Q[S] = [0, 0]
             # Exploration
             if rd() < epsilon:
                 # Using a jump rate to orient exploration
                 A = UP if rd() < jumprate else DOWN
             else:
                 # Reinforcement
                 A = np.argmax(self.Q.get(S))
             # Perform action and get reward
             r = env.act(ACTIONS[A])
             if r == 1.0:
                 # For log purposes only
                 self.print_data['pipes'] += 1
             # Biase reward to orient exploration
             R = self.biase_reward(r)
             S_ = self.state2coord(game.getGameState())
             # Perform Q update
             self.update_q(S, A, R, S_)
             # Change state
             S = S_
         # Decrease exploration rate
         epsilon -= epsilon_decay
         # For log purposes only
         self.print_status()
qvalues = json.load(fil)
fil.close()  

#Loads the FlappyBird game
game = FlappyBird(graphics="fixed") 
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=True, display_screen=True)

p.init()

#Repeat the game nb_games times
for i in range(nb_games):
            
    p.reset_game()
   
    while(not p.game_over()):
        state = game.getGameState()
        bucle += 1
        current_state = etat(state)
         
        #As games are played, epsilon decreases 
        if bucle % 100 == 0:
            epsilon = epsilon * 0.9
        
        #Two options: epsilon < random implies deciding the following action from the qvalues
        if (epsilon < rd.random()):

            action_index = np.argmax(qvalues[current_state])
            action = action_index*119
        
        #Otherwise: action is random. Therefore, as epsilon decreases, qvalues are more often selected to decide the action
        else:
Exemple #15
0
	def update(self, action, reward, observation, episode_over):
		if episode_over:
			future = -5
		else:
			future = np.max(self.q[observation])
		# print "Old Q value [", self.state, action, "] = ",  self.q[self.state][action]
		self.q[self.state][action] += self.config["learning_rate"] * (reward + self.config["discount"] * future - self.q[self.state][action])
		# print "New Q value [", self.state, action, "] = ",  self.q[self.state][action]
 		self.state = observation

game = FlappyBird()
p = PLE(game, fps=30, display_screen=True)
agent = TabularQAgent(action_space=p.getActionSet())
# print "action set = ", p.getActionSet()
p.init()
observation = game.getGameState()
observation = ((int(observation["player_y"]) - int(observation["next_pipe_bottom_y"])), int(observation["next_pipe_dist_to_player"]), int(observation["player_vel"]))
agent.state = observation
max_score = -10
episode_count = 0
output = open("out.txt", "w")
frame_count = 0
batch_sum = 0
# print "Initial State: ", observation
while True:
	frame_count += 1

	episode_over = False
	action = agent.pickAction()	
	# print "Action = ", action
	reward = p.act(p.getActionSet()[action])
Exemple #16
0
myAgent = NaiveAgent(p.getActionSet())
Q = myAgent.createStateActionPolicy(game)

# print(Q)

# myAgent.setTrainedQTable(Q)

starting_episode = 1004
episodes = 5000
# episodes = 1

alpha = 0.1
discount_factor = 0.9

obs = game.getGameState()
# print(obs)


def sarsa():

    # gp = None
    max_reward = -1000

    # with open("output.txt", "a") as file:

    gp = None

    for episode in range(starting_episode, episodes):

        p.reset_game()
        else:
            r = -1000
        return r


if __name__ == "__main__":
    episodes = 2000_000000
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=False)
    p.init()
    agent = Agent(p.getActionSet())
    max_score = 0

    for episode in range(episodes):
        p.reset_game()
        state = agent.get_state(game.getGameState())
        agent.update_greedy()
        while True:
            action = agent.get_best_action(state)
            reward = agent.act(p, action)
            next_state = agent.get_state(game.getGameState())
            agent.update_q_table(state, action, next_state, reward)
            current_score = p.score()
            state = next_state
            if p.game_over():
                max_score = max(current_score, max_score)
                print('Episodes: %s, Current score: %s, Max score: %s' % (episode, current_score, max_score))
                if current_score > 300:
                    np.save("{}_{}.npy".format(current_score, episode), agent.q_table)
                break
Exemple #18
0
import DFS.DFS as dfs

game = FlappyBird()
game.pipe_gap = 150
p = PLE(game, fps=30, display_screen=True, force_fps=False)
p.init()

print(p.getActionSet())

flappyVariables = {
    "player_height": game.player.height,
    "pipe_gap": game.pipe_gap,
    "game_max_drop": game.player.MAX_DROP_SPEED,
    "game_gravity": game.player.GRAVITY,
    "game_flap_power": game.player.FLAP_POWER
}
myAgent = SimpleAgent(flappyVariables)

nb_frames = 1000

for f in range(nb_frames):
    if p.game_over():  #check if the game is over
        exit()
        p.reset_game()
    obs = p.getScreenRGB()
    # if f == 1 :
    # 	steps = dfs.get_steps_by_frame(game.getGameState());
    # 	print("\n STEPS",steps)
    action = myAgent.chooseAction(game.getGameState())
    p.act(action)
#	p.act(None)
Exemple #19
0
scoreMC = np.zeros((nb_epochs))

# Enregistrement du réseau de neurones
filename = "dqn_3_"
"""-----------------"""
""" Deep Q-Learning """
"""-----------------"""

for id_game in range(total_games):
    if id_game % evaluation_period == 0:
        epoch += 1
        scoreMC[epoch] = MCeval(dqn, 50, gamma)
        dqn.save(filename + str(epoch) + ".dqf")
        print(">>> Eval n°%d | score = %f" % (epoch, scoreMC[epoch]))
    p.reset_game()  # Nouvelle partie
    state_x = process_state(game.getGameState())
    id_frame = 0
    score = 0
    alea = 0
    while not game.game_over():
        id_frame += 1
        step += 1
        ## Choisit l'action à effectuer : 0 ou 1
        if np.random.rand() < epsilon(step):  # Action au hasard
            alea += 1
            action = np.random.choice([0, 1])
        else:  # Meilleure action possible
            action = greedy_action(dqn, state_x)
        ## Joue l'action et observe le gain et l'état suivant
        reward = p.act(actions[action])
        reward = clip_reward(reward)