Ejemplo n.º 1
0
def playFlappyBird():
        actions = 2
        brain = DeepQN(actions)
        flappyBird = game.GameState()
        action0 = np.array([1,0])  # do nothing
        observation0, reward0, terminal = flappyBird.frame_step(action0)
        observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY)
        ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY)
        brain.setInitState(observation0)

        score = 0
        time = 0
        x = []
        y = []
        
        while True:
                action = brain.getAction()
                nextObservation,reward,terminal = flappyBird.frame_step(action)
                nextObservation = preprocess(nextObservation)
                brain.setPerception(nextObservation,action,reward,terminal)
                
                if reward != 0.1:
                    time += 1
                    x.append(time)
                    score += reward
                    y.append(score)

                if time == 1000:
                    break
                
        plt.plot(x,y)
        plt.show()
def play():
    actions = 2
    agent = DQN(actions)
    flappyBird = game.GameState()
    # play game
    # obtain init state
    action0 = np.array([1, 0])
    observation0, reward0, is_terminal = flappyBird.frame_step(action0)

    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    observation0 = np.asarray(observation0, dtype=np.float64)
    agent.setInitState(observation0)
    """
    m = np.max(observation0)
    observation0 = 1.0 / m * observation0
    agent.setInitState(observation0)
    """

    while 1 != 0:
        action = agent.getAction()
        print agent.timeStep
        nextObservation, reward, terminal = flappyBird.frame_step(action)
        nextObservation = preprocess(nextObservation)
        agent.setPerception(nextObservation, action, reward, terminal)
Ejemplo n.º 3
0
def TrainFlappyBird():

    # Train AI for the Flappy Bird game    
    # Initialize Flappy Bird game
    flappybird = game.GameState()
    
    # Initialize AI for training
    num_actions = 2
    AI_player = DQN_AI(num_actions = num_actions, mode = 'train')
	
    # AI training
    # Initialize the first state of AI with the first observation from the game
    action = np.array([1,0])  # idle
    observation, reward, terminal = flappybird.frame_step(action)
    observation = Preprocess(observation)
    AI_player.Current_State_Initialze(observation = observation)
    
    # AI starts training
    while True:
        # Keep training until hitting 'ctrl + c'
        print('time step: %d' % AI_player.time_step)
        action = AI_player.AI_Action()
        next_observation, reward, terminal = flappybird.frame_step(action)
        next_observation = Preprocess(next_observation)
        AI_player.Q_CNN_Train(action = action, reward = reward, observation = next_observation, terminal = terminal)
Ejemplo n.º 4
0
    def trainNet(self):
        game_state = game.GameState()
        # get the first state by doing nothing and preprocess the image to 80x80x4
        do_nothing = np.zeros(ACTIONS)
        do_nothing[0] = 1
        x_t, r_0, terminal = game_state.frame_step(do_nothing)
        x_t = cv2.cvtColor(cv2.resize(x_t, (HEIGHT, WIDTH)), cv2.COLOR_BGR2GRAY)
        ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
        s_t = np.zeros([FRAME, HEIGHT, WIDTH])
        for i in range(FRAME):
            s_t[i, :, :] = x_t
        epsilon = INITIAL_EPSILON
        if not args.mode == "train":
            epsilon = 0
        while "flappy bird" != "angry bird":
            input_frame = np.reshape(s_t, (1, FRAME, HEIGHT, WIDTH))
            self.tg_net.forward(mx.io.DataBatch([mx.nd.array(input_frame, self.ctx)], []))
            qvalue = np.squeeze(self.tg_net.get_outputs()[0].asnumpy())
            a_t = np.zeros([ACTIONS])
            action_index = 0
            if self.timestep % FRAME_PER_ACTION == 0:
                if random.random() <= epsilon:
                    # print("----------Random Action----------")
                    action_index = random.randrange(ACTIONS)
                    a_t[action_index] = 1
                else:
                    action_index = np.argmax(qvalue)
                    a_t[action_index] = 1
                    # print "----------Net Action----------", action_index
            else:
                a_t[0] = 1 # do nothing

            # run the selected action and observe next state and reward
            x_t1_colored, r_t, terminal = game_state.frame_step(a_t)
            x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (HEIGHT, WIDTH)), cv2.COLOR_BGR2GRAY)
            ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)
            x_t1 = np.reshape(x_t1, (1, HEIGHT, WIDTH))
            s_t1 = np.vstack((x_t1, s_t[:(FRAME-1), :, :]))

            if args.mode == "train":
                # store the transition in replay memory
                self.replayMemory.append((s_t, a_t, r_t, s_t1, terminal))

                # scale down epsilon
                if epsilon > FINAL_EPSILON and self.timestep > OBSERVE:
                    epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE
                if len(self.replayMemory) > REPLAY_MEMORY:
                    self.replayMemory.popleft()
                # only train if done observing
                if self.timestep > OBSERVE:
                    self.trainStep()
                if self.timestep <= OBSERVE:
                    state = "observe"
                elif self.timestep > OBSERVE and self.timestep <= OBSERVE + EXPLORE:
                    state = "explore"
                else:
                    state = "train"
                print("TIMESTEP", self.timestep, "/ STATE", state, "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, "/ Q ", qvalue)
            s_t = s_t1
            self.timestep += 1
Ejemplo n.º 5
0
def playFlappyBird():
    episodeMemory = []
    # Step 1: init BrainDQN
    actions = 2
    brain = BrainDQN(actions)
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([1, 0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = np.array([1, 0])
        for event in pygame.event.get():
            if event.type == KEYDOWN and event.key == K_UP:
                action = np.array([0, 1])
            if event.type == QUIT or (event.type == KEYDOWN
                                      and event.key == K_ESCAPE):
                pygame.quit()
                sys.exit()
        nextObservation, reward, terminal = flappyBird.frame_step(action)
        nextObservation = preprocess(nextObservation)
        if terminal:
            episodeMemory.append([nextObservation, action, reward, terminal])
            save2file(episodeMemory)
            episodeMemory = []
        else:
            episodeMemory.append([nextObservation, action, reward, terminal])
Ejemplo n.º 6
0
def test_DQN(dqn, episode):
    """
    test DQN model
    param dqn: dqn model
    param episode: current episode
    """
    #test on 5 games
    case_num = 5

    dqn.close_train()
    ave_step = 0
    for i in range(case_num):
        dqn.time_step = 0
        flappyBird = game.GameState()
        o, r, terminal = flappyBird.frame_step([1,0])
        o = preprocess(o)
        dqn.reset_state()
        #play game until game end
        while True:
            action = dqn.get_action_optim()
            o, r, terminal = flappyBird.frame_step(action)
            if terminal:
                break #game over
            o = preprocess(o)
            dqn.currt_state = np.append(dqn.currt_state[1:,:,:], o, axis=0)
            dqn.increase_step()
        ave_step += dqn.time_step
    ave_step = ave_step / case_num
    print("episode:{}, average game steps:{}".format(episode, ave_step))
    return ave_step
Ejemplo n.º 7
0
def play_game(model_file_name, cuda=False, best=True):
    """Play flappy bird with pretrained dqn model

       weight -- model file name containing weight of dqn
       best -- if the model is best or not
    """
    print 'load pretrained model file: ' + model_file_name
    model = BrainDQN(epsilon=0., mem_size=0, cuda=cuda)
    load_checkpoint(model_file_name, model)

    model.set_eval()
    bird_game = game.GameState()
    model.set_initial_state()
    if cuda:
        model = model.cuda()
    while True:
        action = model.get_optim_action()
        o, r, terminal = bird_game.frame_step(action)
        if terminal:
            break
        o = preprocess(o)

        model.current_state = np.append(model.current_state[1:, :, :],
                                        o.reshape((1, ) + o.shape),
                                        axis=0)

        model.increase_time_step()
    print 'total time step is {}'.format(model.time_step)
Ejemplo n.º 8
0
def play_flappybird():
    brain_config = {
        'network_type': 'cnn',
        'learning': False,
        'num_actions': 2,
        'lookback_window': 3
    }

    brain = TFBrain(brain_config)
    brain.show_configs()

    bird_env = game.GameState()
    frame_per_action = 1

    action = np.array([1, 0])
    observation, reward, chain_end = bird_env.frame_step(action)
    observation = preprocess(observation)
    observation = np.reshape(observation,
                             (observation.shape[0], observation.shape[1]))
    state = init_state(observation)

    i = 0
    while 1 != 0:
        if i % frame_per_action == 0:
            action = brain.decide(state, determistic=True)
        else:
            # Do nothing
            action = np.array([1, 0])
        i += 1
        next_observation, reward, chain_end = bird_env.frame_step(action)
        next_observation = preprocess(next_observation)
        next_state = proceed_state(state, next_observation)
        experience = Experience(state, action, reward, next_state, chain_end)
        state = next_state
Ejemplo n.º 9
0
 def __init__(
     self,
     name,
     sess,
     ac_parms,
     globalAC,
     game_name,
 ):
     super(Worker, self).__init__()
     self.name = name
     self.sess = sess
     self.ac_parms = ac_parms
     self.globalAC = globalAC
     self.env = gym.make(game_name).unwrapped
     self.AC = ACNet4CartPole(
         n_actions=self.ac_parms['n_actions'],
         n_features=self.ac_parms['n_features'],
         sess=self.sess,
         globalAC=globalAC,
         scope=self.name,
         OPT_A=self.ac_parms['OPT_A'],
         OPT_C=self.ac_parms['OPT_C'],
     )
     self.memory = A3CMemory()
     self.GameState = game.GameState()
Ejemplo n.º 10
0
def test():
    model = load_model('model')
    game_state = game.GameState()

    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t, 1, 255, cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=0)

    t = 0
    while True:
        predict_batch = s_t[np.newaxis, :]
        readout_t = model.predict(predict_batch)[0]
        a_t = np.zeros([ACTIONS])
        action_index = np.argmax(readout_t)
        q_value = np.max(readout_t)
        print q_value
        a_t[action_index] = 1

        x_t1_colored, r_t, terminal = game_state.frame_step(a_t)
        x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)),
                            cv2.COLOR_BGR2GRAY)
        ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)
        x_t1 = np.reshape(x_t1, (1, 80, 80))
        s_t1 = np.append(x_t1, s_t[:3, :, :], axis=0)

        s_t = s_t1
        t += 1
        if terminal:
            break
def playFlappyBird(mode):

    env = game.GameState()
    Deep_Q_Network = DQN(ACTIONS)

    do_nothing = np.zeros(ACTIONS)
    do_nothing[1] = 1
    observation0, reward0, terminal = env.frame_step(do_nothing)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)), cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0,1,255,cv2.THRESH_BINARY)
    Deep_Q_Network.setInitState(observation0)

    while 1!= 0:
        action = Deep_Q_Network.getAction()
        nextObservation,reward,terminal = env.frame_step(action)
        nextObservation = preprocess(nextObservation)
        # Train
        if mode == "train":
            INITIAL_EPSILON = 1.0
            Deep_Q_Network.setPerception(nextObservation,action,reward,terminal)
        # Interfere
        if mode == "test":
            Deep_Q_Network.interfere(nextObservation)
        cv2.imshow("", nextObservation )
        if cv2.waitKey(1) == 27:
            break
Ejemplo n.º 12
0
def play(model_file_name, config):
    print('load pretrained model file: ' + model_file_name)

    agent = Agent(config)
    load_checkpoint(model_file_name, agent.model)
    bird_game = game.GameState()

    total_reward = 0.
    time_count = 0.

    # 1.init S
    action = [1, 0]  # do nothing
    state = init_state()
    obs, reward, terminal = bird_game.frame_step(action)
    obs = preprocess(obs)
    state = np.append(state[1:, :, :], obs.reshape((1, ) + obs.shape), axis=0)

    while not terminal:
        action = agent.optimal_action(state)

        next_obs, reward, terminal = bird_game.frame_step(action)
        next_obs = preprocess(next_obs)
        next_state = np.append(state[1:, :, :],
                               next_obs.reshape((1, ) + next_obs.shape),
                               axis=0)

        state = next_state

        total_reward += reward
        time_count += 1

    print('total time step is {}'.format(time_count))
Ejemplo n.º 13
0
def test_dqn(model, episode):
    ave_time = 0.
    for test_case in range(5):
        time_step = 0
        flappyBird = game.GameState()
        o, r, terminal = flappyBird.frame_step([1, 0])
        obs = preprocess(o)
        state = init_state()
        state = np.append(state[1:, :, :],
                          obs.reshape((1, ) + obs.shape),
                          axis=0)

        while True:
            action = model.optimal_action(state)
            o, r, terminal = flappyBird.frame_step(action)
            if terminal: break
            o = preprocess(o)
            state = np.append(state[1:, :, :],
                              o.reshape((1, ) + o.shape),
                              axis=0)
            time_step += 1
        ave_time += time_step
    ave_time /= 5
    print('testing: episode: {}, average time: {}'.format(episode, ave_time))
    return ave_time
Ejemplo n.º 14
0
def play():
    network = DQN(2) # init network with 2 actions

    # init flappybird game and the first state
    flappy_bird = game.GameState()
    action = np.array([1, 0])
    state, reward, done, _ = flappy_bird.frame_step(action)
    state = preprocess(state)
    state = state.reshape((80, 80))
    network.current_state = np.stack((state, state, state, state), axis=2)

    # play game
    _score = 0
    total = 0.0
    num_game = 1
    while True:
        action = network.get_action()
        next_state, reward, done, score = flappy_bird.frame_step(action)
        if score != 0:
            _score = score
        if done:
            print "TIME", network.time_in_game, "GAME NUM", num_game, "EPSILON", network.epsilon, "SCORE", score
            total += _score
            num_game += 1
            score = 0

            if num_game % 10 == 0:
                print "\nlast 10 game avg score", total / num_game, "\n"
                total = 0.0

        next_state = preprocess(next_state)
        network.process(next_state, action, reward, done)
Ejemplo n.º 15
0
def run_network(s, readout, h_fc1, sess):
    
    game_state = game.GameState()
    # get the first state by doing nothing and preprocess the image to 80x80x4
    do_nothing = np.zeros(ACTIONS)
    do_nothing[0] = 1
    x_t, r_0, terminal = game_state.frame_step(do_nothing)
    x_t = cv2.cvtColor(cv2.resize(x_t, (80, 80)), cv2.COLOR_BGR2GRAY)
    ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)

    while True:
        # choose an action epsilon greedily
        readout_t = readout.eval(feed_dict={s : [s_t]})[0]
        a_t = np.zeros([ACTIONS])
        
        action_index = np.argmax(readout_t)
        a_t[action_index] = 1
        
        # run the selected action and observe next state and reward
        x_t1_colored, r_t, terminal = game_state.frame_step(a_t)
        x_t1 = cv2.cvtColor(cv2.resize(x_t1_colored, (80, 80)), cv2.COLOR_BGR2GRAY)
        ret, x_t1 = cv2.threshold(x_t1, 1, 255, cv2.THRESH_BINARY)
        x_t1 = np.reshape(x_t1, (80, 80, 1))
        
        s_t = np.append(x_t1, s_t[:, :, :3], axis=2)
Ejemplo n.º 16
0
def playFlappyBird():
    # Step 1: init BrainDQN
    brain = BrainDQN()
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([
        1, 0
    ])  # do nothing (I think it acturally takes a random act from 0 or 1)

    # get all the returned values
    observation0, reward0, terminal = flappyBird.frame_step(action0)

    #transfer into grayscale
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)

    # Initialize the state by the first observation
    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = brain.getAction()

        # pass action in and get the return value
        nextObservation, reward, terminal = flappyBird.frame_step(action)

        # get grayscale
        nextObservation = preprocess(nextObservation)

        # Record this state, put it into experience to be avaliable for the batch
        # and train the model by Q-learning
        brain.setPerception(nextObservation, action, reward, terminal)
Ejemplo n.º 17
0
def main():
    begin_time = datetime.datetime.now()

    env = game.GameState()
    brain = rl_brain_pytorch.DeepQNetwork()

    step = 0
    for episode in range(rl_brain_pytorch.MAX_EPISODE):
        # do nothing
        observation, _, _ = env.frame_step([1,0])
        observation = preprocess(observation, False)
        brain.reset(observation)
        score = 0.0
        while True:
            action = brain.choose_action(observation)
            observation_, reward, done = env.frame_step(action)
            if reward == 1: score+=1
            observation_ = preprocess(observation_, True)
            if TRAINING:
                brain.store_transition(observation, action, reward, done, observation_)
            # 有一定的记忆就可以开始学习了
            if step > 200:
               if TRAINING:
                    brain.learn()

            if done:
                break

            observation = observation_
            step += 1
            
        end_time = datetime.datetime.now()
        print("episode {} over. exec time:{} step:{} score:{}".format(episode, end_time - begin_time, step,score))
    brain.saveNet()
    env.exit("game over")
Ejemplo n.º 18
0
def test_dqn(model, episode):
    """Test the behavor of dqn when training

       model -- dqn model
       episode -- current training episode
    """
    model.set_trainable(False)
    ave_time = 0.
    for test_case in range(5):
        model.time_step = 0
        flappyBird = game.GameState()
        o, r, terminal = flappyBird.frame_step([1, 0])
        o = preprocess(o)
        while True:
            action = model.optimal_action()
            o, r, terminal = flappyBird.frame_step(action)
            if terminal:
                break
            o = preprocess(o)
            model.current_state = np.append(model.current_state[1:, :, :],
                                            o.reshape((1, ) + o.shape),
                                            axis=0)
            model.increase_timestep()
        ave_time += model.time_step
    ave_time /= 5
    print('testing: episode: {}, average time: {}'.format(episode, ave_time))
    return ave_time
Ejemplo n.º 19
0
 def launch_game(self):
     self.game_state = game.GameState()
     print self.game_state
     actions = np.zeros(self.number_of_actions, dtype='int32')
     print actions
     actions[0] = 1
     self.initial_state, reward, done = self.game_state.frame_step(actions)
Ejemplo n.º 20
0
def trainAgentEpisodic(agent):
    # open up a game state to communicate with emulator
    game_state = game.GameState()
    #agent = agent_class('model.h5', memory_size=REPLAY_MEMORY,
    #              Epsilon=rangefloat(INITIAL_EPSILON,FINAL_EPSILON,EXPLORE),
    #              K=FRAME_PER_ACTION)

    fillMemory(game_state, agent, OBSERVATION)

    episode_scores = []
    with trange(0, NUM_OF_EPISODES) as episodes:
        episodes.set_description('Training...')
        steps = 0
        for episode in episodes:
            _, stp = runEpisode(game_state, agent, training=True)
            steps += stp
            if (episode + 1) % 20 == 0:
                episodes.set_description('Testing...')
                score, _ = runEpisode(game_state, agent, training=False)
                episode_scores.append(score)
                episodes.set_description(
                    'Reward {:.2f} | Epsilon: {:.6f} | Steps {!s} | Training...'
                    .format(np.mean(episode_scores), agent.get_epsilon(),
                            steps))

    print('\nMean: {:.3f} Std: {:.3}'.format(np.mean(episode_scores),
                                             np.std(episode_scores)))
    plt.plot(range(0, NUM_OF_EPISODES, 20), episode_scores, 'ro')
    plt.ylabel('Score')
    plt.show()
    print("Episode finished!")
    print("************************")
Ejemplo n.º 21
0
def playGame():
    # open up a game state to communicate with emulator
    game_state = game.GameState()

    # threading.Timer(0.1, processFrames).start()
    # threading.Timer(1.5, saveFrame).start()
    while True:
        t_start = time.time()
        keys = pygame.key.get_pressed()
        if (keys[K_ESCAPE]):
            cap.release()
            videoSaver.release()
            cv2.destroyAllWindows()
            return
        if (keys[K_f]):
            APP_CONFIG['save'] = False
            print('off')
        if (keys[K_n]):
            APP_CONFIG['save'] = True
            print('on')
        frame = processFrames()
        #drawFitRectangle(frame)
        faceFrame = excludeFace(frame)
        shouldJump = isMouth(faceFrame)
        saveFrame(frame)
        t_end = time.time()
        t_delta = int((t_end - t_start) * 1000)
        print(t_delta)
        while (t_delta >= 0):
            game_state.frame_step(getAction(shouldJump))
            t_delta -= 8
        #game_state.frame_step(getAction(shouldJump))
        drawFrame(frame)
Ejemplo n.º 22
0
def TrainFlappyBirdResume():

    # Resume training in case of break 
    # Initialize Flappy Bird game
    flappybird = game.GameState()
    
    # Initialize AI for training
    num_actions = 2
    AI_player = DQN_AI(num_actions = num_actions, mode = 'train')
    
    # Set AI parameters to resume
    AI_player.Load_Model()
    AI_player.epsilon = 0 # user could adjust epsilon for the training after resume
    	
    # AI training
    # Initialize the first state of AI with the first observation from the game
    action = np.array([1,0])  # idle
    observation, reward, terminal = flappybird.frame_step(action)
    observation = Preprocess(observation)
    AI_player.Current_State_Initialze(observation = observation)
    
    # AI starts training
    while True:
        # Keep training until hitting 'ctrl + c'
        print('time step: %d' % AI_player.time_step)
        action = AI_player.AI_Action()
        next_observation, reward, terminal = flappybird.frame_step(action)
        next_observation = Preprocess(next_observation)
        AI_player.Q_CNN_Train(action = action, reward = reward, observation = next_observation, terminal = terminal)
Ejemplo n.º 23
0
def playFlappyBird():
    # Step 1: init BrainDQN
    actions = 2
    brain = BrainDQN(actions)
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([1, 0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    print observation0.shape
    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = brain.getAction()
        print action
        nextObservation, reward, terminal = flappyBird.frame_step(action)
        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward, terminal)
        cv2.imshow('Video', 0)
        if cv2.waitKey(1) & 0xFF == 27:
            break
Ejemplo n.º 24
0
def playFlappyBird():
    # Step 1: init BrainDQN
    actions = 2
    brain = BrainDQN(actions)
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([1, 0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)

    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)

    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = brain.getAction()  # 对初始的状态有action反馈 at
        nextObservation, reward, terminal = flappyBird.frame_step(
            action)  # 执行器获得指令,并输出该指令的奖励r(t),执行该指令导致的观测 o(t+1),
        nextObservation = preprocess(nextObservation)

        tmp_img = showThreshImg(nextObservation)  # 从flappyBird中得到的图像是一个旋转加镜像的
        cv2.imshow("process", tmp_img)
        cv2.waitKey(1)

        brain.setPerception(nextObservation, action, reward, terminal)
Ejemplo n.º 25
0
    def func_Train_Network(var_Build_Models_, parm_Args_):

        # We will open the game now on the emulator
        var_State_Game_ = imp_Game_.GameState()

        # We will store the old observations into the replay memory
        var_D_ = imp_DQ()
Ejemplo n.º 26
0
def playFlappyBird():
    #init BrainDQN
    actions = 2
    brain = BrainDQN(actions)
    #play the game forever

    flappyBird = game.GameState()
    while True:
        #init Flappy Bird Game
        action = np.array([1, 0])
        observation, reward, terminal, screenCap = flappyBird.frame_step(
            action)

        observation = cv2.cvtColor(cv2.resize(observation, (80, 80)),
                                   cv2.COLOR_BGR2GRAY)
        ret, observation = cv2.threshold(observation, 1, 255,
                                         cv2.THRESH_BINARY)
        brain.setInitState(observation)

        #run the game
        while True:
            action = brain.getAction()
            nextObservation, reward, terminal, screenCap = flappyBird.frame_step(
                action)
            nextObservation = preprocess(nextObservation)
            brain.setPerception(nextObservation, action, reward, terminal)
Ejemplo n.º 27
0
def playFlappyBird(pretrained):
    if not os.path.exists('saved_networks'):
        os.makedirs('saved_networks')
    # Step 1: init BrainDQN
    actions = 2
    #brain = BrainDQN(actions,param_file='saved_networks/network-dqn_gluon34900.params')
    if pretrained != "":
        brain = BrainDQN(actions, param_file=pretrained)
    else:
        brain = BrainDQN(actions)
    # Step 2: init Flappy Bird Game
    flappyBird = game.GameState()
    # Step 3: play game
    # Step 3.1: obtain init state
    action0 = np.array([1, 0])  # do nothing
    observation0, reward0, terminal = flappyBird.frame_step(action0)
    observation0 = cv2.cvtColor(cv2.resize(observation0, (80, 80)),
                                cv2.COLOR_BGR2GRAY)
    ret, observation0 = cv2.threshold(observation0, 1, 255, cv2.THRESH_BINARY)
    brain.setInitState(observation0)

    # Step 3.2: run the game
    while 1 != 0:
        action = brain.getAction()
        nextObservation, reward, terminal = flappyBird.frame_step(action)
        nextObservation = preprocess(nextObservation)
        brain.setPerception(nextObservation, action, reward, terminal)
Ejemplo n.º 28
0
    def test(self, num_actions):
        self.saver.restore(self.session, FLAGS.checkpoint_path)
        print("Restored model weights from ", FLAGS.checkpoint_path)

        game_state = game.GameState()
        do_nothing = np.zeros(ACTIONS)
        do_nothing[0] = 1
        x_t1_colored, r_0, terminal = game_state.frame_step(do_nothing)
        x_t1 = skimage.color.rgb2gray(x_t1_colored)
        x_t1 = skimage.transform.resize(x_t1, (80, 80))
        state = np.stack((x_t1, x_t1, x_t1, x_t1), axis=2)
        state = state.reshape(state.shape[0], state.shape[1],
                              state.shape[2])  #shape(1,80,80,4)

        for i_episode in xrange(FLAGS.num_eval_episodes):
            episode_reward = 0
            done = False
            while not done:
                q_values = self.q_values.eval(session=self.session,
                                              feed_dict={self.state: [state]})
                action_index = np.argmax(q_values)
                action = np.zeros([num_actions])
                action[action_index] = 1
                x_t1_colored, reward, done = game_state.frame_step(action)
                x_t1 = skimage.color.rgb2gray(x_t1_colored)
                x_t1 = skimage.transform.resize(x_t1, (80, 80))
                x_t1 = x_t1.reshape(1, x_t1.shape[0], x_t1.shape[1],
                                    1)  #shape(1,80,80,1)
                new_state = np.append(x_t1, state[:, :, :3], axis=2)

                state = new_state
                episode_reward += reward
            print("Finished episode " + str(i_episode + 1) + " with score " +
                  str(episode_reward))
Ejemplo n.º 29
0
    def train(self, num_actions):
        # Initialize target network weights
        # Initialize all variables
        init_op = tf.global_variables_initializer()
        self.session.run(init_op)
        self.session.run(self.update_target)

        # Inititalize learning rate
        self.lr = FLAGS.learning_rate

        self.threadLock = threading.Lock()
        game_states = [game.GameState() for i in range(FLAGS.num_concurrent)]

        if not os.path.exists(FLAGS.checkpoint_dir):
            os.makedirs(FLAGS.checkpoint_dir)
        # Initialize variables
        self.session.run(tf.initialize_all_variables())

        # Start num_concurrent actor-learner training threads
        actor_learner_threads = [
            threading.Thread(target=self.actor_learner_thread,
                             args=(game_states[thread_id], thread_id,
                                   num_actions))
            for thread_id in range(FLAGS.num_concurrent)
        ]
        for t in actor_learner_threads:
            t.start()

        for t in actor_learner_threads:
            t.join()
Ejemplo n.º 30
0
def trainModel(predict_model,actual_model,mode):
	fb = game.GameState()
	replay_mem = []
	i_0, r_0, isDead = fb.frame_step(0)
	i_0 = processImage(i_0)
	state_0 = np.stack((i_0,i_0,i_0,i_0), axis=2)
	state_0 = state_0.reshape(1,rows,cols,stack)
	state_t = state_0
	t = 0
	if mode == 'Train'
		log = file(strftime("%Y-%m-%d-%H:%M:%S", gmtime()) , 'w')
	while True:
		loss = 0
		q_max = 0
		if random.random() < ep and mode == 'Train':
			print 'taking random action'
			flap = random.randint(0,1)
		else:
			q = predict_model.predict(state_t)
			flap = np.argmax(q[0])
			q_max = max(q[0])
		i_t, r_t, isDead = fb.frame_step(flap)
		i_t = processImage(i_t)
		i_t = i_t.reshape(1, rows, cols ,1)
		state_t1 = np.append(i_t, state_t[:, :, :, :3], axis=3)

		replay_mem += [(state_t,state_t1,flap,r_t,isDead)]
		if t > 50000:
			replay_mem = replay_mem[1:]

		if t > 3000 and mode == 'Train':
			batch = random.sample(replay_mem,batch_size)
			X =  np.zeros((batch_size,84,84,4))
			Y = np.zeros((batch_size,2))
			for i in range(len(batch)):
				X[i:i+1] = batch[i][0]
				q = predict_model.predict(batch[i][0])
				Y[i] = q[0]
				action = batch[i][2]
				reward = batch[i][3]
				
				if batch[i][4] == True:
					Y[i,action] = reward
				else:
					max_a = np.argmax(predict_model.predict(batch[i][1])[0])
					q = actual_model.predict(batch[i][1])[0][max_a]
					Y[i,action] = reward + discount_factor*q
			loss = predict_model.train_on_batch(X,Y)
		if t%1000 == 0 and mode == 'Train':
			print 'sync two models...'
			actual_model.set_weights(predict_model.get_weights())
			predict_model.save_weights("model-ddqn.h5", overwrite=True)
			# Do I need to re-compile the model here ?  
		print 'T = '+str(t)+' Loss = '+str(loss)+' q-max = '+str(q_max)+' reward = '+str(r_t)+' action = '+str(flap)
		if mode == 'Train':
			log.write('T = '+str(t)+' Loss = '+str(loss)+' q-max = '+str(q_max)+' reward = '+str(r_t)+' action = '+str(flap))

		t += 1
		state_t = state_t1