class Env:
  def __init__(self):
    self.game = FlappyBird(pipe_gap=125)
    self.env = PLE(self.game, fps=30, display_screen=True)
    self.env.init()
    self.env.getGameState = self.game.getGameState # maybe not necessary

    # by convention we want to use (0,1)
    # but the game uses (None, 119)
    self.action_map = self.env.getActionSet() #[None, 119]

  def step(self, action):
    action = self.action_map[action]
    reward = self.env.act(action)
    done = self.env.game_over()
    obs = self.get_observation()
    # don't bother returning an info dictionary like gym
    return obs, reward, done

  def reset(self):
    self.env.reset_game()
    return self.get_observation()

  def get_observation(self):
    # game state returns a dictionary which describes
    # the meaning of each value
    # we only want the values
    obs = self.env.getGameState()
    return np.array(list(obs.values()))

  def set_display(self, boolean_value):
    self.env.display_screen = boolean_value
Ejemplo n.º 2
0
def run_game(nb_episodes, agent):
    """ Runs nb_episodes episodes of the game with agent picking the moves.
        An episode of FlappyBird ends with the bird crashing into a pipe or going off screen.
    """

    reward_values = {
        "positive": 1.0,
        "negative": 0.0,
        "tick": 0.0,
        "loss": 0.0,
        "win": 0.0
    }
    # TODO: when training use the following instead:
    # reward_values = agent.reward_values

    env = PLE(FlappyBird(),
              fps=30,
              display_screen=False,
              force_fps=True,
              rng=None,
              reward_values=reward_values)
    # TODO: to speed up training change parameters of PLE as follows:
    # display_screen=False, force_fps=True
    env.init()
    totalscore = 0
    score = 0
    count = nb_episodes
    while nb_episodes > 0:
        # pick an action
        # TODO: for training using agent.training_policy instead
        action = agent.policy(agent.state_binner(env.game.getGameState()))

        # step the environment
        reward = env.act(env.getActionSet()[action])
        #print("reward=%d" % reward)

        # TODO: for training let the agent observe the current state transition

        score += reward

        # reset the environment if the game is over
        if env.game_over():
            totalscore += 1
            print("score for this episode: %d" % score)
            env.reset_game()
            nb_episodes -= 1
            score = 0
    print("average for this run is :%d" % (totalscore / count))
Ejemplo n.º 3
0
def test():
    # 创建环境
    game = FlappyBird()
    env = PLE(game, fps=30, display_screen=True)
    obs_dim = len(env.getGameState())
    act_dim = len(env.getActionSet())
    print('action set:', env.getActionSet())
    logger.info('obs_dim {}, act_dim {}'.format(obs_dim, act_dim))

    # 创建经验池
    rpm = ReplayMemory(MEMORY_SIZE)  # DQN的经验回放池

    # 根据parl框架构建agent
    model = Model(act_dim=act_dim)
    algorithm = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
    agent = Agent(algorithm,
                  obs_dim=obs_dim,
                  act_dim=act_dim,
                  e_greed=0.3,
                  e_greed_decrement=1e-6)

    # 加载模型
    save_path = './DQN/checkpoints/episode_V14600.ckpt'
    print('checkpoints:', save_path)
    if os.path.exists(save_path):
        logger.info('load ckpt success!')
        agent.restore(save_path)
    else:
        logger.error('load ckpt error!')

    action_set = env.getActionSet()
    env.init()
    episode_reward = 0
    steps = 0
    while not env.game_over():
        steps += 1
        if (steps == 1):
            continue
        obs = list(env.getGameState().values())
        action_idx = agent.predict(obs)  # 预测动作,只选最优动作
        act = action_set[action_idx]
        reward = env.act(act)
        episode_reward += reward
        reward_str = str(int(episode_reward))
        drawText(env.game.screen, reward_str, 288, 0, 48, (255, 0, 0),
                 (255, 255, 255))
    env.reset_game()
    logger.info('[Test] steps:{}, reward:{}'.format(steps, episode_reward))
Ejemplo n.º 4
0
def main_naive():
    game = FlappyBird()
    env = PLE(game, fps=30, display_screen=True)
    my_agent = naive.NaiveAgent(allowed_actions=env.getActionSet())

    env.init()
    reward = 0.0
    nb_frames = 10000

    for i in range(nb_frames):
        if env.game_over():
            env.reset_game()

        observation = env.getScreenRGB()
        action = my_agent.pickAction(reward, observation)
        reward = env.act(action)
Ejemplo n.º 5
0
def main_naive():
    game = FlappyBird()
    env = PLE(game, fps=30, display_screen=True)
    my_agent = naive.NaiveAgent(allowed_actions=env.getActionSet())

    env.init()
    reward = 0.0
    nb_frames = 10000

    for i in range(nb_frames):
        if env.game_over():
            env.reset_game()

        observation = env.getScreenRGB()
        action = my_agent.pickAction(reward, observation)
        reward = env.act(action)
Ejemplo n.º 6
0
class Game:
    def __init__(self, game="pixelcopter", fps=30):
        os.environ['SDL_VIDEODRIVER'] = 'dummy'
        self.game_name = game
        if game == "flappy":
            engine = FlappyBird()
        elif game == "pixelcopter":
            engine = Pixelcopter()
        else:
            assert False, "This game is not available"
        engine.rewards["loss"] = -5  # reward at terminal state
        self.reward_terminal = -5
        self.game = PLE(engine, fps=fps, display_screen=False)
        self.game.init()
        self.game.act(0)  # Start the game by providing arbitrary key as input
        self.key_input = self.game.getActionSet()
        self.reward = 0

    def game_over(self):
        return self.game.game_over()

    def reset_game(self):
        self.game.reset_game()
        self.game.act(0)  # Start the game

    def get_image(self):
        return self.game.getScreenRGB()

    def get_torch_image(self):
        image = self.game.getScreenRGB()
        if self.game_name == "flappy":
            image = image[:, :-96, :]  # Remove ground
            image = cv2.cvtColor(cv2.resize(image, (84, 84)),
                                 cv2.COLOR_BGR2GRAY)
            image = np.reshape(image, (84, 84, 1))
        elif self.game_name == "pixelcopter":
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            image = np.reshape(image, (48, 48, 1))
        image[image > 0] = 1
        image = image.transpose(2, 0, 1)  #CHW
        image = image.astype(np.float32)
        image = torch.from_numpy(image)
        return image

    def act(self, action_idx):
        self.reward = self.game.act(self.key_input[action_idx])
        return self.reward
Ejemplo n.º 7
0
def test():
    game = FlappyBird()
    env = PLE(game, fps=30, display_screen=True)
    obs_dim = len(env.getGameState())
    action_dim = 2  # 只能是up键,还有一个其它,所以是2
    model = Model(act_dim=action_dim)
    algorithm = parl.algorithms.DQN(model,
                                    act_dim=action_dim,
                                    gamma=GAMMA,
                                    lr=LEARNING_RATE)
    agent = Agent(
        algorithm,
        obs_dim=obs_dim,
        act_dim=action_dim,
        e_greed=0.2,  # explore
        e_greed_decrement=1e-6)
    if os.path.exists('./model_dir'):
        agent.restore('./model_dir')
    # test part, run 5 episodes and average
    eval_reward = []
    for i in range(5):
        env.init()
        episode_reward = 0
        isOver = False
        step = 0
        while not isOver:
            if step == 0:
                reward = env.act(None)
                done = False
            else:
                time.sleep(0.01)  #windows running too fast, slow it down
                obs = list(env.getGameState().values())
                action = agent.predict(obs)
                if action == 1:
                    act = actions["up"]
                else:
                    act = None
                reward = env.act(act)
                isOver = env.game_over()
                episode_reward += reward
            step += 1
            eval_reward.append(episode_reward)
            if step > MAX_STEP:
                break
        env.reset_game()
    return np.mean(eval_reward)
Ejemplo n.º 8
0
def run():
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=True)
    #agent = myAgentHere(allowed_actions=p.getActionSet())

    p.init()
    reward = 0.0

    for i in range(150):
        if p.game_over():
            p.reset_game()

        observation = p.getScreenRGB()
        new_image = convert_image(observation)
        cv.imwrite("Imagenes/Gray_Image" + str(i) + ".jpg", new_image)
        action = None
        reward = p.act(action)
Ejemplo n.º 9
0
class SnakeEnv():
	def __init__(self, height=32, width=32, fps=15, frame_history_size=4):
		# create the game environment and initialize the attribute values

		self.game = Snake(height=height,width=width,init_length=4)
		reward_dict = {"positive": 1.0, "negative": -1.0, "tick": 0.0, "loss": -1.0, "win": 1.0}
		self.environment = PLE(self.game, fps=fps, reward_values=reward_dict, num_steps=2)
		self.init_env() # initialize the game
		self.allowed_actions = self.environment.getActionSet() # the list of allowed actions to be taken by an agent
		self.num_actions = len(self.allowed_actions) - 1  # number of actions that are allowed in this env
		self.frame_hist = frame_history(height=height,width=width,frame_history_size=frame_history_size,num_channels=3);
		self.input_shape = self.frame_hist.get_history().shape  # shape of the game input screen

	def init_env(self):
		# initialize the variables and screen of the game
		self.environment.init()


	def get_current_state(self):
		# get the current state in the game. Returns the current screen of the game with snake and food positions with
		# a sequence of past .
		cur_frame = np.transpose(self.environment.getScreenRGB(),(2,0,1))
		#cur_frame = np.transpose(np.expand_dims(self.environment.getScreenGrayscale(),axis=0), (2, 0, 1))
		self.frame_hist.push(cur_frame)
		return self.frame_hist.get_history()


	def check_game_over(self):
		# check if the game has terminated
		return self.environment.game_over()

	def reset(self):
		# resets the game to initial values and refreshes the screen with a new small snake and random food position.
		self.environment.reset_game()
		_ = self.environment.act(None)
		self.frame_hist.reset(np.transpose(self.environment.getScreenRGB(),(2,0,1)))
		#self.frame_hist.reset(np.transpose(np.expand_dims(self.environment.getScreenGrayscale(),axis=0), (2, 0, 1)))
		return self.frame_hist.get_history()

	def take_action(self, action):
		# lets the snake take the chosen action of moving in some direction
		reward = self.environment.act(self.allowed_actions[action])
		next_state = self.get_current_state()
		done = self.check_game_over()
		return next_state, reward, done, 0
Ejemplo n.º 10
0
def train(FRAME_TRAIN=1000005):
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=True)
    p.init()
    ob = game.getGameState()
    state = ob
    state = np.reshape(np.asarray(list(state.values())), [1, 8])
    total_reward = 0
    agent = DDQN_Agent.DeepQAgent()
    agent.load("model95000")
    batch_size = 32
    my_timer = time.time()
    prev_frame = 0
    data = []
    for i in range(FRAME_TRAIN):
        if p.game_over():
            data.append(total_reward)
            p.reset_game()
            print(
                "Total reward = {}, Frame = {}, epsilon = {}, frame/second = {}"
                .format(total_reward, i, agent.epsilon,
                        (i - prev_frame) / (time.time() - my_timer)))
            total_reward = 0
            prev_frame = i
            my_timer = time.time()

        # get action from agent
        action = agent.act(state)

        # take action
        reward = p.act(p.getActionSet()[action])

        # making the reward space less sparse
        if reward < 0:
            reward = -1

        total_reward += reward
        next_state = np.asarray(list(game.getGameState().values()))
        next_state = np.reshape(next_state, [1, 8])

        state = next_state
        # time.sleep(0.3)
        # Plot socre
        if i % 1000 == 0:
            plot(data)
Ejemplo n.º 11
0
def play(file_name, number_of_games=1):
    game = FlappyBird(width=game_width,
                      height=game_height,
                      pipe_gap=game_pipe_gap)

    p = PLE(game, display_screen=True, force_fps=False, frame_skip=6)
    p.init()

    network = Network()
    network.load(file_name, rename=False)

    for i in range(number_of_games):
        if i > 0:
            p.reset_game()
        while not p.game_over():
            state = p.getGameState()
            actions_q_values = network.Q(state).tolist()
            action_taken_index = np.argmax(actions_q_values)

            p.act(None if action_taken_index == 0 else 119)
def evaluate_step(agent, seed, sess):
    game = FlappyBird()
    env = PLE(game, fps=30, display_screen=False, rng=np.random.RandomState(seed))
    env.reset_game()
    env.act(0) # dummy input
    # grayscale input screen for this episode  
    input_screens = [agent.preprocess(env.getScreenGrayscale())]
    t = 0
    while not env.game_over():
        # feed four previous screen, select an action
        action = agent.select_action(input_screens, sess)
        # execute the action and get reward
        reward = env.act(env.getActionSet()[action])  # reward = +1 when pass a pipe, -5 when die       
        # observe the result
        screen_plum = env.getScreenGrayscale()  # get next screen
        # append grayscale screen for this episode
        input_screens.append(agent.preprocess(screen_plum))
        t+=1
        if t >= 1000: # maximum score to prevent run forever
            break
    return t
Ejemplo n.º 13
0
class FlappyBirdEnv(gym.Env):
    def __init__(self):
        self.resize_factor = 0.125
        self.width = 288
        self.height = 512
        self.ple = PLE(game=FlappyBird(), fps=30, frame_skip=8)
        self.action_set = self.ple.getActionSet()
        self.action_space = spaces.Discrete(len(self.action_set))
        self.observation_space = spaces.Box(
            low=0.0,
            high=255.0,
            shape=(
                int(self.width * self.resize_factor),
                int(self.height * self.resize_factor),
                1,
            ),
            dtype=np.uint32,
        )
        self._steps = 0

    def reset(self):
        self._steps = 0
        self.ple.display_screen = False
        self.ple.reset_game()
        return self._get_state()

    def step(self, action):
        self._steps += 1
        reward = self.ple.act(self.action_set[action])
        next_state = self._get_state()
        terminal = self.ple.game_over()
        return next_state, reward, terminal, {}

    def render(self, mode="human"):
        self.ple.display_screen = True

    def _get_state(self):
        return np.expand_dims(imresize(self.ple.getScreenGrayscale(),
                                       self.resize_factor),
                              axis=-1)
Ejemplo n.º 14
0
def play_with_saved_agent(agent_file_path, agent_file_name, test_rounds=20):
    game = FlappyBird()
    env = PLE(game, fps=30, display_screen=True, force_fps=True, state_preprocessor=process_state)
    my_agent = load_agent(env, agent_file_path, agent_file_name)
    env.init()

    print "Testing model:", agent_file_name

    total_reward = 0.0
    for _ in range(test_rounds):
        my_agent.start_episode()
        episode_reward = 0.0
        while env.game_over() == False:
            state = env.getGameState()
            reward, action = my_agent.act(state, epsilon=0.05)
            episode_reward += reward

        print "Agent score {:0.1f} reward for episode.".format(episode_reward)
        total_reward += episode_reward
        my_agent.end_episode()

    return total_reward/test_rounds
Ejemplo n.º 15
0
def play_with_saved_agent(agent_file_path, agent_file_name, test_rounds=20):
    game = RunningMinion()
    env = PLE(game, fps=30, display_screen=True, force_fps=True, state_preprocessor=process_state)
    my_agent = load_agent(env, agent_file_path, agent_file_name)
    env.init()

    print "Testing model:", agent_file_name

    total_reward = 0.0
    for _ in range(test_rounds):
        my_agent.start_episode()
        episode_reward = 0.0
        while env.game_over() == False:
            state = env.getGameState()
            reward, action = my_agent.act(state, epsilon=0.00)
            episode_reward += reward

        print "Agent score {:0.1f} reward for episode.".format(episode_reward)
        total_reward += episode_reward
        my_agent.end_episode()

    return total_reward/test_rounds
Ejemplo n.º 16
0
class PLEEnvRam(PLEEnv):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, game_name='FlappyBird', display_screen=True):
        # open up a game state to communicate with emulator
        import importlib
        game_module_name = ('ple.games.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()
        self.game_state = PLE(game, fps=30, display_screen=display_screen)
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.observation_space = spaces.Box(low=-np.inf,
                                            high=np.inf,
                                            shape=self._get_game_state().shape)
        self.viewer = None

    def _step(self, a):
        reward = self.game_state.act(self._action_set[a])
        state = self._get_game_state()
        terminal = self.game_state.game_over()
        return state, reward, terminal, self.game_state.game.getGameState()

    def _get_game_state(self):
        gs = self.game_state.game.getGameState()
        names = sorted(gs.keys())
        state = np.array([gs[n] for n in names], dtype=np.float64)
        return state

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        self.game_state.reset_game()
        state = self._get_game_state()
        return state
Ejemplo n.º 17
0
    def play(self):
        print('Playing {} agent after training for {} episodes or {} frames'.
              format(self.name, self.num_of_episodes, self.num_of_frames))
        reward_values = {
            'positive': 1.0,
            'negative': 0.0,
            'tick': 0.0,
            'loss': 0.0,
            'win': 0.0
        }

        env = PLE(FlappyBird(),
                  fps=30,
                  display_screen=True,
                  force_fps=False,
                  rng=None,
                  reward_values=reward_values)
        env.init()

        score = 0
        last_print = 0

        nb_episodes = 50
        while nb_episodes > 0:
            # pick an action
            state = env.game.getGameState()
            action = self.policy(state)

            # step the environment
            reward = env.act(env.getActionSet()[action])

            score += reward

            # reset the environment if the game is over
            if env.game_over():
                print('Score: {}'.format(score))
                env.reset_game()
                nb_episodes -= 1
                score = 0
Ejemplo n.º 18
0
    def play(self, fast=True):
        """Use athlete to play.
        Args:
            fast <bool>: set to True if the screen should be hidden and speed
            enhanced
        """
        game = FlappyBird()
        env = PLE(game,
                  fps=30,
                  frame_skip=1,
                  num_steps=1,
                  force_fps=fast,
                  display_screen=not fast)
        env.init()
        pipes = []
        i = 0
        while i < 100:
            env.reset_game()
            pipes.append(0)
            while not env.game_over():
                A = self.act(game.getGameState())
                r = env.act(ACTIONS[A])
                if r == 1.:
                    pipes[-1] += 1
            if not fast:
                print('\n- Score: {} pipes'.format(pipes[-1]))
                print('- Played {} games'.format(len(pipes)))
                print('- Average score: {} pipes'.format(np.round(np.mean(pipes), decimals=1)))
            else:
                i += 1

        print('\n- Max score: {} pipes'.format(np.max(pipes)))
        print('- Games < 15 pipes: {}'.format(
            len(tuple(filter(lambda x: x < 15, pipes)))
        ))
        print('- Played {} games'.format(100))
        print('- Average score: {} pipes'.format(
            np.round(np.mean(pipes), decimals=1))
        )
Ejemplo n.º 19
0
def main(w, seed=SEED, headless=False):
    """
    Let an agent play flappy bird
    """
    if headless:
        display_screen = False
        force_fps = True
    else:
        display_screen = True
        force_fps = False

    game = PLE(FLAPPYBIRD,
               display_screen=display_screen,
               force_fps=force_fps,
               rng=seed)
    game.init()
    game.reset_game()
    FLAPPYBIRD.rng.seed(seed)

    agent_score = 0
    num_frames = 0

    while True:
        if game.game_over():
            break

        obs = game.getGameState()
        x = normalize(obs)
        action = agent(x, w)

        reward = game.act(ACTION_MAP[action])

        if reward > 0:
            agent_score += 1

        num_frames += 1

    print('Frames  :', num_frames)
    print('Score   :', agent_score)
Ejemplo n.º 20
0
class FlappyBirdWrapper(Env):
    # 如果想把画面渲染出来,就传参display_screen=True
    def __init__(self, **kwargs):
        self.game = FlappyBird()
        self.p = PLE(self.game, **kwargs)
        self.action_set = self.p.getActionSet()

        # 3个输入状态:见函数self._get_obs
        self.observation_space = spaces.Discrete(3)
        # 两个输出状态:跳或者不跳
        self.action_space = spaces.Discrete(2)

    def _get_obs(self):
        # 获取游戏的状态
        state = self.game.getGameState()
        # 小鸟与它前面一对水管中下面那根水管的水平距离
        dist_to_pipe_horz = state["next_pipe_dist_to_player"]
        # 小鸟与它前面一对水管中下面那根水管的顶端的垂直距离
        dist_to_pipe_bottom = state["player_y"] - state["next_pipe_top_y"]
        # 获取小鸟的水平速度
        velocity = state['player_vel']
        # 将这些信息封装成一个数据返回
        return np.array([dist_to_pipe_horz, dist_to_pipe_bottom, velocity])

    def reset(self):
        self.p.reset_game()
        return self._get_obs()

    def step(self, action):
        reward = self.p.act(self.action_set[action])
        obs = self._get_obs()
        done = self.p.game_over()
        return obs, reward, done, dict()

    def seed(self, *args, **kwargs):
        pass

    def render(self, *args, **kwargs):
        pass
def run_game(nb_episodes, agent):
    """ Runs nb_episodes episodes of the game with agent picking the moves.
        An episode of FlappyBird ends with the bird crashing into a pipe or going off screen.
    """
    reward_values = {
        "positive": 1.0,
        "negative": 0.0,
        "tick": 0.0,
        "loss": 0.0,
        "win": 0.0
    }

    env = PLE(FlappyBird(),
              fps=30,
              display_screen=True,
              force_fps=False,
              rng=None,
              reward_values=reward_values)
    env.init()

    score = 0
    while nb_episodes > 0:
        # pick an action
        action = agent.policy(env.game.getGameState())

        # step the environment
        reward = env.act(env.getActionSet()[action])

        score += reward
        # reset the environment if the game is over
        if env.game_over():
            print(score, nb_episodes)
            env.reset_game()
            nb_episodes -= 1
            if score > agent.highestScore:
                agent.highestScore = score
            agent.totalScore += score
            score = 0
Ejemplo n.º 22
0
def test_model_G(nb_games, model):
    game = FlappyBird(
        graphics="fixed"
    )  # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
    p = PLE(game,
            fps=30,
            frame_skip=1,
            num_steps=1,
            force_fps=True,
            display_screen=False)
    p.init()
    reward = 0.0

    cumulated = np.zeros((nb_games))
    list_actions = [0, 119]

    for i in range(nb_games):
        p.reset_game()

        while (not p.game_over()):
            state = game.getGameState()

            screen_x = process_screen(p.getScreenRGB())
            stacked_x = deque([screen_x, screen_x, screen_x, screen_x],
                              maxlen=4)
            x = np.stack(stacked_x, axis=-1)
            action = list_actions[np.argmax(
                model.predict(np.expand_dims(x, axis=0)))]

            reward = p.act(action)

            cumulated[i] = cumulated[i] + reward

    avg_score = np.mean(cumulated)
    print('Average : ' + str(avg_score))
    mx_score = np.max(cumulated)
    print('Max : ' + str(mx_score))
    return avg_score, mx_score
Ejemplo n.º 23
0
def test():
    game2 = FlappyBird()
    p2 = PLE(game2,
             fps=30,
             frame_skip=1,
             num_steps=1,
             force_fps=True,
             display_screen=False)
    p2.init()
    reward = 0.0

    nb_games = 10
    cumulated = np.zeros((nb_games))
    for i in range(nb_games):
        p2.reset_game()

        while (not p2.game_over()):
            state = game2.getGameState()
            screen = p2.getScreenRGB()
            action = FlappyPolicy(state, screen)

            reward = p2.act(action)
            cumulated[i] = cumulated[i] + reward
    return np.mean(cumulated)
Ejemplo n.º 24
0
def random_play(episodes = 100):
    # Initialize game and agent
    game = FlappyBird()
    p = PLE(game, display_screen=True, state_preprocessor=process_state)
    p.init()
    agent = Agent(p)
    total_reward = []

    # Run given number of episodes
    for _ in range(episodes):
        # Initialize episode
        p.reset_game()
        total_episode_reward = 0

        # Episode loop
        while not p.game_over():
            action = agent.choose_action()
            reward = p.act(action)
            total_episode_reward += reward

        # Save episode reward and return
        total_reward.append(total_episode_reward)

    return total_reward
Ejemplo n.º 25
0
    print("loaded")
else:
    agent = RLAgent(.3, .9, actions)

game = frogger_new.Frogger()
fps = 30
p = PLE(game, fps=fps, force_fps=False)
reward = 0.0

p.init()
count = 0
tr = 0
try:
    while True:
        count += 1
        if p.game_over():
            #print("{} REWARD".format(tr))
            tr = 0
            p.reset_game()

        obs = game.getGameState()
        #print obs
        cur = State(obs)
        action = agent.pickAction(cur)[0]
        #action = None
        reward = p.act(action)
        newState = State(obs)

        #disincentivizing no move
        #if cur == newState:
        reward -= .1
Ejemplo n.º 26
0
import FlappyPolicy

game = FlappyBird()
p = PLE(game,
        fps=30,
        frame_skip=1,
        num_steps=1,
        force_fps=False,
        display_screen=True)

p.init()
reward = 0.0

nb_games = 100
cumulated = np.zeros((nb_games))

for i in range(nb_games):
    p.reset_game()

    while (not p.game_over()):
        state = game.getGameState()
        screen = p.getScreenRGB()
        action = FlappyPolicy(state,
                              screen)  ### Your job is to define this function.

        reward = p.act(action)
        cumulated[i] = cumulated[i] + reward

average_score = np.mean(cumulated)
max_score = np.max(cumulated)
Ejemplo n.º 27
0
import numpy as np
from ple import PLE
from ple.games.snake import Snake

agent = Snake(width=256, height=256)

env = PLE(agent, fps=15, force_fps=False, display_screen=True)

env.init()
actions = env.getActionSet()
for i in range(1000):
    if env.game_over():
        env.reset_game()

    action = actions[np.random.randint(0, len(actions))]
    env.act(action)
Ejemplo n.º 28
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self, rng, game=None, frame_skip=4, 
            ple_options={"display_screen": True, "force_fps":True, "fps":30}):

        self._mode = -1
        self._mode_score = 0.0
        self._mode_episode_count = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._random_state = rng
       
        if game is None:
            raise ValueError("Game must be provided")

        self._ple = PLE(game, **ple_options)
        self._ple.init()

        w, h = self._ple.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((48, 48), dtype=np.uint8)
        self._actions = self._ple.getActionSet()

                
    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._mode_score = 0.0
                self._mode_episode_count = 0
            else:
                self._mode_episode_count += 1
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        self._ple.reset_game()
        for _ in range(self._random_state.randint(15)):
            self._ple.act(self._ple.NOOP)
        self._screen = self._ple.getScreenGrayscale()
        cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
        
        return [4 * [48 * [48 * [0]]]]
        
        
    def act(self, action):
        action = self._actions[action]
        
        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ple.act(action)
            if self.inTerminalState():
                break
            
        self._screen = self._ple.getScreenGrayscale()
        cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
  
        self._mode_score += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._mode_episode_count += 1
        print("== Mean score per episode is {} over {} episodes ==".format(self._mode_score / self._mode_episode_count, self._mode_episode_count))


    def inputDimensions(self):
        return [(4, 48, 48)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ple.game_over()
Ejemplo n.º 29
0
class Agent:

    LEARNING_RATE = 0.003
    BATCH_SIZE = 32
    INPUT_SIZE = 8
    LAYER_SIZE = 500
    OUTPUT_SIZE = 2
    EPSILON = 1
    DECAY_RATE = 0.005
    MIN_EPSILON = 0.1
    GAMMA = 0.99
    INITIAL_FEATURES = np.zeros((4, INPUT_SIZE))
    MEMORIES = deque()
    MEMORY_SIZE = 300
    COPY = 1000
    T_COPY = 0
    # based on documentation, features got 8 dimensions
    # output is 2 dimensions, 0 = do nothing, 1 = jump

    def __init__(self, screen=False, forcefps=True):
        self.game = FlappyBird(pipe_gap=125)
        self.env = PLE(self.game, fps=30, display_screen=screen, force_fps=forcefps)
        self.env.init()
        self.env.getGameState = self.game.getGameState
        self.actor = Actor('actor', self.INPUT_SIZE, self.OUTPUT_SIZE, self.LAYER_SIZE)
        self.actor_target = Actor('actor-target', self.INPUT_SIZE, self.OUTPUT_SIZE, self.LAYER_SIZE)
        self.critic = Critic('critic', self.INPUT_SIZE, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
        self.critic_target = Critic('critic-target', self.INPUT_SIZE, self.OUTPUT_SIZE, self.LAYER_SIZE, self.LEARNING_RATE)
        self.grad_critic = tf.gradients(self.critic.logits, self.critic.Y)
        self.actor_critic_grad = tf.placeholder(tf.float32, [None, self.OUTPUT_SIZE])
        weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='actor')
        self.grad_actor = tf.gradients(self.actor.logits, weights_actor, -self.actor_critic_grad)
        grads = zip(self.grad_actor, weights_actor)
        self.optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE).apply_gradients(grads)
        self.sess = tf.InteractiveSession()
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver(tf.global_variables())
        self.rewards = []

    def _assign(self, from_name, to_name):
        from_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=from_name)
        to_w = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=to_name)
        for i in range(len(from_w)):
            assign_op = to_w[i].assign(from_w[i])
            sess.run(assign_op)

    def _memorize(self, state, action, reward, new_state, dead, rnn_state):
        self.MEMORIES.append((state, action, reward, new_state, dead, rnn_state))
        if len(self.MEMORIES) > self.MEMORY_SIZE:
            self.MEMORIES.popleft()

    def _construct_memories_and_train(self, replay):
        # state_r, action_r, reward_r, new_state_r, dead_r = replay
        # train actor
        states = np.array([a[0] for a in replay])
        new_states = np.array([a[3] for a in replay])
        init_values = np.array([a[-1] for a in replay])
        Q = self.sess.run(self.actor.logits, feed_dict={self.actor.X: states, self.actor.hidden_layer:init_values})
        Q_target = self.sess.run(self.actor_target.logits, feed_dict={self.actor_target.X: states, self.actor_target.hidden_layer:init_values})
        grads = self.sess.run(self.grad_critic, feed_dict={self.critic.X:states, self.critic.hidden_layer: init_values, self.critic.Y:Q})
        self.sess.run(self.optimizer, feed_dict={self.actor.X:states, self.actor.hidden_layer:init_values, self.actor_critic_grad:grads})

        # train critic
        rewards = np.array([a[2] for a in replay]).reshape((-1, 1))
        rewards_target = self.sess.run(self.critic_target.logits, feed_dict={self.critic_target.X:new_states, self.critic_target.hidden_layer: init_values,
                                                                             self.critic_target.Y:Q_target})
        for i in range(len(replay)):
            if not replay[0][-1]:
                rewards[i,0] += self.GAMMA * rewards_target
        cost, _ = self.sess.run([self.critic.cost, self.critic.optimizer), feed_dict={self.critic.X:states, self.critic.hidden_layer: init_values,
                                                                                      self.critic.Y:Q, self.critic.REWARD:rewards})
        return cost

    def save(self, checkpoint_name):
        self.saver.save(self.sess, os.getcwd() + "/%s.ckpt" %(checkpoint_name))
        with open('%s-acc.p'%(checkpoint_name), 'wb') as fopen:
            pickle.dump(self.rewards, fopen)

    def load(self, checkpoint_name):
        self.saver.restore(self.sess, os.getcwd() + "/%s.ckpt" %(checkpoint_name))
        with open('%s-acc.p'%(checkpoint_name), 'rb') as fopen:
            self.rewards = pickle.load(fopen)

    def get_state(self):
        state = self.env.getGameState()
        return np.array(list(state.values()))

    def get_reward(self, iterations, checkpoint):
        for i in range(iterations):
            total_reward = 0
            self.env.reset_game()
            dead = False
            init_value = np.zeros((1, 2 * 512))
            state = self.get_state()
            for i in range(self.INITIAL_FEATURES.shape[0]):
                self.INITIAL_FEATURES[i,:] = state
            while not dead:
                if (self.T_COPY + 1) % self.COPY == 0:
                    self._assign('actor', 'actor-target')
                    self._assign('critic', 'critic-target')
                if np.random.rand() < self.EPSILON:
                    action = np.random.randint(self.OUTPUT_SIZE)
                else:
                    action, last_state = sess.run(self.model.logits,
                                                  self.model.last_state,
                                                  feed_dict={self.model.X:[self.INITIAL_FEATURES],
                                                             self.model.hidden_layer:init_values})
                    action, init_value = np.argmax(action[0]), last_state[0]
                real_action = 119 if action == 1 else None
                reward = self.env.act(real_action)
                total_reward += reward
                new_state = np.append(self.get_state(), self.INITIAL_FEATURES[:3, :], axis = 0)
                dead = self.env.game_over()
                self._memorize(state, action, reward, new_state, dead, init_value)
                batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
                replay = random.sample(self.MEMORIES, batch_size)
                cost = self._construct_memories_and_train(replay)
                self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
                self.T_COPY += 1
            self.rewards.append(total_reward)
            if (i+1) % checkpoint == 0:
                print('epoch:', i + 1, 'total rewards:', total_reward)
                print('epoch:', i + 1, 'cost:', cost)

    def fit(self, iterations, checkpoint):
        self.get_reward(iterations, checkpoint)
Ejemplo n.º 30
0
    memory = ReplayMemory(max_memory_size, min_memory_size)

    env.init()

    for epoch in range(1, num_epochs + 1):
        steps, num_episodes = 0, 0
        losses, rewards = [], []
        env.display_screen = False

        # training loop
        while steps < num_steps_train:
            episode_reward = 0.0
            agent.start_episode()

            while env.game_over() == False and steps < num_steps_train:
                state = env.getGameState()
                reward, action = agent.act(state, epsilon=epsilon)
                memory.add([state, action, reward, env.game_over()])

                if steps % update_frequency == 0:
                    loss = memory.train_agent_batch(agent)

                    if loss is not None:
                        losses.append(loss)
                        epsilon = np.max(epsilon_min, epsilon - epsilon_rate)

                episode_reward += reward
                steps += 1

            if num_episodes % 5 == 0:
Ejemplo n.º 31
0
class PLEEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, game_name='FlappyBird', display_screen=True):
        # open up a game state to communicate with emulator
        import importlib
        game_module_name = ('ple.games.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()
        self.game_state = PLE(game, fps=30, display_screen=display_screen)
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_width, self.screen_height = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))
        self.viewer = None

    def _step(self, a):
        reward = self.game_state.act(self._action_set[a])
        state = self._get_image()
        terminal = self.game_state.game_over()
        return state, reward, terminal, {}

    def _get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return image_rotated

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))
        self.game_state.reset_game()
        state = self._get_image()
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Ejemplo n.º 32
0
def trainNetwork(s, readout, h_fc1, sess):
    # define the cost function
    a = tf.placeholder("float", [None, ACTIONS])
    y = tf.placeholder("float", [None])
    readout_action = tf.reduce_sum(tf.mul(readout, a), reduction_indices = 1)
    cost = tf.reduce_mean(tf.square(y - readout_action))
    train_step = tf.train.AdamOptimizer(1e-6).minimize(cost)

    # open up a game state to communicate with emulator
    #setupGame()
    gameClass = FlappyBird(width=288, height=512, pipe_gap=100)
    
    fps = 30
    frame_skip = 2
    num_steps = 1
    force_fps = False
    display_screen = True
    reward = 0.0
    nb_frames = 15000

    game = PLE(gameClass, fps=fps, frame_skip=frame_skip, num_steps=num_steps,
            force_fps=force_fps, display_screen=display_screen)

    game.init()

    # store the previous observations in replay memory
    D = deque()

    # printing
    logdir = "logs_" + GAME
    if not os.path.exists(logdir):
        os.makedirs(logdir)
    a_file = open(logdir + "/readout.txt", 'w')
    h_file = open(logdir + "/hidden.txt", 'w')

    # get the first state by doing nothing and preprocess the image to 80x80x4
    r_0 = game.act(game.NOOP)
    x_t = game.getScreenGrayscale()
    terminal = game.game_over()
    if terminal:
        print "NOOOO"
        game.reset_game()
    
    x_t = cv2.resize(x_t, (80, 80))
    ret, x_t = cv2.threshold(x_t,1,255,cv2.THRESH_BINARY)
    s_t = np.stack((x_t, x_t, x_t, x_t), axis = 2)

    # saving and loading networks
    #saver = tf.train.Saver()
    sess.run(tf.initialize_all_variables())
    '''
    checkpoint = tf.train.get_checkpoint_state("saved_networks")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print "Successfully loaded:", checkpoint.model_checkpoint_path
    else:
        print "Could not find old network weights"
    '''
    epsilon = INITIAL_EPSILON
    t = 0
    while True:
        # choose an action epsilon greedily
        readout_t = readout.eval(feed_dict = {s : [s_t]})[0]
        a_t = np.zeros([ACTIONS])
        action_index = 0
        if random.random() <= epsilon or t <= OBSERVE:
            action_index = random.randrange(ACTIONS)
            a_t[random.randrange(ACTIONS)] = 1
        else:
            action_index = np.argmax(readout_t)
            a_t[action_index] = 1

        # scale down epsilon
        if epsilon > FINAL_EPSILON and t > OBSERVE:
            epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE

        for i in range(0, K):
            # run the selected action and observe next state and reward
            r_t = game.act(np.argmax(a_t))
            x_t1 = game.getScreenGrayscale()
            terminal = game.game_over()
            if terminal:
                print "NOOO2"
                game.reset_game()

            x_t1 = cv2.resize(x_t1, (80, 80))
            ret, x_t1 = cv2.threshold(x_t1,1,255,cv2.THRESH_BINARY)
            x_t1 = np.reshape(x_t1, (80, 80, 1))
            s_t1 = np.append(x_t1, s_t[:,:,1:], axis = 2)

            # store the transition in D
            D.append((s_t, a_t, r_t, s_t1, terminal))
            if len(D) > REPLAY_MEMORY:
                D.popleft()

        # only train if done observing
        if t > OBSERVE:
            # sample a minibatch to train on
            minibatch = random.sample(D, BATCH)

            # get the batch variables
            s_j_batch = [d[0] for d in minibatch]
            a_batch = [d[1] for d in minibatch]
            r_batch = [d[2] for d in minibatch]
            s_j1_batch = [d[3] for d in minibatch]

            y_batch = []
            readout_j1_batch = readout.eval(feed_dict = {s : s_j1_batch})
            for i in range(0, len(minibatch)):
                # if terminal only equals reward
                if minibatch[i][4]:
                    y_batch.append(r_batch[i])
                else:
                    y_batch.append(r_batch[i] + GAMMA * np.max(readout_j1_batch[i]))

            # perform gradient step
            train_step.run(feed_dict = {
                y : y_batch,
                a : a_batch,
                s : s_j_batch})

        # update the old values
        s_t = s_t1
        t += 1

        # save progress every 10000 iterations
        if t % 10000 == 0:
            saver.save(sess, 'saved_networks/' + GAME + '-dqn', global_step = t)

        # print info
        state = ""
        if t <= OBSERVE:
            state = "observe"
        elif t > OBSERVE and t <= OBSERVE + EXPLORE:
            state = "explore"
        else:
            state = "train"
        print "TIMESTEP", t, "/ STATE", state, "/ EPSILON", epsilon, "/ ACTION", action_index, "/ REWARD", r_t, "/ Q_MAX %e" % np.max(readout_t)

        # write info to files
        '''
Ejemplo n.º 33
0
def agent_training(agent_file_path, agent_file_name, fig_path, num_steps_train_total = 5000):
    # training parameters
    num_epochs = 5
    num_steps_train_epoch = num_steps_train_total/num_epochs  # steps per epoch of training
    num_steps_test = 100
    update_frequency = 10  # step frequency of model training/updates

    epsilon = 0.15  # percentage of time we perform a random action, help exploration.
    epsilon_steps = 1000  # decay steps
    epsilon_min = 0.1
    epsilon_rate = (epsilon - epsilon_min) / epsilon_steps

    # memory settings
    max_memory_size = 10000
    min_memory_size = 60  # number needed before model training starts

    game = RunningMinion()
    env = PLE(game, fps=30, display_screen=True, force_fps=True, state_preprocessor=process_state)
    my_agent = init_agent(env)

    memory = utils.ReplayMemory(max_memory_size, min_memory_size)
    env.init()

    # Logging configuration and figure plotting
    logging.basicConfig(filename='../learning.log', filemode='w',
                        level=logging.DEBUG, format='%(levelname)s:%(message)s')
    logging.info('========================================================')
    logging.info('Training started for total training steps: '+str(num_steps_train_total)+'.\n')
    learning_rewards = [0]
    testing_rewards = [0]

    for epoch in range(1, num_epochs + 1):
        steps, num_episodes = 0, 0
        losses, rewards = [], []
        env.display_screen = False

        # training loop
        while steps < num_steps_train_epoch:
            episode_reward = 0.0
            my_agent.start_episode()

            while env.game_over() == False and steps < num_steps_train_epoch:
                state = env.getGameState()
                reward, action = my_agent.act(state, epsilon=epsilon)
                memory.add([state, action, reward, env.game_over()])

                if steps % update_frequency == 0:
                    loss = memory.train_agent_batch(my_agent)

                    if loss is not None:
                        losses.append(loss)
                        epsilon = np.max(epsilon_min, epsilon - epsilon_rate)

                episode_reward += reward
                steps += 1

            if steps < num_steps_train_epoch:
                learning_rewards.append(episode_reward)

            if num_episodes % 5 == 0:
                # print "Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward)
                logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward))

            rewards.append(episode_reward)
            num_episodes += 1
            my_agent.end_episode()

        logging.info("Train Epoch {:02d}: Epsilon {:0.4f} | Avg. Loss {:0.3f} | Avg. Reward {:0.3f}\n"
                     .format(epoch, epsilon, np.mean(losses), np.sum(rewards) / num_episodes))

        steps, num_episodes = 0, 0
        losses, rewards = [], []

        # testing loop
        while steps < num_steps_test:
            episode_reward = 0.0
            my_agent.start_episode()

            while env.game_over() == False and steps < num_steps_test:
                state = env.getGameState()
                reward, action = my_agent.act(state, epsilon=0.05)

                episode_reward += reward
                testing_rewards.append(testing_rewards[-1]+reward)
                steps += 1

                # done watching after 500 steps.
                if steps > 500:
                    env.display_screen = False

            if num_episodes % 5 == 0:
                logging.info("Episode {:01d}: Reward {:0.1f}".format(num_episodes, episode_reward))

            if steps < num_steps_test:
                testing_rewards.append(episode_reward)

            rewards.append(episode_reward)
            num_episodes += 1
            my_agent.end_episode()

        logging.info("Test Epoch {:02d}: Best Reward {:0.3f} | Avg. Reward {:0.3f}\n"
                     .format(epoch, np.max(rewards), np.sum(rewards) / num_episodes))

    logging.info("Training complete.\n\n")
    plot_figure(fig_path, learning_rewards, 'reward', 'reward_in_training', num_steps_train_total)
    plot_figure(fig_path, testing_rewards, 'reward', 'reward_in_testing', num_steps_train_total)

    save_agent(my_agent, agent_file_path, agent_file_name)
Ejemplo n.º 34
0
# You're not allowed to change this file
from ple.games.flappybird import FlappyBird
from ple import PLE
import numpy as np
from FlappyAgent import FlappyPolicy

game = FlappyBird(graphics="fixed") # use "fancy" for full background, random bird color and random pipe color, use "fixed" (default) for black background and constant bird and pipe colors.
p = PLE(game, fps=30, frame_skip=1, num_steps=1, force_fps=False, display_screen=True)
# Note: if you want to see you agent act in real time, set force_fps to False. But don't use this setting for learning, just for display purposes.

p.init()
reward = 0.0

nb_games = 100
cumulated = np.zeros((nb_games))

for i in range(nb_games):
    p.reset_game()
    
    while(not p.game_over()):
        state = game.getGameState()
        screen = p.getScreenRGB()
        action=FlappyPolicy(state, screen) ### Your job is to define this function.
        
        reward = p.act(action)
        cumulated[i] = cumulated[i] + reward

average_score = np.mean(cumulated)
max_score = np.max(cumulated)
Ejemplo n.º 35
0
            epoch = step // params.EVALUATION_PERIOD
            mean_score, max_score, min_score = evaluation(p, network=dqn, epoch=epoch, trials=20, logger=logger_eval)
            logger_eval.info('Score min/max ({}/{}) and mean ({})'.format(min_score, max_score, mean_score))

        # action selection
        if np.random.rand() < epsilon(step):
            a = np.random.randint(0, 2)
        else:
            a = greedy_action(dqn, x)     # 0 or 1

        # step
        r = p.act(params.LIST_ACTIONS[a])
        rr = clip_reward(r)

        screen_y = process_screen(p.getScreenRGB())
        d = p.game_over()
        replay_memory.append(screen_x, a, rr, screen_y, d)

        # print some info
        if d:
            print("##################################################################################################")
            print("#################################################################### DEAD ########################")
            print("##################################################################################################")
            logger_train.info("Step {}, score before death: {}".format(step, training_score))
            training_score = 0      # Restart game
        if r > 0:
            print("--------------------------------------------------------------------------------------------------")
            print("-------------------------------------------------------------------- Pipe passed ! ---------------")
            print("--------------------------------------------------------------------------------------------------")
            training_score += 1     # One pipe passed
import numpy as np
from ple import PLE
from ple.games.waterworld import WaterWorld


# lets adjust the rewards our agent recieves
rewards = {
    "tick": -0.01,  # each time the game steps forward in time the agent gets -0.1
    "positive": 1.0,  # each time the agent collects a green circle
    "negative": -5.0,  # each time the agent bumps into a red circle
}

# make a PLE instance.
# use lower fps so we can see whats happening a little easier
game = WaterWorld(width=256, height=256, num_creeps=8)
p = PLE(game, fps=15, force_fps=False, display_screen=True,
        reward_values=rewards)
# we pass in the rewards and PLE will adjust the game for us

p.init()
actions = p.getActionSet()
for i in range(1000):
    if p.game_over():
        p.reset_game()

    action = actions[np.random.randint(0, len(actions))]  # random actions
    reward = p.act(action)

    print "Score: {:0.3f} | Reward: {:0.3f} ".format(p.score(), reward)
Ejemplo n.º 37
0
from ple.games.flappybird import FlappyBird
from ple import PLE
import random

game = FlappyBird()
p = PLE(game, fps=30, display_screen=True, force_fps=False)
p.init()

nb_frames = 1000
reward = 0.0

for f in range(nb_frames):
    if p.game_over():  #check if the game is over
        p.reset_game()

    obs = p.getScreenRGB()
    action = random.sample(p.getActionSet(), 1)[0]
    reward = p.act(action)
    print(action, reward)
    memory = ReplayMemory(max_memory_size, min_memory_size)

    env.init()
    
    for epoch in range(1, num_epochs+1):
        steps, num_episodes = 0, 0
        losses, rewards = [], []
        env.display_screen = False
       
        #training loop
        while steps < num_steps_train:
            episode_reward = 0.0
            agent.start_episode()

            while env.game_over() == False and steps < num_steps_train:
                state = env.getGameState()
                reward, action = agent.act(state, epsilon=epsilon)
                memory.add([ state, action, reward, env.game_over() ])

                if steps % update_frequency == 0:
                    loss = memory.train_agent_batch(agent)
                    
                    if loss is not None:
                        losses.append(loss)
                        epsilon = np.max(epsilon_min, epsilon - epsilon_rate)

                episode_reward += reward
                steps += 1

Ejemplo n.º 39
0
        env.reset_game()
        state = env.getGameState()
        state = np.array([list(state[0])])
        score = 0
        for time_t in range(20000):
            action = agent.act(state)

            reward = env.act(action)  #액션 선택
            score += reward

            next_state = env.getGameState()
            next_state = np.array([list(next_state[0])])

            action = [K_a, None, K_d].index(action)

            agent.remember(state, action, reward, next_state, env.game_over())
            state = next_state

            if env.game_over() or time_t == 19999:
                #에피소드가 끝나면 출력
                print(
                    "episode: {}/{}, score: {}, memory size: {}, e: {}".format(
                        e, EPISODES, score, len(agent.memory), agent.epsilon))

                #리워드 플랏을 위한 코드
                scores.append(score)
                time.append(e + 1)
                if e % 10 == 0:
                    pylab.plot(time, scores, 'b')
                    pylab.savefig("./save/catcher_dqn.png")
                break
	"""
	def __init__(self, actions):
		self.actions = actions

	def pickAction(self, reward, obs):
		return self.actions[np.random.randint(0, len(self.actions))]

###################################
game = Doom(scenario="take_cover")

env = PLE(game)
agent = NaiveAgent(env.getActionSet())
env.init()

reward = 0.0
for f in range(15000):
	#if the game is over
        if env.game_over():
            env.reset_game()
            
        action = agent.pickAction(reward, env.getScreenRGB())
        reward = env.act(action)

        if f > 2000:
            env.display_screen = True 
            env.force_fps = False
        
        if f > 2250:
            env.display_screen = True 
            env.force_fps = True