class Env:
  def __init__(self):
    self.game = FlappyBird(pipe_gap=125)
    self.env = PLE(self.game, fps=30, display_screen=True)
    self.env.init()
    self.env.getGameState = self.game.getGameState # maybe not necessary

    # by convention we want to use (0,1)
    # but the game uses (None, 119)
    self.action_map = self.env.getActionSet() #[None, 119]

  def step(self, action):
    action = self.action_map[action]
    reward = self.env.act(action)
    done = self.env.game_over()
    obs = self.get_observation()
    # don't bother returning an info dictionary like gym
    return obs, reward, done

  def reset(self):
    self.env.reset_game()
    return self.get_observation()

  def get_observation(self):
    # game state returns a dictionary which describes
    # the meaning of each value
    # we only want the values
    obs = self.env.getGameState()
    return np.array(list(obs.values()))

  def set_display(self, boolean_value):
    self.env.display_screen = boolean_value
 def run_a_game(self,game):
     from ple import PLE
     p =  PLE(game,display_screen=True)
     agent = NaiveAgent(p.getActionSet())
     p.init()
     reward = p.act(p.NOOP)
     for i in range(NUM_STEPS):
         obs = p.getScreenRGB()
         reward = p.act(agent.pickAction(reward,obs))
def main_naive():
    game = FlappyBird()
    env = PLE(game, fps=30, display_screen=True)
    my_agent = naive.NaiveAgent(allowed_actions=env.getActionSet())

    env.init()
    reward = 0.0
    nb_frames = 10000

    for i in range(nb_frames):
        if env.game_over():
            env.reset_game()

        observation = env.getScreenRGB()
        action = my_agent.pickAction(reward, observation)
        reward = env.act(action)
Example #4
0
class PLECatcherEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self, game_name='Catcher', display_screen=True, ple_game=True, obs_type="Image", reward_type = 1):
        '''
        For Catcher:
            getGameState() returns [player x position, player velocity, fruits x position, fruits y position]
        @Params:
            obs_type :
                "RAM" : getGameState()
                "Image" : (64, 64, 3)
            reward_type :
                0 : means [reward1, reward2]
                1 : means raw reward
                2 : means change of x-axis distance from fruit
        '''
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'
        
        # open up a game state to communicate with emulator
        import importlib
        if ple_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = game_name.lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()

        ##################################################################
        # old one
        #self.game_state = PLE(game, fps=30, display_screen=display_screen)

        # use arg state_preprocessor to support self.game_state.getGameState()
        self.game_state = PLE(game, fps=30, display_screen=display_screen, state_preprocessor = self.process_state)
        ##################################################################

        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype = np.uint8)
        self.viewer = None

        ############################################
        self.obs_type = obs_type
        self.reward_type = reward_type

        # every reward type's max-abs value
        self.rewards_ths = [1.0, 2.0]

        # change observation space:
        self.img_width = 84
        self.img_height = 84
        self.img_shape = (self.img_width, self.img_height, 3)
        if self.obs_type == "Image":
            self.observation_space = spaces.Box(low = 0, high = 255, shape = self.img_shape, dtype = np.uint8)
        elif self.obs_type == "RAM":
            self.observation_space = spaces.Box(low = -100.0, high = 100.0, shape = (4, ), dtype = np.float32)
        ############################################


    #############################################
    # Add state processer
    def process_state(self, state):
        return np.array([state.values()])
    #############################################

    def _step(self, a, gamma = 0.99):
        #############################################
        if isinstance(a,np.ndarray):
            a = a[0]
        # old observation
        old_ram = self.game_state.getGameState()
        #############################################

        reward = self.game_state.act(self._action_set[a])

        #############################################
        #state = self._get_image()
        if self.obs_type == "Image":
            state = self._get_image()
        #############################################

        terminal = self.game_state.game_over()

        #############################################
        # new observation
        ram = self.game_state.getGameState()
        #############################################

        #############################################
        if self.reward_type == 1:
            reward = reward / self.rewards_ths[0]

        # reward 2
        if self.reward_type == 2:
            reward = self.get_reward(reward, old_ram, ram, terminal, 2, gamma)

        # reward 0
        if self.reward_type == 0:
            reward1 = reward / self.rewards_ths[0]
            reward2 = self.get_reward(reward, old_ram, ram, terminal, 2, gamma)
            reward = np.array([reward1, reward2])
        ##############################################

        ############################################################
        '''
        # reward scaling
        if self.reward_type == 0:
            for rt in range(len(reward)):
                reward[rt] = reward[rt] / self.rewards_ths[rt]
        else:
            reward = reward / self.rewards_ths[self.reward_type - 1]
        '''
        ############################################################

        ##############################################
        # obs
        if self.obs_type == "RAM":
            state = self.game_state.getGameState()
            state = np.array(list(state[0]))
        ##############################################

        return state, reward, terminal, {}

    #############################################
    # Add for reward
    #############################################
    def get_reward(self, src_reward, old_ram, ram, done, reward_type, gamma):
        ''' 
        @Params:
            old_ram, ram : numpy.array, [dict_values([x, y, z, w])]
            reward_type : 2 , distance of x-axis change
        '''
        old_ram = list(old_ram[0])
        ram = list(ram[0])
        reward = src_reward
        if not done:
            if reward_type == 2:
                old_px, old_fx = old_ram[0], old_ram[2]
                px, fx = ram[0], ram[2]
                old_dis = abs(old_px - old_fx)
                dis = abs(px - fx)
                reward = old_dis - gamma * dis

                # a new epoch
                old_fy, fy = old_ram[3], ram[3]
                if old_fy > fy:
                    reward = 0.0

                reward = min(reward, 2.0)
                reward = max(reward, -2.0)

                reward = src_reward / self.rewards_ths[0] + reward / self.rewards_ths[1]
        return reward
    #############################################
    #############################################

    def _get_image(self):
        image_rotated = np.fliplr(np.rot90(self.game_state.getScreenRGB(),3)) # Hack to fix the rotated image returned by ple
        ##########################################
        # resize image
        img = Image.fromarray(image_rotated)
        img = img.resize((self.img_width, self.img_height), Image.ANTIALIAS)
        image_resized = np.array(img).astype(np.uint8)
        ##########################################
        return image_resized

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.screen_width, self.screen_height, 3), dtype = np.uint8)
        self.game_state.reset_game()
        #######################################
        if self.obs_type == "Image":
            state = self._get_image()
        elif self.obs_type == "RAM":
            state = self.game_state.getGameState()
            state = np.array(list(state[0]))
        #######################################
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)


    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Example #5
0
display_screen = True

reward = 0.0
max_noops = 20
nb_frames = 15000

#make a PLE instance.
p = PLE(game,
        fps=fps,
        frame_skip=frame_skip,
        num_steps=num_steps,
        force_fps=force_fps,
        display_screen=display_screen)

#our Naive agent!
agent = NaiveAgent(p.getActionSet())

#init agent and game.
p.init()

#lets do a random number of NOOP's
for i in range(np.random.randint(0, max_noops)):
    reward = p.act(p.NOOP)

#start our training loop
for f in range(nb_frames):
    #if the game is over
    if p.game_over():
        p.reset_game()

    obs = p.getScreenRGB()
Example #6
0
         actionIndex = -1
         actionIndex0 = -1
     else:
         actionIndex0=softMax(np.abs(output))#np.argmax(output)+1
     print(actionIndex0)
     if actionIndex0==0:
         if np.sign(actionIndex0)==1:
             actionIndex = 0
         else:
             actionIndex = 3
     elif actionIndex0==1:
         if np.sign(actionIndex0)==1:
             actionIndex = 1
         else:
             actionIndex = 2
     action = p.getActionSet()[actionIndex]#ulrdn
     #[115, 100, 119, 97, None]
         # myAgent.pickAction(reward, obs)
     reward = p.act(action)
     print('reward: %f'%(reward))
     #points += (reward - rewardLast)-0.0001*dt*points
     points = (10+reward)/10+(reward - rewardLast) if reward>-10 else reward - rewardLast
     print('points: %f'%(points))
     ModulatorAmount = points
     if ModulatorAmount<0:
         ModulatorAmount=0
         
     ADSA.StepSynapseDynamics(dt, ModulatorAmount) 
     rewardLast=reward
 NeuonNumber=1
 newSlice= [slice(None)]*3
Example #7
0
from ple.games.waterworld import WaterWorld

# lets adjust the rewards our agent recieves
rewards = {
    "tick":
    -0.01,  # each time the game steps forward in time the agent gets -0.1
    "positive": 1.0,  # each time the agent collects a green circle
    "negative": -5.0,  # each time the agent bumps into a red circle
}

# make a PLE instance.
# use lower fps so we can see whats happening a little easier
game = WaterWorld(width=256, height=256, num_creeps=8)
p = PLE(game,
        fps=15,
        force_fps=False,
        display_screen=True,
        reward_values=rewards)
# we pass in the rewards and PLE will adjust the game for us

p.init()
actions = p.getActionSet()
for i in range(1000):
    if p.game_over():
        p.reset_game()

    action = actions[np.random.randint(0, len(actions))]  # random actions
    reward = p.act(action)

    print "Score: {:0.3f} | Reward: {:0.3f} ".format(p.score(), reward)
def train_agent(number_of_episodes):
    game = FlappyBird()

    rewards = {
        "positive": 1.0,
        "negative": 0.0,
        "tick": 0.0,
        "loss": -5.0,
        "win": 0.0
    }

    env = PLE(game=game, fps=30, display_screen=False, reward_values=rewards)

    # Reset environment at the beginning
    env.reset_game()

    training_score = 0
    max_training_score = 0
    episode_number = 1

    state_action_reward = ()

    every_100th = 1
    results = []

    while number_of_episodes > 0:

        if episode_number == 50000:
            f = open("monte_50000.txt", "w")
            f.write(str(monte_carlo_q_agent.Q_matrix))
            f.close()
            f = open("results_50000.txt", "w")
            f.write(str(results))
            f.close()

        # Get current state
        state = MonteCarloQLearningAgent.get_state(env.game.getGameState())

        # Select action in state "state"
        action = monte_carlo_q_agent.compute_action_from_q_values(state)

        if action is None:
            raise IllegalActionException("Illegal action occurred.")
        """
        After choosing action, get reward.
        PLE environment method act() returns the reward that the agent has accumulated while performing the action.
        """
        reward = env.act(env.getActionSet()[action])
        training_score += reward

        max_training_score = max(training_score, max_training_score)

        game_over = env.game_over()

        # observe the result
        if state_action_reward:
            monte_carlo_q_agent.update(state_action_reward[0],
                                       state_action_reward[1], state,
                                       state_action_reward[2])

        state_action_reward = (state, action, reward)

        if game_over:
            print("===========================")
            print("Episode: " + str(episode_number))
            print("Training score: " + str(training_score))
            print("Max. training score: " + str(max_training_score))
            print("===========================\n")
            if every_100th == 100:
                results.append((episode_number, training_score))
                every_100th = 0
            episode_number += 1
            number_of_episodes -= 1
            training_score = 0
            env.reset_game()
Example #9
0
import numpy as np
from ple import PLE
from ple.games.snake import Snake

agent = Snake(width=256, height=256)

env = PLE(agent, fps=15, force_fps=False, display_screen=True)

env.init()
actions = env.getActionSet()

q_states = {((1, 1), 0): 0}

count_q = {(0, 0): 0}
w = np.random.rand(4)
alpha = 0.5
gama = 1
epsilon = 0.7
steps = 1


# checked: correct :)
def compute_sprim(state, action):
    new_state = (0, 0)
    if action == 119:
        new_state = (state[0], state[1] + 1)
    if action == 97:
        new_state = (state[0] + 1, state[1])
    if action == 100:
        new_state = (state[0] - 1, state[1])
    if action == 115:
Example #10
0
class CustomGameEnv(gym.Env):
    def __init__(self, task={}):
        self._task = task
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        import importlib
        game_module = importlib.import_module('ple.games.customgame')
        game = getattr(game_module, 'customgame')()

        self.game_state = PLE(game, fps=30, display_screen=False)
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_width, self.screen_height = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))

        self.num_actions = len(self._action_set)
        self.viewer = None
        self.reward_mult = 1.0

    def seed(self, seed=None):
        if not seed:
            seed = np.random.randint(2**31 - 1)
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng
        self.game_state.init()
        return [seed]

    def reset_task(self, task):
        pass

    def render(self, mode='human'):
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def reset(self):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3))
        self.game_state.reset_game()
        state = self._get_image()
        return state

    def _get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return image_rotated

    def step(self, action):
        reward = self.reward_mult * self.game_state.act(
            self._action_set[action])
        state = self._get_image()
        terminal = self.game_state.game_over()
        return state, reward, terminal, {}
        r = p.act(self.action_set[action])
        if r == 0:
            r = 1
        if r == 1:
            r = 10
        else:
            r = -1000
        return r


if __name__ == "__main__":
    episodes = 2000_000000
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=False)
    p.init()
    agent = Agent(p.getActionSet())
    max_score = 0

    for episode in range(episodes):
        p.reset_game()
        state = agent.get_state(game.getGameState())
        agent.update_greedy()
        while True:
            action = agent.get_best_action(state)
            reward = agent.act(p, action)
            next_state = agent.get_state(game.getGameState())
            agent.update_q_table(state, action, next_state, reward)
            current_score = p.score()
            state = next_state
            if p.game_over():
                max_score = max(current_score, max_score)
Example #12
0
# create our game
force_fps = True  # slower speed
display_screen = True
state_preprocessor = nv_state_preprocessor
reward = 0.0
game = WaterWorld()

# make a PLE instance.
p = PLE(game,
        force_fps=force_fps,
        display_screen=display_screen,
        state_preprocessor=state_preprocessor)

# our Naive agent!
agent = SmartAgent(actions=p.getActionSet())

# init agent and game.
p.init()

# start our loop
score = 0.0
for i in range(10):
    # if the game is over
    if p.game_over():
        p.reset_game()
    while p.game_over() == False:
        obs = p.getGameState()
        action = agent.pickAction(reward, obs)
        reward = p.act(action)  # reward after an action
    score = game.getScore()
class FlappyBirdGame():
    def __init__(self,
                 reward_values={},
                 reward_discount=0.99,
                 pip_gap=100,
                 display_screen=True,
                 fps=30,
                 force_fps=True):
        self.game = PLE(FlappyBird(pipe_gap=pip_gap),
                        reward_values=reward_values,
                        fps=fps,
                        force_fps=force_fps,
                        display_screen=display_screen)
        self.game.init()
        self.actions = self.game.getActionSet()
        self.reward_discount = reward_discount

    @staticmethod
    def random_agent(*args, **kwargs):
        return torch.rand(1)

    def calculate_trial_reward(self, rewards_tensor):
        rewards_output = torch.empty(rewards_tensor.shape[0])
        for i in range(rewards_tensor.shape[0]):
            discount_vector = torch.Tensor([self.reward_discount] *
                                           (rewards_tensor.shape[0] - i))
            pv_rewards = sum(rewards_tensor[i:] *
                             discount_vector**torch.FloatTensor(
                                 range(rewards_tensor.shape[0] - i)))
            rewards_output[i] = pv_rewards

        rewards_output = rewards_output.reshape((-1, 1))
        return rewards_output

    @staticmethod
    def observation_to_torch_tensor(observation):
        obs_tensor = torch.FloatTensor([
            observation['player_y'], observation['player_vel'],
            observation['next_pipe_dist_to_player'],
            observation['next_pipe_top_y'], observation['next_pipe_bottom_y'],
            observation['next_next_pipe_dist_to_player'],
            observation['next_next_pipe_top_y'],
            observation['next_next_pipe_bottom_y']
        ])

        obs_tensor = obs_tensor.reshape((1, 8))
        return obs_tensor

    def run_trial(self, agent=None, sample=True, verbose=False):
        if agent is None:
            agent = self.random_agent
        if self.game.game_over():
            self.game.reset_game()
        rewards = torch.empty(0)
        observations = torch.empty((0, 8))
        agent_decisions = torch.empty((0, 1))
        actual_decisions = torch.empty((0, 1))
        while not self.game.game_over():
            observation = self.observation_to_torch_tensor(
                self.game.getGameState())
            agent_decision = agent(observation)

            if sample:
                actual_decision = torch.bernoulli(agent_decision)
            else:
                actual_decision = torch.FloatTensor(
                    [1]) if agent_decision > 0.5 else torch.FloatTensor([0])

            actual_decision = actual_decision.reshape((1, 1))
            agent_decision = agent_decision.reshape((1, 1))
            if actual_decision == 1:
                action = self.actions[1]
            else:
                action = self.actions[0]

            reward = torch.FloatTensor([self.game.act(action)])

            # reward shaping
            # if (observation[0][0] < observation[0][4]) and (observation[0][0] > observation[0][3]):
            #     reward = torch.add(reward, torch.tensor(0.2))
            # else:
            #     reward = torch.add(reward, torch.tensor(-0.2))

            rewards = torch.cat((rewards, reward))
            observations = torch.cat((observations, observation))
            agent_decisions = torch.cat((agent_decisions, agent_decision))
            actual_decisions = torch.cat((actual_decisions, actual_decision))
            if verbose:
                print(f'action: {action}')
                print(f'observation: {observation}')
                print(f'reward: {reward}')

        return {
            'observations': observations,
            'rewards': self.calculate_trial_reward(rewards),
            'agent_decisions': agent_decisions,
            'actual_decisions': actual_decisions
        }

    def run_n_trials(self, n_trials, agent=None, sample=True):
        out_results = {
            'observations': torch.empty(0),
            'rewards': torch.empty(0),
            'agent_decisions': torch.empty(0),
            'actual_decisions': torch.empty(0)
        }
        for i in range(n_trials):
            results = self.run_trial(agent, sample)
            out_results['observations'] = torch.cat(
                (out_results['observations'], results['observations']))
            out_results['rewards'] = torch.cat(
                (out_results['rewards'], results['rewards']))
            out_results['agent_decisions'] = torch.cat(
                (out_results['agent_decisions'], results['agent_decisions']))
            out_results['actual_decisions'] = torch.cat(
                (out_results['actual_decisions'], results['actual_decisions']))

        return out_results
Example #14
0
        return self.qLearning.getAction(state)

    def incorporateFeedback(self, state, action, reward, newState):
        self.qLearning.incorporateFeedback(state, action, reward, newState)

    def printWeights(self):
        print str(self.qLearning.getWeights())
        print 'num weights: %d' % len(self.qLearning.getWeights())


############################################################
if __name__ == '__main__':
    game = Pixelcopter(width=200, height=200)
    env = PLE(game, fps=30, display_screen=displayScreen)

    agent = Bot(actions=env.getActionSet())
    env.init()

    total_reward = 0.0
    min_reward = float('inf')
    max_reward = float('-inf')

    all_episode_scores = []
    plot_episode_scores = []
    plotted_episodes = []

    for i in range(num_runs):  # should run until qvalues converge
        episode_reward = 0.0
        frames = []
        while not env.game_over():
            state = game.getGameState()
Example #15
0
            obs = list(ple_env.getGameState().values())
            episode_reward += reward
            # if render:
            #     ple_env.getScreenRGB()
            if ple_env.game_over():
                break
        eval_reward.append(episode_reward)
    return np.mean(eval_reward)


# 创建环境
game = Pong(cpu_speed_ratio=0.3)
# game = Pong()
pong = PLE(game, display_screen=True, force_fps=True)
# 根据parl框架构建agent
print(pong.getActionSet())
action_dim = len(pong.getActionSet())
obs_shape = len(pong.getGameState())
print(pong.getGameState())
# 创建经验池
rpm = ReplayMemory(MEMORY_SIZE)  # DQN的经验回放池

model = Model(act_dim=action_dim)
algorithm = DQN(model, act_dim=action_dim, gamma=GAMMA, lr=LEARNING_RATE)
agent = Agent(
    algorithm,
    obs_dim=obs_shape,
    act_dim=action_dim,
    e_greed=0.1,  # 有一定概率随机选取动作,探索
    e_greed_decrement=1e-6)  # 随着训练逐步收敛,探索的程度慢慢降低
Example #16
0
class PLEEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 prespecified_game=True,
                 game_name='MyCatcher',
                 display_screen=True,
                 rgb_state=False):
        # open up a game state to communicate with emulator
        import importlib
        if prespecified_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = ('domains.ple.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        self.game = getattr(game_module, game_name)()
        self.rgb_state = rgb_state
        if self.rgb_state:
            self.game_state = PLE(self.game,
                                  fps=30,
                                  display_screen=display_screen)
        else:
            if prespecified_game:
                self.game_state = PLE(
                    self.game,
                    fps=30,
                    display_screen=display_screen,
                    state_preprocessor=process_state_prespecified)
            else:
                self.game_state = PLE(self.game,
                                      fps=30,
                                      display_screen=display_screen,
                                      state_preprocessor=process_state)
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        if self.rgb_state:
            self.state_width, self.state_height = self.game_state.getScreenDims(
            )
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.state_width,
                                                       self.state_height, 3))
        else:
            self.state_dim = self.game_state.getGameStateDims()
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=self.state_dim)
        self.viewer = None
        self.feature_bins = []
        if hasattr(self.game, 'feature_bins'):
            self.feature_bins = self.game.feature_bins

    def get_source_state(self, state):
        if hasattr(self.game, 'get_source_state'):
            return self.game.get_source_state(state)
        return None

    def get_uniform_state_weights(self):
        if hasattr(self.game, 'get_uniform_state_weights'):
            return self.game.get_uniform_state_weights()
        else:
            states = self.get_states()
            weights = np.ones(len(states))
            weights = [float(i) / sum(weights) for i in weights]
            return states, weights

    def generate_training_subset(self, percent_sim_data):
        if hasattr(self.game, 'generate_training_subset'):
            return self.game.generate_training_subset(percent_sim_data)

    def set_to_training_set(self):
        if hasattr(self.game, 'set_to_training_set'):
            return self.game.set_to_training_set()

    def set_to_testing_set(self):
        if hasattr(self.game, 'set_to_testing_set'):
            return self.game.set_to_testing_set()

    def get_states(self):
        if hasattr(self.game, 'states'):
            return self.game.states

    def _step(self, a):
        reward = self.game_state.act(self._action_set[a])
        state = self._get_state()
        terminal = self.game_state.game_over()
        return state, reward, terminal, {}

    def _get_image(self, game_state):
        image_rotated = np.fliplr(
            np.rot90(game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return image_rotated

    def _get_state(self):
        if self.rgb_state:
            return self._get_image(self.game_state)
        else:
            return self.game_state.getGameState()

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        if self.rgb_state:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.state_width,
                                                       self.state_height, 3))
        else:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=self.state_dim)
        self.game_state.reset_game()
        state = self._get_state()
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image(self.game_state)
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Example #17
0
import numpy as np
import pygame
from pygame.locals import *


class TestAgent():
	def __init__(self, actions):
		self.actions = actions
	def doAction(self,reward,obs):
		#print 'hello'
		for event in pygame.event.get():
			if event.type == KEYDOWN:
				return self.actions[0]
			return None

game = RunningMinion()
#game = WaterWorld()
p = PLE(game, fps=30, display_screen=True)
agent = TestAgent(p.getActionSet())

p.init()
reward = 0.0
nb_frames = 2000

for i in range(nb_frames):
	if p.game_over():
		p.reset_game()
	if i%1==0:
		obser = p.getScreenRGB()
		action = agent.doAction(reward,obser)
		reward = p.act(action)
Example #18
0
rewards = {
    "tick": -0.1,  # each time the game steps forward in time the agent gets -0.1
    "positive": 1,  # each time the agent collects a green circle
    "negative": -5.0,  # each time the agent bumps into a red circle
}

# make a PLE instance.
# use lower fps so we can see whats happening a little easier
game = WaterWorld(width=100, height=100, num_creeps=15)

# p = PLE(game, reward_values=rewards)
p = PLE(game, fps=30, force_fps=False, display_screen=True,
        reward_values=rewards)

p.init()
actions = p.getActionSet()[:-1]
agent = Agent(len(actions))

epochs = 10000000
game_duration = 1000
for epoch in range(epochs):
    p.reset_game()

    for it in range(1000):
        if p.game_over():
            p.reset_game()
            print "Finished with score:" + str(p.score())

        current_state = np.array(p.getScreenGrayscale()).reshape((10000, ))

        action = agent.act(np.array([current_state]))
Example #19
0
            return self.actions[1]
        elif fwd[1] < 0 and abs(fwd[1]) > abs(fwd[0]):
            return self.actions[2]
        elif fwd[0] < 0 and abs(fwd[0]) > abs(fwd[1]):
            return self.actions[3]
        else:
            return self.actions[4]

os.putenv('SDL_VIDEODRIVER', 'fbcon')
os.environ["SDL_VIDEODRIVER"] = "dummy"

# create our game
force_fps = True  # slower speed
display_screen = False
game = WaterWorld()

# make a PLE instance.
p = PLE(game,force_fps=force_fps)

# init agent and game.
p.init()
p.display_screen = True

reward = 0
agent = MyAgent(p.getActionSet())
while p.game_over() == False:
    state = p.getGameState()
    action = agent.pickAction(reward, state)
    reward = p.act(action)
print p.score()
Example #20
0
catcher_dict['state_stds'] = [
    13.89457683, 2.04087944, 17.41686248, 23.38546788
]

game_params = {'cartpole': cartpole_dict, 'catcher': catcher_dict}

if __name__ == "__main__":
    # Initiate cartpole envs
    cartpole_env = gym.make('CartPole-v1')
    # Initiate catcher envs
    catcher_env = PLE(Catcher(init_lives=1),
                      state_preprocessor=process_state,
                      display_screen=False)
    catcher_env.init()

    game_params['catcher']['actions'] = catcher_env.getActionSet()

    envs = {'cartpole': cartpole_env, 'catcher': catcher_env}

    # Initialise the first task: cartpole
    curr_task = sim_params['first_task']

    env = envs[curr_task]

    # Multiple replay databases maintained if multitasking
    if train_params['multitask']:
        mem_length = train_params['replay_sizes']
    else:
        mem_length = game_params[curr_task]['memory_size']

    # Create agent
Example #21
0
            reward = ple_env.act(action)
            obs = list(ple_env.getGameState().values())
            episode_reward += reward
            if render:
                ple_env.getScreenRGB()
            if ple_env.game_over():
                break
        eval_reward.append(episode_reward)
    return np.mean(eval_reward)


# 创建环境
game = FlappyBird()
p = PLE(game, fps=30, display_screen=True, force_fps=True)

act_dim = len(p.getActionSet())
states = len(p.getGameState())
rpm = ReplayMemory(MEMORY_SIZE)

model = Model(act_dim=act_dim)
alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
agent = Agent(alg,
              obs_dim=states,
              act_dim=act_dim,
              e_greed_decrement=0,
              e_greed=0)  # e_greed有一定概率随机选取动作,探索

# 加载模型
# if os.path.exists('./model.ckpt'):
#     agent.restore('./model.ckpt')

game = FlappyBird()
p = PLE(game,
        fps=30,
        display_screen=True,
        force_fps=False,
        state_preprocessor=process_state)
p.init()
game.ple = p
p.init()

#print(p.getActionSet())

#agent
action_set = p.getActionSet()
agent = RandomSearch(len(action_set), p.getGameStateDims()[1])

# agent.load("flappy1_100.h5")

nb_games = 1
nb_frames = 0
flag_game_10 = False
flag_game_100 = False
flag_game_50 = False
score_game = 0

last_50_games_score = deque(maxlen=50)

EXPLORE = 5000000  #small is 300000, big is 5000000
FINAL_EPSILON = 0.0001
Example #23
0
class PygameLearningEnvironment(Environment):

    def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2,
                 hold_action=2, visualize=False, width=84, height=84, lives=1):
        """
        Initialize Pygame Learning Environment
        https://github.com/ntasfi/PyGame-Learning-Environment

        Args:
            env_name: PLE environment

            fps: frames per second
            force_fps: False for slower speeds
            frame_skip: number of env frames to skip
            hold_action: number of env frames to hold each action for
            isRGB: get color or greyscale version of statespace #isRGB = False,
            game_height,game_width: height and width of environment
            visualize: If set True, the program will visualize the trainings, will slow down training
            lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score)

        """

        self.env_name = game_name
        self.rewards = rewards
        self.lives = lives
        self.state_as_image = state_as_image
        self.fps = fps #30  # frames per second
        self.force_fps = force_fps #True  # False for slower speeds
        self.frame_skip = frame_skip  # frames to skip
        self.ple_num_steps = hold_action  # frames to continue action for
        #self.isRGB = isRGB #always returns color, lets tensorforce due the processing
        self.visualize = visualize
        self.width = width
        self.height = height
        #testing
        self.reached_terminal = 0
        self.episode_time_steps = 0
        self.episode_reward = 0
        self.total_time_steps = 0

        if self.env_name == 'catcher':
            self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives)
        elif self.env_name == 'pixelcopter':
            self.game = Pixelcopter(width=self.width, height=self.height)
        elif self.env_name == 'pong':
            self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives)
        elif self.env_name == 'puckworld':
            self.game = PuckWorld(width=self.width, height=self.height)
        elif self.env_name == 'raycastmaze':
            self.game = RaycastMaze(width=self.width, height=self.height)
        elif self.env_name == 'snake':
            self.game = Snake(width=self.width, height=self.height)
        elif self.env_name == 'waterworld':
            self.game = WaterWorld(width=self.width, height=self.height)
        elif self.env_name == 'monsterkong':
            self.game = MonsterKong()
        elif self.env_name == 'flappybird':
            self.game = FlappyBird(width=144, height=256)  # limitations on height and width for flappy bird
        else:
            raise TensorForceError('Unknown Game Environement.')

        if self.state_as_image:
           process_state = None
        else:
            #create a preprocessor to read the state dictionary as a numpy array
            def process_state(state):
                # ret_value = np.fromiter(state.values(),dtype=float,count=len(state))
                ret_value = np.array(list(state.values()), dtype=np.float32)
                return ret_value

        # make a PLE instance
        self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip,
                       num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize,
                       state_preprocessor = process_state)
        #self.env.init()
        #self.env.act(self.env.NOOP) #game starts on black screen
        #self.env.reset_game()
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.reset_game()


        # setup gamescreen object
        if state_as_image:
            w, h = self.env.getScreenDims()
            self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32)
        # if isRGB:
        #     self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        # else:
        #     self.gamescreen = np.empty((h, w), dtype=np.uint8)

        # setup action converter
        # PLE returns legal action indexes, convert these to just numbers
        self.action_list = self.env.getActionSet()
        self.action_list = sorted(self.action_list, key=lambda x: (x is None, x))



    def __str__(self):
        return 'PygameLearningEnvironment({})'.format(self.env_name)

    def close(self):
        pygame.quit()
        self.env = None

    def reset(self):
        # if isinstance(self.gym, gym.wrappers.Monitor):
        #     self.gym.stats_recorder.done = True
        #env.act(env.NOOP) # need to take an action or screen is black
        # clear gamescreen
        if self.state_as_image:
            self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.float32)
        self.env.reset_game()
        return self.current_state

    def execute(self, actions):

        #print("lives check in ple {}".format(self.env.lives()))
        #self.env.saveScreen("test_screen_capture_before_{}.png".format(self.total_time_steps))
        #lives_check = self.env.lives() #testing code

        ple_actions = self.action_list[actions]
        reward = self.env.act(ple_actions)
        state = self.current_state
        # testing code
        # self.env.saveScreen("test_screen_capture_after_{}.png".format(self.total_time_steps))
        # self.episode_time_steps += 1
        # self.episode_reward += reward
        # self.total_time_steps += 1
        # print("reward is {}".format(reward))
        # #if self.env.lives() != lives_check:
        # #    print('lives are different is game over? {}'.format(self.env.game_over()))
        # print('lives {}, game over {}, old lives {}'.format(self.env.lives(),self.env.game_over(),lives_check))

        if self.env.game_over():
            terminal = True
            # testing code
            self.reached_terminal += 1
            # print("GAME OVER reached terminal {}".format(self.reached_terminal))
            # print("episode time steps {}, episode reward {}".format(self.episode_time_steps,self.episode_reward))
            # self.episode_reward = 0
            # self.episode_time_steps = 0
            # print("total timesteps {}".format(self.total_time_steps))
        else:
            terminal = False

        return state, terminal, reward

    @property
    def actions(self):
        return dict(type='int', num_actions=len(self.action_list), names=self.action_list)

    # @property
    # def actions(self):
    #     return OpenAIGym.action_from_space(space=self.gym.action_space)

    #ALE implementation
    # @property
    # def actions(self):
    #     return dict(type='int', num_actions=len(self.action_inds), names=self.action_names)

    @property
    def states(self):
        return dict(shape=self.gamescreen.shape, type=float)

    @property
    def current_state(self):
        #returned state can either be an image or an np array of key components
        if self.state_as_image:
            self.gamescreen = self.env.getScreenRGB()
            # if isRGB:
            #     self.gamescreen = self.env.getScreenRGB()
            # else:
            #     self.gamescreen = self.env.getScreenGrayscale()
        else:
            self.gamescreen = self.env.getGameState()

        return np.copy(self.gamescreen)

    #ALE implementation
    # @property
    # def states(self):
    #     return dict(shape=self.gamescreen.shape, type=float)

    # @property
    # def current_state(self):
    #     self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
    #     return np.copy(self.gamescreen)

    # @property
    # def is_terminal(self):
    #     if self.loss_of_life_termination and self.life_lost:
    #         return True
    #     else:
    #         return self.ale.game_over()
Example #24
0
class PLEWaterWorldEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 game_name='WaterWorld',
                 display_screen=True,
                 ple_game=True,
                 obs_type="Image",
                 reward_type=1):
        '''
        For WaterWorld:
            getGameState() returns [player x position, player y position, player x velocity, player y velocity, player distance to each creep]
            player distance to each creep is a dict with "GOOD" : [], "BAD" : []
        @Params:
            obs_type :
                "RAM" : getGameState()
                "Image" : (48, 48, 3)
            reward_type :
                0 : means [reward1, reward2]
                1 : means raw reward
                2 : means change of dis = sum(distance_from_good) - sum(distance_from_bad)
        '''
        # set headless mode
        os.environ['SDL_VIDEODRIVER'] = 'dummy'

        # open up a game state to communicate with emulator
        import importlib
        if ple_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = game_name.lower()
        game_module = importlib.import_module(game_module_name)
        game = getattr(game_module, game_name)()

        ##################################################################
        # old one
        #self.game_state = PLE(game, fps=30, display_screen=display_screen)

        # use arg state_preprocessor to support self.game_state.getGameState()
        self.game_state = PLE(game,
                              fps=30,
                              display_screen=display_screen,
                              state_preprocessor=self.process_state)
        ##################################################################

        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        self.screen_height, self.screen_width = self.game_state.getScreenDims()
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3),
                                            dtype=np.uint8)
        self.viewer = None

        ############################################
        self.obs_type = obs_type
        self.reward_type = reward_type

        # every reward type's max-abs value
        self.rewards_ths = [10.0, 5.0]

        # change observation space:
        self.img_width = 84
        self.img_height = 84
        self.img_shape = (self.img_width, self.img_height, 3)
        if self.obs_type == "Image":
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=self.img_shape,
                                                dtype=np.uint8)
        else:
            print("Water world only supports image observation!")
            sys.exit(0)
        ############################################

    #############################################
    # Add state processer
    def process_state(self, state):
        return np.array([state.values()])

    #############################################

    def _step(self, a, gamma=0.99):
        #############################################
        # old observation
        old_ram = self.game_state.getGameState()
        #############################################

        reward = self.game_state.act(self._action_set[a])
        state = self._get_image()
        terminal = self.game_state.game_over()

        #############################################
        # new observation
        ram = self.game_state.getGameState()
        #############################################

        #############################################
        # reward 2
        if self.reward_type == 2:
            reward = self.get_reward(old_ram, ram, terminal, 2, gamma)

        # reward 0
        if self.reward_type == 0:
            reward1 = reward
            reward2 = self.get_reward(old_ram, ram, terminal, 2, gamma)
            reward = np.array([reward1, reward2])
        ##############################################

        ############################################################
        # reward scaling
        if self.reward_type == 0:
            for rt in range(len(reward)):
                reward[rt] = reward[rt] / self.rewards_ths[rt]
        else:
            reward = reward / self.rewards_ths[self.reward_type - 1]
        ############################################################

        return state, reward, terminal, {}

    #############################################
    # Add for reward
    #############################################
    def get_reward(self, old_ram, ram, done, reward_type, gamma=0.99):
        ''' 
        @Params:
            old_ram, ram : numpy.array, [dict_values([x, y, z, w, {"GOOD" : [], "BAD" : []}])]
            reward_type : 2 , change of distance from good - bad
        '''
        old_ram = list(old_ram[0])
        ram = list(ram[0])

        reward = 0.0
        if not done:
            if reward_type == 2:
                old_goods = np.array(old_ram[4]["GOOD"])
                old_bads = np.array(old_ram[4]["BAD"])
                goods = np.array(ram[4]["GOOD"])
                bads = np.array(ram[4]["BAD"])

                mean_old_goods = np.mean(
                    old_goods) if len(old_goods) > 0 else 0.0
                mean_old_bads = np.mean(old_bads) if len(old_bads) > 0 else 0.0
                mean_goods = np.mean(goods) if len(goods) > 0 else 0.0
                mean_bads = np.mean(bads) if len(bads) > 0 else 0.0

                old_sum_dis = mean_old_goods - mean_old_bads
                sum_dis = mean_goods - mean_bads
                reward = old_sum_dis - gamma * sum_dis

                if reward > 5.0:
                    reward = 5.0
                elif reward < -5.0:
                    reward = -5.0
        return reward

    #############################################
    #############################################

    def _get_image(self):
        image_rotated = np.fliplr(
            np.rot90(self.game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        ##########################################
        # resize image
        img = Image.fromarray(image_rotated)
        img = img.resize((self.img_width, self.img_height), Image.ANTIALIAS)
        image_resized = np.array(img).astype(np.uint8)
        ##########################################
        return image_resized

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        self.observation_space = spaces.Box(low=0,
                                            high=255,
                                            shape=(self.screen_width,
                                                   self.screen_height, 3),
                                            dtype=np.uint8)
        self.game_state.reset_game()
        state = self._get_image()
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image()
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
Example #25
0
        if jump > not_jump:
            return 0
        else:
            return 1

    def update_Q(self, s, s_prime, reward, action):
        self.Q[s[0], s[1], s[2], action] = (1 - self._alpha) * self.Q[
            s[0], s[1], s[2], action] + self._alpha * (
                reward + self._lambda *
                np.max(self.Q[s_prime[0], s_prime[1], s_prime[2]]))


if __name__ == "__main__":
    game = FlappyBird()
    p = PLE(game, fps=30, display_screen=True)
    agent = Agent(action_space=p.getActionSet(), grid_size=10)

    p.init()

    s = agent.get_current_state(game.getGameState())
    episodes = 0
    max_score = 0

    while True:
        # Find the optimal action based on the current state
        max_action = agent.optimal_action(s)

        current_score = p.score()
        max_score = max(current_score, max_score)

        # Perform the optimal action and return the reward
Example #26
0
class MyEnv(Environment):
    VALIDATION_MODE = 0
    # original size is 288x512 so dividing

    def __init__(self, rng, game=None, frame_skip=4,
            ple_options={"display_screen": True, "force_fps":True, "fps":30}):

        self._mode = -1
        self._mode_score = 0.0
        self._mode_episode_count = 0

        self._frame_skip = frame_skip if frame_skip >= 1 else 1
        self._random_state = rng
        self._hist_size = 1

        if game is None:
            raise ValueError("Game must be provided")


        self._ple = PLE(game, **ple_options)
        self._ple.init()

        self._actions = self._ple.getActionSet()
        self._state_size = self._ple.getGameStateDims()[0]
        self._state_saved = np.zeros((self._state_size), dtype=np.float32)
        self.previous_score = 0.
        self.episode_scores = []


    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._mode_score = 0.0
                self.episode_scores = []
                self.previous_score = .0
                self._mode_episode_count = 0
            else:
                self._mode_episode_count += 1
                self.episode_scores.append(self._mode_score - self.previous_score)
                self.previous_score = self._mode_score
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        # print("Dead at score {}".format(self._ple.game.getScore()))
        self._ple.reset_game()
        for _ in range(self._random_state.randint(self._hist_size)):
             self._ple.act(self._ple.NOOP)

        return [[[0] * self._state_size] * self._hist_size]


    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frame_skip):
            reward += self._ple.act(action)

            if self.inTerminalState():
                break

        self._state_saved = self._ple.getGameState()
        self._mode_score += reward
        if self.inTerminalState():
            pass

        return reward #np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._mode_episode_count += 1
        maxscore = max(self.episode_scores) if len(self.episode_scores) else "N/A"
        print("== Max score of episode is {} over {} episodes ==".format(
            maxscore, self._mode_episode_count))


    def inputDimensions(self):
        return [(self._hist_size, self._state_size)]

    def observationType(self, subject):
        return np.float32

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._state_saved)]

    def inTerminalState(self):
        return self._ple.game_over()
Example #27
0
    def score(self, training=True, nb_episodes=10):
        reward_values = {
            "positive": 1.0,
            "negative": 0.0,
            "tick": 0.0,
            "loss": 0.0,
            "win": 0.0
        }

        env = PLE(FlappyBird(),
                  fps=30,
                  display_screen=False,
                  force_fps=True,
                  rng=None,
                  reward_values=reward_values)
        env.init()

        total_episodes = nb_episodes
        score = 0
        scores = []
        while nb_episodes > 0:
            # pick an action
            state = env.game.getGameState()
            action = self.policy(state)

            # step the environment
            reward = env.act(env.getActionSet()[action])

            score += reward

            # reset the environment if the game is over
            if env.game_over() or score >= 100:
                scores.append(score)
                env.reset_game()
                nb_episodes -= 1
                score = 0
                # print(nb_episodes)

        avg_score = sum(scores) / float(len(scores))
        confidence_interval = st.t.interval(0.95,
                                            len(scores) - 1,
                                            loc=np.mean(scores),
                                            scale=st.sem(scores))
        if np.isnan(confidence_interval[0]):
            confidence_interval = (avg_score, avg_score)

        print("Games played: {}".format(total_episodes))
        print("Average score: {}".format(avg_score))
        print("95 confidence interval: {}".format(confidence_interval))

        if training:
            score_file = "{}/scores.csv".format(self.name)
            # If file doesn't exist, add the header
            if not os.path.isfile(score_file):
                with open(score_file, "ab") as f:
                    f.write(
                        "avg_score,episode_count,frame_count,interval_lower,interval_upper,min,max\n"
                    )

            # Append scores to the file
            with open(score_file, "ab") as f:
                f.write("{},{},{},{},{},{},{}\n".format(
                    avg_score, self.episode_count, self.frame_count,
                    confidence_interval[0], confidence_interval[1],
                    min(scores), max(scores)))

            count = 0
            for score in scores:
                if score >= 50:
                    count += 1
            if count >= len(scores) * 0.9:
                print("*** over 50 score in {} frames ***".format(
                    self.frame_count))
                with open("pass_50.csv", "ab") as f:
                    f.write("{},{}\n".format(self.name, self.frame_count))
        else:
            with open("scores.txt", "ab") as f:
                for score in scores:
                    f.write("{},{}\n".format(self.name, score))
Example #28
0
class MyEnv(Environment):
    VALIDATION_MODE = 0
    memSize = 4
    # original size is 288x512 so dividing
    dividing_factor = 8
    width = 288 // dividing_factor
    height = 512 // dividing_factor

    def __init__(self,
                 rng,
                 game=None,
                 frame_skip=4,
                 ple_options={
                     "display_screen": True,
                     "force_fps": True,
                     "fps": 30
                 }):

        self._mode = -1
        self._mode_score = 0.0
        self._mode_episode_count = 0

        self._frame_skip = frame_skip if frame_skip >= 1 else 1
        self._random_state = rng

        if game is None:
            raise ValueError("Game must be provided")

        self._ple = PLE(game, **ple_options)
        self._ple.init()

        w, h = self._ple.getScreenDims()
        self._screen = np.empty((w, h), dtype=np.uint8)
        self._reduced_screen = np.empty((self.width, self.height),
                                        dtype=np.uint8)
        self._actions = self._ple.getActionSet()

    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._mode_score = 0.0
                self._mode_episode_count = 0
            else:
                self._mode_episode_count += 1
        elif self._mode != -1:  # and thus mode == -1
            self._mode = -1

        print("Dead at score {}".format(self._ple.game.getScore()))
        self._ple.reset_game()
        # for _ in range(self._random_state.randint(15)):
        # self._ple.act(self._ple.NOOP)
        # self._screen = self._ple.getScreenGrayscale()
        # cv2.resize(self._screen, (48, 48),
        # self._reduced_screen,
        # interpolation=cv2.INTER_NEAREST)

        return [self.memSize * [self.width * [self.height * [0]]]]

    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frame_skip):
            reward += self._ple.act(action)
            if self.inTerminalState():
                break

        self._screen = self._ple.getScreenGrayscale()
        self._reduced_screen = cv2.resize(self._screen,
                                          (self.height, self.width),
                                          interpolation=cv2.INTER_NEAREST)
        cv2.imshow("debug", self._reduced_screen.T)
        cv2.waitKey(1)
        self._mode_score += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._mode_episode_count += 1
        mean = (self._mode_score / self._mode_episode_count
                if self._mode_episode_count else "N/A")
        print("== Mean score per episode is {} over {} episodes ==".format(
            mean, self._mode_episode_count))

    def inputDimensions(self):
        return [(self.memSize, self.width, self.height)]

    def observationType(self, subject):
        return np.float32

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reduced_screen) / 256.]

    def inTerminalState(self):
        return self._ple.game_over()
fps = 30 #fps we want to run at
frame_skip = 2
num_steps = 1
force_fps = False #slower speed
display_screen = True

reward = 0.0
max_noops = 20
nb_frames = 15000

#make a PLE instance.
p = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps, 
	force_fps=force_fps, display_screen=display_screen)

#our Naive agent!
agent = NaiveAgent(p.getActionSet())

#init agent and game.
p.init()

#lets do a random number of NOOP's
for i in range(np.random.randint(0, max_noops)):
	reward = p.act(p.NOOP)

#start our training loop
for f in range(nb_frames):
	#if the game is over
        if p.game_over():
            p.reset_game()
            
        obs = p.getScreenRGB()
Example #30
0
EPSILON_DECAY = EPOCHS * STEPS_PER_EPOCHS
EPSILON_MIN = 0.01
EPSILON_DECAY_V = (EPSILON_MIN - EPSILON_START) / EPSILON_DECAY
SEED = 123456
epsilon = EPSILON_START
rng = np.random.RandomState(SEED)
game = flappy.FlappyClone()
env = PLE(game,
          display_screen=True,
          force_fps=True,
          fps=30,
          state_preprocessor=preprocessor,
          rng=rng)
env.game.rewards["positive"] = 1
# env.game.rewards["tick"] = .01
qAgent = QAgent(env.getActionSet(), [s.size for s in scalers],
                discount=.99,
                learningRate=.2,
                gridSize=GRID_SIZE,
                epsilon=epsilon)
qAgent.jFilePath = os.path.join(folder, qAgent.jFilePath)

reward = 0.
clock = pygame.time.Clock()
laststate = None
lastticks = 0
periodJump = 0
action = None
nextTest = False
for e in range(EPOCHS):
    avgloss = 0.
Example #31
0
from ple.games.flappybird import FlappyBird
from ple import PLE
from humanagent import HumanAgent

game = FlappyBird()
p = PLE(game, fps=30, display_screen=True)
agent = HumanAgent(allowed_actions=p.getActionSet())

p.init()
reward = 0.0

nb_frames = 100
for i in range(nb_frames):
    if p.game_over():
        p.reset_game()
    observation = p.getScreenRGB()
    action = agent.pickAction(reward, observation)
    reward = p.act(action)
Example #32
0
def launch(args, defaults, description):
    """
    Execute a complete training run.
    """

    logging.basicConfig(level=logging.INFO)
    parameters = process_args(args, defaults, description)

    rewards = {}
    
    try:
        module = importlib.import_module("ple.games.%s" % parameters.game.lower())
        game = getattr(module, parameters.game)
        if parameters.game == "FlappyBird":
            game = game()
        elif parameters.game == "WaterWorld":
            game = game(width=84, height=84, num_creeps=6)
        else:
            game = game(width=84, height=84)
    except:
        raise ValueError("The game %s could not be found. Try using the classname, it is case sensitive." % parameters.game)
    
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    if parameters.cudnn_deterministic:
        theano.config.dnn.conv.algo_bwd = 'deterministic'

    env = PLE(
            game,
            fps=60,
            force_fps=parameters.force_fps, 
            display_screen=parameters.display_screen,
            reward_values=rewards,
            rng=rng
    )

    num_actions = len(env.getActionSet())

    if parameters.nn_file is None:
        network = q_network.DeepQLearner(defaults.RESIZED_WIDTH,
                                         defaults.RESIZED_HEIGHT,
                                         num_actions,
                                         parameters.phi_length,
                                         parameters.discount,
                                         parameters.learning_rate,
                                         parameters.rms_decay,
                                         parameters.rms_epsilon,
                                         parameters.momentum,
                                         parameters.clip_delta,
                                         parameters.freeze_interval,
                                         parameters.batch_size,
                                         parameters.network_type,
                                         parameters.update_rule,
                                         parameters.batch_accumulator,
                                         rng)
    else:
        handle = open(parameters.nn_file, 'r')
        network = cPickle.load(handle)

    agent = ple_agent.NeuralAgent(network,
                                  parameters.epsilon_start,
                                  parameters.epsilon_min,
                                  parameters.epsilon_decay,
                                  parameters.replay_memory_size,
                                  parameters.experiment_prefix,
                                  parameters.replay_start_size,
                                  parameters.update_frequency,
                                  rng)

    experiment = ple_experiment.PLEExperiment(env, agent,
                                              defaults.RESIZED_WIDTH,
                                              defaults.RESIZED_HEIGHT,
                                              parameters.resize_method,
                                              parameters.epochs,
                                              parameters.steps_per_epoch,
                                              parameters.steps_per_test,
                                              parameters.frame_skip,
                                              parameters.death_ends_episode,
                                              parameters.max_start_nullops,
                                              rng)

    
    env.init()
    experiment.run()
import numpy as np
from ple import PLE
from ple.games.waterworld import WaterWorld


# lets adjust the rewards our agent recieves
rewards = {
    "tick": -0.01,  # each time the game steps forward in time the agent gets -0.1
    "positive": 1.0,  # each time the agent collects a green circle
    "negative": -5.0,  # each time the agent bumps into a red circle
}

# make a PLE instance.
# use lower fps so we can see whats happening a little easier
game = WaterWorld(width=256, height=256, num_creeps=8)
p = PLE(game, fps=15, force_fps=False, display_screen=True,
        reward_values=rewards)
# we pass in the rewards and PLE will adjust the game for us

p.init()
actions = p.getActionSet()
for i in range(1000):
    if p.game_over():
        p.reset_game()

    action = actions[np.random.randint(0, len(actions))]  # random actions
    reward = p.act(action)

    print "Score: {:0.3f} | Reward: {:0.3f} ".format(p.score(), reward)
Example #34
0
class Bot():
    """
            This is our Test agent. It's gonna pick some actions after training!
    """
    def __init__(self, lr):

        self.lr = lr
        self.game = Pixelcopter(width=480, height=480)
        self.p = PLE(self.game, fps=60, display_screen=True)
        self.actions = self.p.getActionSet()

    #def pickAction(self, reward, obs):
    #   return random.choice(self.actions)

    def frame_step(act_inp):
        terminal = False
        reward = self.p.act(act_inp)
        if self.p.game_over():
            self.p.reset_game()
            terminal = True
            reward = -1
        else:
            reward = 1

        self.score = self.p.getScore()
        img = self.p.getScreenGrayscale()
        img = transform.resize(img, (80, 80))
        img = np.ravel(exposure.rescale_intensity(img, out_range=(0, 255)))

        return img, reward, terminal

    def build_model(self):
        print("Building the model..")
        model = Sequential()
        model.add(
            Convolution2D(32,
                          8,
                          8,
                          subsample=(4, 4),
                          border_mode='same',
                          input_shape=(img_rows, img_cols,
                                       img_channels)))  #80*80*4
        model.add(Activation('relu'))
        model.add(Convolution2D(64, 4, 4, subsample=(2, 2),
                                border_mode='same'))
        model.add(Activation('relu'))
        model.add(Convolution2D(64, 3, 3, subsample=(1, 1),
                                border_mode='same'))
        model.add(Activation('relu'))
        model.add(Flatten())
        model.add(Dense(512))
        model.add(Activation('relu'))
        model.add(Dense(2))

        adam = Adam(lr=self.lr)
        model.compile(loss='mse', optimizer=adam)
        self.model = model
        print("Finished building the model..")

    def trainNetwork(self, mode):
        D = deque()

        x_t, r_0, terminal = self.frame_step(self.actions[0])
        x_t = x_t / 255.0

        s_t = np.stack((x_t, x_t, x_t, x_t), axis=2)
        #print (s_t.shape)

        #need to reshape for keras
        s_t = s_t.reshape(1, s_t.shape[0], s_t.shape[1],
                          s_t.shape[2])  #1*80*80*4

        if mode == 'Run':
            OBSERVE = 999999999  #We keep observe, never train
            epsilon = FINAL_EPSILON
            print("Now we load weight")
            self.model.load_weights("model.h5")
            adam = Adam(lr=self.lr)
            self.model.compile(loss='mse', optimizer=adam)
            print("Weight load successfully")
        else:  #We go to training mode
            OBSERVE = OBSERVATION
            epsilon = INITIAL_EPSILON
Example #35
0
import numpy as np
from keras.models import load_model
from challenge_utils import process_screen
from collections import deque
from ple.games.flappybird import FlappyBird
from ple import PLE

deepQnet = load_model('model.h5')
game = FlappyBird(graphics="fixed")
p = PLE(game, fps=30, frame_skip=1, num_steps=1)
list_actions = p.getActionSet()
size_img = (80, 80)

frameDeque = deque([
    np.zeros(size_img),
    np.zeros(size_img),
    np.zeros(size_img),
    np.zeros(size_img)
],
                   maxlen=4)


def FlappyPolicy(state, screen):
    global deepQnet
    global frameDeque
    global list_actions

    x = process_screen(screen)
    # Reinitialize the deque if we start a new game
    if not np.any(x[10:, :]):  # if everything in front of Flappy is black
        frameDeque = deque([
class NaiveAgent():
	"""
		This is our naive agent. It picks actions at random!
	"""
	def __init__(self, actions):
		self.actions = actions

	def pickAction(self, reward, obs):
		return self.actions[np.random.randint(0, len(self.actions))]

###################################
game = Doom(scenario="take_cover")

env = PLE(game)
agent = NaiveAgent(env.getActionSet())
env.init()

reward = 0.0
for f in range(15000):
	#if the game is over
        if env.game_over():
            env.reset_game()
            
        action = agent.pickAction(reward, env.getScreenRGB())
        reward = env.act(action)

        if f > 2000:
            env.display_screen = True 
            env.force_fps = False
        
Example #37
0
    def train(self):
        """Train."""
        logs_path = self.args.logs_path
        video_path = self.args.video_path
        restore = self.args.restore
        train = self.args.train

        # Initial PLE environment
        os.putenv('SDL_VIDEODRIVER', 'fbcon')
        os.environ["SDL_VIDEODRIVER"] = "dummy"

        # Design reward
        reward_values = {
            "positive": 1,
            "tick": 0.1,
            "loss": -1,
        }

        # Create FlappyBird game env
        env = PLE(FlappyBird(),
                  display_screen=False,
                  reward_values=reward_values)

        # Gets the actions FlappyBird supports
        action_set = env.getActionSet()

        replay_buffer = ReplayBuffer(self.hparams.replay_buffer_size)
        agent = Agent(action_set, self.hparams)

        # restore model
        if restore:
            agent.restore(restore)

        reward_logs = []
        loss_logs = []

        for episode in range(1, self.hparams.total_episode + 1):
            # reset env
            env.reset_game()
            env.act(0)
            obs = convert(env.getScreenGrayscale())
            state = np.stack([[obs for _ in range(4)]], axis=0)
            t_alive = 0
            total_reward = 0

            if episode % self.hparams.save_video_frequency == 0 and episode > self.hparams.initial_observe_episode:
                agent.stop_epsilon()
                frames = [env.getScreenRGB()]

            while not env.game_over():
                action = agent.take_action(state)
                reward = env.act(action_set[action])

                if episode % self.hparams.save_video_frequency == 0 and episode > self.hparams.initial_observe_episode:
                    frames.append(env.getScreenRGB())
                obs = convert(env.getScreenGrayscale())
                obs = np.reshape(obs, [1, 1, obs.shape[0], obs.shape[1]])

                state_new = np.append(state[:, 1:, ...], obs, axis=1)
                action_onehot = np.zeros(len(action_set))
                action_onehot[action] = 1

                t_alive += 1
                total_reward += reward
                replay_buffer.append(
                    (state, action_onehot, reward, state_new, env.game_over()))
                state = state_new

            # save video
            if episode % self.hparams.save_video_frequency == 0 and episode > self.hparams.initial_observe_episode:
                os.makedirs(video_path, exist_ok=True)
                clip = make_video(frames, fps=60).rotate(-90)
                clip.write_videofile(os.path.join(
                    video_path, 'env_{}.mp4'.format(episode)),
                                     fps=60)
                agent.restore_epsilon()
                print('Episode: {} t: {} Reward: {:.3f}'.format(
                    episode, t_alive, total_reward))
                # danger
                mp4list = glob.glob('./video_XXX/*.mp4')
                if len(mp4list) > 0:
                    latest = mp4list[0]
                    latest_timestamp = os.path.getmtime(mp4list[0])
                    for mp4 in mp4list:
                        ts = os.path.getmtime(mp4)
                        if (ts > latest_timestamp):
                            latest_timestamp = ts
                            latest = mp4
                    video = io.open(latest, 'r+b').read()
                    encoded = base64.b64encode(video)
                    ipythondisplay.display(
                        HTML(data='''<video alt="test" autoplay 
                                    loop controls style="height: 400px;">
                                    <source src="data:video/mp4;base64,{0}" type="video/mp4" />
                                 </video>'''.format(encoded.decode('ascii'))))
                #end danger
                else:
                    print("Could not find video")

            if episode > self.hparams.initial_observe_episode and train:
                # save model
                if episode % self.hparams.save_logs_frequency == 0:
                    agent.save(episode, logs_path)
                    np.save(os.path.join(logs_path, 'loss.npy'),
                            np.array(loss_logs))
                    np.save(os.path.join(logs_path, 'reward.npy'),
                            np.array(reward_logs))

                # update target network
                if episode % self.hparams.update_target_frequency == 0:
                    agent.update_target_network()

                # sample batch from replay buffer
                batch_state, batch_action, batch_reward, batch_state_new, batch_over = replay_buffer.sample(
                    self.hparams.batch_size)

                # update policy network
                loss = agent.update_Q_network(batch_state, batch_action,
                                              batch_reward, batch_state_new,
                                              batch_over)

                loss_logs.extend([[episode, loss]])
                reward_logs.extend([[episode, total_reward]])

                # print reward and loss
                if episode % self.hparams.show_loss_frequency == 0:
                    print(
                        'Episode: {} t: {} Reward: {:.3f} Loss: {:.3f}'.format(
                            episode, t_alive, total_reward, loss))

                agent.update_epsilon()
Example #38
0
class MyEnv(Environment):
    VALIDATION_MODE = 0

    def __init__(self, rng, game=None, frame_skip=4, 
            ple_options={"display_screen": True, "force_fps":True, "fps":30}):

        self._mode = -1
        self._mode_score = 0.0
        self._mode_episode_count = 0

        self._frameSkip = frame_skip if frame_skip >= 1 else 1
        self._random_state = rng
       
        if game is None:
            raise ValueError("Game must be provided")

        self._ple = PLE(game, **ple_options)
        self._ple.init()

        w, h = self._ple.getScreenDims()
        self._screen = np.empty((h, w), dtype=np.uint8)
        self._reducedScreen = np.empty((48, 48), dtype=np.uint8)
        self._actions = self._ple.getActionSet()

                
    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._mode_score = 0.0
                self._mode_episode_count = 0
            else:
                self._mode_episode_count += 1
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        self._ple.reset_game()
        for _ in range(self._random_state.randint(15)):
            self._ple.act(self._ple.NOOP)
        self._screen = self._ple.getScreenGrayscale()
        cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
        
        return [4 * [48 * [48 * [0]]]]
        
        
    def act(self, action):
        action = self._actions[action]
        
        reward = 0
        for _ in range(self._frameSkip):
            reward += self._ple.act(action)
            if self.inTerminalState():
                break
            
        self._screen = self._ple.getScreenGrayscale()
        cv2.resize(self._screen, (48, 48), self._reducedScreen, interpolation=cv2.INTER_NEAREST)
  
        self._mode_score += reward
        return np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._mode_episode_count += 1
        print("== Mean score per episode is {} over {} episodes ==".format(self._mode_score / self._mode_episode_count, self._mode_episode_count))


    def inputDimensions(self):
        return [(4, 48, 48)]

    def observationType(self, subject):
        return np.uint8

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._reducedScreen)]

    def inTerminalState(self):
        return self._ple.game_over()
Example #39
0
#coding:utf-8
from ple.games.pong import Pong
from ple import PLE
import numpy as np


def get_obs(env):
    # game_state = env.getGameState()
    # obs = list(game_state.values())
    obs = env.getScreenGrayscale() / 255.0
    return obs.astype(np.float).ravel()


if __name__ == '__main__':
    game = Pong(width=128, height=96, MAX_SCORE=11)
    p = PLE(game, fps=30, display_screen=True, force_fps=True)
    # 根据parl框架构建agent
    print(p.getActionSet())
    act_dim = len(p.getActionSet())
    p.getScreenGrayscale()
    game_state = p.getGameState()
    print(game_state)