Beispiel #1
0
    def __init__(self):
        self.game = Snake()
        self.p = PLE(self.game, fps=30, display_screen=True)

        # self.actions = self.p.getActionSet()
        # self._action_space = list(range(self.actions[0]))
        # self._action_space.append(self.actions[-1])
        self.action_space = self.p.getActionSet()
Beispiel #2
0
def main():
    game = Snake()
    game = PLE(game, display_screen=True)
    game.init()
    action_space = game.getActionSet()
    game.act(0)
    agent = Agent(game, MEMORY_MAX_SIZE, EXPLORATION_RATE, DISCOUNT,
                  INITIALIZATION_SIZE)
    return game, agent
Beispiel #3
0
	def __init__(self, height=32, width=32, fps=15, frame_history_size=4):
		# create the game environment and initialize the attribute values

		self.game = Snake(height=height,width=width,init_length=4)
		reward_dict = {"positive": 1.0, "negative": -1.0, "tick": 0.0, "loss": -1.0, "win": 1.0}
		self.environment = PLE(self.game, fps=fps, reward_values=reward_dict, num_steps=2)
		self.init_env() # initialize the game
		self.allowed_actions = self.environment.getActionSet() # the list of allowed actions to be taken by an agent
		self.num_actions = len(self.allowed_actions) - 1  # number of actions that are allowed in this env
		self.frame_hist = frame_history(height=height,width=width,frame_history_size=frame_history_size,num_channels=3);
		self.input_shape = self.frame_hist.get_history().shape  # shape of the game input screen
Beispiel #4
0
def test():
    game = Snake(600, 600)
    p = PLE(game,
            fps=60,
            state_preprocessor=process_state,
            force_fps=True,
            display_screen=True,
            frame_skip=2,
            reward_values={
                "positive": 100.0,
                "negative": -50.0,
                "tick": -0.1,
                "loss": -70.0,
                "win": 5.0
            })
    agent = Agent(alpha=float(sys.argv[1]),
                  gamma=float(sys.argv[2]),
                  n_actions=3,
                  epsilon=0.01,
                  batch_size=100,
                  input_shape=6,
                  epsilon_dec=0.99999,
                  epsilon_end=0.001,
                  memory_size=500000,
                  file_name=sys.argv[3],
                  activations=[str(sys.argv[4]),
                               str(sys.argv[5])])
    p.init()
    agent.load_game()
    scores = []

    for _ in range(200):
        if p.game_over():
            p.reset_game()
        apples = 0
        initial_direction = "Right"
        while not p.game_over():
            old_state = np.array(
                vision(list(p.getGameState()[0]), initial_direction))

            action = agent.choose_action(old_state)
            possible_directions = prepare_corect_directions(initial_direction)
            possible_directions_tuples = list(
                zip(possible_directions.keys(), possible_directions.values()))
            direction = possible_directions_tuples[action]
            initial_direction = direction[1]

            reward = p.act(direction[0])
            if reward > 50.0:
                apples += reward

        scores.append(apples)
    return scores
def main():
    # 创建环境
    game = Snake(width=200, height=200, init_length=5)
    p = PLE(game, fps=30, display_screen=False, force_fps=True)
    # 根据parl框架构建agent
    print(p.getActionSet())
    act_dim = len(p.getActionSet())

    obs_dim = 200 * 200

    rpm = ReplayMemory(MEMORY_SIZE)  # DQN的经验回放池

    model = Model(act_dim=act_dim)
    alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
    agent = Agent(alg,
                  obs_dim=obs_dim,
                  act_dim=act_dim,
                  e_greed_decrement=1e-6,
                  e_greed=0.2)  # e_greed有一定概率随机选取动作,探索

    # 加载模型
    # if os.path.exists('./dqn_snake_400.ckpt'):
    #     agent.restore('./dqn_snake_400.ckpt')

    # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够
    while len(rpm) < MEMORY_WARMUP_SIZE:
        run_episode(p, agent, rpm)

    max_episode = 2000000
    # 开始训练
    episode = 0
    best_reward = -float('inf')
    while episode < max_episode:  # 训练max_episode个回合,test部分不计算入episode数量
        # train part
        for i in range(0, 100):
            total_reward = run_episode(p, agent, rpm)
            episode += 1
        # test part
        eval_reward = evaluate(p, agent, render=True)  # render=True 查看显示效果
        if eval_reward > best_reward:
            best_reward = eval_reward
            agent.save('model_dir/dqn_snake_{}.ckpt'.format(episode))
        logger.info('episode:{}    e_greed:{}   test_reward:{}'.format(
            episode, agent.e_greed, eval_reward))
Beispiel #6
0
def main():
    # 创建环境
    game = Snake(width=224, height=224, init_length=7)
    p = PLE(game, fps=30, display_screen=True, force_fps=True)
    # 根据parl框架构建agent
    print(p.getActionSet())
    act_dim = len(p.getActionSet())

    #rpm = ReplayMemory(MEMORY_SIZE)  # DQN的经验回放池

    model = Model(act_dim=act_dim)
    alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)
    agent = Agent(alg, act_dim=act_dim, e_greed_decrement=1e-6,
                  e_greed=0.2)  # e_greed有一定概率随机选取动作,探索

    # 加载模型
    if os.path.exists('./dqn_snake_7.ckpt'):
        agent.restore('./dqn_snake_7.ckpt')
    evaluate(p, agent, False)
Beispiel #7
0
def get_envs():
    envs = [
        EnvWrapper('cartpole', gym.make('CartPole-v1'), 500, 550, 4000),
        EnvWrapper('catcher', PLE(Catcher(init_lives=1), display_screen=False, reward_values={
            "positive": 1,
            "negative": -1,
            "loss": -1,
        }), 100, 110, 3000),
        EnvWrapper('snake', PLE(Snake(height=256, width=256), display_screen=False, reward_values={
            "tick": -0.01,
            "positive": 5,
            "loss": -1,
        }), 100, 110, 3000),
        EnvWrapper('flappybird', PLE(FlappyBird(), display_screen=False, reward_values={
            "positive": 1,
            "tick": 0.1,
            "loss": -1,
        }), 100, 110, 3000),
    ]
    return envs
def main():
    # 创建环境
    game = Snake(width=256, height=256, init_length=10)
    p = PLE(game, fps=30, display_screen=True, force_fps=True)
    # 根据parl框架构建agent
    p.reset_game()
    print(p.getActionSet())
    act_dim = len(p.getActionSet())
    obs_dim = 256 * 256

    logger.info('obs_dim {}, act_dim {}'.format(obs_dim, act_dim))

    # 根据parl框架构建agent
    model = Model(act_dim=act_dim)
    alg = PolicyGradient(model, lr=LEARNING_RATE)
    agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim)

    # # 加载模型
    # if os.path.exists('model_dir/pg_pong_episode_19.ckpt'):
    #     agent.restore('model_dir/pg_pong_episode_19.ckpt')

    best_total_reward = -float('inf')
    for i in range(50000):
        obs_list, action_list, reward_list = run_episode(p, agent)
        if i % 10 == 0:
            logger.info("Episode {}, Reward Sum {}.".format(
                i, sum(reward_list)))

        batch_obs = np.array(obs_list)
        batch_action = np.array(action_list)
        batch_reward = calc_reward_to_go(reward_list)
        agent.learn(batch_obs, batch_action, batch_reward)
        if (i + 1) % 50 == 0:
            total_reward = evaluate(p, agent, render=True)
            if total_reward > best_total_reward:
                best_total_reward = total_reward
                agent.save(
                    'model_dir/pg_pong_episode_{}_reward_{}.ckpt'.format(
                        i, total_reward))
            logger.info('Test reward: {}'.format(total_reward))
Beispiel #9
0
    def __init__(self, game, display_screen=False):
        from ple import PLE
        assert game in [
            'catcher', 'monsterkong', 'flappybird', 'pixelcopter', 'pong',
            'puckworld', 'raycastmaze', 'snake', 'waterworld'
        ]
        if game == 'catcher':
            from ple.games.catcher import Catcher
            env = Catcher()
        elif game == 'monsterkong':
            from ple.games.monsterkong import MonsterKong
            env = MonsterKong()
        elif game == 'flappybird':
            from ple.games.flappybird import FlappyBird
            env = FlappyBird()
        elif game == 'pixelcopter':
            from ple.games.pixelcopter import Pixelcopter
            env = Pixelcopter()
        elif game == 'pong':
            from ple.games.pong import Pong
            env = Pong()
        elif game == 'puckworld':
            from ple.games.puckworld import PuckWorld
            env = PuckWorld()
        elif game == 'raycastmaze':
            from ple.games.raycastmaze import RaycastMaze
            env = RaycastMaze()
        elif game == 'snake':
            from ple.games.snake import Snake
            env = Snake()
        elif game == 'waterworld':
            from ple.games.waterworld import WaterWorld
            env = WaterWorld()

        self.p = PLE(env, fps=30, display_screen=display_screen)
        self.action_set = self.p.getActionSet()
        self.action_size = len(self.action_set)
        self.screen_dims = self.p.getScreenDims()
        self.p.init()
Beispiel #10
0
                target = reward + self.gamma * \
                    np.amax(self.model.predict(np.array([next_state]))[0])
            target_f = self.model.predict(np.array([state]))

            target_f[0][self.actions.index(action)] = target

            self.model.fit(np.array([state]),
                           np.array(target_f),
                           epochs=1,
                           verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


game = Snake(width=320, height=320)
p = PLE(game, fps=30, display_screen=True)
agent = NaiveAgent(p.getActionSet())

p.init()
reward = 0.0

learning_time = 100000

actions = []
rewards = []
snake_head_x = []
snake_head_y = []
food_x = []
food_y = []
Beispiel #11
0
    # obs = []
    # game_state = env.getGameState()
    # """
    # {'snake_head_x': 32.0, 'snake_head_y': 32.0, 'food_x': 24, 'food_y': 30,
    # 'snake_body': [0.0, 3.0, 6.0],
    #  'snake_body_pos': [[32.0, 32.0], [29.0, 32.0], [26.0, 32.0]]}
    #
    # {'snake_head_x': 33.93333333333334, 'snake_head_y': 32.0, 'food_x': 36, 'food_y': 30,
    # 'snake_body': [0.0, 1.0933333333333337, 2.5333333333333314, 5.233333333333334],
    # 'snake_body_pos': [[33.93333333333334, 32.0], [32.84, 32.0], [31.400000000000006, 32.0], [28.700000000000003, 32.0]]}
    # """
    # obs.append(game_state['snake_head_x'])
    # obs.append(game_state['snake_head_y'])
    # obs.append(game_state['food_x'])
    # obs.append(game_state['food_y'])
    # body_positions = []
    # obs.append(np.mean(np.array(game_state['snake_body'])))
    # for body_pos in game_state['snake_body_pos']:
    #     body_positions.extend(body_pos)
    # obs.append(np.mean(np.array(body_positions)))
    return obs

if __name__ == '__main__':
    game = Snake(width=64, height=64, init_length=20)
    p = PLE(game, fps=30, display_screen=True, force_fps=True)
    # 根据parl框架构建agent
    print(p.getActionSet())

    act_dim = len(p.getActionSet())
    game_state = p.getGameState()
    print(game_state)
Beispiel #12
0
def a3c_main(save_path, shared_model,\
            model,\
            select_action,\
            perform_action,\
            save_model,\
            optimizer=None,\
            train=True,\
            display=False,\
            gamma =.99,\
            tau=1.):
    tresh = False
    fps = 30  # fps we want to run at
    frame_skip = 2
    num_steps = 1
    force_fps = False  # slower speed

    game = Snake(width=256, height=256)

    p = PLE(game,
            fps=fps,
            frame_skip=frame_skip,
            num_steps=num_steps,
            force_fps=force_fps,
            display_screen=display)

    p.init()

    def p_action(action):
        # reward, action
        return p.act(action)

    def main(lstm_shape, steps):

        reward_alive = 0
        values = []
        log_probs = []
        rewards = []
        entropies = []

        x_t = extract_image(p.getScreenRGB(), (80, 80), tresh=tresh)

        stack_x = np.stack((x_t, x_t, x_t, x_t), axis=0)
        model.load_state_dict(shared_model.state_dict())

        cx = Variable(torch.zeros(1, lstm_shape[-1]))
        hx = Variable(torch.zeros(1, lstm_shape[-1]))

        try:
            while p.game_over() == False and steps > 0:
                steps -= 1

                x_t = extract_image(p.getScreenRGB(), (80, 80), tresh=tresh)

                x_t = np.reshape(x_t, (1, 80, 80))

                st = np.append(stack_x[1:4, :, :], x_t, axis=0)

                if train:
                    # print()
                    reward, action, hx, cx, info_dict = train_and_play(p_action, st,\
                                                        select_action, perform_action,\
                                                        possible_actions, opt_nothing, \
                                                        model, {"isTrain":True, "hx":hx,"cx":cx})
                    reward_alive += 0.1
                    reward += reward_alive
                    rewards.append(reward)
                    # reward += r

                    entropies.append(info_dict["entropies"])
                    values.append(info_dict["values"])
                    log_probs.append(info_dict["log_probs"])

                else:
                    _, _, hx, cx, _ = play(p_action, st, select_action,\
                        perform_action, possible_actions, model, {"hx":hx,"cx":cx, "isTrain":False})

                stack_x = st

            if train:
                state = torch.from_numpy(stack_x)
                R = torch.zeros(1, 1)
                if steps > 0:
                    value, _, _ = model(
                        (Variable(state.unsqueeze(0).float()), (hx, cx)))

                values.append(Variable(R))
                policy_loss = 0
                value_loss = 0
                R = Variable(R)
                gae = torch.zeros(1, 1)

                for i in reversed(range(len(rewards))):
                    R = gamma * R + rewards[i]
                    advantage = R - values[i]
                    value_loss = value_loss + 0.5 * advantage.pow(2)

                    # Generalized Advantage Estimataion
                    delta_t = rewards[i] + gamma * \
                        values[i + 1].data - values[i].data
                    gae = gae * gamma * tau + delta_t

                    policy_loss = policy_loss - \
                        log_probs[i] * Variable(gae) - 0.01 * entropies[i]

                optimizer.zero_grad()

                (policy_loss + 0.5 * value_loss).backward()
                torch.nn.utils.clip_grad_norm(model.parameters(), 40)

                ensure_shared_grads(model, shared_model)
                optimizer.step()

        except KeyboardInterrupt as e:
            print("KeyboardInterrupt >>", e)
            print("Saving model")
            if train:
                save_model(shared_model, save_path)
                print("Model saved")
            sys.exit()

        score = p.score()
        p.reset_game()
        if train: save_model(shared_model, save_path)
        return score

    return main
Beispiel #13
0
def init_main(save_path, model, train=True, display=False):
    """The application's entry point.

    If someone executes this module (instead of importing it, for
    example), this function is called.
    """
    push_to_memory, select_action, perform_action, optimize, save_model = model
    tresh = False
    fps = 30  # fps we want to run at
    frame_skip = 2
    num_steps = 1
    force_fps = False  # slower speed

    game = Snake(width=256, height=256)

    p = PLE(game,
            fps=fps,
            frame_skip=frame_skip,
            num_steps=num_steps,
            force_fps=force_fps,
            display_screen=display)

    p.init()

    def p_action(action):
        # reward, action
        return p.act(action)

    def main(steps):
        reward_alive = 0

        x_t = extract_image(p.getScreenRGB(), (80, 80), tresh=tresh)

        stack_x = np.stack((x_t, x_t, x_t, x_t), axis=0)
        try:
            while p.game_over() == False and steps > 0:
                steps -= 1

                x_t = extract_image(p.getScreenRGB(), (80, 80), tresh=tresh)

                x_t = np.reshape(x_t, (1, 80, 80))

                st = np.append(stack_x[1:4, :, :], x_t, axis=0)

                if train:
                    r, action, _, _, _ = train_and_play(
                        p_action, st, select_action, perform_action,
                        possible_actions, optimize, None, {})

                    reward_alive += 0.1
                    r += reward_alive

                    push_to_memory(stack_x, action, st, r)

                else:
                    play(p_action, st, select_action, perform_action,
                         possible_actions, None, {})

                stack_x = st

        except KeyboardInterrupt as e:
            print("KeyboardInterrupt >>", e)
            print("Saving model")
            if train:
                save_model(save_path)
                print("Model saved")
            sys.exit()

        score = p.score()
        p.reset_game()
        if train: save_model(save_path)
        return score

    return main
Beispiel #14
0
        fy = state['food_y']
        turns = individual.activate([sx,sy,fx,fy])
        best_turn = max(turns)
        if turns[0] == best_turn:
            fitness += snake_game.act(UP)
        elif turns[1] == best_turn:
            fitness += snake_game.act(LEFT)
        elif turns[2] == best_turn:
            fitness += snake_game.act(RIGHT)
        else:
            fitness += snake_game.act(DOWN)
    return fitness

if __name__ == '__main__':
    # set up flappybird game
    game = Snake(width=400, height=400)
    # NOTE- if training: set force_fps = true, if testing: set force_fps to false
    snake_game = PLE(game, fps=30, display_screen=True, force_fps=False)
    snake_game.init()

    # uncomment this block to train a solution
    #model = neat.NEAT(config_file="snake.config")
    #best_genome = model.run(fitness_function=test_snake)
    #pickle.dump( best_genome, open( "snek", "wb" ) )

    # uncomment this block to test solution
    LEFT = 119
    DOWN = 97
    UP = 100
    RIGHT = 115
    individual = pickle.load(open("snek", "rb"))
 def test_snake(self):
     from ple.games.snake import Snake
     game = Snake()
     self.run_a_game(game)
Beispiel #16
0
def train():
    game = Snake(600, 600)
    p = PLE(game,
            fps=60,
            state_preprocessor=process_state,
            force_fps=True,
            display_screen=False,
            frame_skip=2,
            reward_values={
                "positive": 100.0,
                "negative": -50.0,
                "tick": -0.1,
                "loss": -110.0,
                "win": 5.0
            })
    agent = Agent(alpha=float(sys.argv[1]),
                  gamma=float(sys.argv[2]),
                  n_actions=3,
                  epsilon=0.99,
                  batch_size=100,
                  input_shape=6,
                  epsilon_dec=0.99999,
                  epsilon_end=0.001,
                  memory_size=500000,
                  file_name=sys.argv[3],
                  activations=[str(sys.argv[4]),
                               str(sys.argv[5])])
    p.init()
    # agent.load_game()

    scores = []

    for _ in range(100000):
        if p.game_over():
            p.reset_game()
        score = 0
        initial_direction = "Right"

        while not p.game_over():
            old_state = np.array(
                vision(list(p.getGameState()[0]), initial_direction))

            action = agent.choose_action(old_state)

            possible_directions = prepare_corect_directions(initial_direction)
            possible_directions_tuples = list(
                zip(possible_directions.keys(), possible_directions.values()))
            direction = possible_directions_tuples[action]
            initial_direction = direction[1]

            reward = p.act(direction[0])

            new_state = np.array(
                vision(list(p.getGameState()[0]), initial_direction))
            agent.add_experience(old_state, action, reward, new_state)
            agent.learn()
            score = p.score()
        scores.append(score)
        print(
            f"Score for model iteration number _ {str(sys.argv[3])} with learning_rate {sys.argv[1]}, gama {sys.argv[2]}, activations: {sys.argv[4], sys.argv[5]} is score {score}. Epsilon is {agent.epsilon}"
        )
        agent.save_game()
Beispiel #17
0
import numpy as np
from ple import PLE
from ple.games.snake import Snake
import random


def getReward(env, state, action):
    return(env.act(action))

agent = Snake(width=360, height=360)
Q = {}
gama = 0.9
alpha = 0.1
explore = 0.75

env = PLE(agent, fps=15, force_fps=False, display_screen=True)

env.init()

for i in range(100000):

    if explore != 0.25:
        if i % 30000 == 0:
            explore -= 0.25

    if env.game_over():
        env.reset_game()

    state = env.getGameState()
    
    del(state["snake_body_pos"])
Beispiel #18
0
    RMS_EPSILON = 0.0001
    MOMENTUM = 0
    CLIP_DELTA = 1.0
    EPSILON_START = 1.0
    EPSILON_MIN = .1
    EPSILON_DECAY = 100000
    UPDATE_FREQUENCY = 1
    REPLAY_MEMORY_SIZE = 1000000
    BATCH_SIZE = 32
    NETWORK_TYPE = "General_DQN_0"
    FREEZE_INTERVAL = 10000
    DETERMINISTIC = True


if __name__ == "__main__":
    game = Snake(width=64, height=64)
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = PLE_env(rng,
                  game=game,
                  frame_skip=parameters.frame_skip,
                  ple_options={
                      "display_screen": True,
        return self.Q[fs[0], fs[1], fs[2], fs[3]]

    def pickAction(self, reward, obs):
        self.testenv(reward)
        #print(str(self.findState()))
        self.slightRandDic = {}
        for i in self.findQ(self.findState()):
            self.slightRandDic[i] = self.findQ(self.findState(
            ))[i] + self.pathr * np.random.randint(-10, 10) * self.steps
        #print(self.slightRandDic)
        self.act = max(self.slightRandDic, key=self.slightRandDic.get)
        self.rememberStep(self.act)
        return self.act  #highest value action


game = Snake(width=size, height=size, init_length=3)

fps = 30  # fps we want to run at
frame_skip = 2
num_steps = 1
force_fps = True  # True == MegaSpeed
display_screen = True

reward = 0.0
max_noops = 20
nb_frames = 1500000

# make a PLE instance.
p = PLE(game,
        fps=fps,
        frame_skip=frame_skip,
def get_grille(game, snake_location, food_location):
    s_x, s_y = snake_location
    f_x, f_x = food_location
    grille = [[0] * int(game.width / 10) for i in range(int(game.height / 10))]
    grille[s_x][s_y] += 1
    grille[f_x][f_x] += 1
    return grille


# NE PAS CHANGER CETTE VARIABLE
case_size = 20
size = 10

# Initialisation du jeu
game = Snake(height=case_size * size, width=case_size * size)
p = PLE(game, fps=30, display_screen=True)

agent = Trainer(allowed_actions=p.getActionSet(),
                height=game.height,
                width=game.width)

p.init()
reward = 0.0
nb_frames = 10000000000000000
bestScore = 0

for i in range(nb_frames):

    if (p.score() > bestScore):
        bestScore = int(p.score())
Beispiel #21
0
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, nb_epoch=1, verbose=0)
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)


if __name__ == "__main__":

    game = Snake(width=256, height=256)
    env = PLE(game,
              display_screen=True,
              fps=10,
              state_preprocessor=process_state)
    agent = DQNAgent(env)
    agent.load('./save/snake.h5')
    env.init()

    for e in range(EPISODE):
        env.reset_game()
        score = 0
        state = game.getGameState()

        state = sorted(state.items(), key=operator.itemgetter(0))
        for i in range(len(state)):
Beispiel #22
0
    def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2,
                 hold_action=2, visualize=False, width=84, height=84, lives=1):
        """
        Initialize Pygame Learning Environment
        https://github.com/ntasfi/PyGame-Learning-Environment

        Args:
            env_name: PLE environment

            fps: frames per second
            force_fps: False for slower speeds
            frame_skip: number of env frames to skip
            hold_action: number of env frames to hold each action for
            isRGB: get color or greyscale version of statespace #isRGB = False,
            game_height,game_width: height and width of environment
            visualize: If set True, the program will visualize the trainings, will slow down training
            lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score)

        """

        self.env_name = game_name
        self.rewards = rewards
        self.lives = lives
        self.state_as_image = state_as_image
        self.fps = fps #30  # frames per second
        self.force_fps = force_fps #True  # False for slower speeds
        self.frame_skip = frame_skip  # frames to skip
        self.ple_num_steps = hold_action  # frames to continue action for
        #self.isRGB = isRGB #always returns color, lets tensorforce due the processing
        self.visualize = visualize
        self.width = width
        self.height = height
        #testing
        self.reached_terminal = 0
        self.episode_time_steps = 0
        self.episode_reward = 0
        self.total_time_steps = 0

        if self.env_name == 'catcher':
            self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives)
        elif self.env_name == 'pixelcopter':
            self.game = Pixelcopter(width=self.width, height=self.height)
        elif self.env_name == 'pong':
            self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives)
        elif self.env_name == 'puckworld':
            self.game = PuckWorld(width=self.width, height=self.height)
        elif self.env_name == 'raycastmaze':
            self.game = RaycastMaze(width=self.width, height=self.height)
        elif self.env_name == 'snake':
            self.game = Snake(width=self.width, height=self.height)
        elif self.env_name == 'waterworld':
            self.game = WaterWorld(width=self.width, height=self.height)
        elif self.env_name == 'monsterkong':
            self.game = MonsterKong()
        elif self.env_name == 'flappybird':
            self.game = FlappyBird(width=144, height=256)  # limitations on height and width for flappy bird
        else:
            raise TensorForceError('Unknown Game Environement.')

        if self.state_as_image:
           process_state = None
        else:
            #create a preprocessor to read the state dictionary as a numpy array
            def process_state(state):
                # ret_value = np.fromiter(state.values(),dtype=float,count=len(state))
                ret_value = np.array(list(state.values()), dtype=np.float32)
                return ret_value

        # make a PLE instance
        self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip,
                       num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize,
                       state_preprocessor = process_state)
        #self.env.init()
        #self.env.act(self.env.NOOP) #game starts on black screen
        #self.env.reset_game()
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.reset_game()


        # setup gamescreen object
        if state_as_image:
            w, h = self.env.getScreenDims()
            self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32)
        # if isRGB:
        #     self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        # else:
        #     self.gamescreen = np.empty((h, w), dtype=np.uint8)

        # setup action converter
        # PLE returns legal action indexes, convert these to just numbers
        self.action_list = self.env.getActionSet()
        self.action_list = sorted(self.action_list, key=lambda x: (x is None, x))
import numpy as np
from ple import PLE
from ple.games.snake import Snake

agent = Snake(width=256, height=256)

env = PLE(agent, fps=15, force_fps=False, display_screen=True)

env.init()
actions = env.getActionSet()
for i in range(1000):
    if env.game_over():
        env.reset_game()

    action = actions[np.random.randint(0, len(actions))]
    env.act(action)
Beispiel #24
0

def discounted_rewards(rewards, gamma=0.99):
    res = []
    for r in reversed(rewards):
        cum_reward = res[0] if res else 0
        res.insert(0, gamma * cum_reward + r)

    return res


def train(env, agent):
    optimizer = torch.optim.Adam(agent.parameters())

    while True:
        agent.zero_grad()
        p, r = play_episode(env, agent)
        r = torch.tensor(discounted_rewards(r), device=agent.device)
        loss = -r * p
        loss = loss.mean()
        loss.backward()
        optimizer.step()


if __name__ == '__main__':
    env = PLE(Snake(), fps=30, display_screen=True)
    env.init()
    agent = Agent(env.getScreenDims(), 16, env.getActionSet())

    train(env, agent)