def __init__(self, env_config): game = Catcher(width=screen_wh, height=screen_wh) fps = 30 # fps we want to run at frame_skip = 2 num_steps = 2 force_fps = False # False for slower speed display_screen = True # make a PLE instance. self.env = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps, force_fps=force_fps, display_screen=display_screen) self.env.init() self.action_dict = {0: None, 1: 97, 2: 100} #PLE env starts with black screen self.env.act(self.env.NOOP) self.action_space = Discrete(3) self.k = 4 self.observation_space = spaces.Box(low=0, high=255, shape=(screen_wh, screen_wh, 1 * self.k)) self.frames = deque([], maxlen=self.k)
def __init__(self, random_seed=0, init_lives=3, normalise=True, display=False): self._random_seed = random_seed self._game = Catcher(init_lives=init_lives) self._normalise = normalise self._display = display if self._display == False: os.putenv('SDL_VIDEODRIVER', 'fbcon') os.environ["SDL_VIDEODRIVER"] = "dummy" if self._normalise: self._env = PLE(self._game, fps=30, state_preprocessor=self._normalise_ob, display_screen=display) else: self._env = PLE(self._game, fps=30, state_preprocessor=self._ob, display_screen=display) self._env.init() self._actions = self._env.getActionSet() self._env.rng.seed(random_seed) # Tracker self._cum_reward = 0
def __init__(self, width, lives=1): ''' @width : width of game window @lives : number of deaths before the episode terminates (death = pallet does not catch ball) ''' self.width = width self.game = None self.actions = None self.max_game_len = 150 self.visitation_map = {} self.timer = 0 self.coordinates = (0, 0) # Create game env catcher = Catcher(width=width, height=width, init_lives=lives) self.game = self.set_catcher_game_setup(catcher)
def get_envs(): envs = [ EnvWrapper('cartpole', gym.make('CartPole-v1'), 500, 550, 4000), EnvWrapper('catcher', PLE(Catcher(init_lives=1), display_screen=False, reward_values={ "positive": 1, "negative": -1, "loss": -1, }), 100, 110, 3000), EnvWrapper('snake', PLE(Snake(height=256, width=256), display_screen=False, reward_values={ "tick": -0.01, "positive": 5, "loss": -1, }), 100, 110, 3000), EnvWrapper('flappybird', PLE(FlappyBird(), display_screen=False, reward_values={ "positive": 1, "tick": 0.1, "loss": -1, }), 100, 110, 3000), ] return envs
def __init__(self, game, display_screen=False): from ple import PLE assert game in [ 'catcher', 'monsterkong', 'flappybird', 'pixelcopter', 'pong', 'puckworld', 'raycastmaze', 'snake', 'waterworld' ] if game == 'catcher': from ple.games.catcher import Catcher env = Catcher() elif game == 'monsterkong': from ple.games.monsterkong import MonsterKong env = MonsterKong() elif game == 'flappybird': from ple.games.flappybird import FlappyBird env = FlappyBird() elif game == 'pixelcopter': from ple.games.pixelcopter import Pixelcopter env = Pixelcopter() elif game == 'pong': from ple.games.pong import Pong env = Pong() elif game == 'puckworld': from ple.games.puckworld import PuckWorld env = PuckWorld() elif game == 'raycastmaze': from ple.games.raycastmaze import RaycastMaze env = RaycastMaze() elif game == 'snake': from ple.games.snake import Snake env = Snake() elif game == 'waterworld': from ple.games.waterworld import WaterWorld env = WaterWorld() self.p = PLE(env, fps=30, display_screen=display_screen) self.action_set = self.p.getActionSet() self.action_size = len(self.action_set) self.screen_dims = self.p.getScreenDims() self.p.init()
def a3c_main(save_path, shared_model,\ model,\ select_action,\ perform_action,\ save_model,\ optimizer=None,\ train=True,\ display=False,\ gamma =.99,\ tau=1.): fps = 30 # fps we want to run at frame_skip = 2 num_steps = 1 force_fps = False # slower speed game = Catcher(width=256, height=256) p = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps, force_fps=force_fps, display_screen=display) p.init() def p_action(action): # reward, action return p.act(action) def main(lstm_shape, steps): values = [] log_probs = [] rewards = [] entropies = [] x_t = extract_image(p.getScreenRGB(), (80, 80)) stack_x = np.stack((x_t, x_t, x_t, x_t), axis=0) model.load_state_dict(shared_model.state_dict()) cx = Variable(torch.zeros(1, lstm_shape[-1])) hx = Variable(torch.zeros(1, lstm_shape[-1])) try: while p.game_over() == False and steps > 0: steps -= 1 x_t = extract_image(p.getScreenRGB(), (80, 80)) x_t = np.reshape(x_t, (1, 80, 80)) st = np.append(stack_x[1:4, :, :], x_t, axis=0) if train: # print() reward, action, hx, cx, info_dict = train_and_play(p_action, st,\ select_action, perform_action,\ possible_actions, opt_nothing, \ model, {"isTrain":True, "hx":hx,"cx":cx}) rewards.append(reward) # reward += r entropies.append(info_dict["entropies"]) values.append(info_dict["values"]) log_probs.append(info_dict["log_probs"]) else: _, _, hx, cx, _ = play(p_action, st, select_action,\ perform_action, possible_actions, model, {"hx":hx,"cx":cx, "isTrain":False}) stack_x = st if train: state = torch.from_numpy(stack_x) R = torch.zeros(1, 1) if steps > 0: value, _, _ = model( (Variable(state.unsqueeze(0).float()), (hx, cx))) values.append(Variable(R)) policy_loss = 0 value_loss = 0 R = Variable(R) gae = torch.zeros(1, 1) for i in reversed(range(len(rewards))): R = gamma * R + rewards[i] advantage = R - values[i] value_loss = value_loss + 0.5 * advantage.pow(2) # Generalized Advantage Estimataion delta_t = rewards[i] + gamma * \ values[i + 1].data - values[i].data gae = gae * gamma * tau + delta_t policy_loss = policy_loss - \ log_probs[i] * Variable(gae) - 0.01 * entropies[i] optimizer.zero_grad() (policy_loss + 0.5 * value_loss).backward() torch.nn.utils.clip_grad_norm(model.parameters(), 40) ensure_shared_grads(model, shared_model) optimizer.step() except Exception as e: print("Exception >>", e) print("Saving model") if train: save_model(shared_model, save_path) score = p.score() p.reset_game() if train: save_model(shared_model, save_path) return score return main
episode_reward = 0 while True: action = agent.predict(obs) # 预测动作,只选最优动作 action = env.getActionSet()[action] reward = env.act(action) obs = list(env.getGameState().values()) episode_reward += reward if render: env.getScreenRGB() if env.game_over(): break eval_reward.append(episode_reward) return np.mean(eval_reward) env = Catcher(500, 500) env = PLE(env, fps=10, display_screen=True, force_fps=False) act_dim = len(env.getActionSet()) obs_dim = len(env.getGameState()) rpm = ReplayMemory(MEMORY_SIZE) model = Model(act_dim=act_dim) alg = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE) agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim, e_greed_decrement=0.1, e_greed=1e-6) """ #添加经验池
obs = obs / norm_coeff arr = np.hstack((np.identity(3), np.tile(obs, (3, 1)))) inputs = torch.FloatTensor(arr) outputs = self.model(inputs) _, action_index = outputs.max(0) action_index = int(action_index) action = self.actions[action_index] return action # load trained neural network model = torch.load('../model/neural_network.pt') # initialize game game = Catcher(width=100, height=100, init_lives=1) p = PLE(game, fps=30, frame_skip=3, num_steps=1, force_fps=False, display_screen=True) p.init() # initialize agent agent = PlayingAgent(p.getActionSet(), model) # run training episodes = 10 max_timestamps = 300
catcher_dict['pygame']=True catcher_dict['state_means'] = [29.88745927,0.15930137,22.5392288,24.73781436] catcher_dict['state_stds'] = [13.89457683,2.04087944,17.41686248,23.38546788] game_params = {'cartpole': cartpole_dict, 'catcher': catcher_dict} # save all params if args.save: all_params = {'sim': sim_params, 'train':train_params,'arch':arch_params,'bf':bf_params,'game':game_params} pickle.dump(all_params, open(save_file+"_params.pkl", "wb")) if __name__=="__main__": # Initiate cartpole envs cartpole_env = gym.make('CartPole-v1') # Initiate catcher envs catcher_env = PLE(Catcher(init_lives=1), state_preprocessor=process_state, display_screen=False) catcher_env.init() game_params['catcher']['actions'] = catcher_env.getActionSet() envs = {'cartpole': cartpole_env, 'catcher': catcher_env} # Initialise the first task: cartpole curr_task = sim_params['first_task'] env = envs[curr_task] # Multiple replay databases maintained if multitasking if train_params['multitask']: mem_length = train_params['replay_sizes'] else:
def test_catcher(self): from ple.games.catcher import Catcher game = Catcher() self.run_a_game(game)
from ple.games.catcher import Catcher from ple import PLE import pygame import numpy as np import NaiveAgent if __name__ == '__main__': pygame.init() game = Catcher(width=256, height=256) game.rng = np.random.RandomState(24) game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) game.clock = pygame.time.Clock() game.init() ''' create learning environment ''' p = PLE(game, fps=30, display_screen=True, force_fps=False) p.init() ''' set my agent actions and rewards ''' myAgent = NaiveAgent(p.getActionSet()) reward = 0.0 while True: dt = game.clock.tick_busy_loop(30) if game.game_over(): game.reset() game.step(dt) pygame.display.update()
# Catcher from ple import PLE from ple.games.catcher import Catcher import pygame import time, sys from pygame.locals import * import random game = Catcher(256, 256, 1) p = PLE(game, display_screen=True) p.init() print(p.getActionSet()) action_set = p.getActionSet() nb_frames = 1000 for f in range(nb_frames): p.act(random.choice(action_set)) time.sleep(.01) if p.game_over(): sys.exit()
MOMENTUM = 0 CLIP_DELTA = 1.0 EPSILON_START = 1.0 EPSILON_MIN = .1 EPSILON_DECAY = 10000 UPDATE_FREQUENCY = 1 REPLAY_MEMORY_SIZE = 1000000 BATCH_SIZE = 32 FREEZE_INTERVAL = 1000 DETERMINISTIC = True if __name__ == "__main__": game = Catcher(width=64, height=64) logging.basicConfig(level=logging.INFO) # --- Parse parameters --- parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(123456) else: rng = np.random.RandomState() # --- Instantiate environment --- env = PLE_env(rng, game=game, frame_skip=parameters.frame_skip, ple_options={"display_screen": True, "force_fps":True, "fps":30}) # --- Instantiate qnetwork --- qnetwork = MyQNetwork(
def init_main(save_path, model, train=True, display=False): push_to_memory, select_action, perform_action, optimize, save_model = model fps = 30 # fps we want to run at frame_skip = 2 num_steps = 1 force_fps = False # slower speed game = Catcher(width=256, height=256) p = PLE(game, fps=fps, frame_skip=frame_skip, num_steps=num_steps, force_fps=force_fps, display_screen=display) p.init() def p_action(action): # reward, action return p.act(action) def main(steps): x_t = extract_image(p.getScreenRGB(), (80, 80)) stack_x = np.stack((x_t, x_t, x_t, x_t), axis=0) while p.game_over() == False and steps > 0: try: steps -= 1 x_t = extract_image(p.getScreenRGB(), (80, 80)) x_t = np.reshape(x_t, (1, 80, 80)) st = np.append(stack_x[1:4, :, :], x_t, axis=0) if train: reward, action, _, _, _ = train_and_play( p_action, st, select_action, perform_action, possible_actions, optimize, None, {}) push_to_memory(stack_x, action, st, reward) else: play(p_action, st, select_action, perform_action, possible_actions, None, {}) stack_x = st except Exception as e: print("Exception >>", e) print("Saving model") if train: save_model(save_path) break score = p.score() p.reset_game() if train: save_model(save_path) return score return main
# save all params if args.save: all_params = { 'sim': sim_params, 'train': train_params, 'arch': arch_params, 'bf': bf_params, 'game': game_params } pickle.dump(all_params, open(save_file + "_params.pkl", "wb")) if __name__ == "__main__": # Initiate cartpole envs cartpole_env = gym.make('CartPole-v1') # Initiate catcher envs catcher_env = PLE(Catcher(init_lives=1), state_preprocessor=process_state, display_screen=False) catcher_env.init() game_params['catcher']['actions'] = catcher_env.getActionSet() envs = {'cartpole': cartpole_env, 'catcher': catcher_env} # Initialise the first task: cartpole curr_task = sim_params['first_task'] env = envs[curr_task] # Multiple replay databases maintained if multitasking if train_params['multitask']:
return random.choice(self.actions) ''' State Formate: { 'player_x': int, 'player_vel': float, 'fruit_x': int, 'fruit_y': int } Actions: [97, 100, None] ''' game = Catcher(width=256, height=256, init_lives=3) p = PLE(game, fps=30, display_screen=True, force_fps=False) p.init() agent = RandomAgent(p.getActionSet()) nb_frames = 1000 reward = 0.0 print(game.getGameState()) print(p.getActionSet()) for f in range(nb_frames): if p.game_over(): #check if the game is over p.reset_game()
def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2, hold_action=2, visualize=False, width=84, height=84, lives=1): """ Initialize Pygame Learning Environment https://github.com/ntasfi/PyGame-Learning-Environment Args: env_name: PLE environment fps: frames per second force_fps: False for slower speeds frame_skip: number of env frames to skip hold_action: number of env frames to hold each action for isRGB: get color or greyscale version of statespace #isRGB = False, game_height,game_width: height and width of environment visualize: If set True, the program will visualize the trainings, will slow down training lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score) """ self.env_name = game_name self.rewards = rewards self.lives = lives self.state_as_image = state_as_image self.fps = fps #30 # frames per second self.force_fps = force_fps #True # False for slower speeds self.frame_skip = frame_skip # frames to skip self.ple_num_steps = hold_action # frames to continue action for #self.isRGB = isRGB #always returns color, lets tensorforce due the processing self.visualize = visualize self.width = width self.height = height #testing self.reached_terminal = 0 self.episode_time_steps = 0 self.episode_reward = 0 self.total_time_steps = 0 if self.env_name == 'catcher': self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives) elif self.env_name == 'pixelcopter': self.game = Pixelcopter(width=self.width, height=self.height) elif self.env_name == 'pong': self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives) elif self.env_name == 'puckworld': self.game = PuckWorld(width=self.width, height=self.height) elif self.env_name == 'raycastmaze': self.game = RaycastMaze(width=self.width, height=self.height) elif self.env_name == 'snake': self.game = Snake(width=self.width, height=self.height) elif self.env_name == 'waterworld': self.game = WaterWorld(width=self.width, height=self.height) elif self.env_name == 'monsterkong': self.game = MonsterKong() elif self.env_name == 'flappybird': self.game = FlappyBird(width=144, height=256) # limitations on height and width for flappy bird else: raise TensorForceError('Unknown Game Environement.') if self.state_as_image: process_state = None else: #create a preprocessor to read the state dictionary as a numpy array def process_state(state): # ret_value = np.fromiter(state.values(),dtype=float,count=len(state)) ret_value = np.array(list(state.values()), dtype=np.float32) return ret_value # make a PLE instance self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip, num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize, state_preprocessor = process_state) #self.env.init() #self.env.act(self.env.NOOP) #game starts on black screen #self.env.reset_game() #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.act(self.env.NOOP) #self.env.reset_game() # setup gamescreen object if state_as_image: w, h = self.env.getScreenDims() self.gamescreen = np.empty((h, w, 3), dtype=np.uint8) else: self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32) # if isRGB: # self.gamescreen = np.empty((h, w, 3), dtype=np.uint8) # else: # self.gamescreen = np.empty((h, w), dtype=np.uint8) # setup action converter # PLE returns legal action indexes, convert these to just numbers self.action_list = self.env.getActionSet() self.action_list = sorted(self.action_list, key=lambda x: (x is None, x))
def __init__(self, config, summary=None): assert isinstance(config, Config) """ Parameters: Name: Type Default: Description(omitted when self-explanatory): max_episode_length int 500000 The max number of steps executed in an episoe before forcing a time out norm_state bool True Normalize the state to [-1,1] display bool False Whether to display the screen of the game init_lives int 3 Number of lives at the start of the game store_summary bool False Whether to store the summary of the environment number_of_steps int 500000 Total number of environment steps """ check_attribute(config, 'current_step', 0) self.config = config # environment parameters self.max_episode_length = check_attribute(config, 'max_episode_length', default_value=500000) self.norm_state = check_attribute(config, 'norm_state', default_value=True) self.display = False self.init_lives = 3 # self.display = check_attribute(config, 'display', default_value=False) # self.init_lives = check_attribute(config, 'init_lives', default_value=3) # summary parameters self.store_summary = check_attribute(config, 'store_summary', default_value=False) self.summary = summary self.number_of_steps = check_attribute(config, 'number_of_steps', 500000) if self.store_summary: assert isinstance(self.summary, dict) self.reward_per_step = np.zeros(self.number_of_steps, dtype=np.float64) check_dict_else_default(self.summary, "steps_per_episode", []) check_dict_else_default(self.summary, "reward_per_step", self.reward_per_step) # setting up original catcher environment with the specified parameters self.catcherOb = Catcher(init_lives=self.init_lives) if not self.display: # do not open a pygame window os.putenv('SDL_VIDEODRIVER', 'fbcon') os.environ["SDL_VIDEODRIVER"] = "dummy" if self.norm_state: self.pOb = PLE(self.catcherOb, fps=30, state_preprocessor=get_ob_normalize, display_screen=self.display) else: self.pOb = PLE(self.catcherOb, fps=30, state_preprocessor=get_ob, display_screen=self.display) self.pOb.init() # environment internal state self.actions = [ 97, None, 100 ] # self.pOb.getActionSet() (left = 97, do nothing = None, right = 100) self.num_action = 3 self.num_state = 4 self.episode_step_count = 0 self.pOb.reset_game() self.current_state = self.pOb.getGameState()
epsilon = 0.15 epsilon_steps = 30000 # decay steps epsilon_min = 0.1 lr = 0.01 discount = 0.95 # discount factor rng = np.random.RandomState(24) # memory settings max_memory_size = 100000 min_memory_size = 1000 # number needed before model training starts epsilon_rate = (epsilon - epsilon_min) / epsilon_steps # PLE takes our game and the state_preprocessor. It will process the state # for our agent. game = Catcher(width=128, height=128) env = PLE(game, fps=60, state_preprocessor=nv_state_preprocessor) agent = Agent(env, batch_size, num_frames, frame_skip, lr, discount, rng, optimizer="sgd_nesterov") agent.build_model() memory = ReplayMemory(max_memory_size, min_memory_size) env.init()
targets.append(target_f[0]) states = np.array(states) targets = np.array(targets) self.model.fit(states, targets, nb_epoch=1, verbose=0) # 학습하기 if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay def load(self, name): # 학습된 네트워크 로드 self.model.load_weights(name) def save(self, name): # 네트워크 저장 self.model.save_weights(name) if __name__ == "__main__": game = Catcher(width=320, height=320) env = PLE(game, display_screen=True, state_preprocessor=process_state) agent = DQNAgent(env) agent.load("./save/catcher.h5") #초기화 #pylab.title("reward") #pylab.xlabel("episodes") #pylab.ylabel("rewards") env.init() scores, time = [], [] for e in range(EPISODES): env.reset_game() state = env.getGameState() state = np.array([list(state[0])])
max_memory_size = 100000 min_memory_size = 1000 # number needed before model training starts epsilon_rate = (epsilon - epsilon_min) / epsilon_steps rewardsVals = { "positive": 1.0, "negative": -0.01, "tick": -0.0, "loss": -5.0, "win": 5.0 } # PLE takes our game and the state_preprocessor. It will process the state # for our agent. game = Catcher(128, 128) #game = FlappyBird() #game = RaycastMaze() env = PLE(game, fps=60, state_preprocessor=nv_state_preprocessor, reward_values=rewardsVals) agent = Agent(env, batch_size, num_frames, frame_skip, lr, discount,