def main(arguments: argparse) -> None: """ Main training loop. :param arguments: User input :return: """ n_steps = arguments.steps n_agents = arguments.envs print(f'Training {args.game} using {"cpu" if arguments.cpu else "gpu"}') print(f'Number of concurrent environments {args.envs}') print(f'Number of steps per batch {args.steps}') if arguments.model: print(f'Using existing model {arguments.model}') env = SubprocVecEnv( [make_env(env_id=arguments.game, rank=i) for i in range(n_agents)]) agent = DeepLearningAgent(observation_space=env.observation_space, action_space=int(env.action_space.n), n_envs=n_agents, n_steps=n_steps, model_path=arguments.model, use_cpu=arguments.cpu) # This is the current state (or observation) observations = reshape_observations(env.reset()) actions = agent.get_action(observations) initial_training_time = time.time() for ep in range(EPISODES): # Reset the frame counter each time the batch size is complete for i in range(n_steps): new_observations, rewards, done, info = env.step( actions.cpu().numpy()) new_observations = reshape_observations(new_observations) agent.train(s=observations, r=rewards, s_next=new_observations, a=actions, done=done, step=i) actions = agent.get_action(new_observations) observations = new_observations if ep % 100 == 0: fps = ((ep + 1) * n_steps * n_agents) / (time.time() - initial_training_time) print(f'FPS {fps}') env.close()
def main(): # Alter reward in scenario.json (C:\Users\Fergus\Anaconda3\envs\AIGym\Lib\site-packages\retro\data\stable\SonicTheHedgehog-Genesis) env = SubprocVecEnv([make_env_3]) obs = env.reset() # env = make_env_3() # env2 = make_env_4() print(env.observation_space) print(env.action_space.n) print(obs.shape) print(obs[0].shape) # obs = env2.reset() rew_mb = [] dones_mb = [] obs_mb = [] step = 0 while True: action = env.action_space.sample() obs, rew, done, info = env.step([0]) print("Step {} Reward: {}, Done: {}".format(step, rew, done)) rew_mb.append(rew) dones_mb.append(done) obs_mb.append(obs) env.render() step += 1 # obs = obs[1] / 255. # for i in range(4): # cv2.imshow('GrayScale'+str(i), np.squeeze(obs[:,:,i])) # cv2.waitKey(1) if done[0]: env.close() break rew_mb = np.array(rew_mb) dones_mb = np.array(dones_mb) obs_mb = np.array(obs_mb) print("Rewards: ", rew_mb) print(rew_mb.shape) print(dones_mb) print(dones_mb.shape) print(obs_mb.shape)
def main7(): retro.data.add_custom_integration("custom") def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1, grayscale=False): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=150, height=100, grayscale=grayscale) env = FrameStack(env, frame_stack) env = ScaledFloatFrame(env) env = RewardScaler(env, scale=1 / 100.0) return env def make_env(): retro.data.add_custom_integration("custom") env = retro.n64_env.N64Env(game="SuperSmashBros-N64", use_restricted_actions=retro.Actions.MULTI_DISCRETE, inttype=retro.data.Integrations.CUSTOM, obs_type=retro.Observations.IMAGE) env = wrap_deepmind_n64(env) return env gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) nenvs = 2 # env = DummyVecEnv([make_env] * nenvs) env = SubprocVecEnv([make_env] * nenvs) network_name = "impala_cnn_lstm" policy = build_policy(env, network_name) recurrent = "lstm" in network_name ob_space = env.observation_space ac_space = env.action_space nsteps = 10 nminibatches = 2 nbatch = nenvs * nsteps nbatch_train = nbatch // nminibatches model = Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nbatch_act=nenvs, nbatch_train=nbatch_train, nsteps=nsteps, ent_coef=0.01, vf_coef=0.5, max_grad_norm=0.5, comm=None, mpi_rank_weight=1) runner = Runner(env=env, model=model, nsteps=10, gamma=.99, lam=.95) env.reset() num_steps = 20000 action = [np.array([0, 0, 0]), np.array([0, 0, 0])] for i in range(num_steps): sys.stdout.write(f"\r{i+1} / {num_steps}") action = [env.action_space.sample() for _ in range(nenvs)] obs, reward, dones, info = env.step(action) # env.reset(dones) # env.render() if i % 50 == 0: if recurrent: fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(20, 12)) else: fig, axs = plt.subplots(nrows=4, ncols=2, figsize=(20, 12)) for env_index in range(nenvs): if recurrent: axs[env_index].imshow(obs[env_index, :, :, :]) else: for j in range(4): row = env_index * 2 + j // 2 col = j % 2 print(row) print(col) axs[row, col].imshow(obs[env_index, :, :, j]) plt.show() plt.close() end = time.time() print(end - start) return env
def main6(): retro.data.add_custom_integration("custom") def wrap_deepmind_n64(env, reward_scale=1 / 100.0, frame_stack=1): env = MaxAndSkipEnv(env, skip=4) env = WarpFrame(env, width=450, height=300, grayscale=False) env = FrameStack(env, frame_stack) env = ScaledFloatFrame(env) env = RewardScaler(env, scale=reward_scale) return env def make_env(): retro.data.add_custom_integration("custom") state = "ssb64.pikachu.level9dk.dreamland.state" env = retro.n64_env.N64Env(game="SuperSmashBros-N64", use_restricted_actions=retro.Actions.MULTI_DISCRETE, inttype=retro.data.Integrations.CUSTOM, obs_type=retro.Observations.IMAGE, state=state) env = wrap_deepmind_n64(env) return env gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # env = make_env() env = SubprocVecEnv([make_env] * 1) # env = DummyVecEnv([make_env] * 1) env.reset() num_steps = 20000 # action = [np.array([0, 0, 0])] # action = [env.action_space.sample() for _ in range(2)] for i in range(num_steps): sys.stdout.write(f"\r{i+1} / {num_steps}") # action = env.action_space.sample() action = [env.action_space.sample() for _ in range(1)] obs, reward, done, info = env.step(action) print(f"\nreward: {reward} done: {done}") # input() if (isinstance(done, bool) and done) or (isinstance(done, list) and all(done)): env.reset() # env.render() if i % 50 == 0: image = Image.fromarray((obs[0] * 255).astype(np.uint8)) image.save("/home/wulfebw/Desktop/color.png") plt.imshow(obs[0, :, :, 0]) # fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 12)) # for j in range(1): # row = j // 2 # col = j % 2 # print(row) # print(col) # axs[row, col].imshow(obs[:, :]) plt.show() plt.close() end = time.time() print(end - start) return env