def main(exp_traj_fn, rep_as_str, from_scratch): env_name = f"zelda-{rep_as_str}-v0" log_dir = f'runs/{rep_as_str}' kwargs_dict = {'resume': False, 'render': True} if rep_as_str == 'wide': policy = FullyConvPolicyBigMap else: policy = CustomPolicyBigMap env = make_vec_envs(env_name, rep_as_str, log_dir, n_cpu=1, **kwargs_dict) model = PPO2(policy, env, verbose=1, tensorboard_log=f"./runs/{rep_as_str}") if not from_scratch: model.load(f'models/{rep_as_str}/zelda_{rep_as_str}', env=env) dataset = ExpertDataset( expert_path=f'expert_trajectories/{rep_as_str}/{exp_traj_fn}.npz', traj_limitation=-1, batch_size=15) start_time = time.process_time() model.set_env(env) model.pretrain(dataset, n_epochs=15) end_time = time.process_time() print(f"training took {end_time - start_time} seconds") model.save(f'models/{rep_as_str}/zelda_{rep_as_str}')
def infer(game, representation, model_path, **kwargs): """ - max_trials: The number of trials per evaluation. - infer_kwargs: Args to pass to the environment. """ env_name = '{}-{}-v0'.format(game, representation) if game == "binary": model.FullyConvPolicy = model.FullyConvPolicyBigMap kwargs['cropped_size'] = 28 elif game == "zelda": model.FullyConvPolicy = model.FullyConvPolicyBigMap kwargs['cropped_size'] = 22 elif game == "sokoban": model.FullyConvPolicy = model.FullyConvPolicySmallMap kwargs['cropped_size'] = 10 kwargs['render'] = True agent = PPO2.load(model_path) env = make_vec_envs(env_name, representation, None, 1, **kwargs) obs = env.reset() obs = env.reset() dones = False for i in range(kwargs.get('trials', 1)): while not dones: action, _ = agent.predict(obs) obs, _, dones, info = env.step(action) if kwargs.get('verbose', False): print(info[0]) if dones: break time.sleep(0.2)
def infer(game, representation, model_path, **kwargs): """ - max_trials: The number of trials per evaluation. - infer_kwargs: Args to pass to the environment. """ env_name = '{}-{}-v0'.format(game, representation) if "small" in game: model.FullyConvPolicy = model.FullyConvPolicySmallMap kwargs['cropped_size'] = 8 elif "medium" in game: model.FullyConvPolicy = model.FullyConvPolicySmallMap kwargs['cropped_size'] = 12 elif "large" in game: model.FullyConvPolicy = model.FullyConvPolicyBigMap kwargs['cropped_size'] = 16 kwargs['render'] = False # agent = PPO2.load(model_path) agent = getattr(settings, model_path, None) fixed_tiles = process(kwargs.get('tiles', [])) initial_map = createMap(kwargs['cropped_size'], fixed_tiles) kwargs['old_map'] = initial_map change_limit = kwargs.get('change_limit', 5000) # if not canCreateMap(fixed_tiles, game.split("_")[0], game.split("_")[1]): # return False sug_info = {} for i in range(kwargs.get('trials', 1)): sug_info[i] = {} env = make_vec_envs(env_name, representation, None, 1, **kwargs) info = None obs = env.reset() dones = False cur_pos = {'x': None, 'y': None} while not dones: if i == 0: action, _ = agent.predict(obs) obs, _, dones, info = env.step(action) else: obs, _, dones, info = step(cur_pos, fixed_tiles, representation, env, agent, obs) cur_pos['x'] = info[0]['pos'][0] cur_pos['y'] = info[0]['pos'][1] if kwargs.get('verbose', False): print(info[0]) if dones: break # if info[0]['changes'] > change_limit: # return False sug_info[i]["info"] = info[0] sug_info["range"] = get_range(game.split("_")[0], game.split("_")[1]) return sug_info
def main(game, representation, experiment, steps, n_cpu, render, logging, **kwargs): env_name = '{}-{}-v0'.format(game, representation) exp_name = get_exp_name(game, representation, experiment, **kwargs) resume = kwargs.get('resume', False) if representation == 'wide': policy = FullyConvPolicyBigMap if game == "sokoban": policy = FullyConvPolicySmallMap else: policy = CustomPolicyBigMap if game == "sokoban": policy = CustomPolicySmallMap if game == "binary": kwargs['cropped_size'] = 28 elif game == "zelda": kwargs['cropped_size'] = 22 elif game == "sokoban": kwargs['cropped_size'] = 10 n = max_exp_idx(exp_name) global log_dir if not resume: n = n + 1 log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log') if not resume: os.mkdir(log_dir) else: model = load_model(log_dir) kwargs = { **kwargs, 'render_rank': 0, 'render': render, } used_dir = log_dir if not logging: used_dir = None env = make_vec_envs(env_name, representation, log_dir, n_cpu, **kwargs) if not resume or model is None: model = PPO2(policy, env, verbose=1, tensorboard_log="./runs") else: model.set_env(env) if not logging: model.learn(total_timesteps=int(steps), tb_log_name=exp_name) else: model.learn(total_timesteps=int(steps), tb_log_name=exp_name, callback=callback)
def infer(game, representation, model_path, **kwargs): """ - max_trials: The number of trials per evaluation. - infer_kwargs: Args to pass to the environment. """ env_name = '{}-{}-v0'.format(game, representation) if game == "binary": model.FullyConvPolicy = model.FullyConvPolicyBigMap kwargs['cropped_size'] = 28 elif game == "zelda": model.FullyConvPolicy = model.FullyConvPolicyBigMap kwargs['cropped_size'] = 22 elif game == "sokoban": model.FullyConvPolicy = model.FullyConvPolicySmallMap kwargs['cropped_size'] = 10 env = make_vec_envs(env_name, representation, None, 1, **kwargs) agent = PPO2.load(model_path, env=env) obs = env.reset() obs = env.reset() dones = False successful_levels = 0.0 total_iterations = 0.0 for i in range(kwargs.get('trials', 1)): while not dones: total_iterations += 1 action, _ = agent.predict(obs) obs, _, dones, info = env.step(action) if kwargs.get('verbose', False): # print(info[0]) pass if info[0]['solved']: successful_levels += 1 dones = True if dones: break return successful_levels / total_iterations
from model import FullyConvPolicyBigMap, CustomPolicyBigMap from utils import make_vec_envs from stable_baselines.gail import generate_expert_traj, ExpertDataset from stable_baselines import PPO2 import time import numpy as np # THIS SECTION IS FOR GEN EXP TRAJ kwargs_dict = {'resume': False, 'render': False} log_dir = f'runs/wide' env_name = f"zelda-wide-v0" policy = FullyConvPolicyBigMap env = make_vec_envs(env_name, "wide", log_dir, n_cpu=1, **kwargs_dict) model = PPO2(policy, env, verbose=1, tensorboard_log=f"./runs/wide") a_dict = generate_expert_traj(model, 'expert_wide', n_timesteps=int(0), n_episodes=1) print(a_dict) numpy_dict = np.load('expert_wide.npz') print(type(numpy_dict)) print(list(numpy_dict.keys())) # ['actions', 'obs', 'rewards', 'episode_returns', 'episode_starts'] print(f"ACTIONS") print(f"=============================")
def bootstrap_envs_and_buffer(args: Namespace): """Method to bootstrap the envs, buffer and related objects""" logbook = make_logbook(args=args) device = "cuda" if torch.cuda.is_available() else "cpu" utils.make_dir(args.work_dir) with open(os.path.join(args.work_dir, "args.json"), "w") as f: json.dump(vars(args), f, sort_keys=True, indent=4) dummy_env = utils.make_dummy_env(args=args) pixel_space_obs = dummy_env.env.env._get_observation_space_for_pixel_space( args.image_size, args.image_size ) state_space_obs = dummy_env.env.env._get_observation_space_for_state_space() action_size = dummy_env.action_space.shape[0] train_replay_buffer = create_multi_env_replay_buffer( args=args, env=dummy_env, device=device, num_envs=args.num_train_envs ) eval_replay_buffer = create_multi_env_replay_buffer( args=args, env=dummy_env, device=device, num_envs=args.num_eval_envs ) ( fns_to_make_train_envs, fns_to_make_eval_envs, ) = make_fns_to_make_train_and_eval_envs(args=args) max_episode_steps = dummy_env._max_episode_steps vec_train_envs = utils.make_vec_envs( fns_to_make_envs=fns_to_make_train_envs, device=None, ) vec_eval_envs = utils.make_vec_envs( fns_to_make_envs=fns_to_make_eval_envs, device=None, ) logging_dict = { "steps": [], "model_error_in_latent_state": [], "model_error_in_eta_state": [], "reward_error": [], "decoding_error": [], "test_model_error_in_latent_state": [], "test_model_error_in_eta_state": [], "test_reward_error": [], "test_decoding_error": [], "discriminator_loss": [], "encoder_discriminator_loss": [], "test_encoder_discriminator_loss": [], } return ( logbook, device, vec_train_envs, vec_eval_envs, state_space_obs, pixel_space_obs, action_size, train_replay_buffer, eval_replay_buffer, logging_dict, max_episode_steps, )
def bootstrap_expert(args: Namespace): utils.set_seed_everywhere(args.seed) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") fns_to_make_train_envs = [ utils.fn_to_make_env(args=args, seed=seed, resource_files=None, camera_id=0) for seed in range(args.num_train_envs) ] fns_to_make_eval_envs = [ utils.fn_to_make_env(args=args, seed=seed, resource_files=None, camera_id=0) for seed in range(args.num_eval_envs) ] vec_train_envs = utils.make_vec_envs( fns_to_make_envs=fns_to_make_train_envs, device=None) vec_eval_envs = utils.make_vec_envs(fns_to_make_envs=fns_to_make_eval_envs, device=None) dummy_env = utils.make_env(args, 0, resource_files=None, camera_id=0) video_dir, model_dir, buffer_dir, video = make_dirs_and_recorders( args=args) validate_env(dummy_env) replay_buffer = utils.MultiEnvReplayBuffer( obs_shape=dummy_env.observation_space.shape, action_shape=dummy_env.action_space.shape, capacity=args.replay_buffer_capacity, batch_size=args.batch_size, device=device, num_envs=args.num_train_envs, ) agent = make_expert( obs_shape=dummy_env.observation_space.shape, action_shape=dummy_env.action_space.shape, args=args, device=device, ) L = VecLogger(args.work_dir, use_tb=args.save_tb, num_envs=args.num_train_envs) max_episode_steps = dummy_env._max_episode_steps return ( vec_train_envs, vec_eval_envs, max_episode_steps, video_dir, model_dir, buffer_dir, video, device, replay_buffer, agent, L, )
def infer(game, representation, experiment, infer_kwargs, **kwargs): """ - max_trials: The number of trials per evaluation. - infer_kwargs: Args to pass to the environment. """ infer_kwargs = { **infer_kwargs, 'inference': True, 'render': True, } max_trials = kwargs.get('max_trials', -1) n = kwargs.get('n', None) env_name = '{}-{}-v0'.format(game, representation) exp_name = get_exp_name(game, representation, experiment, **kwargs) if n is None: n = max_exp_idx(exp_name) if n == 0: raise Exception('Did not find ranked saved model of experiment: {}'.format(exp_name)) log_dir = 'runs/{}_{}_{}'.format(exp_name, n, 'log') model = load_model(log_dir) # no log dir, 1 parallel environment n_cpu = infer_kwargs.get('n_cpu', 12) env = make_vec_envs(env_name, representation, None, n_cpu, **infer_kwargs) obs = env.reset() # Record final values of each trial if 'binary' in env_name: path_lengths = [] changes = [] regions = [] infer_info = { 'path_lengths': [], 'changes': [], 'regions': [], } n_trials = 0 while n_trials != max_trials: #action = get_action(obs, env, model) action, _ = model.predict(obs) obs, rewards, dones, info = env.step(action) reward = rewards[0] n_regions = info[0]['regions'] readouts = [] if 'binary' in env_name: curr_path_length = info[0]['path-length'] readouts.append('path length: {}'.format(curr_path_length) ) path_lengths.append(curr_path_length) changes.append(info[0]['changes']) regions.append(info[0]['regions']) readouts += ['regions: {}'.format(n_regions), 'reward: {}'.format(reward)] stringexec = "" m=0 y0, dy = 50, 40 img = np.zeros((256,512,3), np.uint8) scale_percent = 60 # percent of original size width = int(img.shape[1] * scale_percent / 100) height = int(img.shape[0] * scale_percent / 100) dim = (width, height) # resize image for i, line in enumerate(readouts): y = y0 + i*dy cv2.putText(img, line, (50, y), font, fontScale, fontColor, lineType) #stringexec ="cv2.putText(img, TextList[" + str(TextList.index(i))+"], (100, 100+"+str(m)+"), cv2.FONT_HERSHEY_COMPLEX, 0.5, (200, 100, 100), 1, cv2.LINE_AA)\n" #m += 100 #cv2.putText( # img,readout, # topLeftCornerOfText, # font, # fontScale, # fontColor, # lineType) #Display the image resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA) cv2.imshow("img",resized) cv2.waitKey(1) #for p, v in model.get_parameters().items(): # print(p, v.shape) if dones: #show_state(env, path_lengths, changes, regions, n_step) if 'binary' in env_name: infer_info['path_lengths'] = path_lengths[-1] infer_info['changes'] = changes[-1] infer_info['regions'] = regions[-1] n_trials += 1 return infer_info
def bootstrap_setup_for_rl(args: argparse.Namespace): """Method to bootstrap the setup""" utils.set_seed_everywhere(args.seed) ( logbook, device, train_envs, eval_envs, obs_shape, action_size, train_replay_buffer, eval_replay_buffer, logging_dict, ) = bootstrap_setup(args) args.video_dir = utils.make_dir(os.path.join(args.work_dir, "video")) args.model_dir = utils.make_dir(os.path.join(args.work_dir, "model")) args.buffer_dir = utils.make_dir(os.path.join(args.work_dir, "buffer")) # video = VideoRecorder(video_dir if args.save_video else None) logging_dict = { "steps": [], "model_error_in_latent_state": [], "model_error_in_eta_state": [], "reward_error": [], "decoding_error": [], "test_model_error_in_latent_state": [], "test_model_error_in_eta_state": [], "test_reward_error": [], "test_decoding_error": [], } logger = Logger(args.work_dir, use_tb=args.save_tb, logbook=logbook) # train_envs = utils.make_vec_envs(envs = train_envs, # device=None, # num_frame_stack=args.frame_stack) # eval_envs = utils.make_vec_envs(envs = eval_envs, # device=None, # num_frame_stack=args.frame_stack) ( fns_to_make_train_envs, fns_to_make_eval_envs, ) = make_fns_to_make_train_and_eval_envs(args=args) max_episode_steps = train_envs[0]._max_episode_steps train_envs = utils.make_vec_envs( fns_to_make_envs=fns_to_make_train_envs, device=None, num_frame_stack=args.frame_stack, ) eval_envs = utils.make_vec_envs( fns_to_make_envs=fns_to_make_eval_envs, device=None, num_frame_stack=args.frame_stack, ) return ( logbook, device, train_envs, eval_envs, obs_shape, action_size, train_replay_buffer, eval_replay_buffer, logging_dict, logger, max_episode_steps, )