def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 seed = args.seed env_type, env_id = get_env_type(args.env) if env_type == 'atari': if alg == 'acer': env = make_vec_env(env_id, env_type, nenv, seed) elif alg == 'deepq': env = atari_wrappers.make_atari(env_id) env.seed(seed) env = bench.Monitor(env, logger.get_dir()) env = atari_wrappers.wrap_deepmind(env, frame_stack=True) elif alg == 'trpo_mpi': env = atari_wrappers.make_atari(env_id) env.seed(seed) env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env = atari_wrappers.wrap_deepmind(env) # TODO check if the second seeding is necessary, and eventually remove env.seed(seed) else: frame_stack_size = 4 env = VecFrameStack(make_vec_env(env_id, env_type, nenv, seed), frame_stack_size) elif env_type == 'retro': import retro gamestate = args.gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro( game=args.env, state=gamestate, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE) env.seed(args.seed) env = bench.Monitor(env, logger.get_dir()) env = retro_wrappers.wrap_deepmind_retro(env) else: get_session( tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)) env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if env_type == 'mujoco': env = VecNormalize(env) return env
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 seed = args.seed env_type, env_id = get_env_type(args.env) if env_type == 'atari': if alg == 'acer': env = make_vec_env(env_id, env_type, nenv, seed) elif alg == 'deepq': env = atari_wrappers.make_atari(env_id) env.seed(seed) env = bench.Monitor(env, logger.get_dir()) env = atari_wrappers.wrap_deepmind(env, frame_stack=True, scale=True) elif alg == 'trpo_mpi': env = atari_wrappers.make_atari(env_id) env.seed(seed) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env = atari_wrappers.wrap_deepmind(env) # TODO check if the second seeding is necessary, and eventually remove env.seed(seed) else: frame_stack_size = 4 env = VecFrameStack(make_vec_env(env_id, env_type, nenv, seed), frame_stack_size) elif env_type == 'retro': import retro gamestate = args.gamestate or 'Level1-1' env = retro_wrappers.make_retro(game=args.env, state=gamestate, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE) env.seed(args.seed) env = bench.Monitor(env, logger.get_dir()) env = retro_wrappers.wrap_deepmind_retro(env) else: env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, steps_until_done=args.env_steps, cont=args.env_cont, norm=args.env_norm, start_index=args.start_index) if env_type == 'mujoco': env = VecNormalize(env) return env
def main(env_name, mode, episodes, random_sample, save_path, concrete=False, expert_first=False, save_model=True, dropout=0.05, lr=0.001, ls=5e-7, train_epochs=10, density=0.0, hetero_loss=False, budget=1): """ env_name - gym environment [LunarLander-v2, CartPole-v1] mode - learning type [pool, stream, classic] save_path - where the model and tf loggin data should be saved to """ seed = random.randint(0, 1e6) isSpace = env_name[:5] == 'Space' if isSpace: wrapper_kwargs = {'episode_life': False} env = VecFrameStack( make_atari_env(env_name, 1, 0, wrapper_kwargs=wrapper_kwargs), 4) else: env = gym.make(env_name) env.seed(seed) isFetch = env_name[:5] == 'Fetch' if isFetch: # That's so fetch from active_imitation.agents.mujoco_robot import DEFAULT_PARAMS action_size = env.action_space.shape[0] observation_size = env.observation_space.spaces['observation'].shape goal_size = env.observation_space.spaces['desired_goal'].shape[0] env_dims = { 'observation': observation_size, 'goal': goal_size, 'action': action_size } elif isSpace: from active_imitation.agents.classic_gym import DEFAULT_PARAMS action_size = 1 action_space = env.action_space.n observation_size = env.observation_space.shape env_dims = { 'observation': observation_size, 'action': action_size, 'action_space': action_space } else: from active_imitation.agents.classic_gym import DEFAULT_PARAMS # Need the spaces dimensions to initialize the NN agent action_size = 1 # Single, discrete actions action_space = env.action_space.n observation_size = env.observation_space.shape env_dims = { 'observation': observation_size, 'action': action_size, 'action_space': action_space } # Change the dimensions of the nn layers params = DEFAULT_PARAMS # params['layers'] = [64, 64, 64] params['dropout_rate'] = dropout #[0.05, 0.1, 0.15, 0.2] params['filepath'] = save_path params['lr'] = lr params['hetero_loss'] = hetero_loss if isFetch or isSpace: params['layers'] = [256, 256, 256] #[512, 512, 512] # params['concrete'] = concrete params['ls'] = ls else: params['layers'] = [16, 16, 16] params['concrete'] = concrete if expert_first: mixing = 0.0 mixing_decay = 1.0 else: mixing = 1.0 mixing_decay = 1.0 param_mods = { 'random_sample': random_sample, 'mixing': mixing, 'density_weight': density, 'budget': budget } if isFetch: agent = GymRobotAgent(env_dims, **params) expert = RoboticEnv_Expert(policy_files[env_name]) continuous = True elif isSpace: expert = SpaceInvadersExpert({ 'observation': env.observation_space, 'action': env.action_space }) agent = AtariGymAgent(env_dims, **params) continuous = False param_mods['isSpace'] = True else: agent = GymAgent(env_dims, **params) expert = experts[env_name](env.unwrapped) continuous = False learning_mode = configure.configure_robot(env, env_dims, agent, expert, mode, continuous=continuous, concrete=concrete, param_mods=param_mods) ## Save the training parameters # learning rate, dropout, isconcrete, iscontinuout, env_name, mode, parameter_savefile = os.path.join(save_path, 'parameters.txt') with open(parameter_savefile, 'w') as f: f.write('Environment Name: {} \n'.format(env_name)) f.write('Learning Mode: {} \n'.format(mode)) f.write('# of Episodes: {} \n'.format(episodes)) f.write('Learning Rate:{} \n'.format(lr)) f.write('Concrete Length Scale: {} \n'.format(ls)) f.write('Training Epochs: {}\n'.format(train_epochs)) f.write('Continuous: {}\n'.format(continuous)) f.write('Concrete: {}\n'.format(concrete)) f.write('Random Sample: {}\n'.format(random_sample)) f.write('Mixing: {}\n'.format(mixing)) f.write('Mixing Decay: {}\n'.format(mixing_decay)) f.write('Density Weighting: {}\n'.format(density)) f.write('Budget: {}\n'.format(budget)) for label, value in params.items(): f.write('{}: {}\n'.format(label, value)) f.write('Random Seed: {}\n'.format(seed)) if isSpace: save_rate = 5000 valid_runs = 1 elif isFetch: save_rate = 100 valid_runs = 5 else: save_rate = 100 valid_runs = 5 rewards, stats = learning_mode.train( episodes=episodes, mixing_decay=mixing_decay, train_epochs=train_epochs, save_images=False, image_filepath=save_path + 'images/', save_rate=save_rate, valid_runs=valid_runs, ) if save_model: agent.save_model() if isSpace: expert.close() agent.sess.close() env.close() tf.reset_default_graph() return rewards, stats
def build_env(args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 seed = args.seed env_type, env_id = get_env_type(args.env) if env_type == 'atari': if alg == 'acer': env = make_vec_env(env_id, env_type, nenv, seed) elif alg == 'deepq': env = atari_wrappers.make_atari(env_id) env.seed(seed) env = bench.Monitor(env, logger.get_dir()) env = atari_wrappers.wrap_deepmind(env, frame_stack=True, scale=True) elif alg == 'trpo_mpi': env = atari_wrappers.make_atari(env_id) env.seed(seed) env = bench.Monitor( env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env = atari_wrappers.wrap_deepmind(env) # TODO check if the second seeding is necessary, and eventually remove env.seed(seed) else: frame_stack_size = 4 env = VecFrameStack(make_vec_env(env_id, env_type, nenv, seed), frame_stack_size) elif env_type == 'retro': import retro gamestate = args.gamestate or 'Level1-1' env = retro_wrappers.make_retro( game=args.env, state=gamestate, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE) env.seed(args.seed) env = bench.Monitor(env, logger.get_dir()) env = retro_wrappers.wrap_deepmind_retro(env) elif env_type == 'AirHockey': from gym_airhockey.configuration import configure_env from baselines.common.vec_env.dummy_vec_env import DummyVecEnv version_list = [x for x in args.versions if x is not None] version = version_list[ MPI.COMM_WORLD.Get_rank() % len(version_list)] # Each rank gets its own version # setup the environment env = gym.make(env_id) env.seed(args.seed) configure_env(env, version=version) # wrap the environment env = bench.Monitor(env, logger.get_dir(), allow_early_resets=True) env = DummyVecEnv([lambda: env]) env.render() else: get_session( tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)) env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale) if env_type == 'mujoco': env = VecNormalize(env) return env