def train(self, **kwargs): """ Run the training algorithm to optimize model parameters for the environment provided. """ # define default parameters for each training algorithm, then perturb them based on user input preset_kwargs = PRESETS[ self.training_alg] # select default kwargs for the algo preset_kwargs.update( kwargs) # update default algo kwargs based on user input render_saves = preset_kwargs.get('render_saves', False) if 'render_saves' in preset_kwargs.keys(): preset_kwargs.pop('render_saves') # dynamically import source code (e.g. import algos.vpg.vpg as mod) mod = import_module("algos.{}.{}".format(self.training_alg, self.training_alg)) method = getattr( mod, self.training_alg) # e.g. from algos.vpg.vpg import vpg if self.actorCritic is None: # use the default actorCritic for the algo core = import_module("algos.{}.core".format( self.training_alg)) # e.g. import algos.vpg.core as core self.actorCritic = getattr( core, DEFAULT_ACTOR_CRITIC[self.training_alg] ) # e.g. from core import MLPActorCritic as actorCritic # prepare mpi if self.ncpu > 1 (and supported by chosen RL algorithm) mpi_fork(self.ncpu) # run parallel code with mpi # update logger kwargs logger_kwargs = setup_logger_kwargs(self.exp_name, preset_kwargs['seed']) preset_kwargs['logger_kwargs'] = logger_kwargs # begin training method(self.env, actor_critic=self.actorCritic, **preset_kwargs) # render all checkpoints user specifies with 'render_saves' if render_saves: log_dir = logger_kwargs['output_dir'] + os.sep + 'pyt_save' + os.sep fnames = glob.glob( log_dir + 'model*.pt' )[1:] # first item in list is final checkpoint, with no itr in file name for checkpoint in fnames: itr = re.search('model(.*).pt', checkpoint).group( 1) # get epoch number from file name render_kwargs = { 'filename': '/gym_animation_' + str(itr) + '.mp4', 'model_itr': itr } self.render(save=True, show=False, seed=self.seed, **render_kwargs)
env_name = 'HalfCheetah_hurdle-v2' meta_Skill = Meta_skill('./Skill/ica_skill/model_1211/IcaNet.115.pt') output_dir = './network_reserve/model_0322/' # gpu_init device = torch.device("cuda" if torch.cuda.is_available() else "cpu") env = NormalizedActions(gym.make(env_name)) params.update({ 'env': env, 'output_dir': output_dir, 'device': device, }) logger_kwargs = setup_logger_kwargs(exp_name=env_name, seed=0, data_dir=output_dir) meta_Control = Meta_control(**params) meta_Control.logger_setup(logger_kwargs, **params) MAX_EPISODE = 100 # 100 # MAX_EPOCH = 10 MAX_STEP = 500 # 500 # for episode_idx in range(MAX_EPISODE): episode_reward = 0 episode_success = 0 for epoch in range(MAX_EPOCH): state = env.reset() for step in range(MAX_STEP): # env.render()
reward / episode, episode) self.logger.log_tabular("reward", with_min_and_max=True) self.logger.log_tabular("step", with_min_and_max=True) self.logger.log_tabular("reward_test", with_min_and_max=True) self.logger.log_tabular("step_test", with_min_and_max=True) self.logger.dump_tabular() if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, default='Carla') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--port', type=int, default=2000) parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--batch', type=int, default=32) parser.add_argument('--exp_name', type=str, default='dqn_carla_random_pos_replaybuffer10e5') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) from utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) dqn = Dqn(args.env, args.port, args.gpu, logger_kwargs=logger_kwargs) dqn.train_test()
# plt.legend(loc = 'lower right', # 默认在左上角即 upper left 可以通过loc进行修改 # fancybox = True, # 边框 # framealpha = 0.5, # 透明度 # shadow = True, # 阴影 # borderpad = 1) # 边框宽度 if not os.path.exists(DEFAULT_IMG_DIR): os.mkdir(DEFAULT_IMG_DIR) out_file = os.path.join(DEFAULT_IMG_DIR, output_name + ".png") plt.savefig(out_file) plt.clf() else: from utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.plot_name, args.seed) data_file = os.path.join(logger_kwargs["output_dir"], "progress.txt") pd_data = pd.read_table(data_file) mean_name = "Averagetest_reward" std_name = "Stdtest_reward" mean = pd_data[mean_name] std = pd_data[std_name] x = pd_data["Epoch"] plt.plot(x, mean) plt.fill_between(x, mean+std, mean-std) output_name = args.output_name if args.output_name is None: output_name = args.plot_name + "_s" + str(args.seed)
dynamic_model.fit(use_data_buf=True, normalize=True) cost_model.fit() env.close() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--robot', type=str, default='point', help="robot model, selected from `point` or `car` ") parser.add_argument('--level', type=int, default=1, help="environment difficulty, selected from `1` or `2`, where `2` would be more difficult than `1`") parser.add_argument('--epoch', type=int, default=60, help="maximum epochs to train") parser.add_argument('--episode', type=int, default=10, help="determines how many episodes data to collect for each epoch") parser.add_argument('--render','-r', action='store_true', help="render the environment") parser.add_argument('--test', '-t', action='store_true', help="test the performance of pretrained models without training") parser.add_argument('--seed', '-s', type=int, default=1, help="seed for Gym, PyTorch and Numpy") parser.add_argument('--dir', '-d',type=str, default='./data/', help="directory to save the logging information") parser.add_argument('--name','-n', type=str, default='test', help="name of the experiment, used to save data in a folder named by this parameter") parser.add_argument('--save', action='store_true', help="save the trained dynamic model, data buffer, and cost model") parser.add_argument('--load',type=str, default=None, help="load the trained dynamic model, data buffer, and cost model from a specified directory") parser.add_argument('--ensemble',type=int, default=0, help="number of model ensembles, if this argument is greater than 0, then it will replace the default ensembles number in config.yml") # number of ensembles parser.add_argument('--optimizer','-o',type=str, default="rce", help=" determine the optimizer, selected from `rce`, `cem`, or `random` ") # random, cem or CCE parser.add_argument('--config', '-c', type=str, default='./config.yml', help="specify the path to the configuation file of the models") args = parser.parse_args() logger_kwargs = setup_logger_kwargs(args.name, args.seed, args.dir) logger = EpochLogger(**logger_kwargs) config = load_config(args.config) run(logger, config, args)
parser.add_argument('--anneal_lr', action="store_true") parser.add_argument('--debug', action="store_false") parser.add_argument('--log_every', default=10, type=int) parser.add_argument('--network', default="cnn") parser.add_argument('--feature_dim', default=50, type=int) parser.add_argument('--target_kl', default=0.03, type=float) parser.add_argument('--encoder_dir', default="vae_2") parser.add_argument('--encoder_check', default=300, type=int) parser.add_argument('--test_epoch', default=10, type=int) args = parser.parse_args() device = torch.device( "cuda:" + str(args.gpu) if torch.cuda.is_available() else "cpu") from utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) logger = EpochLogger(**logger_kwargs) with open(os.path.join(logger.output_dir, 'args.json'), 'w') as f: json.dump(vars(args), f, sort_keys=True, indent=4) writer = SummaryWriter(os.path.join(logger.output_dir, "logs")) env = dmc2gym.make(domain_name=args.domain_name, task_name=args.task_name, seed=args.seed, visualize_reward=False, from_pixels=(args.encoder_type == 'pixel'), height=args.image_size, width=args.image_size, frame_skip=args.action_repeat) test_env = dmc2gym.make(domain_name=args.domain_name, task_name=args.task_name,
parser.add_argument('--log', type=str, default="logs") parser.add_argument('--steps', type=int, default=1000) # parser.add_argument('--env', type=str, default="CartPole-v0") parser.add_argument('--v_gae_clip', default=False) parser.add_argument('--env', type=str, default="HalfCheetah-v2") parser.add_argument('--exp_name', type=str, default="orthogonal") parser.add_argument('--seed', type=int, default=0) args = parser.parse_args() gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) from utils.run_utils import setup_logger_kwargs file_name = "ppo_" + args.env + "_" + args.exp_name logger_kwargs = setup_logger_kwargs(file_name, args.seed) logger = EpochLogger(**logger_kwargs) env = gym.make(args.env) tf.random.set_seed(args.seed) np.random.seed(args.seed) env.seed(args.seed) action_space = env.action_space if isinstance(action_space, Discrete): ppo = core.PPO(action_space.n, 0.2, lr_a=args.lr_a, lr_c=args.lr_c) else: ppo = core.PPO(action_space.shape[0], 0.2, False, action_space.high[0],
d = json.load(outfile) minibatch_size = d.get('minibatch_size') A_learningRate = d.get('A_learningRate') C_learningRate = d.get('C_learningRate') discountFactor = d.get('discountFactor') explorationRate = d.get('explorationRate') learnStart = d.get('learnStart') memorySize = d.get('memorySize') current_epoch = d.get('current_epoch') stepCounter = d.get('stepCounter') loadsim_seconds = d.get('loadsim_seconds') clear_monitor_files(outdir) copy_tree(monitor_path, outdir) env = gym.wrappers.Monitor(env, outdir, resume=True) logger_kwargs = setup_logger_kwargs('PelicanAttControllerEnv', None) policy_net = policy_core.Policy_Net(S_DIM=S_DIM, A_DIM=A_DIM, EP_MAX=epochs, EP_LEN=episode_steps, GAMMA=discountFactor, LR=I_learningRate, BATCH=minibatch_size, logger_kwargs=logger_kwargs) last100Rewards = [0] * 100 last100RewardsIndex = 0 last100Filled = False all_ep_r = [] start_time = time.time()
if __name__ == '__main__': rospy.init_node('pelican_attitude_controller_policy_test', anonymous=True, log_level=rospy.WARN) parser = argparse.ArgumentParser() default_fpath = osp.join( osp.abspath(osp.dirname(__file__)), 'data/Pelican_position_controller_dagger_for_ppo/Pelican_position_controller_dagger_for_ppo_s3' ) parser.add_argument('--exp', type=str, default="PelicanAttControllerEnv-v0") parser.add_argument('--fpath', type=str, default=default_fpath) parser.add_argument('--len', '-l', type=int, default=500) parser.add_argument('--episodes', '-n', type=int, default=100) parser.add_argument('--itr', '-i', type=int, default=-1) parser.add_argument('--seed', '-s', type=int, default=None) parser.add_argument('--deterministic', '-d', action='store_true') args = parser.parse_args() env = gym.make(args.exp) outdir = '/tmp/openai_ros_experiments/' env = gym.wrappers.Monitor(env, outdir, force=True) get_action = load_policy(args.fpath, args.itr if args.itr >= 0 else 'last', args.deterministic) logger_kwargs = setup_logger_kwargs(args.exp + '_test', args.seed) run_policy(env, get_action, args.len, args.episodes, logger_kwargs=logger_kwargs)
task_name=args.task_name, seed=args.seed, visualize_reward=False, from_pixels=(args.encoder_type == 'pixel'), height=args.image_size, width=args.image_size, frame_skip=args.action_repeat) if args.encoder_type == 'pixel': env = DMCFrameStack(env, k=args.frame_stack) torch.manual_seed(args.seed) np.random.seed(args.seed) env.seed(args.seed) state_dim = env.observation_space.shape from utils.run_utils import setup_logger_kwargs logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed) expert_data_file = os.path.join(logger_kwargs["output_dir"], "experts") with open( os.path.join( expert_data_file, args.domain_name + "_" + args.task_name + "_epoch" + str(args.expert_num) + ".pkl"), "rb") as f: expert_data = pickle.load(f) out_kwargs = setup_logger_kwargs(args.out_dir, args.seed) logger = EpochLogger(**out_kwargs) writer = SummaryWriter(os.path.join(logger.output_dir, "logs")) with open(os.path.join(logger.output_dir, 'args.json'), 'w') as f: json.dump(vars(args), f, sort_keys=True, indent=4) if not os.path.exists(os.path.join(logger.output_dir, "checkpoints")): os.makedirs(os.path.join(logger.output_dir, "checkpoints"))