def get_args(do_just_test=False): #this parameter is just used for the name parser = get_arg_parser() parser.add_argument('--tag', help='terminal tag in logger', type=str, default='') parser.add_argument('--alg', help='backend algorithm', type=str, default='ddpg', choices=['ddpg', 'ddpg2']) parser.add_argument('--learn', help='type of training method', type=str, default='hgg', choices=learner_collection.keys()) parser.add_argument('--env', help='gym env id', type=str, default='FetchReach-v1', choices=Robotics_envs_id) parser.add_argument('--extra_sec', help='whether to use extra distance around obstacle', type=str2bool, default=False) args, _ = parser.parse_known_args() if args.env == 'HandReach-v0': parser.add_argument('--goal', help='method of goal generation', type=str, default='reach', choices=['vanilla', 'reach']) else: parser.add_argument( '--goal', help='method of goal generation', type=str, default='interval', choices=[ 'vanilla', 'fixobj', 'interval', 'custom', 'intervalTest', 'intervalExt', 'intervalColl', 'intervalRewSub', 'intervalRewVec', 'intervalTestExtendedBbox', 'intervalCollStop', 'intervalRewMod', 'intervalCollStopRegion', 'intervalRewModStop', 'intervalRewModRegion', 'intervalRewModRegionStop', 'intervalCollMinDist', 'intervalMinDistRewMod', 'intervalMinDistRewModStop', 'intervalTestExtendedMinDist', 'intervalCollPAV', 'intervalP', 'intervalPRewMod', 'intervalPRewModStop', 'intervalTestExtendedP', 'intervalPAV', 'intervalPAVRewMod', 'intervalPAVRewModStop', 'intervalTestExtendedPAV', 'intervalPRel', 'intervalPRelRewMod', 'intervalPRelRewModStop', 'intervalTestExtendedPRel', 'intervalPAVRel', 'intervalPAVRelRewMod', 'intervalPAVRelRewModStop', 'intervalTestExtendedPAVRel' ]) if args.env[:5] == 'Fetch': parser.add_argument('--init_offset', help='initial offset in fetch environments', type=np.float32, default=1.0) elif args.env[:4] == 'Hand': parser.add_argument('--init_rotation', help='initial rotation in hand environments', type=np.float32, default=0.25) args, _ = parser.parse_known_args() if 'RewMod' in args.goal: parser.add_argument('--rew_mod_val', help='value to subtract on collision', type=np.float32, default=-5.) if args.extra_sec: parser.add_argument('--sec_dist', help='security distance around obstacle', type=np.float32, default=None) parser.add_argument('--graph', help='g-hgg yes or no', type=str2bool, default=False) parser.add_argument('--show_goals', help='number of goals to show', type=np.int32, default=0) parser.add_argument('--play_path', help='path to meta_file directory for play', type=str, default=None) parser.add_argument( '--play_path_im_h', help= 'path to meta_file directory for play; this one is just used for the heatmap that will be compared', type=str, default=None) parser.add_argument('--play_epoch', help='epoch to play', type=str, default='latest') parser.add_argument( '--stop_hgg_threshold', help= 'threshold of goals inside goalspace, between 0 and 1, deactivated by default value 2!', type=np.float32, default=2) parser.add_argument('--agent_device', help='the device to load the agent', type=str, default='cpu') parser.add_argument('--n_x', help='number of vertices in x-direction for g-hgg', type=int, default=31) parser.add_argument('--n_y', help='number of vertices in y-direction for g-hgg', type=int, default=31) parser.add_argument('--n_z', help='number of vertices in z-direction for g-hgg', type=int, default=11) parser.add_argument('--gamma', help='discount factor', type=np.float32, default=0.98) parser.add_argument('--clip_return', help='whether to clip return value', type=str2bool, default=True) # these two arguments might be helpful if using other than sparse reward (-1, 0) parser.add_argument('--reward_min', help='discount factor', type=np.float32, default=-1.) parser.add_argument('--reward_max', help='discount factor', type=np.float32, default=0.) parser.add_argument('--eps_act', help='percentage of epsilon greedy explorarion', type=np.float32, default=0.3) parser.add_argument( '--std_act', help='standard deviation of uncorrelated gaussian explorarion', type=np.float32, default=0.2) parser.add_argument('--pi_lr', help='learning rate of policy network', type=np.float32, default=1e-3) parser.add_argument('--q_lr', help='learning rate of value network', type=np.float32, default=1e-3) parser.add_argument('--act_l2', help='quadratic penalty on actions', type=np.float32, default=1.0) parser.add_argument( '--polyak', help='interpolation factor in polyak averaging for DDPG', type=np.float32, default=0.95) parser.add_argument('--epoches', help='number of epoches', type=np.int32, default=20) parser.add_argument('--cycles', help='number of cycles per epoch', type=np.int32, default=20) parser.add_argument('--episodes', help='number of episodes per cycle', type=np.int32, default=50) parser.add_argument('--timesteps', help='number of timesteps per episode', type=np.int32, default=(50 if args.env[:5] == 'Fetch' else 100)) parser.add_argument('--train_batches', help='number of batches to train per episode', type=np.int32, default=20) parser.add_argument('--buffer_size', help='number of episodes in replay buffer', type=np.int32, default=10000) parser.add_argument( '--buffer_type', help= 'type of replay buffer / whether to use Energy-Based Prioritization', type=str, default='energy', choices=['normal', 'energy']) parser.add_argument('--batch_size', help='size of sample batch', type=np.int32, default=256) parser.add_argument('--warmup', help='number of timesteps for buffer warmup', type=np.int32, default=10000) parser.add_argument('--her', help='type of hindsight experience replay', type=str, default='future', choices=['none', 'final', 'future']) parser.add_argument('--her_ratio', help='ratio of hindsight experience replay', type=np.float32, default=0.8) parser.add_argument('--pool_rule', help='rule of collecting achieved states', type=str, default='full', choices=['full', 'final']) parser.add_argument('--hgg_c', help='weight of initial distribution in flow learner', type=np.float32, default=3.0) parser.add_argument('--hgg_L', help='Lipschitz constant', type=np.float32, default=5.0) parser.add_argument('--hgg_pool_size', help='size of achieved trajectories pool', type=np.int32, default=1000) parser.add_argument('--save_acc', help='save successful rate', type=str2bool, default=True) #arguments for VAEs and images parser.add_argument('--vae_dist_help', help='using vaes yes or no', type=str2bool, default=False) parser.add_argument('--img_size', help='size image in pixels', type=np.int32, default=84) parser.add_argument('--img_vid_size', help='size image in pixels', type=np.int32, default=500) #type of VAE parser.add_argument( '--vae_type', help='', type=str, default=None, choices=['sb', 'mixed', 'monet', 'space', 'bbox', 'faster_rcnn']) #type VAE for size parser.add_argument( '--vae_size_type', help='', type=str, default='all', choices=['normal', 'sb', 'mixed', 'monet']) #if mixed or monet then representation is shared #parameters for VAE parser.add_argument('--latent_size_obstacle', help='size latent space obstacle', type=np.int32, default=None) parser.add_argument('--latent_size_goal', help='size latent space goal', type=np.int32, default=None) parser.add_argument('--obstacle_ind_1', help='index 1 component latent vector', type=np.int32, default=None) parser.add_argument('--obstacle_ind_2', help='index 2 component latent vector', type=np.int32, default=None) parser.add_argument('--goal_ind_1', help='index 1 component latent vector', type=np.int32, default=None) parser.add_argument('--goal_ind_2', help='index 2 component latent vector', type=np.int32, default=None) parser.add_argument('--goal_slot', help='', type=np.int32, default=None) parser.add_argument('--obstacle_slot', help='', type=np.int32, default=None) #parameter for size VAE parser.add_argument('--size_ind', help='index 2 component latent vector', type=np.int32, default=None) parser.add_argument('--size_ind_2', help='index 2 component latent vector', type=np.int32, default=None) parser.add_argument( '--dist_estimator_type', help='the type if dist estimator to use or None if not using', type=str, default=None, choices=[ 'noneType', 'noneTypeReal', 'normal', 'realCoords', 'multiple', 'multipleReal', 'subst', 'substReal' ]) #for imaginary obstacle interactions parser.add_argument('--imaginary_obstacle_transitions', help='expand obstacle transition', type=str2bool, default=False) args, _ = parser.parse_known_args() if args.imaginary_obstacle_transitions: parser.add_argument( '--im_train_freq', help='how often the imaginary transitions are used', type=np.int, default=5) parser.add_argument('--im_buffer_size', help='size of the imaginary buffer', type=np.int, default=400) parser.add_argument( '--im_warmup', help='minimum amount of transitions to start sampling', type=np.int, default=120) parser.add_argument( '--im_n_per_type', help='amount fake interactions per type of interaction', type=np.int, default=5) args = parser.parse_args() args.num_vertices = [args.n_x, args.n_y, args.n_z] args.goal_based = (args.env in Robotics_envs_id) args.clip_return_l, args.clip_return_r = clip_return_range(args) if args.extra_sec and args.sec_dist is None: if args.vae_dist_help: #distance for latent space args.sec_dist = 0.02 else: args.sec_dist = 0.009 if args.imaginary_obstacle_transitions: args.im_train_counter = 0 args.im_norm_freq = copy.copy(args.im_train_freq) args.im_norm_counter = 0 if do_just_test: args.epoches = 1 args.cycles = 5 base_name = args.alg + '-' + args.env + '-' + args.goal + '-' + args.learn if do_just_test: if args.play_path is not None: remaining, last = os.path.split(args.play_path) if args.env in last: logger_name = 'TEST-' + last else: remaining, last = os.path.split(remaining) if args.env in last: logger_name = 'TEST-' + last else: logger_name = 'TEST-' + base_name if 'secdist' in args.play_path and not args.extra_sec: raise Exception( 'using agent trained with security distance, but test not using it. Add the same distance' ) else: logger_name = 'TEST-' + base_name if str.startswith(remaining, 'log/'): rest_path = remaining[4:] if len(rest_path) > 0: logger_name = rest_path + '/' + logger_name else: logger_name = base_name if args.tag != '': logger_name = args.tag + '-' + logger_name if args.graph: logger_name = logger_name + '-graph' if args.stop_hgg_threshold < 1: logger_name = logger_name + '-stop' if args.dist_estimator_type is not None: logger_name = logger_name + '-' + args.dist_estimator_type if args.vae_type is not None: logger_name = logger_name + '-' + args.vae_type if 'RewMod' in args.goal: logger_name = logger_name + '-rewmodVal(' + str( args.rew_mod_val) + ')' if args.extra_sec: logger_name = logger_name + '-secdist({})'.format(args.sec_dist) if args.imaginary_obstacle_transitions: logger_name = logger_name + '-IMAGINARY' args.logger = get_logger(logger_name) for key, value in args.__dict__.items(): if key != 'logger': args.logger.info('{}: {}'.format(key, value)) cuda = torch.cuda.is_available() torch.manual_seed(1) device = torch.device("cuda" if cuda else "cpu") args.device = device #extensions from intervale_ext if args.goal in ['intervalRewVec']: args.reward_dims = 2 else: args.reward_dims = 1 args.colls_test_check_envs = [ 'intervalTestExtendedBbox', 'intervalTestExtendedMinDist', 'intervalTestExtendedP', 'intervalTestExtendedPAV', 'intervalTestExtendedPRel', 'intervalTestExtendedPAVRel', 'intervalTest' ] return args
def get_args(): parser = get_arg_parser() # basic arguments parser.add_argument('--tag', help='terminal tag in logger', type=str, default='') parser.add_argument('--gpu', help='which gpu to use', type=int, default=0) parser.add_argument('--env', help='gym env id', type=str, default='Pong') parser.add_argument('--alg', help='backend algorithm', type=str, default='dqn', choices=algorithm_collection.keys()) parser.add_argument('--learn', help='type of training method', type=str, default='normal', choices=learner_collection.keys()) args, _ = parser.parse_known_args() # env arguments parser.add_argument('--gamma', help='discount factor', type=np.float32, default=0.99) def atari_args(): parser.set_defaults(learn='atari') parser.add_argument('--sticky', help='whether to use sticky actions', type=str2bool, default=False) parser.add_argument('--xian', help='whether to use xian group', type=str2bool, default=False) parser.add_argument( '--noop', help='number of noop actions while starting new episode', type=np.int32, default=30) parser.add_argument('--frames', help='number of stacked frames', type=np.int32, default=4) parser.add_argument('--rews_scale', help='scale of rewards', type=np.float32, default=1.0) parser.add_argument('--test_eps', help='random action noise in atari testing', type=np.float32, default=0.001) env_args_collection = {'atari': atari_args} env_args_collection[envs_collection[args.env]]() # training arguments parser.add_argument('--epoches', help='number of epoches', type=np.int32, default=20) parser.add_argument('--cycles', help='number of cycles per epoch', type=np.int32, default=20) parser.add_argument('--iterations', help='number of iterations per cycle', type=np.int32, default=100) parser.add_argument('--timesteps', help='number of timesteps per iteration', type=np.int32, default=500) # testing arguments parser.add_argument('--test_rollouts', help='number of rollouts to test per cycle', type=np.int32, default=5) parser.add_argument('--test_timesteps', help='number of timesteps per rollout', type=np.int32, default=27000) parser.add_argument('--save_rews', help='save cumulative rewards', type=str2bool, default=False) parser.add_argument('--save_Q', help='save Q estimation', type=str2bool, default=False) # buffer arguments parser.add_argument('--buffer', help='type of replay buffer', type=str, default='default', choices=buffer_collection) parser.add_argument('--buffer_size', help='number of transitions in replay buffer', type=np.int32, default=1000000) parser.add_argument('--batch_size', help='size of sample batch', type=np.int32, default=32) parser.add_argument('--warmup', help='number of timesteps for buffer warmup', type=np.int32, default=2000) #### modifyed !!!!!!!!!!!!!! # algorithm arguments def q_learning_args(): parser.add_argument('--train_batches', help='number of batches to train per iteration', type=np.int32, default=25) parser.add_argument('--train_target', help='frequency of target network updating', type=np.int32, default=8000) parser.add_argument( '--eps_l', help='beginning percentage of epsilon greedy explorarion', type=np.float32, default=1.00) parser.add_argument( '--eps_r', help='final percentage of epsilon greedy explorarion', type=np.float32, default=0.01) parser.add_argument('--eps_decay', help='number of steps to decay epsilon', type=np.int32, default=250000) parser.add_argument('--optimizer', help='optimizer to use', type=str, default='adam', choices=['adam', 'rmsprop']) args, _ = parser.parse_known_args() if args.optimizer == 'adam': parser.add_argument('--q_lr', help='learning rate of value network', type=np.float32, default=0.625e-4) parser.add_argument('--Adam_eps', help='epsilon factor of Adam optimizer', type=np.float32, default=1.5e-4) elif args.optimizer == 'rmsprop': parser.add_argument('--q_lr', help='learning rate of value network', type=np.float32, default=2.5e-4) parser.add_argument('--RMSProp_decay', help='decay factor of RMSProp optimizer', type=np.float32, default=0.95) parser.add_argument('--RMSProp_eps', help='epsilon factor of RMSProp optimizer', type=np.float32, default=1e-2) parser.add_argument('--nstep', help='parameter for n-step bootstrapping', type=np.int32, default=1) def dqn_args(): # q_learning_args() ddq_args() parser.add_argument('--double', help='whether to use double trick', type=str2bool, default=False) # parser.add_argument('--dueling', help='whether to use dueling trick', type=str2bool, default=False) def cddqn_args(): q_learning_args() parser.add_argument('--dueling', help='whether to use dueling trick', type=str2bool, default=False) def mmdqn_args(): q_learning_args() parser.add_argument('--dueling', help='whether to use dueling trick', type=str2bool, default=False) def lrdqn_args(): q_learning_args() parser.add_argument('--double', help='whether to use double trick', type=str2bool, default=False) parser.add_argument('--rank', help='rank of value matrix', type=np.int32, default=3) parser.add_argument('--beta', help='weight of sparsity loss', type=np.float32, default=1.0) def ddq_args(): q_learning_args() parser.add_argument( '--inner_q_type', help= 'whether to use td3 trick/double trick TD3:min double-Q:double none:mean', type=str, default='min') # parser.add_argument('--td4', help='whether to use td3 trick ', type=str2bool, default=False) parser.add_argument('--alpha', help='leaky relu parameter', type=np.float, default=1.) parser.add_argument('--tau', help='parameter for smooth target update', type=np.float, default=1.) parser.add_argument('--num_q', help='number of q to use', type=np.int32, default=4) parser.add_argument('--beta', help='if >0 use lambda return, else use max', type=np.float32, default=-1.) parser.add_argument('--state_dim', help='for representation, no use now', type=np.int, default=32) parser.add_argument('--dueling', help='whether to use dueling trick', type=str2bool, default=False) parser.add_argument('--max_step', help='max step to truncate', type=int, default=-1) algorithm_args_collection = { 'dqn': dqn_args, 'cddqn': cddqn_args, 'mmdqn': mmdqn_args, 'avedqn': dqn_args, 'lrdqn': lrdqn_args, 'ddq': ddq_args, 'ddq6': ddq_args, 'amc': ddq_args, } algorithm_args_collection[args.alg]() # learner arguments def lb_args(): parser.add_argument('--lb_type', help='type of lower-bound objective', type=str, default='hard', choices=['hard', 'soft']) def hash_args(): lb_args() parser.add_argument('--avg_n', help='number of trajectories for moving average', type=np.int32, default=5) learner_args_collection = { 'atari_lb': lb_args, 'atari_hash_lb': hash_args, 'atari_vi_lb': hash_args } if args.learn in learner_args_collection.keys(): learner_args_collection[args.learn]() args = parser.parse_args() get_policy_train_type(args) logger_name = args.alg + '-' + args.env + '-' + args.learn if args.tag != '': logger_name = args.tag + '-' + logger_name args.logger = get_logger(logger_name) for key, value in args.__dict__.items(): if key != 'logger': args.logger.info('{}: {}'.format(key, value)) return args
def get_args(): parser = get_arg_parser() parser.add_argument('--tag', help='terminal tag in logger', type=str, default='') parser.add_argument('--alg', help='backend algorithm', type=str, default='ddpg', choices=['ddpg', 'ddpg2']) parser.add_argument('--learn', help='type of training method', type=str, default='hgg', choices=learner_collection.keys()) parser.add_argument('--env', help='gym env id', type=str, default='FetchReach-v1', choices=Robotics_envs_id + Kuka_envs_id) args, _ = parser.parse_known_args() if args.env == 'HandReach-v0': parser.add_argument('--goal', help='method of goal generation', type=str, default='reach', choices=['vanilla', 'reach']) else: parser.add_argument('--goal', help='method of goal generation', type=str, default='interval', choices=['vanilla', 'fixobj', 'interval', 'custom']) if args.env[:5] == 'fetch': parser.add_argument('--init_offset', help='initial offset in fetch environments', type=np.float32, default=1.0) elif args.env[:4] == 'Hand': parser.add_argument('--init_rotation', help='initial rotation in hand environments', type=np.float32, default=0.25) parser.add_argument('--graph', help='g-hgg yes or no', type=str2bool, default=False) parser.add_argument('--route', help='use route to help hgg find target or not', type=str2bool, default=False) # route only for testing parser.add_argument('--show_goals', help='number of goals to show', type=np.int32, default=0) parser.add_argument('--play_path', help='path to meta_file directory for play', type=str, default=None) parser.add_argument('--play_epoch', help='epoch to play', type=str, default='latest') parser.add_argument('--stop_hgg_threshold', help='threshold of goals inside goalspace, between 0 and 1, deactivated by default value 2!', type=np.float32, default=2) parser.add_argument('--n_x', help='number of vertices in x-direction for g-hgg', type=int, default=31) parser.add_argument('--n_y', help='number of vertices in y-direction for g-hgg', type=int, default=31) parser.add_argument('--n_z', help='number of vertices in z-direction for g-hgg', type=int, default=11) parser.add_argument('--gamma', help='discount factor', type=np.float32, default=0.98) parser.add_argument('--clip_return', help='whether to clip return value', type=str2bool, default=True) parser.add_argument('--eps_act', help='percentage of epsilon greedy explorarion', type=np.float32, default=0.3) parser.add_argument('--std_act', help='standard deviation of uncorrelated gaussian explorarion', type=np.float32, default=0.2) parser.add_argument('--pi_lr', help='learning rate of policy network', type=np.float32, default=1e-3) parser.add_argument('--q_lr', help='learning rate of value network', type=np.float32, default=1e-3) parser.add_argument('--act_l2', help='quadratic penalty on actions', type=np.float32, default=1.0) parser.add_argument('--polyak', help='interpolation factor in polyak averaging for DDPG', type=np.float32, default=0.95) parser.add_argument('--epoches', help='number of epoches', type=np.int32, default=20) parser.add_argument('--cycles', help='number of cycles per epoch', type=np.int32, default=20) parser.add_argument('--episodes', help='number of episodes per cycle', type=np.int32, default=50) parser.add_argument('--timesteps', help='number of timesteps per episode', type=np.int32, default=(50 if args.env[:5] == 'fetch' else 100)) parser.add_argument('--train_batches', help='number of batches to train per episode', type=np.int32, default=20) parser.add_argument('--curriculum', type=str2bool, default=False) parser.add_argument('--buffer_size', help='number of episodes in replay buffer', type=np.int32, default=10000) parser.add_argument('--buffer_type', help='type of replay buffer / whether to use Energy-Based Prioritization', type=str, default='energy', choices=['normal', 'energy']) parser.add_argument('--rhg', help='record hindsight goals in different learning stage or not', type=str2bool, default=False) parser.add_argument('--batch_size', help='size of sample batch', type=np.int32, default=256) parser.add_argument('--warmup', help='number of timesteps for buffer warmup', type=np.int32, default=10000) parser.add_argument('--her', help='type of hindsight experience replay', type=str, default='future', choices=['none', 'final', 'future']) parser.add_argument('--her_ratio', help='ratio of hindsight experience replay', type=np.float32, default=0.8) parser.add_argument('--pool_rule', help='rule of collecting achieved states', type=str, default='full', choices=['full', 'final']) parser.add_argument('--hgg_c', help='weight of initial distribution in flow learner', type=np.float32, default=3.0) parser.add_argument('--hgg_L', help='Lipschitz constant', type=np.float32, default=5.0) parser.add_argument('--hgg_pool_size', help='size of achieved trajectories pool', type=np.int32, default=1000) parser.add_argument('--balance_sigma', help='balance parameters', type=np.float32, default=0.3) parser.add_argument('--balance_eta', help='balance parameters', type=np.float32, default=1000) parser.add_argument('--record', help='record videos', type=bool, default=False) parser.add_argument('--save_acc', help='save successful rate', type=str2bool, default=True) args = parser.parse_args() args.num_vertices = [args.n_x, args.n_y, args.n_z] args.goal_based = (args.env in (Robotics_envs_id + Kuka_envs_id)) args.clip_return_l, args.clip_return_r = clip_return_range(args) logger_name = args.alg + '-' + args.env + '-' + args.learn if args.tag != '': logger_name = args.tag + '-' + logger_name if args.graph: logger_name = logger_name + '-graph' if args.stop_hgg_threshold < 1: logger_name = logger_name + '-stop' if args.curriculum: logger_name = logger_name + '-curriculum' args.logger = get_logger(logger_name) for key, value in args.__dict__.items(): if key != 'logger': args.logger.info('{}: {}'.format(key, value)) return args
def get_args(): parser = get_arg_parser() parser.add_argument('--tag', help='terminal tag in logger', type=str, default='') parser.add_argument('--alg', help='backend algorithm', type=str, default='ddpg', choices=['ddpg', 'ddpg2']) parser.add_argument('--learn', help='type of training method', type=str, default='hgg', choices=learner_collection.keys()) parser.add_argument('--env', help='gym env id', type=str, default='FetchReach-v1', choices=Robotics_envs_id) args, _ = parser.parse_known_args() if args.env == 'HandReach-v0': parser.add_argument('--goal', help='method of goal generation', type=str, default='reach', choices=['vanilla', 'reach']) else: parser.add_argument( '--goal', help='method of goal generation', type=str, default='interval', choices=['vanilla', 'fixobj', 'interval', 'obstacle']) if args.env[:5] == 'Fetch': parser.add_argument('--init_offset', help='initial offset in fetch environments', type=np.float32, default=1.0) elif args.env[:4] == 'Hand': parser.add_argument('--init_rotation', help='initial rotation in hand environments', type=np.float32, default=0.25) parser.add_argument('--gamma', help='discount factor', type=np.float32, default=0.98) parser.add_argument('--clip_return', help='whether to clip return value', type=str2bool, default=True) parser.add_argument('--eps_act', help='percentage of epsilon greedy explorarion', type=np.float32, default=0.3) parser.add_argument( '--std_act', help='standard deviation of uncorrelated gaussian explorarion', type=np.float32, default=0.2) parser.add_argument('--pi_lr', help='learning rate of policy network', type=np.float32, default=1e-3) parser.add_argument('--q_lr', help='learning rate of value network', type=np.float32, default=1e-3) parser.add_argument('--act_l2', help='quadratic penalty on actions', type=np.float32, default=1.0) parser.add_argument( '--polyak', help='interpolation factor in polyak averaging for DDPG', type=np.float32, default=0.95) parser.add_argument('--epochs', help='number of epochs', type=np.int32, default=20) parser.add_argument('--cycles', help='number of cycles per epoch', type=np.int32, default=20) parser.add_argument('--episodes', help='number of episodes per cycle', type=np.int32, default=50) parser.add_argument('--timesteps', help='number of timesteps per episode', type=np.int32, default=(50 if args.env[:5] == 'Fetch' else 100)) parser.add_argument('--train_batches', help='number of batches to train per episode', type=np.int32, default=20) parser.add_argument('--buffer_size', help='number of episodes in replay buffer', type=np.int32, default=10000) parser.add_argument( '--buffer_type', help= 'type of replay buffer / whether to use Energy-Based Prioritization', type=str, default='energy', choices=['normal', 'energy']) parser.add_argument('--batch_size', help='size of sample batch', type=np.int32, default=256) parser.add_argument('--warmup', help='number of timesteps for buffer warmup', type=np.int32, default=10000) parser.add_argument('--her', help='type of hindsight experience replay', type=str, default='future', choices=['none', 'final', 'future']) parser.add_argument('--her_ratio', help='ratio of hindsight experience replay', type=np.float32, default=0.8) parser.add_argument('--pool_rule', help='rule of collecting achieved states', type=str, default='full', choices=['full', 'final']) parser.add_argument('--hgg_c', help='weight of initial distribution in flow learner', type=np.float32, default=3.0) parser.add_argument('--hgg_L', help='Lipschitz constant', type=np.float32, default=5.0) parser.add_argument('--hgg_pool_size', help='size of achieved trajectories pool', type=np.int32, default=1000) parser.add_argument('--save_acc', help='save successful rate', type=str2bool, default=True) args = parser.parse_args() args.goal_based = (args.env in Robotics_envs_id) args.clip_return_l, args.clip_return_r = clip_return_range(args) logger_name = args.alg + '-' + args.env + '-' + args.learn if args.tag != '': logger_name = args.tag + '-' + logger_name args.logger = get_logger(logger_name) for key, value in args.__dict__.items(): if key != 'logger': args.logger.info('{}: {}'.format(key, value)) return args
def get_config(): parser = get_arg_parser() parser.add_argument('--seed', help='random seed', type=int, default=0) ######## hardware configure parser.add_argument('--gpu', help='which gpu exp assigns on', type=str, default="0") ######## here for extension modules parser.add_argument('--sr', help='if use Sibling Rivalry or not', type=str2bool, default=False) parser.add_argument('--goalgan', help='if use goalgan or not', type=str2bool, default=False) parser.add_argument('--fgi', help='relabel the goal with foresight goal inference', type=str2bool, default=False) parser.add_argument('--foresight_length', help='foresight length', type=int, default=10) parser.add_argument('--goal_generator', help='if use goal generator', type=str2bool, default=False) parser.add_argument('--model_based_training', help='if MB training', type=str2bool, default=False) parser.add_argument('--training_freq', help='training times', type=int, default=10) parser.add_argument('--extend_length', help='extend length', type=int, default=3) parser.add_argument('--her_before_fgi', help='her before fgi', type=bool, default=True) parser.add_argument('--test_last_step', help='judge success whether use the last step', type=bool, default=False) ######## here for env model configs parser.add_argument('--fake', help='use env model', type=str2bool, default=False) parser.add_argument('--env_num_networks', help='num networks', type=int, default=6) parser.add_argument('--env_elites', help='elites', type=int, default=3) parser.add_argument('--env_hidden', help='env hidden', type=int, default=200) parser.add_argument('--distance_threshold', help='distance_threshold', type=float, default=0.05) ######## parser.add_argument('--tag', help='terminal tag in logger', type=str, default='') parser.add_argument('--alg', help='backend algorithm', type=str, default='ddpg', choices=['ddpg', 'ddpg2']) parser.add_argument('--learn', help='type of training method', type=str, default='hgg', choices=learner_collection.keys()) parser.add_argument( '--env', help='gym env id', type=str, default='FetchReach-v1') # here removed choice in envs. args, _ = parser.parse_known_args() if args.env == 'HandReach-v0': parser.add_argument('--goal', help='method of goal generation', type=str, default='reach', choices=['vanilla', 'reach']) else: parser.add_argument('--goal', help='method of goal generation', type=str, default='interval', choices=['vanilla', 'fixobj', 'interval']) if args.env[:5] == 'Fetch': parser.add_argument('--init_offset', help='initial offset in fetch environments', type=np.float32, default=1.0) elif args.env[:4] == 'Hand': parser.add_argument('--init_rotation', help='initial rotation in hand environments', type=np.float32, default=0.25) parser.add_argument('--gamma', help='discount factor', type=np.float32, default=0.98) parser.add_argument('--clip_return', help='whether to clip return value', type=str2bool, default=True) parser.add_argument('--eps_act', help='percentage of epsilon greedy explorarion', type=np.float32, default=0.3) parser.add_argument( '--std_act', help='standard deviation of uncorrelated gaussian explorarion', type=np.float32, default=0.2) parser.add_argument('--pi_lr', help='learning rate of policy network', type=np.float32, default=1e-3) parser.add_argument('--q_lr', help='learning rate of value network', type=np.float32, default=1e-3) parser.add_argument('--act_l2', help='quadratic penalty on actions', type=np.float32, default=1.0) parser.add_argument( '--polyak', help='interpolation factor in polyak averaging for DDPG', type=np.float32, default=0.95) parser.add_argument('--epoches', help='number of epoches', type=np.int32, default=20) parser.add_argument('--cycles', help='number of cycles per epoch', type=np.int32, default=15) parser.add_argument('--episodes', help='number of episodes per cycle', type=np.int32, default=50) parser.add_argument('--timesteps', help='number of timesteps per episode', type=np.int32, default=(50 if args.env[:5] == 'Fetch' else 100)) parser.add_argument('--train_batches', help='number of batches to train per episode', type=np.int32, default=20) parser.add_argument('--buffer_size', help='number of episodes in replay buffer', type=np.int32, default=10000) parser.add_argument( '--buffer_type', help= 'type of replay buffer / whether to use Energy-Based Prioritization', type=str, default='normal', choices=['normal', 'energy']) parser.add_argument('--batch_size', help='size of sample batch', type=np.int32, default=256) parser.add_argument('--warmup', help='number of timesteps for buffer warmup', type=np.int32, default=10000) parser.add_argument('--her', help='type of hindsight experience replay', type=str, default='future', choices=['none', 'final', 'future']) parser.add_argument('--her_ratio', help='ratio of hindsight experience replay', type=np.float32, default=0.8) parser.add_argument('--pool_rule', help='rule of collecting achieved states', type=str, default='full', choices=['full', 'final']) parser.add_argument('--hgg_c', help='weight of initial distribution in flow learner', type=np.float32, default=3.0) parser.add_argument('--hgg_L', help='Lipschitz constant', type=np.float32, default=5.0) parser.add_argument('--hgg_pool_size', help='size of achieved trajectories pool', type=np.int32, default=1000) parser.add_argument('--save_acc', help='save successful rate', type=str2bool, default=True) args = parser.parse_args() # gpu visible setting os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu args.goal_based = True args.clip_return_l, args.clip_return_r = clip_return_range(args) logger_name = args.alg + '-' + args.env + '-' + args.learn if args.tag != '': logger_name = args.tag + '-' + logger_name args.logger = get_logger(logger_name) for key, value in args.__dict__.items(): if key != 'logger': args.logger.info('{}: {}'.format(key, value)) # predefine the corresponding dims in different envs. env_alt = { 'Mountaincar-v0': { 'start_in_obs': 0, 'end_in_obs': 1, 'desire_dim': 1, 'step_fake_param': 3, 'env_model_obs_dim': 2, 'env_model_act_dim': 1 }, 'FetchPush-v1': { 'start_in_obs': 3, 'end_in_obs': 6, 'desire_dim': 3, 'step_fake_param': 29, 'env_model_obs_dim': 25, 'env_model_act_dim': 4 }, 'World-v0': { 'start_in_obs': 0, 'end_in_obs': 2, 'desire_dim': 2, 'step_fake_param': 4, 'env_model_obs_dim': 2, 'env_model_act_dim': 2 }, 'FetchReach-v1': { 'start_in_obs': 0, 'end_in_obs': 3, 'desire_dim': 3, 'step_fake_param': 14, 'env_model_obs_dim': 10, 'env_model_act_dim': 4 }, 'Pendulum-v0': { 'start_in_obs': 1, 'end_in_obs': 3, 'desire_dim': 2, 'step_fake_param': 4, 'env_model_obs_dim': 3, 'env_model_act_dim': 1 }, 'AntLocomotion-v0': { 'start_in_obs': 0, 'end_in_obs': 2, 'desire_dim': 2, 'step_fake_param': 37, 'env_model_obs_dim': 29, 'env_model_act_dim': 8 }, 'AntLocomotionDiverse-v0': { 'start_in_obs': 0, 'end_in_obs': 2, 'desire_dim': 2, 'step_fake_param': 37, 'env_model_obs_dim': 29, 'env_model_act_dim': 8 }, 'HalfCheetahGoal-v0': { 'start_in_obs': 8, 'end_in_obs': 9, 'desire_dim': 1, 'step_fake_param': 23, 'env_model_obs_dim': 17, 'env_model_act_dim': 6 }, 'Reacher-v0': { 'start_in_obs': 9, 'end_in_obs': 11, 'desire_dim': 2, 'step_fake_param': 14, 'env_model_obs_dim': 11, 'env_model_act_dim': 3 } } args.env_params = env_alt[args.env] args.model_loss_log_name = args.tag + time.strftime('-(%Y-%m-%d-%H:%M:%S)') return args