def run_sac_experiment(main, mode, include_folders=None, log_dir=None, exp_prefix="experiment", exp_name=None, **kwargs): if exp_name is None: exp_name = timestamp() if log_dir is None: log_dir = os.path.join(DEFAULT_LOG_DIR, "local", exp_prefix.replace("_", "-"), exp_name) if include_folders is None: include_folders = list() if mode == 'ec2': include_folders.append('sac') all_symlinks = list() for folder in include_folders: all_symlinks.append(_create_symlink(folder)) kwargs.update(added_project_directories=all_symlinks) run_experiment_lite(stub_method_call=main, mode=mode, exp_prefix=exp_prefix, exp_name=exp_name, log_dir=log_dir, **kwargs)
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--cost_type', type=float, default=10) parser.add_argument('--domain', type=str, choices=AVAILABLE_DOMAINS, default=None) parser.add_argument('--task', type=str, choices=AVAILABLE_TASKS, default='default') parser.add_argument('--policy', type=str, choices=('gaussian', 'gmm', 'lsp'), default='gaussian') parser.add_argument('--envn', type=int, default=10) parser.add_argument('--scale', type=float, default=1) parser.add_argument('--exp_name', type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') args = parser.parse_args() args.env = ENVS[args.envn] log_dir = '/home/wisrl/Downloads/log_sac/Mujoco/' + args.env log_dir += '_c%s'%args.cost_type if args.task == 'delayed': log_dir += '_delayed%s'%str(DELAY_CONST) if 'cross' in args.task: log_dir += '_cross' log_dir = log_dir + '/SAC' if not args.scale==1.0: log_dir += '_s%s'%str(args.scale) args.log_dir = log_dir return args
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--domain', type=str, choices=AVAILABLE_DOMAINS, default=None) parser.add_argument('--task', type=str, choices=AVAILABLE_TASKS, default='default') parser.add_argument('--policy', type=str, choices=('gaussian', 'gmm', 'lsp'), default='gaussian') parser.add_argument('--env', type=str, default=DEFAULT_ENV) parser.add_argument('--exp_name', type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') parser.add_argument('--log_dir', type=str, default=None) ## RC: Added to run time-complexity experiments. parser.add_argument('--n_train_repeat', type=int, default=1) parser.add_argument('--n_parallel', type=int, default=1) parser.add_argument('--n_epochs', type=int, default=1000) parser.add_argument('--gpu_fraction', type=float, default=1.0) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, choices=AVAILABLE_ENVS, default='swimmer') parser.add_argument('--exp_name',type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') parser.add_argument('--log_dir', type=str, default=None) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--domain', type=str, choices=AVAILABLE_DOMAINS, default=None) parser.add_argument('--task', type=str, choices=AVAILABLE_TASKS, default='default') parser.add_argument('--policy', type=str, choices=('gaussian', 'gmm', 'lsp'), default='gaussian') parser.add_argument('--env', type=str, default=DEFAULT_ENV) parser.add_argument('--exp_name', type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') parser.add_argument('--tau', type=float, default=0.005) parser.add_argument('--log_dir', type=str, default=None) parser.add_argument('--lr', type=float, default=-1.0) parser.add_argument('--l1regpi', type=float, default=0.0) #L1 reg policy parser.add_argument('--l2regpi', type=float, default=0.0) #L2 reg policy parser.add_argument( '--l1regvf', type=float, default=0.0 ) #L1 reg value (V only, the two Q networks are not regularized for simplicity) parser.add_argument('--l2regvf', type=float, default=0.0) #L2 reg value parser.add_argument('--wclippi', type=float, default=0.0) #Weight clip policy parser.add_argument('--wclipvf', type=float, default=0.0) #Weight clip value parser.add_argument('--dropoutpi', type=float, default=1.0) #Dropout policy keep prob parser.add_argument('--dropoutvf', type=float, default=1.0) #Dropout value keep prob parser.add_argument( '--ent_coef', type=float, default=0.0 ) #Entropy regularization coefficient, not the temperature of maximum entropy formulation (In this SAC implementation, this temperature term is fixed to be 1. parser.add_argument('--batchnormpi', type=bool, default=False) #Batchnorm policy (T/F) parser.add_argument('--batchnormvf', type=bool, default=False) #Batchnorm value (T/F) #parser.add_argument('--gaussianreg', type=float, default=1e-3) #This term appears in the original code release and regularizes the mu and logsigma of policy output. However, this regularization term is not applied when we use gaussian policy. Since all of our experiements adopt gaussian policy, this term does not affect our result in anyway. parser.add_argument('--reward_scale', type=float, default=-1.0) parser.add_argument('--num_hidden', type=int, default=256) parser.add_argument('--seed', type=int, default=1) parser.add_argument('--policypath', type=str, default='') #Policy network save path parser.add_argument('--valuepath', type=str, default='') #Value network save path args = parser.parse_args() return args
def main(): args = parse_args() variant = {'policy_type': args.policy_type} run_experiment_lite( run, exp_prefix='multigoal', exp_name=timestamp(), variant=variant, snapshot_mode='last', n_parallel=1, seed=1, mode='local', )
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, choices=AVAILABLE_ENVS, default='swimmer') parser.add_argument('--exp_name', type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') parser.add_argument('--log_dir', type=str, default='./logs/unity') parser.add_argument('--idx', type=int, default=0) parser.add_argument('--no_graphics', type=bool, default=False) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, choices=AVAILABLE_ENVS, default=DEFAULT_ENV) parser.add_argument('--exp_name', type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') parser.add_argument('--log_dir', type=str, default=None) parser.add_argument('--low_level_policy_path', '-p', type=str, default=None) args = parser.parse_args() return args
def launch_experiments(args): num_experiments = 1 print('Launching {} experiments.'.format(num_experiments)) for i in range(num_experiments): print("Experiment: {}/{}".format(i + 1, num_experiments)) run_sac_experiment( run_experiment, mode='local', n_parallel=1, terminate_machine=True, log_dir='/root/code/log/{0}/{1}'.format(args.domain, timestamp()), snapshot_mode='gap', snapshot_gap=100, sync_s3_pkl=True, )
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--domain', type=str, choices=AVAILABLE_DOMAINS, default='ant-cross-maze') parser.add_argument('--policy', type=str, choices=('gaussian', 'gaussian_ptr'), default='gaussian_ptr') parser.add_argument('--env', type=str, default=DEFAULT_ENV) parser.add_argument('--exp_name', type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') parser.add_argument('--log_dir', type=str, default=None) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--env', type=str, choices=AVAILABLE_ENVS, default='swimmer') parser.add_argument('--exp_name', type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') parser.add_argument('--log_dir', type=str, default=None) parser.add_argument('--seed', type=int, default=1) parser.add_argument('--num_skills', type=int, default=None) parser.add_argument('--eval_freq', type=int, default=None) parser.add_argument('--xdir', default=None) parser.add_argument('--xname', default=None) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--domain', type=str, default='Baxter') parser.add_argument('--task', type=str, default='default') parser.add_argument('--policy', type=str, choices=('gaussian', 'gmm', 'lsp'), default='gaussian') parser.add_argument('--env', type=str, default='BaxterReachNG') parser.add_argument('--exp_name', type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') parser.add_argument('--log_dir', type=str, default=None) args = parser.parse_args() return args
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--domain', type=str, choices=AVAILABLE_DOMAINS, default=None) parser.add_argument('--task', type=str, choices=AVAILABLE_TASKS, default='default') parser.add_argument('--policy', type=str, choices=('lsp', 'gmm'), default='lsp') parser.add_argument('--env', type=str, default=DEFAULT_ENV) parser.add_argument('--exp_name', type=str, default=timestamp()) parser.add_argument('--mode', type=str, default='local') parser.add_argument('--log_dir', type=str, default=None) args = parser.parse_args() return args
def launch_experiments(): args = arg() num_experiments = 1 print('Launching {} experiments.'.format(num_experiments)) for i in range(num_experiments): print("Experiment: {}/{}".format(i + 1, num_experiments)) experiment_prefix = args.domain + '/' + args.exp_name experiment_name = '{prefix}-{exp_name}-{i:02}'.format( prefix=args.domain, exp_name=args.exp_name, i=0) run_sac_experiment( run_experiment, mode='local', exp_prefix=experiment_prefix, exp_name=experiment_name, n_parallel=1, terminate_machine=True, log_dir='/root/code/log/{0}/{1}'.format(args.domain, timestamp()), snapshot_mode='gap', snapshot_gap=100, sync_s3_pkl=True, )
env=env, policy=policy, initial_exploration_policy=initial_exploration_policy, pool=pool, qf1=qf1, qf2=qf2, vf=vf, lr=3e-4, scale_reward=20, discount=0.99, tau=0.005, reparameterize=True, target_update_interval=1, action_prior='uniform', save_full_state=False, ) algorithm._sess.run(tf.global_variables_initializer()) algorithm.train() if __name__ == "__main__": run_sac_experiment( run_experiment, mode='local', log_dir='/root/code/log/prim/reach/{0}'.format(timestamp()), snapshot_mode='gap', snapshot_gap=100, )
env=env, policy=policy, initial_exploration_policy=initial_exploration_policy, pool=pool, qf1=qf1, qf2=qf2, vf=vf, lr=3e-4, scale_reward=20, discount=0.99, tau=0.005, reparameterize=True, target_update_interval=1, action_prior='uniform', save_full_state=False, ) algorithm._sess.run(tf.global_variables_initializer()) algorithm.train() if __name__ == "__main__": run_sac_experiment( run_experiment, mode='local', log_dir='/root/code/log/prim/pick/{0}'.format(timestamp()), snapshot_mode='gap', snapshot_gap=100, )
# plotter=plotter, lr=1e-3, discount=0.99, #tau=1e-4, target_update_interval=20, # reparameterize=False, save_full_state=False ) algo.train() if __name__ == "__main__": exp_prefix = 'DQN-VoltVar34-exp2' exp_name = timestamp() log_dir = os.path.join( DEFAULT_LOG_DIR, "local", exp_prefix.replace("_", "-"), exp_name) os.makedirs(log_dir,exist_ok=True) logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode('last') tabular_log_file = os.path.join(log_dir,'progress.csv') text_log_file = os.path.join(log_dir,'debug.log') logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) run_experiment() # run_sac_experiment(