trainer.train() if __name__ == '__main__': sweep_params = { 'seed': [1, 2, 3], 'env': [ HalfCheetahRandDirecEnv, AntRandDirecEnv, HopperRandParamsEnv, Walker2DRandDirecEnv, HumanoidRandDirec2DEnv, Walker2DRandParamsEnv ], 'rollouts_per_meta_task': [20], 'max_path_length': [100], 'parallel': [True], 'discount': [0.99], 'normalize_adv': [True], 'positive_adv': [False], 'hidden_sizes': [(64, 64)], 'learn_std': [True], 'hidden_nonlinearity': [tf.tanh], 'output_nonlinearity': [None], 'inner_lr': [0.1], 'learning_rate': [1e-3], 'n_itr': [1001], 'meta_batch_size': [40], 'num_inner_grad_steps': [1], 'scope': [None], } run_sweep(run_experiment, sweep_params, EXP_NAME, INSTANCE_TYPE)
logger.logkv("iter", i) logger.logkv("iter_time", timer.time() - start) logger.dumpkvs() if i == 0: sess.graph.finalize() if __name__ == '__main__': sweep_params = { 'alg': ['her'], 'seed': [399856203240], 'env': [ 'HandManipulateEgg-v0', 'HandManipulatePen-v0', 'HandManipulateBlock-v0' ], 'env': ['HandManipulateEgg-v0'], # Env Sampling 'fixed_num_of_contact': [7], # Problem Conf 'obs_type': ['contact', 'full_contact'], 'obs_type': ['full_contact'], 'process_type': ['max_pool', 'pointnet'], 'process_type': ['pointnet'], 'prediction': ['object'], 'feature_dim': [128], 'feature_layer': [0], } run_sweep(main, sweep_params, EXP_NAME, INSTANCE_TYPE)
], # Dummy; we don't time out episodes (they time out by themselves) "gae_lambda": [.99], "normalize_adv": [True], "positive_adv": [False], } DEFAULT = 'DEFAULT' parser = argparse.ArgumentParser() for key, value in sweep_params.items(): parser.add_argument(f'--{key}', default=DEFAULT) args = parser.parse_args() for k in vars(args): v = getattr(args, k) if not v == DEFAULT: arg_type = args_type(sweep_params[k][0]) sweep_params[k] = [arg_type(v)] # DEBUG HPARAMS if DEBUG: sweep_params['meta_batch_size'] = [2] sweep_params['success_threshold'] = [0] sweep_params['accuracy_threshold'] = [0] sweep_params['hidden_sizes'] = [(2, )] sweep_params['backprop_steps'] = [1] sweep_params['max_path_length'] = [3] sweep_params['parallel'] = [False] sweep_params["memory_dim"] = [3] # 2048 sweep_params["instr_dim"] = [4] # 256 run_sweep(run_experiment, sweep_params, sweep_params['prefix'][0], parser, INSTANCE_TYPE)
'use_cem': [False], 'num_cem_iters': [5], # Training 'dynamics_learning_rate': [5e-4, 0.001], 'valid_split_ratio': [0.1], 'initial_random_samples': [True], 'initial_sinusoid_samples': [False], # Dynamics Model 'recurrent': [False], 'num_models': [5], 'dynamics_hidden_nonlinearity': ['swish'], 'dynamics_output_nonlinearity': [None], 'dynamics_hidden_sizes': [(512, 512, 512)], 'dynamic_model_epochs': [50], # UNUSED 'dynamics_buffer_size': [25000], 'backprop_steps': [100], 'weight_normalization_model': [False], # FIXME: Doesn't work 'dynamics_batch_size': [64], 'cell_type': ['lstm'], # Other 'n_parallel': [1], 'exp_tag': ['parallel-mbmpc'] } assert config['n_candidates'][0] % config['num_models'][0] == 0 # FIXME: remove constraint run_sweep(run_experiment, config, EXP_NAME, INSTANCE_TYPE)
'normalize': [False], 'n_itr': [30], 'discount': [0.99], # Policy 'n_candidates': [128], 'horizon': [5], 'use_cem': [False], 'num_cem_iters': [4], # Training 'num_rollouts': [20], 'learning_rate': [0.001], 'valid_split_ratio': [0.1], 'rolling_average_persitency': [0.99], 'initial_random_samples': [True], # Dynamics Model 'recurrent': [False], 'num_models': [3], 'hidden_nonlinearity_model': ['relu'], 'hidden_sizes_model': [(512, 512)], 'dynamic_model_epochs': [200], 'backprop_steps': [100], 'weight_normalization_model': [False], # FIXME: Doesn't work 'batch_size_model': [256], 'cell_type': ['lstm'], } run_sweep(run_experiment, config, EXP_NAME)
'num_models': [10], 'dynamics_hidden_sizes': [(512, 512)], 'dyanmics_hidden_nonlinearity': ['relu'], 'dyanmics_output_nonlinearity': [None], 'dynamics_max_epochs': [50], 'dynamics_learning_rate': [1e-3], 'dynamics_batch_size': [128], 'dynamics_buffer_size': [5000], # Policy 'policy_hidden_sizes': [(64, 64)], 'policy_learn_std': [True], 'policy_hidden_nonlinearity': [tf.tanh], 'policy_output_nonlinearity': [None], # Meta-Algo 'meta_batch_size': [10], # Note: It has to be multiple of num_models 'rollouts_per_meta_task': [50], 'num_inner_grad_steps': [1], 'inner_lr': [0.001], 'inner_type': ['log_likelihood'], 'step_size': [0.01], 'exploration': [False], 'scope': [None], 'exp_tag': [''], # For changes besides hyperparams } run_sweep(run_experiment, sweep_params, EXP_NAME)