import gym import argparse import os from common.agents.ddpg.ddpg import DDPG from common.agents.ddpg_actor import DDPGActor from common.utils.visualization import Visualizer from common.utils.sim_agent_helper import generate_simulator_agent from common.utils.logging import setup_experiment_logs, reshow_hyperparameters, StatsLogger from experiments.domainrand.args import get_args, check_args if __name__ == '__main__': args = get_args() paths = setup_experiment_logs(args) check_args(args) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) stats_logger = StatsLogger(args) visualizer = Visualizer(randomized_env_id=args.randomized_eval_env_id, seed=args.seed) reference_env = gym.make(args.reference_env_id) if args.freeze_agent: # only need the actor agent_policy = DDPGActor( state_dim=reference_env.observation_space.shape[0],
from common.utils.visualization import Visualizer from common.utils.logging import setup_experiment_logs, reshow_hyperparameters from experiments.domainrand.args import get_args, check_args from common.utils.rollout_evaluation import evaluate_policy from common.envs.randomized_vecenv import make_vec_envs NEVAL_EPISODES = 10 N_PROCESSES = 5 N_SEEDS = 5 if __name__ == '__main__': args = get_args() paths = setup_experiment_logs(experiment_name='unfreeze-policy', args=args) check_args(args, experiment_name='unfreeze-policy') reference_env = gym.make(args.reference_env_id) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) environment_prototype = 'Pusher3DOFGeneralization{}{}-v0' rewards_grid = np.zeros((3, 3, 5, NEVAL_EPISODES)) finaldists_grid = np.zeros((3, 3, 5, NEVAL_EPISODES)) for i in range(3): for j in range(3): randomized_env = make_vec_envs(environment_prototype.format(i, j), args.seed + i + j, N_PROCESSES) actor_paths = glob.glob(os.path.join(os.getcwd(), paths['paper'], 'best-seed*_actor.pth'))
learning_curves_combinations = combinations(learning_curves_files, 5) generalization_combinations = combinations(generalization_files, 5) agent_name_start = paper_path.find('v0') + 3 agent_name_end = paper_path.find('-exp') agent_name = paper_path[agent_name_start:agent_name_end] return agent_name, list(learning_curves_files), generalization_files if __name__ == '__main__': args = get_args() experiment_name = 'unfreeze-policy' if not args.use_bootstrapping_results else 'bootstrapping' paths = setup_experiment_logs(experiment_name=experiment_name, args=args) check_args(args, experiment_name=experiment_name) agent_name, learning_curves_files, generalization_files = get_converged_modelpaths(paths) nseeds = len(learning_curves_files) nmetrics = len(np.load(learning_curves_files[0]).files) # Learning curves # Find Max Length and resize each array to that length # for combination in combinations: for lc in combination # for i, learning_curves_files in enumerate(learning_curves_combinations): # print(i, learning_curves_files, '\n\n') max_length = 0 for lc in learning_curves_files: