import gym
import argparse
import os

from common.agents.ddpg.ddpg import DDPG
from common.agents.ddpg_actor import DDPGActor
from common.utils.visualization import Visualizer
from common.utils.sim_agent_helper import generate_simulator_agent
from common.utils.logging import setup_experiment_logs, reshow_hyperparameters, StatsLogger

from experiments.domainrand.args import get_args, check_args

if __name__ == '__main__':
    args = get_args()
    paths = setup_experiment_logs(args)
    check_args(args)

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)

    stats_logger = StatsLogger(args)
    visualizer = Visualizer(randomized_env_id=args.randomized_eval_env_id,
                            seed=args.seed)

    reference_env = gym.make(args.reference_env_id)

    if args.freeze_agent:
        # only need the actor
        agent_policy = DDPGActor(
            state_dim=reference_env.observation_space.shape[0],
from common.utils.visualization import Visualizer
from common.utils.logging import setup_experiment_logs, reshow_hyperparameters

from experiments.domainrand.args import get_args, check_args

from common.utils.rollout_evaluation import evaluate_policy
from common.envs.randomized_vecenv import make_vec_envs

NEVAL_EPISODES = 10
N_PROCESSES = 5
N_SEEDS = 5

if __name__ == '__main__':
    args = get_args()
    paths = setup_experiment_logs(experiment_name='unfreeze-policy', args=args)
    check_args(args, experiment_name='unfreeze-policy')
    reference_env = gym.make(args.reference_env_id)

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    np.random.seed(args.seed)

    environment_prototype = 'Pusher3DOFGeneralization{}{}-v0'

    rewards_grid = np.zeros((3, 3, 5, NEVAL_EPISODES))
    finaldists_grid = np.zeros((3, 3, 5, NEVAL_EPISODES))

    for i in range(3):
        for j in range(3):
            randomized_env = make_vec_envs(environment_prototype.format(i, j), args.seed + i + j, N_PROCESSES)            
            actor_paths = glob.glob(os.path.join(os.getcwd(), paths['paper'], 'best-seed*_actor.pth'))
    learning_curves_combinations = combinations(learning_curves_files, 5)
    generalization_combinations = combinations(generalization_files, 5)

    agent_name_start = paper_path.find('v0') + 3
    agent_name_end = paper_path.find('-exp')

    agent_name = paper_path[agent_name_start:agent_name_end]

    return agent_name, list(learning_curves_files), generalization_files


if __name__ == '__main__':
    args = get_args()
    experiment_name = 'unfreeze-policy' if not args.use_bootstrapping_results else 'bootstrapping'
    paths = setup_experiment_logs(experiment_name=experiment_name, args=args)
    check_args(args, experiment_name=experiment_name)

    agent_name, learning_curves_files, generalization_files = get_converged_modelpaths(paths)
    nseeds = len(learning_curves_files)

    nmetrics = len(np.load(learning_curves_files[0]).files)

    # Learning curves 
    # Find Max Length and resize each array to that length

    # for combination in combinations: for lc in combination

    # for i, learning_curves_files in enumerate(learning_curves_combinations):
    #     print(i, learning_curves_files, '\n\n')
    max_length = 0
    for lc in learning_curves_files: