num_configurations, num_ace_runs, num_policies = policies_memmap[ 'policies'].shape parameters_dtype = policies_memmap['parameters'].dtype performance_dtype = np.dtype([('parameters', parameters_dtype), ('results', float, (num_ace_runs, num_policies, args.num_evaluation_runs))]) # Create the memmapped array of results to be populated in parallel: performance_memmap_path = str(experiment_path / '{}_performance.npy'.format(args.objective)) performance_memmap = np.lib.format.open_memmap( performance_memmap_path, shape=(num_configurations, ), dtype=performance_dtype, mode='w+') # Evaluate the learned policies in parallel: with tqdm_joblib( tqdm(total=args.num_evaluation_runs * num_ace_runs * num_configurations * num_policies)) as progress_bar: Parallel(n_jobs=args.num_cpus, verbose=0)( delayed(evaluate_policies)(policies_memmap, performance_memmap, evaluation_run_num, ace_run_num, config_num, policy_num, random_seed) for evaluation_run_num, random_seed in enumerate(random_seeds) for ace_run_num in range(num_ace_runs) for config_num in range(num_configurations) for policy_num in range(num_policies))
mode='w+') # Create the memmapped array of performance results for the learned policies: performance_dtype = np.dtype([ ('parameters', parameters_dtype), ('results', float, (num_runs, num_policies, args.num_evaluation_runs)), ('results_excursions', float, (num_runs, num_policies, num_test_eval)) ]) performance_memmap_path = str(output_dir / 'performance.npy') if os.path.isfile(performance_memmap_path): performance_memmap = np.lib.format.open_memmap(performance_memmap_path, mode='r+') else: performance_memmap = np.lib.format.open_memmap(performance_memmap_path, shape=(len( args.parameters), ), dtype=performance_dtype, mode='w+') # Run ACE for each configuration in parallel: with utils.tqdm_joblib( tqdm(total=num_runs * len(args.parameters), smoothing=0)) as progress_bar: Parallel(n_jobs=args.num_cpus, verbose=0)( delayed( run_ace)(experience_memmap, policies_memmap, performance_memmap, run_num, config_num, parameters, random_seed, experience_memmap_test, num_test_eval) for config_num, parameters in enumerate(args.parameters) for run_num, random_seed in enumerate(random_seeds))
transition_dtype = np.dtype([('s_t', float, env.observation_space.shape)]) experience_memmap_path = str(output_dir / 'experience_test.npy') if os.path.isfile(experience_memmap_path): experience_memmap = np.lib.format.open_memmap( experience_memmap_path, mode='r+') else: experience_memmap = np.lib.format.open_memmap( experience_memmap_path, shape=(args.num_runs, args.num_timesteps), dtype=transition_dtype, mode='w+') # Generate the experience in parallel: with utils.tqdm_joblib(tqdm(total=args.num_runs)) as progress_bar: Parallel(n_jobs=args.num_cpus, verbose=0)( delayed(generate_experience_test)(experience_memmap, run_num, random_seed) for run_num, random_seed in enumerate(random_seeds)) else: utils.save_args_to_file(args, output_dir / 'experience.args') # Create the memmapped structured array of experience to be populated in parallel: if args.environment == 'pw': env = puddleworld() else: env = gym.make( args.environment ).unwrapped # Make a dummy env to get shape info for observations. transition_dtype = np.dtype([