예제 #1
0
    num_configurations, num_ace_runs, num_policies = policies_memmap[
        'policies'].shape

    parameters_dtype = policies_memmap['parameters'].dtype
    performance_dtype = np.dtype([('parameters', parameters_dtype),
                                  ('results', float,
                                   (num_ace_runs, num_policies,
                                    args.num_evaluation_runs))])

    # Create the memmapped array of results to be populated in parallel:
    performance_memmap_path = str(experiment_path /
                                  '{}_performance.npy'.format(args.objective))
    performance_memmap = np.lib.format.open_memmap(
        performance_memmap_path,
        shape=(num_configurations, ),
        dtype=performance_dtype,
        mode='w+')

    # Evaluate the learned policies in parallel:
    with tqdm_joblib(
            tqdm(total=args.num_evaluation_runs * num_ace_runs *
                 num_configurations * num_policies)) as progress_bar:
        Parallel(n_jobs=args.num_cpus, verbose=0)(
            delayed(evaluate_policies)(policies_memmap, performance_memmap,
                                       evaluation_run_num, ace_run_num,
                                       config_num, policy_num, random_seed)
            for evaluation_run_num, random_seed in enumerate(random_seeds)
            for ace_run_num in range(num_ace_runs)
            for config_num in range(num_configurations)
            for policy_num in range(num_policies))
                                                    mode='w+')

    # Create the memmapped array of performance results for the learned policies:
    performance_dtype = np.dtype([
        ('parameters', parameters_dtype),
        ('results', float, (num_runs, num_policies, args.num_evaluation_runs)),
        ('results_excursions', float, (num_runs, num_policies, num_test_eval))
    ])
    performance_memmap_path = str(output_dir / 'performance.npy')
    if os.path.isfile(performance_memmap_path):
        performance_memmap = np.lib.format.open_memmap(performance_memmap_path,
                                                       mode='r+')
    else:
        performance_memmap = np.lib.format.open_memmap(performance_memmap_path,
                                                       shape=(len(
                                                           args.parameters), ),
                                                       dtype=performance_dtype,
                                                       mode='w+')

    # Run ACE for each configuration in parallel:
    with utils.tqdm_joblib(
            tqdm(total=num_runs * len(args.parameters),
                 smoothing=0)) as progress_bar:
        Parallel(n_jobs=args.num_cpus, verbose=0)(
            delayed(
                run_ace)(experience_memmap, policies_memmap,
                         performance_memmap, run_num, config_num, parameters,
                         random_seed, experience_memmap_test, num_test_eval)
            for config_num, parameters in enumerate(args.parameters)
            for run_num, random_seed in enumerate(random_seeds))
예제 #3
0
        transition_dtype = np.dtype([('s_t', float,
                                      env.observation_space.shape)])

        experience_memmap_path = str(output_dir / 'experience_test.npy')
        if os.path.isfile(experience_memmap_path):
            experience_memmap = np.lib.format.open_memmap(
                experience_memmap_path, mode='r+')
        else:
            experience_memmap = np.lib.format.open_memmap(
                experience_memmap_path,
                shape=(args.num_runs, args.num_timesteps),
                dtype=transition_dtype,
                mode='w+')

        # Generate the experience in parallel:
        with utils.tqdm_joblib(tqdm(total=args.num_runs)) as progress_bar:
            Parallel(n_jobs=args.num_cpus, verbose=0)(
                delayed(generate_experience_test)(experience_memmap, run_num,
                                                  random_seed)
                for run_num, random_seed in enumerate(random_seeds))
    else:
        utils.save_args_to_file(args, output_dir / 'experience.args')

        # Create the memmapped structured array of experience to be populated in parallel:
        if args.environment == 'pw':
            env = puddleworld()
        else:
            env = gym.make(
                args.environment
            ).unwrapped  # Make a dummy env to get shape info for observations.
        transition_dtype = np.dtype([