Ejemplo n.º 1
0
    def __init__(self, curiosity_program, reward_combiner_program,
                 curiosity_data_structure_values, curiosity_optimizer_values,
                 reward_combiner_data_structure_values,
                 reward_combiner_optimizer_values, envs, policy):
        self.curiosity_program = curiosity_program
        self.reward_combiner_program = reward_combiner_program
        self.curiosity_data_structure_values = curiosity_data_structure_values
        self.curiosity_optimizer_values = curiosity_optimizer_values
        self.reward_combiner_data_structure_values = reward_combiner_data_structure_values
        self.reward_combiner_optimizer_values = reward_combiner_optimizer_values

        self.envs = envs

        self.internal_reward_normalizer_all = mlca.helpers.statistics.welfords_std.Welford(
        )
        self.internal_reward_normalizer_window: List[int] = []

        # From https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_normalize.py

        self.ret_rms = RunningMeanStd(shape=())
        self.clipob = 10.
        self.cliprew = 10.
        self.ret = np.zeros(TspParams.current().NUM_ROLLOUTS_PER_TRIAL)
        self.gamma = TspParams.current().DECAY_RATE
        assert self.gamma == .99
        self.epsilon = 1e-8
Ejemplo n.º 2
0
    def _normalize_combined_reward(self, combined_reward, dones):
        combined_reward = combined_reward.cpu().numpy()
        all_rews = []
        for timestep_i in range(
                TspParams.current().STEPS_PER_CURIOSITY_UPDATE):
            # ", TspParams.current().PPO_FRAMES_PER_PROC)):
            step_combined_reward = combined_reward[
                timestep_i *
                TspParams.current().NUM_ROLLOUTS_PER_TRIAL:(timestep_i + 1) *
                TspParams.current().NUM_ROLLOUTS_PER_TRIAL]

            self.ret = self.ret * self.gamma + step_combined_reward

            self.ret_rms.update(self.ret)
            rews = np.clip(
                step_combined_reward /
                np.sqrt(self.ret_rms.var + self.epsilon), -self.cliprew,
                self.cliprew)

            timestep_dones = dones[timestep_i *
                                   TspParams.current().NUM_ROLLOUTS_PER_TRIAL:
                                   (timestep_i + 1) *
                                   TspParams.current().NUM_ROLLOUTS_PER_TRIAL]
            # print(len(timestep_dones), timestep_dones, len(dones))
            # print(self.ret.shape)
            self.ret[timestep_dones] = 0.
            all_rews.append(torch.tensor(rews, device=DefaultDevice.current()))

        return torch.cat(all_rews)
Ejemplo n.º 3
0
    def _normalize_internal_reward(self, r):
        if TspParams.current().NORMALIZE_INTERNAL_REWARD == "ALL":
            for k in r.detach():
                self.internal_reward_normalizer_all.update(k)

            std = self.internal_reward_normalizer_all.std
            if std == 0:
                return torch.sign(r)
            else:
                return r / std
        elif TspParams.current().NORMALIZE_INTERNAL_REWARD == "100":
            REWARD_WINDOW = 5 * 100
            for k in r.detach():
                self.internal_reward_normalizer_window.append(k.item())
            self.internal_reward_normalizer_window = self.internal_reward_normalizer_window[
                -REWARD_WINDOW:]
            std = torch.tensor(self.internal_reward_normalizer_window,
                               device=DefaultDevice.current()).std()
            # print(r, std, r/std)
            # print(r[0], std, r[0]/std)
            if torch.isnan(std):
                return torch.sign(r)
            else:
                return r / std
        else:
            return r.detach()
Ejemplo n.º 4
0
    def extract_from_rollout_buffer(self, rollouts, cur_start_timestep):
        assert TspParams.current().REAL_BATCH_REWARD_COMPUTATION
        states = []
        prev_states = []
        actions = []
        extrinsic_rewards = []
        normalized_timesteps = []
        dones = []
        for timestep_i in range(TspParams.current().PPO_FRAMES_PER_PROC):
            for rollout in range(TspParams.current().NUM_ROLLOUTS_PER_TRIAL):
                i_episode = cur_start_timestep + timestep_i

                states.append(rollouts.obs[timestep_i + 1][rollout])
                prev_states.append(rollouts.obs[timestep_i][rollout])
                actions.append(rollouts.actions[timestep_i][rollout])
                extrinsic_rewards.append(
                    rollouts.rewards[timestep_i][rollout].detach())
                dones.append(not rollouts.masks[timestep_i][rollout])
                normalized_timesteps.append(
                    i_episode / TspParams.current().STEPS_PER_ROLLOUT)

        states_tensor = torch.stack(states)
        prev_states_tensor = torch.stack(prev_states)
        actions_tensor = self.remap_actions(self.envs, actions)
        extrinsic_rewards_tensor = torch.cat(extrinsic_rewards)
        normalized_timesteps_tensor = torch.tensor(
            normalized_timesteps,
            dtype=torch.float,
            device=DefaultDevice.current())

        return states_tensor, prev_states_tensor, actions_tensor, extrinsic_rewards_tensor, normalized_timesteps_tensor, dones
 def create_empty(self, environment, policy):
     if TspParams.current().REAL_BATCH_REWARD_COMPUTATION:
         if TspParams.current().SHARE_CURIOSITY_MODULE_IN_TRIAL:
             size = TspParams.current().STEPS_PER_CURIOSITY_UPDATE
             #TspParams.current().PPO_FRAMES_PER_PROC"]) * params["NUM_ROLLOUTS_PER_TRIAL
         else:
             size = TspParams.current().STEPS_PER_CURIOSITY_UPDATE
             #", TspParams.current().PPO_FRAMES_PER_PROC)
     else:
         size = 1
     return torch.ones(size,
                       device=DefaultDevice.current()) * self.constant_value
Ejemplo n.º 6
0
def plot_scatter_of_every_trial_best_performance(data: ProgramData, params):
    data = [d for d in data if not d.results.early_terminated]
    data = [d for d in data if d.stats]
    data = sorted(data, key=lambda p: p.stats["mean_performance"])

    print("Data w. index", len((data)))

    for trial in range(TspParams.current().NUM_TRIALS_PER_PROGRAM):
        trial_perf = [_best_episode_performance(d, trial) for d in data]
        plt.scatter(range(len(data)), trial_perf, s=1, c="black")

    plt.title(TspParams.current().__EXPERIMENT_ID__)
    plt.xlabel("Program index (by mean performance)")
    plt.ylabel("Performance of best episode, per trial")
    plt.show()
Ejemplo n.º 7
0
def plot_scatter_of_every_trials(data: ProgramData, params):
    data = [d for d in data if not d.results.early_terminated]
    data = [d for d in data if d.stats]
    data = sorted(data, key=lambda p: p.stats["mean_performance"])

    for trial in range(TspParams.current().NUM_TRIALS_PER_PROGRAM):
        trial_perf = [
            np.array(d.results.trials_rollouts_mean_reward[trial]).mean()
            for d in data
        ]
        plt.scatter(range(len(data)), trial_perf, s=1, c="black")

    plt.title(TspParams.current().__EXPERIMENT_ID__)
    plt.xlabel("Program index (by mean performance)")
    plt.ylabel("Mean performance of finished episodes, per trial")
    plt.show()
Ejemplo n.º 8
0
def _plot_deltas_and_performance(deltas: List[float], performance: List[float]):
  plt.title(TspParams.current()._experiment_id)
  # scatter_heatmap(performance, deltas, bins=500)
  # plt.scatter(performance, deltas, color='white')# alpha=.05)
  plt.scatter(performance, deltas, alpha=.5)
  plt.xlabel("Performance")
  plt.ylabel("Delta (Min distance to point with > performance)")
  plt.show()
 def create_empty(self, environment, data_structure_values):
     torch_data_structures = [
         d for d in data_structure_values.values()
         if isinstance(d, nn.Module)
     ]
     nn_modules = nn.ModuleList(torch_data_structures)
     return optim.Adam(nn_modules.parameters(),
                       lr=TspParams.current().LEARNING_RATE)
Ejemplo n.º 10
0
    def calc_remapped_rewards(self, rollouts, profiler, cur_start_timestep,
                              tensorboard_logger, trial_i):
        states, prev_states, actions, extrinsic_rewards, normalized_timesteps, dones = \
            self.extract_from_rollout_buffer(rollouts, cur_start_timestep)

        STEPS_PER_CURIOSITY_UPDATE = TspParams.current(
        ).STEPS_PER_CURIOSITY_UPDATE * TspParams.current(
        ).NUM_ROLLOUTS_PER_TRIAL
        assert TspParams.current().PPO_FRAMES_PER_PROC * TspParams.current(
        ).NUM_ROLLOUTS_PER_TRIAL % STEPS_PER_CURIOSITY_UPDATE == 0
        remapped_rewards = []
        for internal_reward_update_batch in range(
                0,
                TspParams.current().PPO_FRAMES_PER_PROC *
                TspParams.current().NUM_ROLLOUTS_PER_TRIAL,
                STEPS_PER_CURIOSITY_UPDATE):
            remapped_rewards.append(
                self.calc(prev_states[
                    internal_reward_update_batch:internal_reward_update_batch +
                    STEPS_PER_CURIOSITY_UPDATE],
                          actions[internal_reward_update_batch:
                                  internal_reward_update_batch +
                                  STEPS_PER_CURIOSITY_UPDATE],
                          states[internal_reward_update_batch:
                                 internal_reward_update_batch +
                                 STEPS_PER_CURIOSITY_UPDATE],
                          dones[internal_reward_update_batch:
                                internal_reward_update_batch +
                                STEPS_PER_CURIOSITY_UPDATE],
                          extrinsic_rewards[internal_reward_update_batch:
                                            internal_reward_update_batch +
                                            STEPS_PER_CURIOSITY_UPDATE],
                          normalized_timesteps[internal_reward_update_batch:
                                               internal_reward_update_batch +
                                               STEPS_PER_CURIOSITY_UPDATE],
                          profiler,
                          cur_start_timestep,
                          tensorboard_logger=tensorboard_logger,
                          cur_start_timestep=cur_start_timestep +
                          internal_reward_update_batch,
                          trial=trial_i).view(
                              TspParams.current().STEPS_PER_CURIOSITY_UPDATE,
                              # TspParams.current().PPO_FRAMES_PER_PROC),
                              TspParams.current().NUM_ROLLOUTS_PER_TRIAL,
                          ).unsqueeze(2))

        remapped_rewards_tensor = torch.cat(remapped_rewards)
        assert remapped_rewards_tensor.shape == rollouts.rewards.shape, (
            remapped_rewards_tensor.shape, rollouts.rewards.shape)
        rollouts.rewards = remapped_rewards_tensor

        return remapped_rewards_tensor
Ejemplo n.º 11
0
def plot_scatter_of_every_trial_mean_performance(data: ProgramData, params):
    data = [d for d in data if not d.results.early_terminated]
    data = [d for d in data if d.stats]
    data = sorted(data, key=lambda p: p.stats["mean_performance"])

    print("Data w. index", len((data)))

    for trial in range(TspParams.current().NUM_TRIALS_PER_PROGRAM):
        trial_perf = [
            np.array([
                np.array(r).mean()
                for r in d.results.trials_rollouts_episode_end_rewards[trial]
            ]).mean() for d in data
        ]
        plt.scatter(range(len(data)), trial_perf, s=1, c="black")

    # plt.ylim(top=200)
    plt.title(TspParams.current().__EXPERIMENT_ID__)
    plt.xlabel("Program index (by mean performance)")
    plt.ylabel("Mean episode performance, per trial")
    plt.show()
Ejemplo n.º 12
0
    def _thunk():
        #print("Make envs", params)
        if env_id.startswith("dm"):
            _, domain, task = env_id.split('.')
            env = dm_control2gym.make(domain_name=domain, task_name=task)
        else:
            env = gym.make(env_id, **TspParams.current().ENVIRONMENT_KWARGS)

        is_atari = (hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv))
        if is_atari:
            env = make_atari(env_id)

        is_minigrid = "MiniGrid" in env_id

        if set_time_limit is not None:
            env = TimeLimit(env, set_time_limit)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if str(env.__class__.__name__).find(
                'TimeLimit') >= 0 or set_time_limit is not None:
            env = TimeLimitMask(env)

        if log_dir is not None:
            env = bench.Monitor(env,
                                os.path.join(log_dir, str(rank)),
                                allow_early_resets=allow_early_resets)

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif is_minigrid:
            pass
        elif len(env.observation_space.shape) == 3:
            raise NotImplementedError(
                "CNN models work only for atari,\n"
                "please use a custom wrapper for a custom pixel input env.\n"
                "See wrap_deepmind for an example.")

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env, op=[2, 0, 1])

        return env
def _program_as_feature_vector(p: Program, fp: int, feature_input_output: bool) -> ProgramFeatureVector:
  assert type(fp) == int, fp

  operations_list_name = TspParams.current().CURIOSITY_PROGRAMS_NAME
  ops = OperationsSetList[operations_list_name].OPERATIONS  

  FEATURE_VECTOR_CLASSES = [[o] for o in ops] + \
      (list(itertools.combinations(ops, 2)) if fp >= 2 else []) + \
      (list(itertools.combinations(ops, 3)) if fp >= 3 else [])
  FEATURE_VECTOR_INPUT_OUTPUT_PAIR_CLASSES = \
      (list(itertools.permutations(ops, 2))
      if feature_input_output else [])

  program_ops = p.forward_program + p.update_program
  classes = set([type(d) for d in program_ops])

  r1 = []
  for has_gradients in [True, False]:
    classes_and_grads = [(c, has_gradients) for c in classes]
    for ops in FEATURE_VECTOR_CLASSES:
      r1.append(all((o, has_gradients) in classes_and_grads for o in ops))

  r2 = []
  for in_class, out_class in FEATURE_VECTOR_INPUT_OUTPUT_PAIR_CLASSES:
    r2.append(
      any(
        (type(o) == out_class \
          and any(type(i) == in_class for i in o.inputs))
        for o \
        in program_ops))

  r = r1 + r2

  assert max(r1) == 1
  assert not feature_input_output or max(r2) == 1
  assert max(r) == 1

  return np.array(r)
Ejemplo n.º 14
0
def make_vec_envs(env_name,
                  seed,
                  num_processes,
                  gamma,
                  log_dir,
                  device,
                  allow_early_resets,
                  set_time_limit,
                  num_frame_stack=None):
    envs = [
        make_env(env_name,
                 seed,
                 i,
                 log_dir,
                 allow_early_resets,
                 set_time_limit=set_time_limit) for i in range(num_processes)
    ]

    # if len(envs) > 1:
    #     envs = ShmemVecEnv(envs, context='fork')
    # else:
    envs = DummyVecEnv(envs)

    if TspParams.current().PPO_NEW_ARGS["vec_normalize"]:
        if len(envs.observation_space.shape) == 1:
            if gamma is None:
                envs = VecNormalize(envs, ret=False)
            else:
                envs = VecNormalize(envs, gamma=gamma)

    envs = VecPyTorch(envs, device)

    if num_frame_stack is not None:
        envs = VecPyTorchFrameStack(envs, num_frame_stack, device)
    # elif len(envs.observation_space.shape) == 3:
    #    envs = VecPyTorchFrameStack(envs, 4, device)

    return envs
Ejemplo n.º 15
0
    def calc(self,
             state,
             action,
             next_state,
             dones,
             extrinsic_reward,
             normalized_timestep,
             profiler=None,
             i_episode=None,
             tensorboard_logger=None,
             cur_start_timestep=None,
             trial=None):
        if TspParams.current().ONLY_EXTERNAL_REWARD:
            print("Only external")
            return extrinsic_reward
        else:
            intrinsic_reward = self._internal_reward(state,
                                                     action,
                                                     next_state,
                                                     profiler=None,
                                                     i_episode=None)

            combined_reward = self._combined_reward(intrinsic_reward,
                                                    extrinsic_reward,
                                                    normalized_timestep)

            if TspParams.current().NORMALIZE_COMBINED_REWARD:
                assert not TspParams.current().PPO_NEW_ARGS["vec_normalize"]
                normalized_combined_reward = self._normalize_combined_reward(
                    combined_reward, dones)
            else:
                normalized_combined_reward = combined_reward

            if tensorboard_logger is not None and tensorboard_logger.tensorboard_writer is not None:
                i = 0

                if TspParams.current().REAL_BATCH_REWARD_COMPUTATION:
                    batch_num_timesteps = TspParams.current(
                    ).STEPS_PER_CURIOSITY_UPDATE
                    #", TspParams.current().PPO_FRAMES_PER_PROC)
                else:
                    batch_num_timesteps = 1

                if TspParams.current().SHARE_CURIOSITY_MODULE_IN_TRIAL:
                    batch_num_rollouts = TspParams.current(
                    ).NUM_ROLLOUTS_PER_TRIAL
                else:
                    batch_num_rollouts = 1

                # for timestep_i in range(batch_num_timesteps):
                #         print("intrinsic_reward", intrinsic_reward)
                #         tensorboard_logger.add_scalar(f'intrinsic_reward_{trial}_{rollout}', intrinsic_reward[i].cpu().item(), cur_start_timestep + timestep_i)
                #         tensorboard_logger.add_scalar(f'extrinsic_reward_{trial}_{rollout}', extrinsic_reward[i].cpu().item(), cur_start_timestep + timestep_i)
                #         tensorboard_logger.add_scalar(f'combined_reward_{trial}_{rollout}', combined_reward[i].cpu().item(), cur_start_timestep + timestep_i)
                #         tensorboard_logger.add_scalar(f'normalized_combined_reward{trial}_{rollout}', normalized_combined_reward[i].cpu().item(), cur_start_timestep + timestep_i)
                #         tensorboard_logger.add_scalar(f'normalized_timestep_{trial}_{rollout}', normalized_timestep[i].cpu().item(), cur_start_timestep + timestep_i)
                #         tensorboard_logger.add_scalar(f'done_{trial}_{rollout}', dones[i], cur_start_timestep + timestep_i)

                #         i += 1

            return normalized_combined_reward
Ejemplo n.º 16
0
def _plot_program_evaluations(evaluated_programs_data,
                              early_termination_batch_data):
    end_times = []
    end_rewards = []

    max_evaluation_index = max(d.results.selected_index
                               for d in evaluated_programs_data)

    random.shuffle(evaluated_programs_data)
    for d in tqdm(evaluated_programs_data, "Plotting program evaluations"):
        evaluation_index = d.results.selected_index
        # Loop through episode, keeping track of lengths and rewards
        reward = 0
        episode_rewards = []
        time = 0
        # TODO: Dedup this code
        for trial, (trial_rollouts_episode_lengths,
                    trial_episode_final_rewards) in enumerate(
                        zip(d.results.trials_rollouts_episode_lengths,
                            d.results.trials_rollouts_episode_end_rewards)):
            rewards = []
            times = []
            for timestep_cap in range(
                    SearchParams.current(
                    ).EARLY_TERMINATION_CHECKING_FREQUENCY,
                    TspParams.current().STEPS_PER_ROLLOUT,
                    SearchParams.current().EARLY_TERMINATION_CHECKING_FREQUENCY
            ):
                # Simulate the average episode ending rewards we would have seen at this timestep.
                rollouts_episode_end_rewards = []
                rollouts_episode_lengths = []
                reached_timestep = False

                for rollout, (rollout_final_rewards,
                              rollout_episode_lengths) in enumerate(
                                  zip(trial_episode_final_rewards,
                                      trial_rollouts_episode_lengths)):
                    timestep = 0
                    assert len(rollout_final_rewards) == len(
                        rollout_episode_lengths)
                    episode_end_rewards = []
                    episode_lengths = []
                    rollouts_episode_end_rewards.append(episode_end_rewards)
                    rollouts_episode_lengths.append(episode_lengths)

                    for final_reward, episode_length in zip(
                            rollout_final_rewards, rollout_episode_lengths):
                        timestep += episode_length
                        if timestep > timestep_cap:
                            reached_timestep = True
                            break
                        else:
                            episode_end_rewards.append(final_reward)
                            episode_lengths.append(episode_length)

                if reached_timestep:
                    if "MiniGrid" in TspParams.current().ENVIRONMENT:
                        mean_episode_end_reward = np.array([
                            np.array(r).max()
                            for r in rollouts_episode_end_rewards
                        ]).max()
                    else:
                        mean_episode_end_reward = np.array([
                            np.array(r).mean()
                            for r in rollouts_episode_end_rewards
                        ]).mean()
                else:
                    break

                times.append(trial * TspParams.current().STEPS_PER_ROLLOUT +
                             timestep_cap)
                rewards.append(mean_episode_end_reward)

            if d.results.get(
                    "had_early_termination_data", True
            ):  # evaluation_index > SearchParams.current().PROGRAMS_PER_BATCH):
                color = evaluation_index / max_evaluation_index

                plt.plot(times,
                         rewards,
                         alpha=.01,
                         color=(color, 1 - color, 0))

        # Note this intentionally happens outisde of the for loop, after the last trial
        if len(times) > 0:
            end_times.append(times[-1])
            end_rewards.append(rewards[-1])
        # else:
        #   plt.plot(times, rewards, alpha=.05, color="gray")

    if early_termination_batch_data:
        cutoff_times = []
        cutoff_levels = []
        for key in early_termination_batch_data:
            trial, timestep = key
            cutoff_times.append(trial * TspParams.current().STEPS_PER_ROLLOUT +
                                timestep)
            cutoff_levels.append(early_termination_batch_data[key])
        plt.plot(cutoff_times, cutoff_levels, color='yellow', alpha=.5)

    plt.scatter(end_times, end_rewards, color='blue', s=1, alpha=.5, zorder=10)

    plt.xlabel("# steps evaluated")
    plt.ylabel("avg episode reward")
    plt.show()
 def create_empty(self, environment, policy):
     return TorchKNN(TspParams.current().KNN_BUFFER_SIZE_LARGE, 32, 5)
Ejemplo n.º 18
0
def main():
    parser = mlca.helpers.config.argparser()
    args = parser.parse_args()
    experiment_id = args.experiment_id

    search_params = SearchExperimentList[experiment_id]
    tsp_params = TspExperimentList[
        search_params.TEST_SYNTHESIZED_PROGRAMS_EXPERIMENT_ID]
    predict_performance_params = PredictPerformanceExperimentList[
        search_params.PREDICT_PERFORMANCE_EXPERIMENT_ID]

    with search_params:
        with tsp_params:
            with predict_performance_params:
                # =====================
                # Setup programs
                # =====================

                if TspParams.current(
                ).EXPERIMENT_TYPE == TspParams.ExperimentType.CURIOSITY_SEARCH:
                    data, _, _, _, _, _, _, _, _, _, _, = load_curiosity_program_data(
                        TspParams.current().CURIOSITY_PROGRAMS_NAME,
                        TspParams.current().REWARD_COMBINER_PROGRAMS_NAME,
                        SearchParams.current().
                        TEST_SYNTHESIZED_PROGRAMS_EXPERIMENT_ID,
                        TspParams.current().FIXED_REWARD_COMBINER_PROGRAM_ID)
                elif TspParams.current(
                ).EXPERIMENT_TYPE == TspParams.ExperimentType.REWARD_COMBINER_SEARCH:
                    data, _, _, _, _, _, _, _, _, _, _, = load_reward_combiner_program_data(
                        TspParams.current().CURIOSITY_PROGRAMS_NAME,
                        TspParams.current().REWARD_COMBINER_PROGRAMS_NAME,
                        SearchParams.current().
                        TEST_SYNTHESIZED_PROGRAMS_EXPERIMENT_ID,
                        TspParams.current().FIXED_CURIOSITY_PROGRAM_ID)

                data = [d for d in data if d.stats]
                programs = [d.curiosity_program for d in data]
                program_to_data = {
                    p.program_id: d
                    for p, d in zip(programs, data)
                }

                select_next_program_batch_fn = {
                    "RANDOM": _select_next_program_batch_random,
                    "SORT": _select_next_program_batch_regressor,
                    "DIVERSITY": _select_next_program_batch_diversity
                }[SearchParams.current().BATCH_SELECTION]

                select_next_program_preprocess_data_fn = {
                    "RANDOM": _none,
                    "SORT": _none,
                    "DIVERSITY": _select_next_program_preprocess_data_diversity
                }[SearchParams.current().BATCH_SELECTION]

                select_next_program_data_update_with_program_result_fn = {
                    "RANDOM":
                    _none,
                    "SORT":
                    _none,
                    "DIVERSITY":
                    _select_next_program_data_update_with_program_result_diversity
                }[SearchParams.current().BATCH_SELECTION]

                # =====================
                # Run NUM_SEARCHES searches
                # =====================

                most_possible_steps = TspParams.current().NUM_ROLLOUTS_PER_TRIAL * \
                    TspParams.current().STEPS_PER_ROLLOUT * \
                    TspParams.current().NUM_TRIALS_PER_PROGRAM * \
                    len(program_to_data)

                print(
                    f"Simulating search using {len(program_to_data)} pre-cached results."
                )
                search_scores = []
                all_intermediate_scores = []
                all_intermediate_num_programs_per_machine_evaluted = []
                for search_i in range(SearchParams.current().NUM_SEARCHES):

                    print("----------------------------")
                    print(
                        f"Start search {search_i}/{SearchParams.current().NUM_SEARCHES}"
                    )
                    print("----------------------------")

                    mlca.helpers.util.set_random_seed(search_i)
                    random_programs = list(programs)
                    random.shuffle(random_programs)

                    def get_pre_evaluated_programs_fn():
                        return [], []

                    intermediate_scores = []
                    intermediate_num_programs_per_machine_evaluted = []

                    def post_batch_hook_fn(evaluated_programs_data):
                        intermediate_scores.append(_num_evaluated_programs_in_top_n_percent(
                            evaluated_programs_data, program_to_data,  INTERMEDIATE_SCORE_TOP_N_FRACTION)  / \
                              math.floor(INTERMEDIATE_SCORE_TOP_N_FRACTION * len(program_to_data)))
                        intermediate_num_programs_per_machine_evaluted.append(
                            len(evaluated_programs_data))

                    target_num_jobs_running = 64

                    evaluate_program_fn_extra_args = (program_to_data,
                                                      search_params)
                    evaluated_programs_data = search_with_score_prediction(
                        random_programs,
                        get_pre_evaluated_programs_fn,
                        target_num_jobs_running,
                        simulate_evaluate_program_fn,
                        rollout_timestep_pruning_hook_fn,
                        select_next_program_batch_fn,
                        select_next_program_preprocess_data_fn,
                        select_next_program_data_update_with_program_result_fn,
                        post_batch_hook_fn,
                        get_early_termination_batch_data_fn,
                        search_params,
                        tsp_params,
                        evaluate_program_fn_extra_args,
                        use_threads=True)

                    evaluated_programs_order = [
                        d.curiosity_program for d in evaluated_programs_data
                    ]

                    score = _average_rank_of_top_n_percent_of_programs(
                        evaluated_programs_order, program_to_data)
                    search_scores.append(score)

                    all_intermediate_scores.append(intermediate_scores)
                    all_intermediate_num_programs_per_machine_evaluted.append(
                        intermediate_num_programs_per_machine_evaluted)

                    print(f"""
            _average_rank_of_top_n_percent_of_programs {score} 
            avg score {np.array(search_scores).mean()}
            Num programs ran {len(evaluated_programs_data)}
            Num programs pruned {len([d for d in evaluated_programs_data if d.results.early_terminated])}
            Num of steps run {_total_num_steps_run(evaluated_programs_data)}
            % of top {INTERMEDIATE_SCORE_TOP_N_FRACTION} programs found {intermediate_scores[-1]}
            time saved {1 - (_total_num_steps_run(evaluated_programs_data) / most_possible_steps)}
            time saved w. missing programs {(1 - (_total_num_steps_run(evaluated_programs_data) / (most_possible_steps  * intermediate_scores[-1])))}
            """)

                    evaluated_program_ids = [
                        d.curiosity_program.program_id
                        for d in evaluated_programs_data
                    ]
                    evaluated_programs = [
                        d.curiosity_program for d in evaluated_programs_data
                    ]
                    print(evaluated_program_ids)
                    print(
                        "num_removed_programs",
                        evaluate_diversity_selection_choices(
                            evaluated_programs, data))

                if SearchParams.current().ENABLE_EARLY_TERMINATION:
                    early_termination_batch_data = get_early_termination_batch_data_fn(
                        evaluated_programs_data)
                else:
                    early_termination_batch_data = None

                _plot_program_evaluations(evaluated_programs_data,
                                          early_termination_batch_data)

                # _plot_cutoffs(early_termination_batch_data)

                # _plot_pruning_distribution(evaluated_programs_data)

                _plot_all_intermediate_scores(
                    all_intermediate_num_programs_per_machine_evaluted,
                    all_intermediate_scores,
                    f"% of Top {INTERMEDIATE_SCORE_TOP_N_FRACTION} Programs Found vs # Evaluated",
                    f"% of top {INTERMEDIATE_SCORE_TOP_N_FRACTION} programs found"
                )
Ejemplo n.º 19
0
def get_early_termination_batch_data_fn(
        evaluated_programs_data) -> EarlyTerminationBatchData:
    evaluated_programs_data = [
        d for d in evaluated_programs_data
        if d is not None and d.stats is not None
    ]

    if len(evaluated_programs_data) == 0:
        return None
    else:
        program_data_by_perf = sorted(
            evaluated_programs_data, key=lambda d: d.stats["mean_performance"])
        best_program_data = program_data_by_perf[-SearchParams.current().
                                                 NUM_BEST_PROGRAMS:]

        timestep_program_mean_performance: Dict[Timestep, List[Reward]] = {}
        timestep_program_stds_across_trials: Dict[Tuple[Trial, Timestep],
                                                  List[float]] = {}

        # Find the average end-of-episode reward at each (trial, timestep_cap), averaged over all trials and timesteps for the top 10 agents
        for data in tqdm(best_program_data,
                         "get_early_termination_batch_data_fn"):
            program_timestep_rewards: Dict[Timestep, List[Reward]] = {}
            for trial, (
                    trial_episode_final_rewards,
                    trial_rollouts_episode_lengths) in enumerate(
                        zip(data.results.trials_rollouts_episode_end_rewards,
                            data.results.trials_rollouts_episode_lengths)):
                for timestep_cap in range(
                        SearchParams.current(
                        ).EARLY_TERMINATION_CHECKING_FREQUENCY,
                        TspParams.current().STEPS_PER_ROLLOUT,
                        SearchParams.current().
                        EARLY_TERMINATION_CHECKING_FREQUENCY):
                    # Find average reward of finished episodes by this timestep
                    trial_avg_episode_end_rewards = []
                    for rollout, (rollout_final_rewards,
                                  rollout_episode_lengths) in enumerate(
                                      zip(trial_episode_final_rewards,
                                          trial_rollouts_episode_lengths)):
                        rollout_episode_end_rewards = []

                        timestep = 0
                        for final_reward, episode_length in zip(
                                rollout_final_rewards,
                                rollout_episode_lengths):
                            timestep += episode_length
                            if timestep > timestep_cap:
                                break
                            else:
                                rollout_episode_end_rewards.append(
                                    final_reward)

                        if len(rollout_episode_end_rewards) > 0:
                            if "MiniGrid" in TspParams.current().ENVIRONMENT:
                                trial_avg_episode_end_rewards.append(
                                    np.array(
                                        rollout_episode_end_rewards).max())
                            else:
                                trial_avg_episode_end_rewards.append(
                                    np.array(
                                        rollout_episode_end_rewards).mean())

                    if len(trial_avg_episode_end_rewards) > 0:
                        if "MiniGrid" in TspParams.current().ENVIRONMENT:
                            avg_episode_end_rewards = np.array(
                                trial_avg_episode_end_rewards).max()
                        else:
                            avg_episode_end_rewards = np.array(
                                trial_avg_episode_end_rewards).mean()
                    else:
                        avg_episode_end_rewards = np.nan

                    if avg_episode_end_rewards != np.nan:
                        if timestep_cap not in program_timestep_rewards:
                            program_timestep_rewards[timestep_cap] = []
                        program_timestep_rewards[timestep_cap].append(
                            avg_episode_end_rewards)

            for timestep_cap in program_timestep_rewards:
                # Track stdevs within programs (across trials) on each timestep
                for trial in range(TspParams.current().NUM_TRIALS_PER_PROGRAM):
                    if (trial, timestep_cap
                        ) not in timestep_program_stds_across_trials:
                        timestep_program_stds_across_trials[(
                            trial, timestep_cap)] = []

                    timestep_program_stds_across_trials[(
                        trial, timestep_cap
                    )].append(
                        np.array(program_timestep_rewards[timestep_cap]).std()
                        / math.sqrt(
                            min(trial + 1,
                                len(program_timestep_rewards[timestep_cap]))))

                # Track mean performance
                if timestep_cap not in timestep_program_mean_performance:
                    timestep_program_mean_performance[timestep_cap] = []
                timestep_program_mean_performance[timestep_cap].append(
                    np.array(program_timestep_rewards[timestep_cap]).mean())

        # Set the caps for each (trial, timestep) to the mean performance seen by the best programs - 2 * variance
        trial_timestep_caps = {
            (trial, timestep_cap): \
          # Average program performance on this timestep

              np.array(timestep_program_mean_performance[timestep_cap]).mean() \
          # Minus differences between programs

              - SearchParams.current().NUM_STDEVS_DOWN * np.array(timestep_program_mean_performance[timestep_cap]).std()
              # Minus the average difference of program's trials, normalizing using the # of timesteps.
              - np.array(timestep_program_stds_across_trials[(trial, timestep_cap)]).mean()
            for trial in range(TspParams.current().NUM_TRIALS_PER_PROGRAM)
            for timestep_cap in timestep_program_mean_performance.keys()
        }

        return trial_timestep_caps
def test_all_curiosity_programs(parser, args):
    NUM_WORKERS = args.workers_per_gpu * args.num_gpus

    evaluation_folder = f"pickles/{args.experiment_id}_evaluations/"
    curiosity_programs_name = TspParams.current().CURIOSITY_PROGRAMS_NAME
    reward_combiner_programs_name = TspParams.current(
    ).REWARD_COMBINER_PROGRAMS_NAME

    if not os.path.exists(evaluation_folder):
        os.mkdir(evaluation_folder)

    # Initialize the GPUs here to prevent multiprocessing problems
    print("Initialize GPUS")
    # https://github.com/pytorch/pytorch/issues/16559
    if torch.cuda.is_available():
        for i in range(args.num_gpus):
            with torch.cuda.device(i):
                torch.tensor([1.]).cuda()

    pre_evaluated_data, _, \
        curiosity_programs, curiosity_program_inputs, \
        curiosity_data_structures, curiosity_optimizers, \
        reward_combiner_programs, reward_combiner_program_inputs, \
        reward_combiner_data_structures, reward_combiner_optimizers, _ = load_curiosity_program_data(
            curiosity_programs_name, reward_combiner_programs_name, args.experiment_id,
            TspParams.current().FIXED_REWARD_COMBINER_PROGRAM_ID)

    restricted_programs_to_evaluate = TspParams.current(
    ).RESTRICTED_CURIOSITY_PROGRAMS_TO_EVALUATE

    if TspParams.current().SPLIT_ACROSS_MACHINES:
        assert args.machine_split_id >= 0 and args.machine_split_id < TspParams.current(
        ).SPLIT_ACROSS_MACHINES
        if restricted_programs_to_evaluate is None:
            random.seed(0)
            random_curiosity_programs = list(curiosity_programs)
            random.shuffle(random_curiosity_programs)
            restricted_programs_to_evaluate = [
                p.program_id for i, p in enumerate(random_curiosity_programs)
                if i % TspParams.current().SPLIT_ACROSS_MACHINES ==
                args.machine_split_id
            ]
        else:
            random.seed(0)
            random_curiosity_program_ids = list(
                restricted_programs_to_evaluate)
            random.shuffle(random_curiosity_program_ids)
            restricted_programs_to_evaluate = [
                p_id for i, p_id in enumerate(random_curiosity_program_ids)
                if i % TspParams.current().SPLIT_ACROSS_MACHINES ==
                args.machine_split_id
            ]
    else:
        assert args.machine_split_id == None

    reward_combiner_program = reward_combiner_programs[TspParams.current().FIXED_REWARD_COMBINER_PROGRAM_ID] \
        if TspParams.current().FIXED_REWARD_COMBINER_PROGRAM_ID else None

    print("reward_combiner_program", reward_combiner_program)

    if restricted_programs_to_evaluate:
        print("restricted_programs_to_evaluate",
              len(restricted_programs_to_evaluate),
              restricted_programs_to_evaluate[:10], "... etc ...")
        restricted_programs_to_evaluate_set = set(
            restricted_programs_to_evaluate)

    restricted_programs = [
        p \
        for p in curiosity_programs
        if not restricted_programs_to_evaluate or p.program_id in restricted_programs_to_evaluate_set
    ]
    assert not restricted_programs_to_evaluate or len(
        restricted_programs) == len(restricted_programs_to_evaluate)

    id_to_program = {p.program_id: p for p in restricted_programs}

    def get_pre_evaluated_programs_fn():
        progs = [
            id_to_program[d.results.curiosity_program_id]
            for d in pre_evaluated_data
            if restricted_programs_to_evaluate is None \
                or d.results.curiosity_program_id in restricted_programs_to_evaluate]
        print("Get pre evaluated results. # Datapoints:", len(progs),
              len(pre_evaluated_data))
        return progs, pre_evaluated_data

    def post_batch_hook_fn(a):
        pass

    select_next_program_batch_fn = {
        "RANDOM": _select_next_program_batch_random,
        "SORT": _select_next_program_batch_regressor,
        "DIVERSITY": _select_next_program_batch_diversity
    }[SearchParams.current().BATCH_SELECTION]

    select_next_program_preprocess_data_fn = {
        "RANDOM": _none,
        "SORT": _none,
        "DIVERSITY": _select_next_program_preprocess_data_diversity
    }[SearchParams.current().BATCH_SELECTION]

    select_next_program_data_update_with_program_result_fn = {
        "RANDOM": _none,
        "SORT": _none,
        "DIVERSITY":
        _select_next_program_data_update_with_program_result_diversity
    }[SearchParams.current().BATCH_SELECTION]

    search_with_score_prediction(
        restricted_programs, get_pre_evaluated_programs_fn,
        args.num_gpus * args.workers_per_gpu, evaluate_program_in_environment,
        rollout_timestep_pruning_hook_fn, select_next_program_batch_fn,
        select_next_program_preprocess_data_fn,
        select_next_program_data_update_with_program_result_fn,
        post_batch_hook_fn, get_early_termination_batch_data_fn,
        simulate_params, params,
        (args.num_gpus, reward_combiner_program, evaluation_folder))
Ejemplo n.º 21
0
def simulate_evaluate_program_fn(program, rollout_timestep_pruning_hook_fn,
                                 early_termination_batch_data, selected_index,
                                 tsp_params, extra_curiosity_programs,
                                 evaluate_program_fn_extra_args,
                                 result_pipe_connection):

    program_to_data, params = evaluate_program_fn_extra_args

    # Simulate scanning through each trial sequentially, and through each timestep in the trial.
    trials_rollouts_mean_reward: TrialList[RolloutList[Reward]] = []
    trials_rollouts_episode_lengths: TrialList[RolloutList[
        EpisodeList[int]]] = []
    trials_rollouts_episode_end_rewards: TrialList[RolloutList[
        EpisodeList[Reward]]] = []
    data = program_to_data[program.program_id]

    def create_results_program_data(early_terminated):
        trials_rollouts_mean_reward: TrialList[EpisodeList[Reward]] = []
        for t in trials_rollouts_episode_end_rewards:
            trial_final_rewards: EpisodeList[Reward] = []
            trials_rollouts_mean_reward.append(trial_final_rewards)
            for r in t:
                trial_final_rewards += r
        results = ProgramTestResultData(
            trials_rollouts_mean_reward,
            trials_rollouts_episode_lengths,
            trials_rollouts_episode_end_rewards,
            False,
            None,
            early_terminated,
            early_termination_batch_data is not None,
            selected_index,
            time.time(),
            time.time(),
            0,
            None,
            None,
            None,
            None,
        )
        return ProgramData(program.program_id, program, None, results,
                           _stats_for_program(results))

    for trial, (trial_episode_final_rewards,
                trial_rollouts_episode_lengths) in enumerate(
                    zip(data.results.trials_rollouts_episode_end_rewards,
                        data.results.trials_rollouts_episode_lengths)):

        if early_termination_batch_data is not None:
            for timestep_cap in range(
                    SearchParams.current(
                    ).EARLY_TERMINATION_CHECKING_FREQUENCY,
                    TspParams.current().STEPS_PER_ROLLOUT,
                    SearchParams.current().EARLY_TERMINATION_CHECKING_FREQUENCY
            ):
                # Simulate the average episode ending rewards we would have seen at this timestep.
                rollouts_episode_end_rewards: List[List[Reward]] = []
                rollouts_episode_lengths: List[List[int]] = []

                assert len(trial_episode_final_rewards) == len(
                    trial_rollouts_episode_lengths), (
                        len(trial_episode_final_rewards),
                        len(trial_rollouts_episode_lengths))

                for rollout, (rollout_final_rewards,
                              rollout_episode_lengths) in enumerate(
                                  zip(trial_episode_final_rewards,
                                      trial_rollouts_episode_lengths)):
                    timestep = 0
                    assert len(rollout_final_rewards) == len(
                        rollout_episode_lengths)
                    episode_end_rewards: List[Reward] = []
                    episode_lengths: List[int] = []
                    rollouts_episode_end_rewards.append(episode_end_rewards)
                    rollouts_episode_lengths.append(episode_lengths)

                    for final_reward, episode_length in zip(
                            rollout_final_rewards, rollout_episode_lengths):
                        timestep += episode_length
                        if timestep > timestep_cap:
                            break
                        else:
                            episode_end_rewards.append(final_reward)
                            episode_lengths.append(episode_length)

                if len(rollouts_episode_end_rewards) > 0:
                    if "MiniGrid" in TspParams.current().ENVIRONMENT:
                        mean_episode_end_reward = np.array([
                            np.array(r).max() if len(r) > 0 else math.nan
                            for r in rollouts_episode_end_rewards
                        ]).max()
                    else:
                        mean_episode_end_reward = np.array([
                            np.array(r).mean()
                            for r in rollouts_episode_end_rewards
                        ]).mean()
                else:
                    mean_episode_end_reward = math.nan

                prune = len(episode_end_rewards
                            ) > 0 and rollout_timestep_pruning_hook_fn(
                                trial, timestep_cap, mean_episode_end_reward,
                                early_termination_batch_data)

                if prune:
                    trials_rollouts_episode_lengths.append(
                        rollouts_episode_lengths)
                    trials_rollouts_episode_end_rewards.append(
                        rollouts_episode_end_rewards)

                    result_pipe_connection.send(
                        create_results_program_data(early_terminated=True))
                    return

        # Didn't prune, just add everything:
        trials_rollouts_episode_end_rewards.append(
            data.results.trials_rollouts_episode_end_rewards[trial])
        trials_rollouts_episode_lengths.append(
            data.results.trials_rollouts_episode_lengths[trial])

    result_pipe_connection.send(
        create_results_program_data(early_terminated=False))
    return
 def create_empty(self, environment, policy):
     return TorchKNNRegressor(TspParams.current().KNN_BUFFER_SIZE_REGRESSOR,
                              32, 5)
#         batch_results = evaluate_program_batch_fn(
#             program_batch, rollout_timestep_pruning_hook_fn)

if __name__ == "__main__":
    parser = mlca.helpers.config.argparser()
    parser.add_argument('--num_gpus', default=1, type=int)
    parser.add_argument('--workers_per_gpu', default=4, type=int)
    parser.add_argument('--machine_split_id', default=None, type=int)
    args = parser.parse_args()

    params = TspExperimentList.get(args.experiment_id)
    simulate_params = SearchExperimentList.get(
        params.SEARCH_PROGRAMS_EXPERIMENT_ID)

    device = mlca.helpers.config.get_device_and_set_default()

    print("device", device)

    with params:
        with simulate_params:
            with mlca.helpers.config.DefaultDevice(device):
                if TspParams.current(
                ).EXPERIMENT_TYPE == TspParams.ExperimentType.CURIOSITY_SEARCH:
                    test_all_curiosity_programs(parser, args)
                elif TspParams.current(
                ).EXPERIMENT_TYPE == TspParams.ExperimentType.REWARD_COMBINER_SEARCH:
                    quit("Deprecated")
                    # test_all_reward_combiner_programs(parser, args)
                else:
                    quit(TspParams.current().EXPERIMENT_TYPE)
Ejemplo n.º 24
0
 def _normalize_external_reward(self, external_reward):
     if TspParams.current().NORMALIZE_EXTERNAL_REWARD == "MANUAL":
         return external_reward / TspParams.current(
         ).NORMALIZE_EXTERNAL_REWARD_MANUAL_LEVEL
     else:
         return external_reward
 def create_empty(self, environment, policy):
     return VariableBufferStruct(
         TspParams.current().MAX_VARIABLE_BUFFER_SIZE)
Ejemplo n.º 26
0
def main():
    experiment_id = "2-96_15x15_new-ppo-real-batched-shared_1000-steps_5-trials-yes-share-yes-batch-1_steps_curiosity"

    params = TspExperimentList[experiment_id]
    with params:
        simulator_params = SearchExperimentList[
            TspParams.current().SEARCH_PROGRAMS_EXPERIMENT_ID]

        print(TspParams.current().EXPERIMENT_TYPE)
        if TspParams.current(
        ).EXPERIMENT_TYPE == TspParams.ExperimentType.CURIOSITY_SEARCH:
            data, curiosity_programs_with_results, \
                  curiosity_programs, curiosity_program_inputs, \
                  curiosity_data_structures, curiosity_optimizers, \
                  reward_combiner_programs, reward_combiner_program_inputs, \
                  reward_combiner_data_structures, reward_combiner_optimizers, program_results_data \
              = load_curiosity_program_data(
                TspParams.current().CURIOSITY_PROGRAMS_NAME,
                TspParams.current().REWARD_COMBINER_PROGRAMS_NAME,
                experiment_id,
                TspParams.current().FIXED_REWARD_COMBINER_PROGRAM_ID)
        elif TspParams.current(
        ).EXPERIMENT_TYPE == TspParams.ExperimentType.REWARD_COMBINER_SEARCH:
            data, curiosity_programs_with_results, \
                curiosity_programs, curiosity_program_inputs, \
                curiosity_data_structures, curiosity_optimizers, \
                reward_combiner_programs, reward_combiner_program_inputs, \
                reward_combiner_data_structures, reward_combiner_optimizers, program_results_data \
                = load_reward_combiner_program_data(
                    TspParams.current().CURIOSITY_PROGRAMS_NAME,
                    TspParams.current().REWARD_COMBINER_PROGRAMS_NAME,
                    experiment_id,
                    TspParams.current().FIXED_CURIOSITY_PROGRAM_ID)

        # print(data[0].results.trials_states)

        print("Done loading")

        programs_with_error = [
            p for p in data if p.results.execution_had_error
        ]

        # for p in programs_with_error:
        #   print(p.results.execution_had_error)
        #   print(str(p.results.error))
        #   print_program_data(p, experiment_id)

        # print(place_programs_in_buckets(data))

        # print("Num programs", len(programs))
        # print("Num evaluated programs", len(program_results_data))
        # print("Num programs with error", len(programs_with_error))

        # _view_programs(curiosity_programs)

        # _throughput(data)

        stdevs = np.array(
            [p.stats["mean_performance_std"] for p in data if p.stats])
        means = np.array(
            [p.stats["mean_performance"] for p in data if p.stats])
        print("Data points", len(data))
        print("means", len(means))
        print("n", len(stdevs), "avg std", stdevs.mean(), "mean performance",
              means.mean(), ".25 quantile", np.quantile(means, 0.25),
              ".75 quantile", np.quantile(means, 0.75), "total CPU time",
              sum([p.results.elapsed_time for p in data]), "# w. error",
              len(programs_with_error), "# w. had_early_termination_data",
              len([d for d in data if d.results.had_early_termination_data]))

        print("total amount of steps",
          sum(_total_steps(p.results.trials_rollouts_episode_lengths) for p in data if p.stats),
          "max amount of steps",
          TspParams.current().STEPS_PER_ROLLOUT * TspParams.current().NUM_ROLLOUTS_PER_TRIAL * \
          TspParams.current().NUM_TRIALS_PER_PROGRAM *  len([p for p in data if p.stats])
        )

        # print_profiler(data)
        # plot_scatter_of_every_trials(data, params)
        # plot_scatter_of_every_trial_best_performance(data, params)
        # plot_scatter_of_every_trial_last_performance(data, params)
        # plot_scatter_of_every_trial_mean_performance(data, params)

        # plot_scatter_of_mean_plus_minus_std(data, experiment_id)
        # plot_scatter_of_max_plus_minus_std(data, experiment_id)
        # print(ids_of_best_n_programs(data, 16))
        # quit()

        # from mlca.simulate_search import _plot_program_evaluations
        # _plot_program_evaluations(data, None, simulator_params, params)

        # plot_histogram_of_runtime(data)
        # plot_histogram_of_average_steps_to_reach_n_unique_states(data)
        # # compare_mean_with_avg_steps_to_reach_n_unique_states(data, 10)
        # plot_histogram_of_episode_length(data)
        # quit()

        # plot_histogram_of_mean_performances(data)
        #analyze_errors(
        #  programs_with_error, data, experiment_id)

        # plot_scatter_of_selection_index_vs_total_timesteps(data)

        # plot_violin_of_mean_performances(data)
        # plot_scatter_of_stdev_vs_mean(data)
        # plot_histogram_of_stdevs(data)

        # print(program_inputs[1])
        # for p in data:
        #   # if p.stats["mean_performance"] > 60:
        #   if program_inputs[1] in p.program.update_program[-1].input_set:
        #     print(p)

        # print("")
        # print("Smallest program above threshold")
        # for p in smallest_programs_above_threshold(data, 80):
        #   print_program_data(p)

        # print("Best programs"
        best = programs_by_mean_performance(data)
        print("REMOVE THIS HACK")
        best = best[-16:]
        print(len(best))
        print([d.curiosity_program.program_id for d in best])
        for i in range(13):
            print("------")
            print(i, "from top")
            print("------")
            d = best[-i - 1]
            # worst: # d = programs_by_mean_performance(data)[i]
            print_program_data(d, experiment_id)
            d.curiosity_program.visualize_as_graph(i)

            if d.reward_combiner_program is not None:
                d.reward_combiner_program.visualize_as_graph(
                    str(i) + "combiner")
Ejemplo n.º 27
0
def main():
  # experiment_id = "2-80_30x30_new-ppo-real-batched-shared_2500-steps_5-trials"
  # experiment_id = "temp-2-80_30x30_new-ppo-real-batched-shared_2500-steps_5-trials"
  # experiment_id = "2-84_15x15_new-ppo-real-batched-shared_2500-steps_5-trials"
  experiment_id = "2-96_15x15_new-ppo-real-batched-shared_1000-steps_5-trials-yes-share-yes-batch-1_steps_curiosity"

  NUM_PROGRAMS_TO_CLUSTER = 20000
  NUM_TOP_PROGRAMS = 512 # 128

  # # Thresholds for meta-selecting
  DELTA_THRESHOLD = 3.5
  PERFORMANCE_THRESHOLD = 200

  # # # Thresholds for 2-96
  # DELTA_THRESHOLD = 3.5
  # PERFORMANCE_THRESHOLD = 400

  # DELTA_THRESHOLD = 2.5
  # PERFORMANCE_THRESHOLD = 400
  # DELTA_THRESHOLD = 1
  # PERFORMANCE_THRESHOLD = 400

  # DELTA_THRESHOLD = 1
  # # DELTA_THRESHOLD = 2.2
  # PERFORMANCE_THRESHOLD = 200 # 150

  # Thresholds for vanilla pydc
  # DELTA_THRESHOLD = 250
  # DENSITY_THRESHOLD = 1

  # OUTLIER_DELTA_THRESHOLD = 2
  # OUTLIER_DENSITY_THRESHOLD = 200

  params = TspExperimentList[experiment_id]
  with params: 
    simulator_params = SearchExperimentList[TspParams.current().SEARCH_PROGRAMS_EXPERIMENT_ID]
    with simulator_params: 
      predict_performance_params = PredictPerformanceExperimentList[simulator_params.PREDICT_PERFORMANCE_EXPERIMENT_ID]
      with predict_performance_params: 
        operations_set = OperationsSetList[TspParams.current().CURIOSITY_PROGRAMS_NAME]
        with operations_set:

          print(TspParams.current().EXPERIMENT_TYPE)
          assert TspParams.current().EXPERIMENT_TYPE == TspParams.ExperimentType.CURIOSITY_SEARCH

          data, curiosity_programs_with_results, \
                curiosity_programs, curiosity_program_inputs, \
                curiosity_data_structures, curiosity_optimizers, \
                reward_combiner_programs, reward_combiner_program_inputs, \
                reward_combiner_data_structures, reward_combiner_optimizers, program_results_data \
            = load_curiosity_program_data(
              TspParams.current().CURIOSITY_PROGRAMS_NAME, 
              TspParams.current().REWARD_COMBINER_PROGRAMS_NAME, 
              experiment_id,
              TspParams.current().FIXED_REWARD_COMBINER_PROGRAM_ID)

          data = [d for d in data if d.stats is not None]
          # data = data[:1000]

          program_features = np.array([
              program_as_feature_vector_diversity(d.curiosity_program) 
              for d in tqdm(data, "Compute program features")
          ]).astype(np.double)

          # # ======================
          # # Test evaluate_diversity_selection_choices
          # # ======================
          # diversity_selected_program_ids = [1555, 4326, 29160, 50370, 5777, 1494, 46394, 27005, 10608, 43408, 30400, 40262, 45938, 48117, 43001, 36843, 13582, 27746, 9520, 48643, 22451, 31660, 9953, 30505, 47646, 37722, 13623, 6402, 17380, 12364, 41567, 11918, 39845, 47502, 823, 29921, 8274, 44384, 42196, 20883, 20152, 44937, 21881, 37669, 32002, 39249, 39669, 17839, 48524, 12740, 47770, 26513, 6101, 12647, 31889, 177, 32612, 35737, 14980, 42037, 48528, 47704, 18094, 41569, 37597, 13348, 700, 2777, 13677, 47701, 49403, 4330, 1427, 11427, 28323, 37042, 12029, 29790, 28746, 41534, 4427, 25664, 14473, 16294, 7471, 35744, 49895, 22834, 28892, 3617, 25471, 25482, 22636, 35262, 47781, 1776, 48124, 12819, 28209, 37806, 4423, 21932, 18596, 10678, 39880, 3360, 48055, 8144, 2011, 11487, 10205, 6115, 2945, 894, 30149, 37420, 47508, 1198, 34467, 20690, 37655, 31625, 7931, 18650, 38635, 47648, 18525, 2117, 10499, 868, 7117, 5868, 14831, 11619, 50482, 25367, 32046, 6315, 17256, 29366, 33685, 7269, 38086, 46739, 16617, 1001, 27228, 11653, 8236, 47248, 10165, 31891, 48983, 22654, 18288, 5669, 4119, 13712, 22339, 37098, 45453, 18617, 4286, 24797, 24364, 37781, 5966, 6501, 3027, 19732, 20096, 26782, 26520, 31299, 10929, 5038, 2627, 22673, 46680, 38364, 10587, 32209, 22494, 32351, 25020, 49956, 4466, 988, 35884, 20555, 7327, 38011, 37189, 19772, 17240, 32184, 20103, 4255, 3424, 7530, 38223, 3550, 43128, 12484, 824, 8557, 44612, 11095, 3313, 48930, 39423, 27214, 13658, 13493, 30770, 20819, 39274, 25570, 31976, 4219, 8273, 29095, 5412, 13298, 42043, 19924, 9790, 45515, 2516, 21626, 37426, 31521, 31989, 25686, 38385, 3524, 38596, 32840, 8096, 10411, 38494, 5817, 41705, 784, 31531, 42764, 2145, 45595, 27794, 5470, 8158, 11705, 18523, 10705, 46882, 31685, 6254, 5163, 37868, 37207, 3511, 44677, 21362, 43184, 10142, 7208, 1220, 26723, 36659, 39598, 8294, 34974, 38116, 24808, 17960, 12627, 4125, 11424, 8473, 7629, 6805, 40292, 9557, 36382, 27070, 11630, 19922, 45493, 24099, 49734, 38009, 48336, 20833, 2805, 10687, 37652, 9836, 31646, 22431, 12530, 47895, 30937, 8115, 35606, 6258, 11161, 19985, 8183, 5532, 3458, 30097, 45524, 10547, 37058, 6107, 32042, 5432, 16513, 39741, 12875, 2218, 7904, 15345, 41736, 39492, 40109, 32349, 35490, 37745, 18496, 6205, 28839, 25311, 7290, 13376, 4189, 42148, 41039, 38526, 36648, 9582, 4949, 45233, 31313, 31579, 25034, 972, 43888, 41316, 7323, 4752, 31890, 33365, 29200, 48187, 34196, 14913, 5103, 48053, 22658, 11732, 3707, 13340, 10583, 10544, 19515, 13432, 12500, 49593, 7472, 15344, 28972, 4388, 2230, 5076, 21817, 44584, 50790, 7794, 31741, 41506, 24057, 6372, 46973, 31607, 29591, 6156, 50030, 25169, 13512, 5199, 4361, 8309, 6117, 51138, 22628, 17799, 31460, 31307, 31888, 42135, 35685, 45218, 33216, 49197, 981, 31719, 5241, 35357, 13354, 37656, 47836, 27199, 9844, 2582, 20794, 7448, 31525, 48120, 36700, 36549, 13485, 9322, 11163, 44654, 12651, 8315, 29256, 47483, 10095, 48235, 2353, 50910, 21890, 12658, 27035, 6623, 25186, 13453, 36521, 3902, 40325, 38099, 29099, 25507, 6408, 7138, 6342, 38434, 38985, 6084, 30209, 41689, 47480, 35051, 20699, 34340, 7096, 27566, 27547, 4404, 31462, 49282, 40103, 35023, 20761, 46682, 43886, 10666, 29516, 6583, 31954, 47818, 47703, 20470, 1536, 4105, 38010, 30919, 12741, 10581, 16483, 821, 3604, 35362, 6307, 5140, 25047, 15270, 49179, 28747, 10869, 46818, 1229, 45191, 25720, 17697, 38101, 24356, 24556, 6360, 8429, 4509, 3240, 35302, 19741, 25207, 22311, 50501, 2094, 48519, 21735, 18337, 26445, 35802, 13499, 46006, 42091, 31524, 30501, 6606, 41483, 4824, 8412, 27072, 30990, 6983, 31605, 5689, 20495, 14934, 38012, 289, 36097, 10413, 3467, 25052, 20879, 12469, 13243, 13645, 16610, 15269, 2314, 41508, 49433, 7307, 30864, 13057, 45680, 13386, 21333, 34002, 8504, 18920, 24454, 39626, 3433, 4524, 1848, 47457, 20975, 42782, 2906, 22332, 51137, 41047, 10436, 17120, 47801, 45253, 31327, 30099, 6399, 32278, 18952, 49782, 30123, 18370, 37657, 6395, 49584, 51618, 49031, 2838, 31927, 47861, 9787, 2229, 13133, 48052, 30693, 1000, 1561, 31755, 7456, 29217, 3314, 4137, 10096, 7405, 39629, 45189, 11879, 41649, 26690, 29445, 13213, 22958, 48108, 45975, 21849, 8444, 6124, 4205, 12598, 48097, 19721, 6928, 50952, 38008, 22687, 48721, 47643, 17176, 47231, 28872, 6081, 29710, 49721, 6852, 43842, 20904, 16324, 46678, 6381, 2701, 33214, 31642, 37770, 14905, 288, 25049, 31894, 29802, 1557, 5346, 3469, 3353, 45539, 1846, 13527, 1429, 18200, 24982, 45195, 10104, 27037, 28691, 2359, 41896, 2767, 18926, 21295, 7919, 4355, 4429, 22005, 20753, 4229, 7837, 12660, 7042, 17421, 20436, 18372, 47642, 43407, 11853, 6075, 22463, 26521, 27614, 48082, 4086, 4101, 2040, 41230, 18433, 49028, 48407, 6746, 31522, 7715, 6406, 20128, 39671, 38175, 14961, 3965, 44712, 998, 14702, 47706, 7468, 37115, 1665, 28364, 3195, 6521, 35372, 31068, 1159, 4211, 48974, 17735, 22190, 5793, 49595, 20456, 4655, 8478, 39226, 25155, 4278, 25562, 5188, 6222, 38956, 36375, 37451, 50953, 8272, 20463, 32257, 46236, 35322, 37417, 40071, 2554, 2681, 38195, 48620, 4111, 40984, 6392, 45565, 11615, 43887, 36645, 1404, 49389, 47919, 32006, 30982, 38014, 1246, 34970, 866, 44670, 993, 7469, 12446, 15211, 17517, 24811, 32849, 13400, 5409, 2146, 2080, 28564, 6020, 19194, 33374, 5781, 48183, 25600, 8319, 4277, 21921, 25284, 12715, 9634, 33926, 37398, 10694, 38103, 27068, 34996, 30213, 4965, 50224, 23924, 5857, 46346, 140, 39782, 7532, 13626, 43841, 45314, 19755, 48779, 4267, 12621, 20361, 4983, 3321, 37751, 38145, 10541, 32120, 31043, 20367, 14933, 870, 27869, 11803, 1153, 15163, 5462, 3399, 6468, 5884, 17507, 24997, 41240, 5379, 29810, 2972, 33376, 40170, 31987, 23773, 8117, 13396, 22779, 5167, 8197, 26777, 29494, 10191, 883, 19713, 20190, 48478, 48049, 31467, 31321, 42024, 27560, 2134, 792, 36297, 1171, 29625, 8401, 38141, 25146, 26453, 45590, 37454, 6248, 32031, 19980, 5122, 457, 25307, 36827, 9771, 10415, 16574, 48057, 28837, 2315, 10055, 37429, 13709, 17481, 39585, 1924, 41212, 29847, 5806, 11714, 7154, 33559, 5738, 40113, 6464, 32336, 39483, 8428, 3118, 4349, 33859, 49033, 43224, 16984, 3675, 39843, 4279, 20400, 7074, 22309, 28068, 20998, 33698, 10515, 31920, 20721, 35886, 38402, 12173, 25517, 8270, 48787, 20510, 27386, 6227, 47699, 674, 6885, 4345, 3782, 9319, 22792, 37050, 11909, 20374, 17104, 6507, 15304, 10854, 12754, 12616, 8331, 41536, 20695, 39464, 35556, 47794, 4199, 1474, 40256, 49512, 40193, 39582, 13636, 4468, 7415, 806, 343, 29568, 28341, 25035, 34449, 13357, 49908, 51571, 48149, 20050, 40976, 18646, 38543, 16281, 4854, 6233, 25179, 30798, 2054, 13332, 16578, 2708, 872, 36649, 11723, 10708, 17661, 9320, 25053, 48051, 9815, 28628, 17382, 11096, 5461, 10200, 4511, 13168, 13701, 3140, 29215, 32847, 1834, 36619, 21875, 46492, 8028, 39939, 33343, 35851, 6277, 21861, 3611, 6376, 5368, 17156, 25352, 29180, 18028, 49277, 45826, 37440, 20443, 25022, 774, 4259, 48728, 48976, 31958, 18644, 32445, 20117, 17502, 22992, 8113, 5074, 11429, 4613, 8248, 36647, 10699, 36557, 5210, 12956, 38234, 8530, 47710, 11790, 34878, 15311, 34976, 10856, 5469, 47044, 7009, 28930, 4310, 49247, 31612, 12697, 46722, 7925, 21321, 27394, 35918, 6441, 9497, 22146, 13159, 6223, 3168, 35651, 31276, 37844, 27067, 4398, 31285, 39657, 30927, 960, 32613, 11683, 6726, 43840, 13378, 48571, 46430, 32061, 20943, 680, 25321, 826, 3946, 8418, 20099, 9481, 14755, 31519, 2470, 29334, 13229, 39918, 38016, 11880, 10068, 17102, 48104, 37457, 37002, 13716, 5630, 45291, 49205, 19393, 3244, 7626, 46707, 25504, 41904, 13666, 40163, 7445, 8280, 22034, 8013, 4223, 12552, 17879, 691, 38970, 16842, 10507, 6236, 32927, 2119, 47641, 26452, 31058, 18001, 51619, 20132, 39960, 6390, 36292, 17476, 2041, 11157, 4083, 5838, 20491, 3344, 5691, 14470, 46681, 20226, 36129, 31892, 28749, 11917, 14926, 1155, 15206, 27871, 7996, 7477, 22218, 16339, 7024, 12674, 8448, 46693, 32820, 48115, 5284, 25136, 22722, 13523, 33328, 6128, 22407, 13099, 3265, 6375, 17438, 47100, 19765, 11668, 22802, 10426, 42658, 27727, 20362, 26778, 8230, 31461, 17895, 22646, 11616, 32604, 46344, 49238, 37785, 35132, 5067, 1002, 11282, 4257, 4736, 37654, 47707, 9323, 12803, 35568, 39667, 11532, 11910, 14979, 16543, 42805, 4571, 7454, 28932, 39221, 4318, 12985, 19239, 5301, 20448, 33183, 21575, 48359, 8179, 49432, 5570, 4128, 7927, 22019, 7368, 8289, 45078, 43223, 36175, 20389, 33878, 26780, 40009, 17471, 31523, 48207, 12340, 45881, 46349, 40130, 32684, 4417, 37600, 2055, 4803, 46003, 47822, 29241, 51572, 678, 11756, 8394, 44674, 4842, 458, 47702, 47709, 31051, 25134, 42332, 10870, 15180, 5477, 29354, 24521, 28958, 3013, 7437, 8207, 20461, 7136, 26734, 22564, 21127, 27416, 4207, 4431, 5903, 7674, 22202, 12693, 4523, 19983, 36235, 36245, 50483, 41314, 159, 3429, 31969, 22310, 22484, 32683, 46428, 48550, 47494, 6111, 38160, 49003, 4988, 45274, 10585, 12420, 6262, 994, 11721, 30504, 36511, 41857, 31529, 20583, 5530, 50853, 45457, 2471, 1431, 15343, 14972, 12746, 2481, 36498, 39208, 3384, 35374, 8491, 6999, 47803, 41210, 19524, 21605, 13270, 4445, 35815, 49590, 6276, 22174, 12714, 4506, 19982, 31258, 45848, 10530, 33746, 11691, 32925, 27740, 39751, 2140, 36835, 4897, 19918, 19769, 32218, 30207, 1559, 6613, 6264, 47807, 41526, 3705, 44429, 4247, 455, 19978, 45782, 33533, 25646, 20365, 2219, 42895, 50028, 1954, 11873, 15179, 12742, 46157, 14683, 24565, 29246, 13408, 4647, 8187, 49236, 48106, 25154, 25727, 3872, 49434, 5224, 13281, 4281, 22164, 7159, 8222, 18460, 37755, 37748, 48050, 10598, 42518, 21317, 19749, 31590, 33872, 20404, 13631, 39810, 37602, 29712, 3005, 10409, 26752, 8266, 29546, 11629, 17251, 6099, 997, 14474, 11792, 46866, 18956, 39672, 37659, 5607, 35264, 2308, 7804, 1535, 10107, 19744, 12579, 36069, 35121, 39378, 4486, 12142, 6490, 41271, 4006, 31616, 22650, 18055, 27417, 3866, 3320, 13380, 24591, 7150, 4380, 13399, 12197, 20363, 3200, 11697, 30011, 21134, 37711, 7238, 25529, 45940, 31465, 26997, 32401, 2784, 1783, 6093, 47466, 31631, 7713, 20874, 27370, 9661, 6387, 10545, 3526, 46858, 37653, 50726, 22333, 48054, 33998, 7276, 11919, 4531, 11078, 3610, 15268, 2058, 9524, 46747, 4191, 35077, 22203, 2991, 6135, 27020, 31972, 22437, 21309, 27419, 5222, 6119, 39728, 7265, 8300, 8355, 6489, 13447, 4514, 8217, 4378, 13592, 6450, 4439, 8524, 4451, 6199, 2111, 38067, 13506, 25021, 37468, 38504, 3071, 41273, 8256, 1667, 48163, 33350, 32462, 6389, 10540, 2184, 19979, 46598, 40057, 11911, 17384, 38013, 9814, 10858, 21904, 10210, 11633, 46569, 45264, 3299, 6176, 3842, 11474, 13641, 49220, 46654, 25494, 32328, 34875, 5339, 13514, 24509, 11340, 8190, 7192, 8219, 6481, 13414, 7266, 6325, 6374, 6142, 6377, 8528, 13672, 1724, 4513, 12405, 18554, 19919, 1785, 19783, 8109, 51573, 37197, 37810, 27185, 17226, 4094, 9321, 30503, 10706, 11979, 10930, 46107, 32109, 34004, 31526, 11643, 10208, 24743, 19786, 46951, 17214, 1722, 20506, 3236, 13552, 7058, 11716, 40106, 38456, 24154, 13673, 49513, 7753, 7444, 24851, 7142, 6175, 6436, 13459, 4185, 5290, 13525, 7988, 6350, 7148, 4492, 8205, 13700, 4225, 13689, 4676, 38342, 19917, 4280, 2775, 11159, 26725, 20156, 38564, 4396, 14780, 18955, 10704, 38171, 47239, 47933, 2231, 5770, 34490, 31893, 11080, 9845, 29860, 15247, 39277, 5467, 5425, 13705, 28901, 47117, 4474, 7806, 47790, 3745, 22954, 17969, 26979, 33555, 5223, 8278, 24845, 13456, 5315, 13557, 24794, 21814, 12606, 8362, 12496, 12670, 39407, 3144, 8152, 22131, 24634, 21998, 11330, 7616, 7390, 4460, 4222, 6133, 676, 51617, 20535, 35329, 49570, 19981, 8392, 817, 2360, 36819, 22341, 19994, 17100, 1157, 11328, 10206, 9837, 45754, 39505, 31288, 5971, 37766, 29191, 7293, 6663, 13392, 11717, 25080, 3435, 38790, 33167, 50913, 8286, 6275, 39395, 12599, 4480, 7355, 6178, 31026, 9513, 12509, 5137, 3436, 14859, 13692, 39266, 7392, 30854, 13628, 8480, 6466, 5436, 8229, 8341, 8468, 38120, 38458, 36937, 38332, 47700, 6270, 818, 46169, 2160, 37882, 20366, 50034, 34876, 1470, 14952, 16508, 10244, 24756, 29404, 5459, 5228, 35344, 14668, 4314, 4334, 7385, 3596, 36615, 38612, 22587, 33181, 33541, 5363, 8121, 39905, 6470, 13041, 7401, 8193, 13715, 39616, 19185, 14680, 37165, 14643, 5153, 5329, 13602, 39207, 3345, 21801, 24981, 3232, 8221, 3117, 14777, 8424, 6502, 4172, 26969, 18954, 32392, 14472, 13501, 999, 31035, 2361, 9759, 28472, 37668, 22342, 11644, 15213, 39235, 47832, 46531, 12739, 45279, 3283, 18928, 13520, 12688, 7639, 35859, 24457, 41481, 41905, 7547, 8487, 12559, 22181, 5447, 6362, 40093, 4160, 13581, 21929, 4209, 3273, 22052, 6122, 12549, 7433, 22144, 6454, 5385, 22939, 8515, 6492, 3784, 6379, 6159, 28942, 33144, 31520, 13491, 41872, 290, 30975, 2352, 20466, 4532, 19995, 34972, 22338, 2069, 10201, 36105, 22134, 12744, 5613, 7412, 17242, 12730, 8335, 31974, 6168, 11386, 7006, 29599, 34874, 40112, 41464, 13264, 12970, 4432, 3381, 6334, 24547, 39307, 6354, 5200, 5310, 12677, 21803, 24564, 6273, 7443, 3860, 5265, 6407, 13711, 4197, 4201, 4127, 4317, 7257, 1902, 4220, 1963, 33548, 28967, 32520, 38007, 4239, 10546, 30912, 14471, 9778, 16614, 25056, 19989, 34601, 34757, 11415, 10910, 15267, 18228, 39181, 19758, 29189, 3318, 5333, 13404, 6356, 6833, 38096, 33554, 20818, 26707, 24030, 19062, 6409, 6137, 7189, 4440, 8201, 4324, 4470, 3926, 3357, 13394, 13574, 8022, 5212, 39414, 13577, 5221, 7209, 21816, 4194, 39504, 11394, 3967, 12916, 13509, 6306, 33116, 35104, 48048, 46965, 13612, 10700, 18189, 20214, 2422, 31898, 42334, 3290, 2078, 24854, 15283, 18176, 47469, 38156, 17185, 2144, 6340, 13109, 12843, 33375, 7129, 36350, 48957, 6416, 22957, 3647, 24824, 8425, 6207, 3152, 3914, 7243, 5912, 4122, 7214, 7446, 13375, 8131, 12643, 13451, 6488, 3233, 12596, 3635, 8118, 8476, 13479, 6498, 21823, 47035, 48588, 32882, 13346, 9608, 10549, 19368, 48246, 11149, 16582, 31527, 50524, 28750, 12805, 11130, 7986, 29763, 24550, 15299, 31330, 5812, 17548, 3147, 8457, 31978, 3573, 2954, 49396, 18064, 4166, 49207, 38989, 6279, 12474, 7326, 13423, 36755, 3388, 24979, 8337, 7434, 6283, 7263, 12678, 3372, 7329, 6127, 4312, 3238, 12648, 4488, 12708, 7399, 13141, 5175, 3317, 12610, 8441, 12463, 13410, 3257, 24577, 12583, 30560, 4273, 30367, 825, 22952, 14927, 31528, 42909, 7874, 11131, 44603, 24622, 45769, 10135, 12738, 28956, 12439, 6209, 31982, 13579, 13048, 49352, 39120, 23926, 48476, 7653, 3438, 6220, 25294, 7255, 13415, 33169, 8455, 25121, 39134, 6321, 4285, 19414, 4126, 4526, 6219, 13554, 13561, 7375, 3802, 8181, 3979, 6414, 8198, 4447, 5609, 6431, 3303, 12605, 5813, 4421, 44708, 30328, 10707, 39670, 9805, 12958, 48145, 19987, 28833, 15201, 11642, 3456, 29391, 24578, 32397, 12593, 13007, 7316, 7044, 4449, 46652, 6352, 49591, 22578, 21618, 48959, 24507, 13391, 8282, 5176, 13627, 7200, 22254, 21889, 21874, 5134, 33518, 24518, 7553, 7824, 7278, 12458, 24535, 7143, 4358, 8427, 39420, 21150, 3181, 4434, 4428, 6497, 4292, 22823, 13614, 44704, 291, 1199, 19376, 22335, 31681, 31895, 8574, 45193, 11654, 5478, 39477, 18261, 45581, 12570, 6437, 4503, 3541, 5318, 31980, 12540, 33152, 22395, 43185, 5990, 4284, 5225, 13723, 39742, 4320, 47138, 8156, 26995, 13585, 5321, 40203, 6336, 22912, 41281, 24562, 5297, 8422, 4121, 50912, 18446, 5450, 13679, 22187, 13597, 9579, 6197, 3177, 1733, 6499, 1836, 5340, 4186, 10548, 14469, 30359, 20592, 1512, 37661, 17386, 15188, 16503, 7476, 39408, 20531, 46508, 3131, 45556, 7253, 6201, 5715, 30475, 3908, 13639, 22380, 26705, 22474, 28748, 7374, 4347, 5341, 18487, 13482, 24613, 19310, 39502, 40238, 6447, 13699, 6123, 14675, 24755, 13730, 3315, 8314, 3164, 8119, 7283, 13660, 48102, 24728, 8527, 3280, 22382, 13633, 3192, 25458, 24810, 14664, 36573, 4529, 456, 22616, 11801, 29101, 19984, 11740, 15309, 3461, 2309, 22147, 35544, 11274, 45526, 2779, 13575, 12477, 5619, 27149, 33114, 18608, 13656, 40107, 31985, 3412, 39178, 8162, 3201, 5697, 13237, 5330, 13601, 7121, 4350, 5448, 8284, 8497, 39180, 13510, 24967, 6405, 4139, 7424, 31618, 6463, 39234, 7425, 13383, 7191, 7324, 13644, 24768, 13480, 2351, 37066, 10703, 9324, 39668, 9779, 37658, 27873, 47857, 10161, 2081, 3448, 24592, 46189, 3406, 35109, 49001, 41650, 3824, 13179, 7246, 4441, 12882, 4462, 6125, 40058, 47783, 21377, 12625, 39199, 12642, 13726, 12700, 39474, 7120, 21179, 6180, 6203, 4124, 13388, 13551, 8375, 22867, 25167, 32415, 8432, 22235, 5192, 12522, 45967, 6289, 6278, 13637, 6348, 21931, 8129, 22062, 13571, 6126, 35776, 4359, 822, 14912, 25055, 47705, 34000, 7579, 10164, 2079, 5964, 3446, 22022, 29431, 17187, 3910, 2998, 48944, 5402, 33557, 6433, 20089, 39788, 39744, 7418, 8125, 38819, 6452, 8363, 12486, 13468, 24816, 5168, 6313, 27230, 8456, 39096, 12544, 25719, 20454, 13428, 8218, 6183, 4353, 5399, 7449, 3204, 7796, 20452, 8426, 4525, 4130, 13445, 6411, 8290, 13546, 6290, 5829, 5451, 40251, 47715, 45455, 15284, 2070, 32057, 12747, 22008, 29874, 3162, 28947, 7910, 5261, 12097, 8373, 49245, 18569, 40090, 43186, 22441, 7387, 4133, 8482, 5345, 8366, 36623, 25447, 39193, 39246, 4148, 5263, 4425, 27148, 43834, 6422, 3296, 8298, 6364, 6485, 8358, 4282, 8525, 13697, 3380, 8019, 1780, 8276, 4156, 38621, 4202, 4426, 3997, 8499, 26692, 41238, 13570, 4489, 22420, 47708, 35260, 10097, 2152, 1511, 3459, 28458, 21946, 1731, 30744, 35387, 3187, 27232, 7976, 22532, 22549, 6282, 39220, 8323, 48942, 8484, 8360, 46646, 3302, 3155, 30473, 5343, 1867, 6355, 6224, 19162, 30138, 13379, 13147, 8489, 4123, 24621, 3373, 4210, 3395, 12704, 7876, 47180, 8226, 33142, 4478, 6474, 30069, 12612, 4501, 45912, 6439, 13481, 4276, 3387, 5421, 19986, 2169, 35266, 15214, 1173, 37471, 3251, 17684, 19089, 17533, 22897, 49436, 3172, 12236, 13074, 46794, 46648, 8512, 21643, 12729, 13449, 33124, 7125, 13588, 38356, 12595, 6200, 39205, 27007, 22456, 6317, 12691, 6448, 5273, 5203, 48110, 13165, 4224, 6366, 37739, 12461, 5433, 12435, 7242, 13535, 4228, 37741, 8037, 3258, 22577, 24527, 13708, 47785, 13537, 13732, 20364, 27867, 10069, 2072, 13039, 34693, 12756, 46079, 36779, 38337, 46891, 6280, 12517, 49555, 5982, 39938, 3180, 8311, 24715, 41524, 4154, 4213, 31620, 30173, 12576, 12490, 4232, 6016, 8501, 8127, 6139, 12675, 3230, 48113, 39998, 6473, 39166, 4527, 12563, 21872, 8338, 3716, 13484, 41820, 6166, 12471, 4328, 13448, 6292, 7898, 24619, 8206, 13518, 39003, 6444, 24549, 19988, 50632, 15298, 1896, 11651, 43188, 7474, 36475, 24536, 45554, 13384, 8195, 4490, 30228, 7261, 26678, 5445, 49418, 47792, 22955, 6338, 5909, 12433, 3622, 25633, 25327, 5931, 24796, 38092, 13731, 47796, 12623, 7447, 39489, 31614, 20087, 7297, 13065, 36873, 8168, 8225, 14672, 12465, 8185, 7417, 6208, 7622, 22824, 3316, 22365, 7213, 12586, 13426, 7331, 3798, 31532, 42336, 10199, 11802, 11872, 43064, 7362, 12755, 46923, 24825, 17216, 4288, 13516, 48455, 4294, 27410, 30469, 12454, 6032, 22133, 40074, 4772, 13725, 7211, 26732, 7296, 25521, 13434, 5314, 6301, 3434, 25110, 27022, 14803, 22172, 14728, 12442, 12493, 48985, 5772, 13385, 24533, 24742, 3113, 6143, 7183, 4203, 8149, 7162, 12629, 39405, 3264, 8283, 2971, 40110, 7212, 7282, 37660, 28835, 10242, 1172, 11485, 16563, 34587, 47511, 39290, 45707, 45319, 8203, 41906, 48463, 5253, 26750, 22669, 12888, 4296, 5787, 6420, 20080, 22620, 3206, 7195, 13255, 5136, 7300, 43183, 5396, 5435, 46310, 40091, 11713, 24842, 13583, 11504, 13443, 39453, 39554, 40061, 33373, 25520, 2884, 11278, 6129, 13398, 4336, 24766, 12582, 13150, 13478, 47788, 13431, 31897, 11791, 45459, 10198, 34820, 3367, 3883, 48159, 1433, 39436, 35517, 31316, 45289, 6014, 36258, 3376, 33154, 6897, 49437, 6160, 12726, 4306, 25156, 12450, 30479, 24936, 6225, 4302, 8277, 24802, 8116, 29683, 5159, 12724, 48928, 7450, 5502, 24717, 13382, 4152, 8122, 7820, 12575, 5274, 47206, 7635, 51611, 8481, 12545, 3295, 5286, 24947, 13483, 22200, 3378, 8421, 5145, 2220, 17098, 50618, 10209, 37663, 3858, 11871, 19320, 30444, 3444, 19456, 17491, 9500, 40168, 11720, 5754, 45904, 13690, 4170, 8209, 41739, 30958, 7286, 8281, 12473, 49177, 12436, 24628, 12628, 6162, 7216, 5362, 13603, 3666, 4135, 4951, 6363, 3270, 7378, 3890, 9490, 4164, 7393, 5173, 6121, 39860, 5414, 3114, 4761, 3141, 5298, 3124, 30061, 13664, 8228, 12548, 15241, 29097, 20368, 7614, 2307, 30638, 31000, 7460, 17711, 38499, 13586, 21473, 3319, 13082, 38633, 24753, 27229, 6184, 2148, 13087, 6120, 35768, 7176, 3589, 12711, 6505, 3205, 5660, 4357, 28745, 49195, 20935, 4000, 5196, 31609, 3418, 31093, 5885, 6323, 3133, 6435, 24853, 5999, 27192, 7118, 12555, 7957, 24654, 5293, 12244, 5257, 4153, 12725, 19549, 3306, 3629, 14652, 5165, 13707, 15248, 42330, 20369, 37140, 2071, 24637, 5464, 19207, 28903, 4290, 43880, 46318, 2142, 6410, 25475, 33556, 30836, 7977, 3416, 13670, 13643, 12886, 12663, 22239, 8223, 5312, 46914, 37233, 51565, 41735, 47020, 5364, 24589, 49435, 49577, 37743, 12551, 49594, 4430, 7140, 32864, 29675, 13477, 6339, 4443, 8507, 4433, 7123, 21728, 4528, 4332, 9689, 6358, 21887, 3272, 5322, 3137, 37662, 50032, 5186, 22248, 5474, 2316, 36883, 29258, 8352, 5996, 5897, 4351, 8431, 24273, 31622, 13727, 27231, 20831, 7421, 3191, 24636, 21691, 4370, 13675, 46656, 5227, 8459, 21630, 39399, 5650, 4304, 27418, 25138, 12622, 8160, 21387, 34873, 6118, 36342, 13629, 32437, 13286, 13718, 8177, 3203, 39191, 27420, 3207, 39595, 12664, 38620, 24288, 12964, 13550, 13519, 5384, 12694, 24822, 41740, 12507, 24729, 11652, 7458, 30568, 32214, 6164, 8191, 20996, 5325, 20083, 12972, 17908, 5428, 27318, 24994, 13504, 21118, 6305, 33371, 6404, 4301, 24368, 38094, 5160, 3136, 12539, 4287, 13729, 12609, 5703, 4283, 5369, 3346, 6005, 5388, 7313, 21588, 6380, 6281, 39248, 5449, 5796, 24126, 3391, 13418, 24342, 13604, 4484, 6373, 40222, 3538, 39164, 12644, 24726, 3350, 36852, 39194, 2044, 10852, 30557, 29230, 12681, 4507, 6344, 5520, 32822, 41903, 20093, 3208, 13661, 39422, 18541, 12712, 22873, 4221, 12521, 6347, 6211, 13505, 6167, 12451, 20078, 6443, 3169, 41253, 32518, 4316, 13714, 22260, 39162, 3116, 6482, 14671, 5193, 12497, 13573, 22082, 47798, 4231, 11538, 8330, 39772, 13728, 4120, 5411, 12081, 5152, 13598, 6226, 7892, 11874, 24769, 31302, 11338, 30385, 8470, 13457, 48341, 23762, 49218, 7428, 18924, 7354, 8354, 3973, 30615, 9509, 20810, 3956, 33292, 4377, 7647, 32890, 32892, 6008, 22247, 12877, 4522, 9605, 6412, 45262, 24934, 6807, 30471, 37735, 22189, 32880, 4670, 3255, 11476, 39545, 13668, 41721, 13071, 38088, 3653, 24926, 3121, 8124, 36613, 5395, 7187, 7179, 4379, 5419, 1932, 2306, 22063, 3971, 3470, 46144, 8364, 25182, 35090, 13381, 41491, 23822, 37737, 27319, 5156, 7916, 4909, 39240, 7280, 8146, 4169, 3396, 33126, 12671, 32337, 22245, 13635, 24469, 8182, 21619, 13117, 13503, 13374, 12707, 8356, 8189, 12291, 6814, 41228, 7151, 24740, 22217, 26967, 13424, 26676, 12466, 12519, 4465, 6145, 25419, 5171, 22798, 8005, 8523, 5840, 6286, 44631, 10909, 36084, 22175, 3472, 6131, 8031, 48100, 30181, 3268, 8516, 24786, 33372, 22423, 26977, 45579, 8423, 6182, 3120, 5808, 5452, 12560, 13605, 13533, 8498, 3151, 45303, 41902, 7328, 12641, 13724, 12626, 4435, 39276, 5614, 37732, 41880, 21901, 9521, 7402, 4300, 39289, 7184, 13703, 5891, 5226, 3532, 12718, 13638, 8292, 3202, 24609, 13502, 13377, 5295, 4131, 33460, 5327, 13634, 32838, 11645, 3442, 21862, 5308, 25593, 5148, 18922, 13548, 8133, 3580, 17199, 39923, 39369, 13529, 7250, 14661, 50909, 8123, 20091, 4424, 3356, 12892, 13080, 9534, 11718, 3183, 6309, 25328, 8321, 6475, 30630, 8510, 8322, 6500, 3184, 6195, 4298, 39628, 5415, 25082, 5269, 7249, 5365, 13402, 8350, 13416, 14648, 3431, 6154, 8148, 7169, 1219, 48641, 44635, 24937, 7479, 31656, 5901, 41697, 22872, 17535, 36617, 5439, 2858, 43187, 6486, 5802, 40013, 39491, 3253, 8150, 8132, 5342, 9493, 6186, 38894, 7512, 39172, 20446, 7240, 3134, 13590, 13669, 2987, 13531, 6303, 6311, 13696, 7371, 8508, 12690, 5372, 30734, 3432, 12457, 6172, 3369, 8376, 12518, 5501, 12667, 19482, 22215, 4275, 13511, 12524, 5398, 13508, 7210, 13430, 32862, 1469, 39557, 37443, 30711, 40213, 41819, 29205, 2141, 4663, 13420, 22538, 13261, 13559, 12520, 22209, 5289, 7146, 8490, 44399, 6274, 22603, 36266, 39547, 7330, 33459, 3292, 22458, 13589, 46809, 4028, 6284, 24116, 45330, 7215, 5266, 6471, 3241, 12701, 39375, 3393, 24155, 12572, 7409, 40178, 8135, 17519, 12448, 22007, 21335, 17231, 22471, 39770, 8339, 2317, 11719, 13176, 24950, 20113, 47497, 5344, 37075, 8279, 6221, 5012, 49353, 22527, 8154, 6170, 35075, 5179, 3176, 4482, 22537, 39168, 33293, 45570, 36621, 3410, 7171, 13455, 21644, 8333, 34877, 27421, 7309, 40257, 5277, 4168, 12698, 12443, 50911, 13195, 6477, 7289, 8325, 13407, 35334, 20459, 39218, 8297, 8447, 3279, 13156, 5150, 24734, 39287, 4181, 5422, 13647, 12613, 4476, 8514, 5870, 8317, 5465, 21832, 3884, 6780, 4150, 24317, 5418, 40165, 30477, 33558, 7325, 24502, 8474, 5258, 8450, 35403, 13135, 3261, 8012, 25493, 5392, 8472, 21996, 8526, 8050, 13632, 18665, 8164, 40044, 2658, 13534, 7364, 8446, 39574, 39232, 7396, 39461, 11358, 4158, 35119, 35060, 4437, 8453, 8374, 4226, 48464, 21853, 7199, 12489, 3349, 12731, 1747, 12523, 14679, 5446, 5476, 32027, 22263, 2147, 12602, 6445, 46402, 13023, 27233, 24128, 7312, 7943, 17493, 4381, 24504, 13412, 12727, 22021, 21859, 39433, 2149, 6346, 4309, 12580, 8216, 5381, 5391, 4464, 6503, 31627, 35405, 8506, 13662, 4448, 17550, 50914, 41737, 4515, 6403, 8120, 5429, 3421, 4517, 4162, 13507, 8157, 8166, 13694, 9516, 3420, 8126, 39377, 5144, 4472, 9496, 3408, 22035, 12752, 48382, 8327, 12813, 3991, 7203, 5918, 37730, 45328, 22262, 39847, 8199, 6504, 13630, 41907, 7126, 6483, 18930, 18014, 3413, 7258, 6413, 12728, 6322, 21943, 6319, 8275, 13522, 8430, 19081, 39476, 4863, 8493, 4195, 36994, 28917, 7431, 47273, 4322, 44442, 49592, 4193, 6418, 3685, 8518, 3606, 13555, 4129, 6479, 3143, 8220, 12624, 20450, 13513, 8165, 20085, 38090, 4333, 25183, 3276, 21170, 4187, 39584, 8495, 13231, 3437, 5941, 4481, 41738, 46650, 5252, 35137, 4183, 6419]
          # data_ids = set(
          #   d.curiosity_program.program_id for d in data
          # )
          # diversity_selected_program_ids_set = set([
          #   i for i in diversity_selected_program_ids if i in data_ids
          # ])          


          # sub_selected_programs = [
          #   d.curiosity_program 
          #   for d in data 
          #   if d.curiosity_program.program_id in diversity_selected_program_ids_set]
          # # print("# sub_selected_programs", len(sub_selected_programs))
          # print("num_removed_programs", evaluate_diversity_selection_choices(
          #   sub_selected_programs,
          #   data
          # ))

          # # ==============
          # # Select programs that are far away from each other
          # # ==============
          # PERCENT_OF_DATA_TO_SAMPLE = .1
          # CLOSEST_THRESHOLD = 1
          # n = int(len(data) * PERCENT_OF_DATA_TO_SAMPLE)
          # sub_selected_program_ids: List[ProgramId] = []
          # selected_program_features: List[np.array] = []
          # program_id_to_features = {
          #   d.curiosity_program.program_id: program_as_feature_vector_diversity(d.curiosity_program)
          #   for d in tqdm(data, "Make program features")
          # }

          # for i, d in enumerate(tqdm(data, "Generate diverse dataset")):
          #   program_id = d.curiosity_program.program_id
          #   pos = program_id_to_features[program_id]
          #   closest = min([np.linalg.norm(p - pos) for p in selected_program_features]) if len(selected_program_features) > 0 else math.inf
          #   if closest > CLOSEST_THRESHOLD:
          #     sub_selected_program_ids.append(program_id)
          #     selected_program_features.append(pos.astype(np.float))
          #     if len(sub_selected_program_ids) == n:
          #       break
          # print(i, len(sub_selected_program_ids))

          # sub_selected_programs = [
          #   d.curiosity_program 
          #   for d in data 
          #   if d.curiosity_program.program_id in sub_selected_program_ids]

          # print("# sub_selected_programs", len(sub_selected_programs))
          # print("num_removed_programs", evaluate_diversity_selection_choices(
          #   sub_selected_programs,
          #   data
          # ))


          # ==============
          # Select the best programs
          # ==============
          # PERCENT_OF_DATA_TO_SAMPLE = .1
          # CLOSEST_THRESHOLD = 1
          # n = int(len(data) * PERCENT_OF_DATA_TO_SAMPLE)
          # programs_by_performance = [ d.curiosity_program
          #     for d in reversed(sorted(data, key=lambda d: d.stats["mean_performance"]))]
          # sub_selected_programs = programs_by_performance[:n]

          # print("# sub_selected_programs", len(sub_selected_programs))
          # print("num_removed_programs", evaluate_diversity_selection_choices(
          #   sub_selected_programs,
          #   data
          # ))
          # ==============
          # Select programs randomly
          # ==============

          # PERCENT_OF_DATA_TO_SAMPLE = .1
          # for i in range(4):
          #   print(len(data) * PERCENT_OF_DATA_TO_SAMPLE)
          #   sub_selected_programs = random.sample(
          #     [d.curiosity_program for d in data],
          #     int(len(data) * PERCENT_OF_DATA_TO_SAMPLE)
          #   )
          #   print("num_removed_programs", evaluate_diversity_selection_choices(
          #     sub_selected_programs,
          #     data
          #   ))

          # ======================
          # Run clustering on top programs
          # ======================
          # best_program_data = programs_by_mean_performance(data)[-NUM_TOP_PROGRAMS:]
          # print(f"{len(best_program_data)} best programs loaded")

          # best_performances = [
          #   d.stats["mean_performance"] for d in best_program_data
          # ]
          # best_program_features = np.array([
          #     program_as_feature_vector_diversity(d.curiosity_program) for d in tqdm(best_program_data)
          # ]).astype(np.double)
          # qualifying_point_indices, deltas = performance_cluster(
          #   best_performances, best_program_features, 
          #   DELTA_THRESHOLD, PERFORMANCE_THRESHOLD, 
          #   visualize=True)

          # print("# qualifying_point_indices", len(qualifying_point_indices))

          # # for index in qualifying_point_indices:
          #   # best_program_data[index].curiosity_program.visualize_as_graph(f"{best_performances[index]}, {deltas[index]}")

          # print("# qualifying_point_indices", len(qualifying_point_indices))

          # ======================
          # Run clustering on all programs
          # # ======================
          program_ids = [d.curiosity_program.program_id for d in data]
          performances = [
            d.stats["mean_performance"] for d in data
          ]
          start = time.time()
          qualifying_point_indices, deltas, parent_pointers = performance_cluster(
            program_ids, performances, program_features, 
            DELTA_THRESHOLD, PERFORMANCE_THRESHOLD, )
            # visualize=True)
          print("Elapsed", time.time() - start)

          print("# qualifying_point_indices", len(qualifying_point_indices))
          print(qualifying_point_indices)

          # for index in qualifying_point_indices:
          #   data[index].curiosity_program.visualize_as_graph(f"{performances[index]}, {deltas[index]}")

          print("# qualifying_point_indices", len(qualifying_point_indices))
Ejemplo n.º 28
0
 def remap_actions(self, envs, actions):
     if not TspParams.current().CONTINUOUS_ACTION_SPACE:
         return one_hot(torch.cat(actions), self.envs.action_space.n).to(
             DefaultDevice.current())
     else:
         return torch.stack(actions).to(DefaultDevice.current())