コード例 #1
0
    def check_constraint(self, new_agent):
        # Compute UBM, extract supervectors and compute KL
        new_policy_data = do_manual_rollouts(new_agent, self.env,
                                             self.n_rollouts)
        new_policy_data += np.random.randn(*new_policy_data.shape) * 0.001
        all_data = np.concatenate((self.old_policy_data, new_policy_data),
                                  axis=0)
        # Avoid all the spam from "less unique centroids"
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            ubm = gmm_tools.train_ubm(all_data,
                                      n_components=self.n_centroids,
                                      verbose=0)
        old_supervector = gmm_tools.trajectories_to_supervector(
            self.old_policy_data, ubm)
        new_supervector = gmm_tools.trajectories_to_supervector(
            new_policy_data, ubm)
        # Supervectors are returned as raveled 1D vectors
        old_supervector = old_supervector.reshape((ubm.means_.shape))
        new_supervector = new_supervector.reshape((ubm.means_.shape))

        kl_distance = gmm_tools.adapted_gmm_distance(old_supervector,
                                                     new_supervector,
                                                     ubm.precisions_,
                                                     ubm.weights_)

        if kl_distance >= self.max_kl_constraint:
            return True
        return False
コード例 #2
0
def compute_pivector_distance_matrices():
    # Compute distance matrices for each pivector file
    pivector_files = glob(PIVECTOR_TEMPLATE.format(env="*", num_traj="*", num_components="*", policy_name="*", repetition_num="*"))
    for pivector_file in tqdm(pivector_files, desc="distance"):
        # Skip if exists
        # Distance file name is same as pivectors, but replace "pivector" with "distance"
        distance_file = pivector_file.replace("pivectors", "pivector_distances")
        if os.path.isfile(distance_file):
            continue

        data = np.load(pivector_file)
        pivectors = data["pivectors"]
        covariances = data["covariances"]
        precisions = 1 / covariances
        weights = data["weights"]
        num_pivectors = len(pivectors)
        mean_shape = precisions.shape

        # Create with np.ones to allocate space, so
        # know immediattely if we are running out of space.
        distance_matrix = np.ones((num_pivectors, num_pivectors))
        for i in range(num_pivectors):
            # Cut ~half of the computation needed
            for j in range(i, num_pivectors):
                means1 = pivectors[i].reshape(mean_shape)
                means2 = pivectors[j].reshape(mean_shape)
                distance = adapted_gmm_distance(means1, means2, precisions, weights)
                distance_matrix[i, j] = distance
                distance_matrix[j, i] = distance

        np.savez(
            distance_file,
            distance_matrix=distance_matrix,
            average_episodic_rewards=data["average_episodic_rewards"]
        )
コード例 #3
0
def compute_novelty_vs_archive(archive,
                               novelty_vector,
                               k,
                               bc_type="terminal",
                               worker_dir=None):
    distances = []
    nov = novelty_vector.astype(np.float)
    if bc_type == "supervector":
        ubm = None
        means = None
        stds = None
        # A fight against race-condition: If failed to load, try again bit later
        while ubm is None:
            try:
                ubm, means, stds = gmm_tools.load_ubm(
                    os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME))
            except Exception:
                print("[Warning] Failed to load UBM file. Trying again...")
                time.sleep(0.1)
        # Normalize data
        normalized_states = (novelty_vector - means) / stds

        my_supervector = gmm_tools.trajectories_to_supervector(
            normalized_states, ubm)
        my_supervector = my_supervector.reshape(ubm.means_.shape)
        precisions = ubm.precisions_
        weights = ubm.weights_

        # Now load supervectors that are stored in the same file (conveniently reading many times
        # for _optimal efficiency_...)
        archive_data = None
        while archive_data is None:
            try:
                archive_data = np.load(
                    os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME))
            except Exception:
                print("[Warning] Failed to load archive file. Trying again...")
                time.sleep(0.1)
        other_supervectors = archive_data["supervectors"]
        archive_data.close()

        for i in range(other_supervectors.shape[0]):
            kl_distance = gmm_tools.adapted_gmm_distance(
                my_supervector, other_supervectors[i], precisions, weights)
            distances.append(kl_distance)
    else:
        for point in archive:
            if bc_type == "terminal":
                distances.append(
                    euclidean_distance(point.astype(np.float), nov))
            elif bc_type == "gaussian":
                midpoint = len(point) // 2
                if isinstance(nov, np.ndarray):
                    if nov.ndim == 2:
                        # Need to compute mean and cov
                        nov = th.distributions.MultivariateNormal(
                            th.from_numpy(np.mean(nov, axis=0)).float(),
                            th.diag(th.from_numpy(np.var(nov, axis=0) +
                                                  1e-7)).float())
                    else:
                        # Already computed mean+var vector
                        nov = th.distributions.MultivariateNormal(
                            th.from_numpy(nov[:midpoint]).float(),
                            th.diag(th.from_numpy(nov[midpoint:] +
                                                  1e-7)).float())
                point = th.distributions.MultivariateNormal(
                    th.from_numpy(point[:midpoint]).float(),
                    th.diag(th.from_numpy(point[midpoint:] + 1e-7)).float())
                with th.no_grad():
                    kl_distance = (th.distributions.kl_divergence(nov, point) +
                                   th.distributions.kl_divergence(point, nov))
                    distances.append(kl_distance.item())
            else:
                raise NotImplementedError(
                    "bc_type {} not implemented".format(bc_type))

    # Pick k nearest neighbors
    distances = np.array(distances)
    top_k_indicies = (distances).argsort()[:k]
    top_k = distances[top_k_indicies]
    return top_k.mean()
コード例 #4
0
def compute_checkpoint_distances(unparsed_args):
    parser = ArgumentParser(
        "Compute distances between consecutive checkpoints and store them under experiment dir"
    )
    parser.add_argument("--inputs",
                        type=str,
                        nargs="+",
                        required=True,
                        help="Paths to experiments to process.")
    args = parser.parse_args(unparsed_args)

    for path in tqdm(args.inputs, desc="experiment", leave=False):
        pivector_paths = glob.glob(os.path.join(path, PIVECTORS_DIR, "*"))

        # Sort them by the number of steps trained
        steps_trained = [
            int(re.findall("rl_model_([0-9]+)_steps", pivector_name)[0])
            for pivector_name in pivector_paths
        ]
        pivector_paths = list(
            zip(*sorted(zip(pivector_paths, steps_trained),
                        key=lambda x: x[1])))[0]

        means = []
        rewards = []
        shared_covariances = None
        shared_weights = None
        for pivector_path in pivector_paths:
            data = np.load(pivector_path)
            covariances = data["covariances"]
            weights = data["weights"]
            reward = data["average_episodic_reward"]
            # Use same covariance and weight
            # for all samples later
            if shared_covariances is None:
                shared_covariances = covariances
                shared_weights = weights
            # Sanity check to make sure all adapted GMMs
            # share same covariance
            if not np.allclose(shared_covariances, covariances):
                raise ValueError(
                    "Covariances did not match file {}".format(pivector_path))

            # Covariances are diagonal so they share same
            # shape with means.
            # Pivector is just raveled means.
            mean = data["pivector"].reshape(covariances.shape)
            means.append(mean)
            rewards.append(reward)

        precisions = 1 / shared_covariances

        distances = []
        aligned_rewards = []
        for i in range(len(means) - 1):
            distances.append(
                adapted_gmm_distance(means[i], means[i + 1], precisions,
                                     shared_weights))
            # Align to the previous policy
            aligned_rewards.append(rewards[i])

        distances = np.array(distances)
        aligned_rewards = np.array(aligned_rewards)

        # Store back on disk
        save_path = os.path.join(path, CHECKPOINT_DISTANCES_FILE)
        np.savez(save_path,
                 distances=distances,
                 average_episodic_rewards=aligned_rewards)
コード例 #5
0
 def distance_metric(pivector1, pivector2):
     means1 = pivector1.reshape(mean_shape)
     means2 = pivector2.reshape(mean_shape)
     return adapted_gmm_distance(means1, means2, env_precisions,
                                 env_weights)