Exemple #1
0
    def check_constraint(self, new_agent):
        # Compute UBM, extract supervectors and compute KL
        new_policy_data = do_manual_rollouts(new_agent, self.env,
                                             self.n_rollouts)
        new_policy_data += np.random.randn(*new_policy_data.shape) * 0.001
        all_data = np.concatenate((self.old_policy_data, new_policy_data),
                                  axis=0)
        # Avoid all the spam from "less unique centroids"
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            ubm = gmm_tools.train_ubm(all_data,
                                      n_components=self.n_centroids,
                                      verbose=0)
        old_supervector = gmm_tools.trajectories_to_supervector(
            self.old_policy_data, ubm)
        new_supervector = gmm_tools.trajectories_to_supervector(
            new_policy_data, ubm)
        # Supervectors are returned as raveled 1D vectors
        old_supervector = old_supervector.reshape((ubm.means_.shape))
        new_supervector = new_supervector.reshape((ubm.means_.shape))

        kl_distance = gmm_tools.adapted_gmm_distance(old_supervector,
                                                     new_supervector,
                                                     ubm.precisions_,
                                                     ubm.weights_)

        if kl_distance >= self.max_kl_constraint:
            return True
        return False
def extract_pivector_worker(num_traj_index, num_traj, num_components, env):
    # Worker for the function below
    trained_ubms = glob(UBM_TEMPLATE.format(num_traj=num_traj, num_components=num_components, env=env, policy_name="*", repetition_num="*"))
    trained_ubm_dirs = [os.path.basename(os.path.dirname(x)) for x in trained_ubms]
    policy_names = ["_".join(x.split("_")[-4:-2]) for x in trained_ubm_dirs]
    policy_names = sorted(list(set(policy_names)))
    for policy_name in policy_names:
        for repetition in range(1, NUM_REPETITIONS + 1):
            pivector_path = PIVECTOR_TEMPLATE.format(num_traj=num_traj, num_components=num_components, env=env, policy_name=policy_name, repetition_num=repetition)
            # If already exists, skip extracting pivectors for this
            if os.path.isfile(pivector_path):
                continue
            # Load UBM
            ubm_path = UBM_TEMPLATE.format(num_traj=num_traj, num_components=num_components, env=env, policy_name=policy_name, repetition_num=repetition)
            ubm, means, stds = load_ubm(ubm_path)
            # Hacky thing to load the same trajectories as used in UBM training
            ubm_data = np.load(ubm_path)
            trajectory_indeces = ubm_data["trajectory_indeces"]
            ubm_data.close()
            # Load trajectory data
            trajectories_path = glob(os.path.join(TRAJECTORY_TEMPLATE.format(env=env, policy_name=policy_name), "*"))
            trajectories_path = sorted(trajectories_path)
            all_pivectors = []
            all_average_episodic_returns = []
            for trajectory_i, trajectory_path in enumerate(trajectories_path):
                data = np.load(trajectory_path)
                keys = sorted(list(data.keys()))
                all_average_episodic_returns.append(data["episodic_rewards"].mean())
                # Take trajectories at same indeces as in used in training UBM.
                # First make sure it is in same order as with ubm training
                datas = [data[key] for key in keys if "traj" in key]
                datas = [datas[i] for i in trajectory_indeces[trajectory_i]]

                data = np.concatenate(datas, axis=0)
                data = (data - means) / stds
                pivector = trajectories_to_supervector(data, ubm)
                all_pivectors.append(pivector)
            all_pivectors = np.array(all_pivectors)

            np.savez(
                pivector_path,
                pivectors=all_pivectors,
                average_episodic_rewards=all_average_episodic_returns,
                covariances=ubm.covariances_,
                weights=ubm.weights_,
            )
Exemple #3
0
def extract_pivectors(unparsed_args):
    parser = ArgumentParser("Extract pivectors for given experiments")
    parser.add_argument(
        "--inputs",
        type=str,
        nargs="+",
        required=True,
        help="Paths to experiments for which pivectors should be extracted.")
    parser.add_argument(
        "ubms",
        type=str,
        help="Directory where UBM models reside, one per environment.")
    args = parser.parse_args(unparsed_args)

    for experiment_path in tqdm(args.inputs):
        env = experiment_path.split("_")[1]
        os.makedirs(os.path.join(experiment_path, PIVECTORS_DIR),
                    exist_ok=True)
        ubm, means, stds = load_ubm(
            os.path.join(args.ubms, "{}_ubm.npz".format(env)))

        trajectory_paths = glob.glob(
            os.path.join(experiment_path, TRAJECTORIES_DIR, "*"))
        for trajectory_path in tqdm(trajectory_paths, leave=False):
            trajectory_name = os.path.basename(trajectory_path)
            data = np.load(trajectory_path)
            average_episodic_reward = data["episodic_rewards"].mean()
            states = np.concatenate(
                [data[key] for key in data.keys() if "traj" in key])

            # Normalize
            states = (states - means) / stds
            pivector = trajectories_to_supervector(states, ubm)
            new_path = os.path.join(experiment_path, PIVECTORS_DIR,
                                    trajectory_name)

            # Also store component weights and covariances for future reference
            np.savez(new_path,
                     pivector=pivector,
                     average_episodic_reward=average_episodic_reward,
                     covariances=ubm.covariances_,
                     weights=ubm.weights_)

        del ubm
Exemple #4
0
def train_ubm_and_extract_pivectors(env, experiment_paths):
    """
    Train UBM for pivector extraction
    and adapt GMMs for the given experiments
    """
    ubm_path = UBM_PATH.format(env)
    os.makedirs(os.path.dirname(ubm_path), exist_ok=True)
    # Train UBM if one does not exist
    if not os.path.isfile(ubm_path):
        # Load GAIL and BC data, and final agent data as well
        all_data = []
        for experiment_path in tqdm(experiment_paths, desc="ubm-load"):
            traj_paths = glob(
                os.path.join(experiment_path, BC_TRAJECTORY_DIRECTORY, "*"))
            for traj_path in traj_paths:
                data = np.load(traj_path)
                data_trajs = [
                    data[key] for key in data.keys() if "traj" in key
                ]
                all_data.extend(data_trajs)
        # Load the data of the final model
        traj_paths = os.path.join(experiment_path, FINAL_MODEL_TRAJECTORIES)
        data = np.load(traj_path)
        data_trajs = [data[key] for key in data.keys() if "traj" in key]
        all_data.extend(data_trajs)

        all_data = np.concatenate(all_data, axis=0)
        # Restrict amount of data
        if all_data.shape[0] > MAX_UBM_DATA:
            np.random.shuffle(all_data)
            all_data = all_data[:MAX_UBM_DATA]
        # Normalize
        means = all_data.mean(axis=0)
        stds = all_data.std(axis=0)
        all_data = (all_data - means) / stds

        ubm = train_ubm(all_data, n_components=NUM_COMPONENTS)
        save_ubm(ubm_path, ubm, means, stds)
    else:
        print("Skipping UBM training (found)")

    ubm, means, std = load_ubm(ubm_path)

    # Extract pivectors
    for experiment_path in experiment_paths:
        traj_dir = BC_TRAJECTORY_DIRECTORY
        pivec_dir = BC_PIVECTOR_DIRECTORY
        os.makedirs(os.path.join(experiment_path, pivec_dir), exist_ok=True)
        traj_paths = glob(os.path.join(experiment_path, traj_dir, "*"))
        for traj_path in traj_paths:
            pivec_path = os.path.join(experiment_path, pivec_dir,
                                      os.path.basename(traj_path))
            if os.path.isfile(pivec_path):
                continue
            data = np.load(traj_path)
            average_episodic_reward = data["episodic_rewards"].mean()
            data = [data[key] for key in data.keys() if "traj" in key]
            data = np.concatenate(data, axis=0)
            data = (data - means) / std

            pivec = trajectories_to_supervector(data, ubm)

            # Also store component weights and covariances for future reference
            np.savez(pivec_path,
                     pivector=pivec,
                     average_episodic_reward=average_episodic_reward,
                     covariances=ubm.covariances_,
                     weights=ubm.weights_)
        # Extract pivector for the final model as well
        pivec_path = os.path.join(experiment_path, FINAL_MODEL_PIVECTOR)
        traj_path = os.path.join(experiment_path, FINAL_MODEL_TRAJECTORIES)

        if not os.path.isfile(pivec_path):
            data = np.load(traj_path)
            average_episodic_reward = data["episodic_rewards"].mean()
            data = [data[key] for key in data.keys() if "traj" in key]
            data = np.concatenate(data, axis=0)
            data = (data - means) / std

            pivec = trajectories_to_supervector(data, ubm)

            # Also store component weights and covariances for future reference
            np.savez(pivec_path,
                     pivector=pivec,
                     average_episodic_reward=average_episodic_reward,
                     covariances=ubm.covariances_,
                     weights=ubm.weights_)
Exemple #5
0
def compute_novelty_vs_archive(archive,
                               novelty_vector,
                               k,
                               bc_type="terminal",
                               worker_dir=None):
    distances = []
    nov = novelty_vector.astype(np.float)
    if bc_type == "supervector":
        ubm = None
        means = None
        stds = None
        # A fight against race-condition: If failed to load, try again bit later
        while ubm is None:
            try:
                ubm, means, stds = gmm_tools.load_ubm(
                    os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME))
            except Exception:
                print("[Warning] Failed to load UBM file. Trying again...")
                time.sleep(0.1)
        # Normalize data
        normalized_states = (novelty_vector - means) / stds

        my_supervector = gmm_tools.trajectories_to_supervector(
            normalized_states, ubm)
        my_supervector = my_supervector.reshape(ubm.means_.shape)
        precisions = ubm.precisions_
        weights = ubm.weights_

        # Now load supervectors that are stored in the same file (conveniently reading many times
        # for _optimal efficiency_...)
        archive_data = None
        while archive_data is None:
            try:
                archive_data = np.load(
                    os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME))
            except Exception:
                print("[Warning] Failed to load archive file. Trying again...")
                time.sleep(0.1)
        other_supervectors = archive_data["supervectors"]
        archive_data.close()

        for i in range(other_supervectors.shape[0]):
            kl_distance = gmm_tools.adapted_gmm_distance(
                my_supervector, other_supervectors[i], precisions, weights)
            distances.append(kl_distance)
    else:
        for point in archive:
            if bc_type == "terminal":
                distances.append(
                    euclidean_distance(point.astype(np.float), nov))
            elif bc_type == "gaussian":
                midpoint = len(point) // 2
                if isinstance(nov, np.ndarray):
                    if nov.ndim == 2:
                        # Need to compute mean and cov
                        nov = th.distributions.MultivariateNormal(
                            th.from_numpy(np.mean(nov, axis=0)).float(),
                            th.diag(th.from_numpy(np.var(nov, axis=0) +
                                                  1e-7)).float())
                    else:
                        # Already computed mean+var vector
                        nov = th.distributions.MultivariateNormal(
                            th.from_numpy(nov[:midpoint]).float(),
                            th.diag(th.from_numpy(nov[midpoint:] +
                                                  1e-7)).float())
                point = th.distributions.MultivariateNormal(
                    th.from_numpy(point[:midpoint]).float(),
                    th.diag(th.from_numpy(point[midpoint:] + 1e-7)).float())
                with th.no_grad():
                    kl_distance = (th.distributions.kl_divergence(nov, point) +
                                   th.distributions.kl_divergence(point, nov))
                    distances.append(kl_distance.item())
            else:
                raise NotImplementedError(
                    "bc_type {} not implemented".format(bc_type))

    # Pick k nearest neighbors
    distances = np.array(distances)
    top_k_indicies = (distances).argsort()[:k]
    top_k = distances[top_k_indicies]
    return top_k.mean()
Exemple #6
0
def get_mean_bc(env,
                policy,
                tslimit,
                num_rollouts=1,
                bc_type="terminal",
                for_archive=False,
                worker_dir=None):
    # for_archive tells us if we are about to send this item to arhive.
    novelty_vector = []
    for n in range(num_rollouts):
        rew, t, nv = policy.rollout(
            env,
            timestep_limit=tslimit,
            bc_only_final_state=(bc_type == "terminal"))
        novelty_vector.append(nv)
    if bc_type == "terminal":
        return np.mean(novelty_vector, axis=0)
    elif bc_type == "gaussian":
        # Concatenate individual rollouts
        novelty_vector = np.concatenate(novelty_vector, axis=0)
        # Fit a simple monogaussian
        return np.concatenate((
            np.mean(novelty_vector, axis=0),
            np.var(novelty_vector, axis=0),
        ),
                              axis=0)
    elif bc_type == "supervector":
        novelty_vector = np.concatenate(novelty_vector, axis=0)
        if for_archive:
            # Save novelty vector for the archival purposes
            raw_data_dirpath = os.path.join(worker_dir,
                                            NOVELTY_RAW_DATA_DIR_NAME)
            os.makedirs(raw_data_dirpath, exist_ok=True)

            # Store data and update the extractor
            num_items = len(os.listdir(raw_data_dirpath))
            # Create a new item
            save_path = os.path.join(raw_data_dirpath,
                                     "policy_{}".format(num_items))
            np.save(save_path, novelty_vector)

            # Read stored policies' data, train UBM and extract supervectors
            all_buffer_data = []
            buffer_filenames = os.listdir(raw_data_dirpath)
            for filename in buffer_filenames:
                full_path = os.path.join(raw_data_dirpath, filename)
                buffer_data = np.load(full_path)
                all_buffer_data.append(buffer_data)

            # Super-elegant hoarding of memory by having multiple copies
            concat_data = np.concatenate(all_buffer_data, axis=0)
            means = concat_data.mean(axis=0)
            stds = concat_data.std(axis=0)

            concat_data = (concat_data - means) / stds

            ubm = gmm_tools.train_ubm(
                concat_data,
                n_components=NOVELTY_SUPERVECTOR_COMPONENTS,
                verbose=0)

            # Extract supervectors
            supervectors = []
            mean_shape = ubm.means_.shape
            for buffer_data in all_buffer_data:
                buffer_data = (buffer_data - means) / stds
                supervector = gmm_tools.trajectories_to_supervector(
                    buffer_data, ubm)
                supervectors.append(supervector.reshape(mean_shape))

            gmm_tools.save_ubm(os.path.join(worker_dir,
                                            NOVELTY_ARCHIVE_FILE_NAME),
                               ubm,
                               means,
                               stds,
                               supervectors=supervectors)
            # Return something dumb. This goes to archive, but we are
            # never supposed to read it during supervector novelty search
            return np.array([42])
        else:
            # Return just the states visited. They are the "BC"
            return novelty_vector
    else:
        raise NotImplementedError("bc_type {} not implemented".format(bc_type))
Exemple #7
0
def main():
    # Illustrations for three different agents:
    # - Random
    # - a trained neural network
    # - Always high
    env = gym.make("Pendulum-v0")
    env = StateWrapper(env)

    # Always pick random action
    random_agent = SimpleAgentClass(lambda obs: env.action_space.sample())
    # Always pick high
    always_high_agent = SimpleAgentClass(lambda obs: env.action_space.high)

    # Trained stable-baselines agent
    def network_activation(obs):
        x = np.tanh((obs @ NETWORK_W1) + NETWORK_B1)
        x = (x @ NETWORK_W2) + NETWORK_B2
        return x

    network_agent = SimpleAgentClass(network_activation)

    # Gather observations
    print("Collecting random trajectories...")
    random_trajectories, random_rewards = collect_trajectories(
        env, random_agent, NUM_TRAJECTORIES)
    print("Average reward: {}".format(np.mean(random_rewards)))
    print("Collecting always-high trajectories...")
    always_high_trajectories, always_high_rewards = collect_trajectories(
        env, always_high_agent, NUM_TRAJECTORIES)
    print("Average reward: {}".format(np.mean(always_high_rewards)))
    print("Collecting network trajectories...")
    network_trajectories, network_rewards = collect_trajectories(
        env, network_agent, NUM_TRAJECTORIES)
    print("Average reward: {}".format(np.mean(network_rewards)))

    random_trajectories = np.concatenate(random_trajectories, axis=0)
    always_high_trajectories = np.concatenate(always_high_trajectories, axis=0)
    network_trajectories = np.concatenate(network_trajectories, axis=0)

    # Take theta and theta-velocity as the variables we want to study (for 2D plots)
    random_trajectories = np.stack(
        (np.arccos(random_trajectories[:, 0]), random_trajectories[:, 2]),
        axis=1)
    always_high_trajectories = np.stack((np.arccos(
        always_high_trajectories[:, 0]), always_high_trajectories[:, 2]),
                                        axis=1)
    network_trajectories = np.stack(
        (np.arccos(network_trajectories[:, 0]), network_trajectories[:, 2]),
        axis=1)

    all_data = np.concatenate(
        (random_trajectories, always_high_trajectories, network_trajectories),
        axis=0)

    # Train the GMM-UBM.
    # Using multiple inits here for a similar results on different runs
    ubm = train_ubm(all_data, n_components=N_COMPONENTS, n_init=5)

    # Extract policy supervectors (or adapted means)
    random_means = trajectories_to_supervector(random_trajectories,
                                               ubm).reshape(N_COMPONENTS, 2)
    always_high_means = trajectories_to_supervector(always_high_trajectories,
                                                    ubm).reshape(
                                                        N_COMPONENTS, 2)
    network_means = trajectories_to_supervector(network_trajectories,
                                                ubm).reshape(N_COMPONENTS, 2)

    # Compute stuff for contours
    mins, maxs = all_data.min(axis=0), all_data.max(axis=0)
    theta_space = np.linspace(mins[0], maxs[0], num=NUM_LINSPACE)
    thetavel_space = np.linspace(mins[1], maxs[1], num=NUM_LINSPACE)
    locations = np.array(np.meshgrid(theta_space,
                                     thetavel_space)).T.reshape(-1, 2)

    os.makedirs("figures", exist_ok=True)
    fig = pyplot.figure(figsize=FIG_SIZE)
    pyplot.axis("off")
    pyplot.scatter(all_data[:, 0], all_data[:, 1], alpha=0.003, s=MARKERSIZE)
    pyplot_remove_margins()
    pyplot.savefig("figures/method_all_data.png", **SAVEFIG_PARAMS)
    # Plot the components
    ax = pyplot.gca()
    for i in range(N_COMPONENTS):
        cov = np.diag(ubm.covariances_[i])
        mean_x = ubm.means_[i, 0]
        mean_y = ubm.means_[i, 1]
        _ = confidence_ellipse(mean_x,
                               mean_y,
                               cov,
                               ax,
                               n_std=1,
                               edgecolor="red",
                               linewidth=2)
        pyplot.scatter(mean_x, mean_y, marker="+", c="red")
    pyplot_remove_margins()
    pyplot.savefig("figures/method_ubm_all_data.png", **SAVEFIG_PARAMS)
    xlim = pyplot.xlim()
    ylim = pyplot.ylim()
    pyplot.close(fig)

    # Repeat above for all different datas
    for i in range(3):
        name = None
        ubm_name = None
        means = None
        # Plot data
        fig = pyplot.figure(figsize=FIG_SIZE)
        if i == 0:
            name = "figures/method_random_data.png"
            ubm_name = "figures/method_ubm_random_data.png"
            means = random_means
            pyplot.scatter(random_trajectories[:, 0],
                           random_trajectories[:, 1],
                           alpha=0.003,
                           s=MARKERSIZE)
        elif i == 1:
            name = "figures/method_always_high_data.png"
            ubm_name = "figures/method_ubm_always_high_data.png"
            means = always_high_means
            pyplot.scatter(always_high_trajectories[:, 0],
                           always_high_trajectories[:, 1],
                           alpha=0.003,
                           s=MARKERSIZE)
        else:
            name = "figures/method_network_data.png"
            ubm_name = "figures/method_ubm_network_data.png"
            means = network_means
            pyplot.scatter(network_trajectories[:, 0],
                           network_trajectories[:, 1],
                           alpha=0.003,
                           s=MARKERSIZE)
        pyplot.xlim(xlim)
        pyplot.ylim(ylim)
        pyplot.axis("off")
        pyplot_remove_margins()
        pyplot.savefig(name, **SAVEFIG_PARAMS)

        # Plot adapted GMM
        # Plot the old components and new compontnes
        ax = pyplot.gca()
        for i in range(N_COMPONENTS):
            cov = np.diag(ubm.covariances_[i])
            mean_x = means[i, 0]
            mean_y = means[i, 1]
            _ = confidence_ellipse(mean_x,
                                   mean_y,
                                   cov,
                                   ax,
                                   n_std=1,
                                   edgecolor="red",
                                   linewidth=2)
            pyplot.scatter(mean_x, mean_y, marker="+", c="red")
            # Old component
            ubm_mean_x = ubm.means_[i, 0]
            ubm_mean_y = ubm.means_[i, 1]
            _ = confidence_ellipse(ubm_mean_x,
                                   ubm_mean_y,
                                   cov,
                                   ax,
                                   n_std=1,
                                   edgecolor="red",
                                   linewidth=2,
                                   alpha=0.3,
                                   linestyle="--")
            pyplot.scatter(ubm_mean_x,
                           ubm_mean_y,
                           marker="+",
                           c="red",
                           alpha=0.3)
            pyplot.arrow(ubm_mean_x,
                         ubm_mean_y,
                         mean_x - ubm_mean_x,
                         mean_y - ubm_mean_y,
                         color="red",
                         width=0.01,
                         linewidth=0.25)
        pyplot.xlim(xlim)
        pyplot.ylim(ylim)
        pyplot_remove_margins()
        pyplot.savefig(ubm_name, **SAVEFIG_PARAMS)
        pyplot.close(fig)