def extract_pivector_worker(num_traj_index, num_traj, num_components, env):
    # Worker for the function below
    trained_ubms = glob(UBM_TEMPLATE.format(num_traj=num_traj, num_components=num_components, env=env, policy_name="*", repetition_num="*"))
    trained_ubm_dirs = [os.path.basename(os.path.dirname(x)) for x in trained_ubms]
    policy_names = ["_".join(x.split("_")[-4:-2]) for x in trained_ubm_dirs]
    policy_names = sorted(list(set(policy_names)))
    for policy_name in policy_names:
        for repetition in range(1, NUM_REPETITIONS + 1):
            pivector_path = PIVECTOR_TEMPLATE.format(num_traj=num_traj, num_components=num_components, env=env, policy_name=policy_name, repetition_num=repetition)
            # If already exists, skip extracting pivectors for this
            if os.path.isfile(pivector_path):
                continue
            # Load UBM
            ubm_path = UBM_TEMPLATE.format(num_traj=num_traj, num_components=num_components, env=env, policy_name=policy_name, repetition_num=repetition)
            ubm, means, stds = load_ubm(ubm_path)
            # Hacky thing to load the same trajectories as used in UBM training
            ubm_data = np.load(ubm_path)
            trajectory_indeces = ubm_data["trajectory_indeces"]
            ubm_data.close()
            # Load trajectory data
            trajectories_path = glob(os.path.join(TRAJECTORY_TEMPLATE.format(env=env, policy_name=policy_name), "*"))
            trajectories_path = sorted(trajectories_path)
            all_pivectors = []
            all_average_episodic_returns = []
            for trajectory_i, trajectory_path in enumerate(trajectories_path):
                data = np.load(trajectory_path)
                keys = sorted(list(data.keys()))
                all_average_episodic_returns.append(data["episodic_rewards"].mean())
                # Take trajectories at same indeces as in used in training UBM.
                # First make sure it is in same order as with ubm training
                datas = [data[key] for key in keys if "traj" in key]
                datas = [datas[i] for i in trajectory_indeces[trajectory_i]]

                data = np.concatenate(datas, axis=0)
                data = (data - means) / stds
                pivector = trajectories_to_supervector(data, ubm)
                all_pivectors.append(pivector)
            all_pivectors = np.array(all_pivectors)

            np.savez(
                pivector_path,
                pivectors=all_pivectors,
                average_episodic_rewards=all_average_episodic_returns,
                covariances=ubm.covariances_,
                weights=ubm.weights_,
            )
Example #2
0
def extract_pivectors(unparsed_args):
    parser = ArgumentParser("Extract pivectors for given experiments")
    parser.add_argument(
        "--inputs",
        type=str,
        nargs="+",
        required=True,
        help="Paths to experiments for which pivectors should be extracted.")
    parser.add_argument(
        "ubms",
        type=str,
        help="Directory where UBM models reside, one per environment.")
    args = parser.parse_args(unparsed_args)

    for experiment_path in tqdm(args.inputs):
        env = experiment_path.split("_")[1]
        os.makedirs(os.path.join(experiment_path, PIVECTORS_DIR),
                    exist_ok=True)
        ubm, means, stds = load_ubm(
            os.path.join(args.ubms, "{}_ubm.npz".format(env)))

        trajectory_paths = glob.glob(
            os.path.join(experiment_path, TRAJECTORIES_DIR, "*"))
        for trajectory_path in tqdm(trajectory_paths, leave=False):
            trajectory_name = os.path.basename(trajectory_path)
            data = np.load(trajectory_path)
            average_episodic_reward = data["episodic_rewards"].mean()
            states = np.concatenate(
                [data[key] for key in data.keys() if "traj" in key])

            # Normalize
            states = (states - means) / stds
            pivector = trajectories_to_supervector(states, ubm)
            new_path = os.path.join(experiment_path, PIVECTORS_DIR,
                                    trajectory_name)

            # Also store component weights and covariances for future reference
            np.savez(new_path,
                     pivector=pivector,
                     average_episodic_reward=average_episodic_reward,
                     covariances=ubm.covariances_,
                     weights=ubm.weights_)

        del ubm
Example #3
0
def train_ubm_and_extract_pivectors(env, experiment_paths):
    """
    Train UBM for pivector extraction
    and adapt GMMs for the given experiments
    """
    ubm_path = UBM_PATH.format(env)
    os.makedirs(os.path.dirname(ubm_path), exist_ok=True)
    # Train UBM if one does not exist
    if not os.path.isfile(ubm_path):
        # Load GAIL and BC data, and final agent data as well
        all_data = []
        for experiment_path in tqdm(experiment_paths, desc="ubm-load"):
            traj_paths = glob(
                os.path.join(experiment_path, BC_TRAJECTORY_DIRECTORY, "*"))
            for traj_path in traj_paths:
                data = np.load(traj_path)
                data_trajs = [
                    data[key] for key in data.keys() if "traj" in key
                ]
                all_data.extend(data_trajs)
        # Load the data of the final model
        traj_paths = os.path.join(experiment_path, FINAL_MODEL_TRAJECTORIES)
        data = np.load(traj_path)
        data_trajs = [data[key] for key in data.keys() if "traj" in key]
        all_data.extend(data_trajs)

        all_data = np.concatenate(all_data, axis=0)
        # Restrict amount of data
        if all_data.shape[0] > MAX_UBM_DATA:
            np.random.shuffle(all_data)
            all_data = all_data[:MAX_UBM_DATA]
        # Normalize
        means = all_data.mean(axis=0)
        stds = all_data.std(axis=0)
        all_data = (all_data - means) / stds

        ubm = train_ubm(all_data, n_components=NUM_COMPONENTS)
        save_ubm(ubm_path, ubm, means, stds)
    else:
        print("Skipping UBM training (found)")

    ubm, means, std = load_ubm(ubm_path)

    # Extract pivectors
    for experiment_path in experiment_paths:
        traj_dir = BC_TRAJECTORY_DIRECTORY
        pivec_dir = BC_PIVECTOR_DIRECTORY
        os.makedirs(os.path.join(experiment_path, pivec_dir), exist_ok=True)
        traj_paths = glob(os.path.join(experiment_path, traj_dir, "*"))
        for traj_path in traj_paths:
            pivec_path = os.path.join(experiment_path, pivec_dir,
                                      os.path.basename(traj_path))
            if os.path.isfile(pivec_path):
                continue
            data = np.load(traj_path)
            average_episodic_reward = data["episodic_rewards"].mean()
            data = [data[key] for key in data.keys() if "traj" in key]
            data = np.concatenate(data, axis=0)
            data = (data - means) / std

            pivec = trajectories_to_supervector(data, ubm)

            # Also store component weights and covariances for future reference
            np.savez(pivec_path,
                     pivector=pivec,
                     average_episodic_reward=average_episodic_reward,
                     covariances=ubm.covariances_,
                     weights=ubm.weights_)
        # Extract pivector for the final model as well
        pivec_path = os.path.join(experiment_path, FINAL_MODEL_PIVECTOR)
        traj_path = os.path.join(experiment_path, FINAL_MODEL_TRAJECTORIES)

        if not os.path.isfile(pivec_path):
            data = np.load(traj_path)
            average_episodic_reward = data["episodic_rewards"].mean()
            data = [data[key] for key in data.keys() if "traj" in key]
            data = np.concatenate(data, axis=0)
            data = (data - means) / std

            pivec = trajectories_to_supervector(data, ubm)

            # Also store component weights and covariances for future reference
            np.savez(pivec_path,
                     pivector=pivec,
                     average_episodic_reward=average_episodic_reward,
                     covariances=ubm.covariances_,
                     weights=ubm.weights_)
Example #4
0
def compute_novelty_vs_archive(archive,
                               novelty_vector,
                               k,
                               bc_type="terminal",
                               worker_dir=None):
    distances = []
    nov = novelty_vector.astype(np.float)
    if bc_type == "supervector":
        ubm = None
        means = None
        stds = None
        # A fight against race-condition: If failed to load, try again bit later
        while ubm is None:
            try:
                ubm, means, stds = gmm_tools.load_ubm(
                    os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME))
            except Exception:
                print("[Warning] Failed to load UBM file. Trying again...")
                time.sleep(0.1)
        # Normalize data
        normalized_states = (novelty_vector - means) / stds

        my_supervector = gmm_tools.trajectories_to_supervector(
            normalized_states, ubm)
        my_supervector = my_supervector.reshape(ubm.means_.shape)
        precisions = ubm.precisions_
        weights = ubm.weights_

        # Now load supervectors that are stored in the same file (conveniently reading many times
        # for _optimal efficiency_...)
        archive_data = None
        while archive_data is None:
            try:
                archive_data = np.load(
                    os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME))
            except Exception:
                print("[Warning] Failed to load archive file. Trying again...")
                time.sleep(0.1)
        other_supervectors = archive_data["supervectors"]
        archive_data.close()

        for i in range(other_supervectors.shape[0]):
            kl_distance = gmm_tools.adapted_gmm_distance(
                my_supervector, other_supervectors[i], precisions, weights)
            distances.append(kl_distance)
    else:
        for point in archive:
            if bc_type == "terminal":
                distances.append(
                    euclidean_distance(point.astype(np.float), nov))
            elif bc_type == "gaussian":
                midpoint = len(point) // 2
                if isinstance(nov, np.ndarray):
                    if nov.ndim == 2:
                        # Need to compute mean and cov
                        nov = th.distributions.MultivariateNormal(
                            th.from_numpy(np.mean(nov, axis=0)).float(),
                            th.diag(th.from_numpy(np.var(nov, axis=0) +
                                                  1e-7)).float())
                    else:
                        # Already computed mean+var vector
                        nov = th.distributions.MultivariateNormal(
                            th.from_numpy(nov[:midpoint]).float(),
                            th.diag(th.from_numpy(nov[midpoint:] +
                                                  1e-7)).float())
                point = th.distributions.MultivariateNormal(
                    th.from_numpy(point[:midpoint]).float(),
                    th.diag(th.from_numpy(point[midpoint:] + 1e-7)).float())
                with th.no_grad():
                    kl_distance = (th.distributions.kl_divergence(nov, point) +
                                   th.distributions.kl_divergence(point, nov))
                    distances.append(kl_distance.item())
            else:
                raise NotImplementedError(
                    "bc_type {} not implemented".format(bc_type))

    # Pick k nearest neighbors
    distances = np.array(distances)
    top_k_indicies = (distances).argsort()[:k]
    top_k = distances[top_k_indicies]
    return top_k.mean()