def extract_pivector_worker(num_traj_index, num_traj, num_components, env): # Worker for the function below trained_ubms = glob(UBM_TEMPLATE.format(num_traj=num_traj, num_components=num_components, env=env, policy_name="*", repetition_num="*")) trained_ubm_dirs = [os.path.basename(os.path.dirname(x)) for x in trained_ubms] policy_names = ["_".join(x.split("_")[-4:-2]) for x in trained_ubm_dirs] policy_names = sorted(list(set(policy_names))) for policy_name in policy_names: for repetition in range(1, NUM_REPETITIONS + 1): pivector_path = PIVECTOR_TEMPLATE.format(num_traj=num_traj, num_components=num_components, env=env, policy_name=policy_name, repetition_num=repetition) # If already exists, skip extracting pivectors for this if os.path.isfile(pivector_path): continue # Load UBM ubm_path = UBM_TEMPLATE.format(num_traj=num_traj, num_components=num_components, env=env, policy_name=policy_name, repetition_num=repetition) ubm, means, stds = load_ubm(ubm_path) # Hacky thing to load the same trajectories as used in UBM training ubm_data = np.load(ubm_path) trajectory_indeces = ubm_data["trajectory_indeces"] ubm_data.close() # Load trajectory data trajectories_path = glob(os.path.join(TRAJECTORY_TEMPLATE.format(env=env, policy_name=policy_name), "*")) trajectories_path = sorted(trajectories_path) all_pivectors = [] all_average_episodic_returns = [] for trajectory_i, trajectory_path in enumerate(trajectories_path): data = np.load(trajectory_path) keys = sorted(list(data.keys())) all_average_episodic_returns.append(data["episodic_rewards"].mean()) # Take trajectories at same indeces as in used in training UBM. # First make sure it is in same order as with ubm training datas = [data[key] for key in keys if "traj" in key] datas = [datas[i] for i in trajectory_indeces[trajectory_i]] data = np.concatenate(datas, axis=0) data = (data - means) / stds pivector = trajectories_to_supervector(data, ubm) all_pivectors.append(pivector) all_pivectors = np.array(all_pivectors) np.savez( pivector_path, pivectors=all_pivectors, average_episodic_rewards=all_average_episodic_returns, covariances=ubm.covariances_, weights=ubm.weights_, )
def extract_pivectors(unparsed_args): parser = ArgumentParser("Extract pivectors for given experiments") parser.add_argument( "--inputs", type=str, nargs="+", required=True, help="Paths to experiments for which pivectors should be extracted.") parser.add_argument( "ubms", type=str, help="Directory where UBM models reside, one per environment.") args = parser.parse_args(unparsed_args) for experiment_path in tqdm(args.inputs): env = experiment_path.split("_")[1] os.makedirs(os.path.join(experiment_path, PIVECTORS_DIR), exist_ok=True) ubm, means, stds = load_ubm( os.path.join(args.ubms, "{}_ubm.npz".format(env))) trajectory_paths = glob.glob( os.path.join(experiment_path, TRAJECTORIES_DIR, "*")) for trajectory_path in tqdm(trajectory_paths, leave=False): trajectory_name = os.path.basename(trajectory_path) data = np.load(trajectory_path) average_episodic_reward = data["episodic_rewards"].mean() states = np.concatenate( [data[key] for key in data.keys() if "traj" in key]) # Normalize states = (states - means) / stds pivector = trajectories_to_supervector(states, ubm) new_path = os.path.join(experiment_path, PIVECTORS_DIR, trajectory_name) # Also store component weights and covariances for future reference np.savez(new_path, pivector=pivector, average_episodic_reward=average_episodic_reward, covariances=ubm.covariances_, weights=ubm.weights_) del ubm
def train_ubm_and_extract_pivectors(env, experiment_paths): """ Train UBM for pivector extraction and adapt GMMs for the given experiments """ ubm_path = UBM_PATH.format(env) os.makedirs(os.path.dirname(ubm_path), exist_ok=True) # Train UBM if one does not exist if not os.path.isfile(ubm_path): # Load GAIL and BC data, and final agent data as well all_data = [] for experiment_path in tqdm(experiment_paths, desc="ubm-load"): traj_paths = glob( os.path.join(experiment_path, BC_TRAJECTORY_DIRECTORY, "*")) for traj_path in traj_paths: data = np.load(traj_path) data_trajs = [ data[key] for key in data.keys() if "traj" in key ] all_data.extend(data_trajs) # Load the data of the final model traj_paths = os.path.join(experiment_path, FINAL_MODEL_TRAJECTORIES) data = np.load(traj_path) data_trajs = [data[key] for key in data.keys() if "traj" in key] all_data.extend(data_trajs) all_data = np.concatenate(all_data, axis=0) # Restrict amount of data if all_data.shape[0] > MAX_UBM_DATA: np.random.shuffle(all_data) all_data = all_data[:MAX_UBM_DATA] # Normalize means = all_data.mean(axis=0) stds = all_data.std(axis=0) all_data = (all_data - means) / stds ubm = train_ubm(all_data, n_components=NUM_COMPONENTS) save_ubm(ubm_path, ubm, means, stds) else: print("Skipping UBM training (found)") ubm, means, std = load_ubm(ubm_path) # Extract pivectors for experiment_path in experiment_paths: traj_dir = BC_TRAJECTORY_DIRECTORY pivec_dir = BC_PIVECTOR_DIRECTORY os.makedirs(os.path.join(experiment_path, pivec_dir), exist_ok=True) traj_paths = glob(os.path.join(experiment_path, traj_dir, "*")) for traj_path in traj_paths: pivec_path = os.path.join(experiment_path, pivec_dir, os.path.basename(traj_path)) if os.path.isfile(pivec_path): continue data = np.load(traj_path) average_episodic_reward = data["episodic_rewards"].mean() data = [data[key] for key in data.keys() if "traj" in key] data = np.concatenate(data, axis=0) data = (data - means) / std pivec = trajectories_to_supervector(data, ubm) # Also store component weights and covariances for future reference np.savez(pivec_path, pivector=pivec, average_episodic_reward=average_episodic_reward, covariances=ubm.covariances_, weights=ubm.weights_) # Extract pivector for the final model as well pivec_path = os.path.join(experiment_path, FINAL_MODEL_PIVECTOR) traj_path = os.path.join(experiment_path, FINAL_MODEL_TRAJECTORIES) if not os.path.isfile(pivec_path): data = np.load(traj_path) average_episodic_reward = data["episodic_rewards"].mean() data = [data[key] for key in data.keys() if "traj" in key] data = np.concatenate(data, axis=0) data = (data - means) / std pivec = trajectories_to_supervector(data, ubm) # Also store component weights and covariances for future reference np.savez(pivec_path, pivector=pivec, average_episodic_reward=average_episodic_reward, covariances=ubm.covariances_, weights=ubm.weights_)
def compute_novelty_vs_archive(archive, novelty_vector, k, bc_type="terminal", worker_dir=None): distances = [] nov = novelty_vector.astype(np.float) if bc_type == "supervector": ubm = None means = None stds = None # A fight against race-condition: If failed to load, try again bit later while ubm is None: try: ubm, means, stds = gmm_tools.load_ubm( os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME)) except Exception: print("[Warning] Failed to load UBM file. Trying again...") time.sleep(0.1) # Normalize data normalized_states = (novelty_vector - means) / stds my_supervector = gmm_tools.trajectories_to_supervector( normalized_states, ubm) my_supervector = my_supervector.reshape(ubm.means_.shape) precisions = ubm.precisions_ weights = ubm.weights_ # Now load supervectors that are stored in the same file (conveniently reading many times # for _optimal efficiency_...) archive_data = None while archive_data is None: try: archive_data = np.load( os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME)) except Exception: print("[Warning] Failed to load archive file. Trying again...") time.sleep(0.1) other_supervectors = archive_data["supervectors"] archive_data.close() for i in range(other_supervectors.shape[0]): kl_distance = gmm_tools.adapted_gmm_distance( my_supervector, other_supervectors[i], precisions, weights) distances.append(kl_distance) else: for point in archive: if bc_type == "terminal": distances.append( euclidean_distance(point.astype(np.float), nov)) elif bc_type == "gaussian": midpoint = len(point) // 2 if isinstance(nov, np.ndarray): if nov.ndim == 2: # Need to compute mean and cov nov = th.distributions.MultivariateNormal( th.from_numpy(np.mean(nov, axis=0)).float(), th.diag(th.from_numpy(np.var(nov, axis=0) + 1e-7)).float()) else: # Already computed mean+var vector nov = th.distributions.MultivariateNormal( th.from_numpy(nov[:midpoint]).float(), th.diag(th.from_numpy(nov[midpoint:] + 1e-7)).float()) point = th.distributions.MultivariateNormal( th.from_numpy(point[:midpoint]).float(), th.diag(th.from_numpy(point[midpoint:] + 1e-7)).float()) with th.no_grad(): kl_distance = (th.distributions.kl_divergence(nov, point) + th.distributions.kl_divergence(point, nov)) distances.append(kl_distance.item()) else: raise NotImplementedError( "bc_type {} not implemented".format(bc_type)) # Pick k nearest neighbors distances = np.array(distances) top_k_indicies = (distances).argsort()[:k] top_k = distances[top_k_indicies] return top_k.mean()