def check_constraint(self, new_agent): # Compute UBM, extract supervectors and compute KL new_policy_data = do_manual_rollouts(new_agent, self.env, self.n_rollouts) new_policy_data += np.random.randn(*new_policy_data.shape) * 0.001 all_data = np.concatenate((self.old_policy_data, new_policy_data), axis=0) # Avoid all the spam from "less unique centroids" with warnings.catch_warnings(): warnings.simplefilter("ignore") ubm = gmm_tools.train_ubm(all_data, n_components=self.n_centroids, verbose=0) old_supervector = gmm_tools.trajectories_to_supervector( self.old_policy_data, ubm) new_supervector = gmm_tools.trajectories_to_supervector( new_policy_data, ubm) # Supervectors are returned as raveled 1D vectors old_supervector = old_supervector.reshape((ubm.means_.shape)) new_supervector = new_supervector.reshape((ubm.means_.shape)) kl_distance = gmm_tools.adapted_gmm_distance(old_supervector, new_supervector, ubm.precisions_, ubm.weights_) if kl_distance >= self.max_kl_constraint: return True return False
def compute_pivector_distance_matrices(): # Compute distance matrices for each pivector file pivector_files = glob(PIVECTOR_TEMPLATE.format(env="*", num_traj="*", num_components="*", policy_name="*", repetition_num="*")) for pivector_file in tqdm(pivector_files, desc="distance"): # Skip if exists # Distance file name is same as pivectors, but replace "pivector" with "distance" distance_file = pivector_file.replace("pivectors", "pivector_distances") if os.path.isfile(distance_file): continue data = np.load(pivector_file) pivectors = data["pivectors"] covariances = data["covariances"] precisions = 1 / covariances weights = data["weights"] num_pivectors = len(pivectors) mean_shape = precisions.shape # Create with np.ones to allocate space, so # know immediattely if we are running out of space. distance_matrix = np.ones((num_pivectors, num_pivectors)) for i in range(num_pivectors): # Cut ~half of the computation needed for j in range(i, num_pivectors): means1 = pivectors[i].reshape(mean_shape) means2 = pivectors[j].reshape(mean_shape) distance = adapted_gmm_distance(means1, means2, precisions, weights) distance_matrix[i, j] = distance distance_matrix[j, i] = distance np.savez( distance_file, distance_matrix=distance_matrix, average_episodic_rewards=data["average_episodic_rewards"] )
def compute_novelty_vs_archive(archive, novelty_vector, k, bc_type="terminal", worker_dir=None): distances = [] nov = novelty_vector.astype(np.float) if bc_type == "supervector": ubm = None means = None stds = None # A fight against race-condition: If failed to load, try again bit later while ubm is None: try: ubm, means, stds = gmm_tools.load_ubm( os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME)) except Exception: print("[Warning] Failed to load UBM file. Trying again...") time.sleep(0.1) # Normalize data normalized_states = (novelty_vector - means) / stds my_supervector = gmm_tools.trajectories_to_supervector( normalized_states, ubm) my_supervector = my_supervector.reshape(ubm.means_.shape) precisions = ubm.precisions_ weights = ubm.weights_ # Now load supervectors that are stored in the same file (conveniently reading many times # for _optimal efficiency_...) archive_data = None while archive_data is None: try: archive_data = np.load( os.path.join(worker_dir, NOVELTY_ARCHIVE_FILE_NAME)) except Exception: print("[Warning] Failed to load archive file. Trying again...") time.sleep(0.1) other_supervectors = archive_data["supervectors"] archive_data.close() for i in range(other_supervectors.shape[0]): kl_distance = gmm_tools.adapted_gmm_distance( my_supervector, other_supervectors[i], precisions, weights) distances.append(kl_distance) else: for point in archive: if bc_type == "terminal": distances.append( euclidean_distance(point.astype(np.float), nov)) elif bc_type == "gaussian": midpoint = len(point) // 2 if isinstance(nov, np.ndarray): if nov.ndim == 2: # Need to compute mean and cov nov = th.distributions.MultivariateNormal( th.from_numpy(np.mean(nov, axis=0)).float(), th.diag(th.from_numpy(np.var(nov, axis=0) + 1e-7)).float()) else: # Already computed mean+var vector nov = th.distributions.MultivariateNormal( th.from_numpy(nov[:midpoint]).float(), th.diag(th.from_numpy(nov[midpoint:] + 1e-7)).float()) point = th.distributions.MultivariateNormal( th.from_numpy(point[:midpoint]).float(), th.diag(th.from_numpy(point[midpoint:] + 1e-7)).float()) with th.no_grad(): kl_distance = (th.distributions.kl_divergence(nov, point) + th.distributions.kl_divergence(point, nov)) distances.append(kl_distance.item()) else: raise NotImplementedError( "bc_type {} not implemented".format(bc_type)) # Pick k nearest neighbors distances = np.array(distances) top_k_indicies = (distances).argsort()[:k] top_k = distances[top_k_indicies] return top_k.mean()
def compute_checkpoint_distances(unparsed_args): parser = ArgumentParser( "Compute distances between consecutive checkpoints and store them under experiment dir" ) parser.add_argument("--inputs", type=str, nargs="+", required=True, help="Paths to experiments to process.") args = parser.parse_args(unparsed_args) for path in tqdm(args.inputs, desc="experiment", leave=False): pivector_paths = glob.glob(os.path.join(path, PIVECTORS_DIR, "*")) # Sort them by the number of steps trained steps_trained = [ int(re.findall("rl_model_([0-9]+)_steps", pivector_name)[0]) for pivector_name in pivector_paths ] pivector_paths = list( zip(*sorted(zip(pivector_paths, steps_trained), key=lambda x: x[1])))[0] means = [] rewards = [] shared_covariances = None shared_weights = None for pivector_path in pivector_paths: data = np.load(pivector_path) covariances = data["covariances"] weights = data["weights"] reward = data["average_episodic_reward"] # Use same covariance and weight # for all samples later if shared_covariances is None: shared_covariances = covariances shared_weights = weights # Sanity check to make sure all adapted GMMs # share same covariance if not np.allclose(shared_covariances, covariances): raise ValueError( "Covariances did not match file {}".format(pivector_path)) # Covariances are diagonal so they share same # shape with means. # Pivector is just raveled means. mean = data["pivector"].reshape(covariances.shape) means.append(mean) rewards.append(reward) precisions = 1 / shared_covariances distances = [] aligned_rewards = [] for i in range(len(means) - 1): distances.append( adapted_gmm_distance(means[i], means[i + 1], precisions, shared_weights)) # Align to the previous policy aligned_rewards.append(rewards[i]) distances = np.array(distances) aligned_rewards = np.array(aligned_rewards) # Store back on disk save_path = os.path.join(path, CHECKPOINT_DISTANCES_FILE) np.savez(save_path, distances=distances, average_episodic_rewards=aligned_rewards)
def distance_metric(pivector1, pivector2): means1 = pivector1.reshape(mean_shape) means2 = pivector2.reshape(mean_shape) return adapted_gmm_distance(means1, means2, env_precisions, env_weights)