def experiment1(exp_name, env_name): """ Run the two Online PE methods on the problem "env_name" for 500 runs (500 random users) :param exp_name: :param env_name: :return: """ seed = 42 random_state = RandomState(seed) n_trials = 500 results = {} all_weights = np.random.uniform(0, 1, (n_trials, N_OBJ[env_name])) all_weights /= all_weights.sum(axis=1)[:, None] for scb in range(3, 53): successes_abs = 0 successes_comp = 0 distances_abs = [] distances_comp = [] for step, weights in enumerate(all_weights): print(f"Solver call buget: {scb} | Run number {step}") weights = random_state.uniform(0.0, 1, N_OBJ[env_name]) weights /= np.sum(weights) if env_name == "synt": pf = create_3D_pareto_front() optimal_returns = get_best_sol(pf, weights) elif env_name == "synt_20": pf = create_3D_pareto_front(size=20) optimal_returns = get_best_sol(pf, weights) else: optimal_returns = get_best_sol_BST(weights) _, res_abs = single_run(env_name, weights, "absolute", seed, solver_calls_budget=scb) _, res_comp = single_run(env_name, weights, "comparisons", seed, num_virtual_comp=0, solver_calls_budget=scb) successes_abs += int(did_it_succeed(env_name, weights, optimal_returns, res_abs)) successes_comp += int(did_it_succeed(env_name, weights, optimal_returns, res_comp)) distances_abs.append( get_utility_loss(res_abs["returns"], optimal_returns, weights, env_name)[-1]) distances_comp.append( get_utility_loss(res_comp["returns"], optimal_returns, weights, env_name)[-1]) results[scb] = { "abs": {"success": (successes_abs / n_trials) * 100, "distance": distances_abs}, "comp": {"success": (successes_comp / n_trials) * 100, "distance": distances_comp} } with open(f'experiments/{exp_name}/results.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL) return results
def get_opt_return(env_name, weights): """ Get the optimal solution for an environment (i.e. problem) and some weights :param env_name: :param weights: :return: """ if env_name == "synt": pf = create_3D_pareto_front() optimal_returns = get_best_sol(pf, weights) elif env_name == "synt_20": pf = create_3D_pareto_front(size=20) optimal_returns = get_best_sol(pf, weights) else: optimal_returns = get_best_sol_BST(weights) return optimal_returns
def comparisons_accuracy(num_virtual_comp, env_name, solver_calls_budget): """ Run the PE method using relative feedback 100 times using num_virtual_comp virtual comparisons and get the accuracy :param num_virtual_comp: :param env_name: :param solver_calls_budget: :return: """ seed = 1 random_state = RandomState(seed) n_trials = 100 successes = 0 solver_calls = [] distances = [] for step in range(n_trials): print(step) weights = random_state.uniform(0.0, 1, N_OBJ[env_name]) weights /= np.sum(weights) if env_name == "synt": pf = create_3D_pareto_front() optimal_result = get_best_sol(pf, weights) else: optimal_result = get_best_sol_BST(weights) _, res = single_run(env_name, weights, "comparisons", seed, num_virtual_comp=num_virtual_comp, solver_calls_budget=solver_calls_budget) obtained_res = res["returns"][-1] # Check if it converged if list(obtained_res) == list(optimal_result): successes += 1 u_loss = get_utility_loss(res["returns"], optimal_result, weights, env_name) distances.append(u_loss) s_rate = (successes / n_trials) * 100 # with open(f'experiments/test.pickle', 'wb') as handle: # pickle.dump([s_rate, distances, solver_calls], handle, protocol=pickle.HIGHEST_PROTOCOL) return s_rate, distances, solver_calls
def abs_low_noise(exp_name, env_name): """ Run the PE method using absolute feedback for with a small noise level :param exp_name: :param env_name: :return: """ seed = 42 random_state = RandomState(seed) n_trials = 500 all_weights = random_state.uniform(0, 1, (n_trials, N_OBJ[env_name])) all_weights /= all_weights.sum(axis=1)[:, None] results = {} for scb in range(3, 20): successes = 0 distances = [] for step, weights in enumerate(all_weights): print(f"Solver call budget: {scb} | Run number {step}") if env_name == "synt": pf = create_3D_pareto_front() optimal_returns = get_best_sol(pf, weights) else: optimal_returns = get_best_sol_BST(weights) _, res = single_run(env_name, weights, "absolute", seed, solver_calls_budget=scb, low_noise=True) successes += int(did_it_succeed(env_name, weights, optimal_returns, res)) distances.append(get_utility_loss(res["returns"], optimal_returns, weights, env_name)[-1]) results[scb] = { "abs": {"success": (successes / n_trials) * 100, "distance": distances}, } with open(f'experiments/{exp_name}/results.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL) return results
def elicitWithRelFeedback(user, env_name, seed, solver_calls_budget=100, metric="solver"): logging.info(f"\n \n") logs = { "returns": [], "weights": [], "solver_calls": [], "stds": [], } solver = SingleObjSolver( ) # Used to train and evaluate the agent on the env # Check expected solution for real weights if env_name == "synt": real_sol = get_best_sol(create_3D_pareto_front(), user.hidden_weights) elif env_name == "synt_20": real_sol = get_best_sol(create_3D_pareto_front(size=20), user.hidden_weights) elif env_name == "synt_bst" or "bst": real_sol = get_best_sol_BST(user.hidden_weights) logging.info( f"Expected solution for hidden weights of the user is: {real_sol}") logging.info("Hidden weights: " + str(user.hidden_weights) + "\n") random_state = RandomState(seed) num_obj = user.num_objectives # Initial weights to solve the problem if num_obj == 2: initial_weights = [ np.array([0, 1]), np.array([1, 0]), ] # If 3 objectives solve for two arbitrary weights # that are not too close from each other elif num_obj == 3: initial_weights = [ np.array([0.65, 0.25, 0.10]), # np.array([0.1, 0.1, 0.8]), np.array([0.1, 0.5, 0.4]), # np.array([0.1, 0.25, 0.65]), ] # Solve the problem for the initial weights initial_returns = [ solver.solve(env_name, weights, random_state=random_state) for weights in initial_weights ] logs["weights"].extend(initial_weights) logs["returns"].extend(initial_returns) # Ask the user to compare the initial solutions logging.info("Comparison between: " + str(initial_returns[0]) + " and " + str(initial_returns[1])) preferred = user.compare(initial_returns[0], initial_returns[1])[0] logging.info("User prefers: " + str(preferred)) # Compute a first estimate of the user's weights weights, w_fit, H_fit = user.current_map() std = 1 / np.sqrt(H_fit) # Keep track of the number of queries asked to the user n_queries = 2 # The stop condition could be the number of queries to the user # or the number of solver calls if metric == "user": def stopCondition(): return n_queries >= solver_calls_budget elif metric == "solver": def stopCondition(): return solver.n_calls >= solver_calls_budget while not stopCondition(): logging.info(f"Current weights: {weights} | Current std : {std}") logs["weights"].append(weights) logs["stds"].append(std) # Solve the problem for the current estimate of the User's weights returns = solver.solve(env_name, weights, random_state=random_state) logging.info("Current returns: " + str(returns)) logs["returns"].append(returns) logs["solver_calls"].append(solver.n_calls) n_try = 0 # While we can still sample new solutions while (list(returns) == list(preferred)) and (not stopCondition()) and (n_try < 100): weights = user.sample_weight_vector() returns = solver.solve(env_name, weights, random_state=random_state) n_try += 1 # Make a new comparison logging.info("Comparison between: " + str(returns) + " and " + str(preferred)) preferred = user.compare(returns, preferred)[0] logging.info("User prefers: " + str(preferred)) logging.info("") n_queries += 1 # Compute the new weights MAP (new estimate of the user's weights) weights, w_fit, H_fit = user.current_map() std = 1 / np.sqrt(H_fit) return logs
def solve(self, env_name, weights, random_state=None): """ Train an agent to solve a single obj. instance of a multi-objective problem for some weights :param env_name: :param weights: :param random_state: :return: """ self.n_calls += 1 if env_name == "bst": n_eval_runs = 1 env = MultiObjRewardWrapper(BountyfulSeaTreasureEnv(), weights) learning_steps = STEPS_BST agent = Qlearning(env, decay=0.999997, random_state=random_state) elif env_name == "synt_bst": return get_best_sol_BST(weights) elif env_name[0:4] == "synt": env = create_3D_pareto_front(size=int(env_name.split('_')[-1])) return get_best_sol(env, weights) elif env_name == "minecart": n_eval_runs = 50 trained_agents = get_pretrained_agents() # checkpoint_callback = CheckpointCallback( # num_steps=N_STEPS_BEFORE_CHECKPOINT, # save_path='saved_agents', # name_prefix=f'{weights[0]}_{weights[1]}_{weights[2]}' # ) # Train agent from scratch if len(trained_agents) == 0: learning_steps = STEPS_MINECART_COLD_START env = SubprocVecEnv([lambda i=i: build_SO_minecart(weights) for i in range(N_ENVS_A2C)]) agent = A2C(MlpPolicy, env, vf_coef=0.5, ent_coef=0.01, n_steps=500 // N_ENVS_A2C, max_grad_norm=50, # clip_loss_value=100, learning_rate=3e-4, gamma=0.99, policy_kwargs={'net_arch': [ {'vf': A2C_ARCH, 'pi': A2C_ARCH}]}, # tensorboard_log="src/tensorboard/" ) # Get the most similar already trained agent else: most_similar_weights, agent = get_most_similar_agent(weights, trained_agents) learning_steps = STEPS_MINECART_COLD_START # If the most similar agent was trained for the same weights # we don't need to learn() if list(most_similar_weights) == list(weights): fully_trained_agent = A2C.load( f'saved_agents/{most_similar_weights[0]}_{most_similar_weights[1]}_{most_similar_weights[2]}') returns = self.eval_agent(fully_trained_agent, env_name, weights, n_runs=n_eval_runs) return returns else: env = SubprocVecEnv([lambda i=i: build_SO_minecart(weights) for i in range(N_ENVS_A2C)]) agent = A2C(MlpPolicy, env, vf_coef=0.5, ent_coef=0.01, n_steps=500 // N_ENVS_A2C, max_grad_norm=50, # clip_loss_value=100, learning_rate=3e-4, gamma=0.99, policy_kwargs={'net_arch': [ {'vf': A2C_ARCH, 'pi': A2C_ARCH}]}, # tensorboard_log="src/tensorboard/" ) if env_name == "minecart": agent.learn(int(learning_steps)) # , callback=checkpoint_callback) agent.save(f"saved_agents/{weights[0]}_{weights[1]}_{weights[2]}") else: agent.learn(learning_steps) returns = self.eval_agent(agent, env_name, weights, n_runs=n_eval_runs) return returns
def experimentNoise(experiment_id, method, env_name): """ Run a PE method for multiple levels of noise in a single environment """ if env_name == "synt": num_obj = 3 WEIGHTS_LIST = WEIGHTS_NOISE_SYNT elif env_name == "bst" or env_name == "synt_bst": num_obj = 2 WEIGHTS_LIST = WEIGHTS_COMP_BST # elif env_name == "minecart": # num_obj = 3 # WEIGHTS_LIST = WEIGHTS_NOISE_MINECART seed = 42 random_state = RandomState(seed) n_runs = 500 noise_values = [ 0, 5, 10, 20, 40, 60, 80, 100, ] # mean_distances, std_distances = [], [] # mean_weightEstimates, std_weightEstimates = [], [] results = {} for noise in noise_values: all_seed_distances = [] all_seed_weightEstimates = [] for run in range(n_runs): print(f"Noise = {noise}") weight_vector = random_state.uniform(0.0, 1, N_OBJ[env_name]) weight_vector /= np.sum(weight_vector) user = User( num_objectives=num_obj, noise_pct=noise, random_state=random_state, weights=weight_vector ) if env_name == "synt": pf = create_3D_pareto_front() optimal_returns = get_best_sol(pf, weight_vector) else: optimal_returns = get_best_sol_BST(weight_vector) if method == "comparisons": logs = elicitWithRelFeedback(user, env_name, seed=seed) elif method == "absolute": logs = elicitWithAbsFeedback(user, env_name, seed=seed, solver_calls_budget=19) else: print("Incorrect method.") exit() returns = logs["returns"] weights = logs["weights"] distances = get_utility_loss(returns, optimal_returns, weight_vector, env_name) all_seed_distances.append(distances) distances_w = get_distances_from_optimal_weights(weights, weight_vector) all_seed_weightEstimates.append(distances_w) results[noise] = {"dist": all_seed_distances, "w": all_seed_weightEstimates} with open(f'experiments/{experiment_id}/results.pickle', 'wb') as handle: pickle.dump(results, handle, protocol=pickle.HIGHEST_PROTOCOL)