def experiment_rhc_22(self): init_state = None max_attempts = np.array( [5, 10, 15, 30, 40, 50, 60, 80, 100, 200, 300, 350]) result = np.zeros((len(self.rand_seeds), len(max_attempts))) best_score = None for i in range(len(self.rand_seeds)): restarts = 0 rand_state = self.rand_seeds[i] for j in range(len(max_attempts)): prob_length = 20 fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() max_attempt = max_attempts[j].item() max_iter = np.inf alg = RHC(problem, init_state, rand_state, max_attempt, max_iter, restarts) best_score, best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('avg result ' + str(avg_result)) print('best score') print(best_score) title = self.problem_type + ' with RHC - Max Attempts Variation - 0 restart' plot_curve(max_attempts, avg_result, title, 'Max Attempts', 'Best Score')
def experiment_rhc_3(self): init_state = None max_iters = np.arange(100, 5000, 100) result = np.zeros((len(self.rand_seeds), len(max_iters))) for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(max_iters)): prob_length = 20 fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() max_attempt = 200 restarts = 150 max_iter = max_iters[j].item() alg = RHC(problem, init_state, rand_state, max_attempt, max_iter, restarts) best_score, best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('avg result ' + str(avg_result)) print('best score') print(best_score) title = self.problem_type + ' with RHC - Max Iterations Variation' plot_curve(max_iters, avg_result, title, 'Max Iterations', 'Best Score')
def experiment_sa_11(self): init_state = None prob_lengths = np.arange(7, 30) schedule_var = 0 best_state = None result = np.zeros((len(self.rand_seeds), len(prob_lengths))) best_state = None for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(prob_lengths)): prob_length = prob_lengths[j] fl = CustomProblem(prob_length.item(), self.problem_type) problem = fl.create_problem() alg = SA(problem, init_state, rand_state, schedule_var, 10, 1000) best_state, best_fitness = alg.optimize() result[i][j] = best_fitness print(str(result)) print('best_state') print(best_state) avg_result = np.mean(result, axis=0) print('avg result for varying input size' + str(avg_result)) title = self.problem_type + ' with SA - Input Size Variation' plot_curve(prob_lengths, avg_result, title, 'Input Size', 'Best Score')
def experiment_sa_7(self): init_state = None schedule_var = 2 best_state = None max_iters = np.arange(100, 5000, 100) result = np.zeros((len(self.rand_seeds), len(max_iters))) for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(max_iters)): prob_length = 20 fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() max_attempt = 200 max_iter = max_iters[j].item() alg = SA(problem, init_state, rand_state, schedule_var, max_attempt, max_iter) best_state, best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('best_state') print(best_state) print('avg result ' + str(avg_result)) title = self.problem_type + ' with SA - Max Iter Variation - Arith' plot_curve(max_iters, avg_result, title, 'Max Iterations', 'Best Score')
def experiment_rhc_1(self): print('restart vary') init_state = None restart_lengths = np.arange(10, 800, 100) result = np.zeros((len(self.rand_seeds), len(restart_lengths))) #best_state = np.zeros((len(self.rand_seeds), len(restart_lengths))) print(self.problem_type) for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] prob_length = 20 for j in range(len(restart_lengths)): restart_length = restart_lengths[j] max_iter = np.inf #max_attempts is varied by trial and error max_attempts = 100 fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() alg = RHC(problem, init_state, rand_state, max_attempts, max_iter, restart_length.item()) best_state, best_fitness = alg.optimize() result[i][j] = best_fitness print('best fitness') print(str(result)) print('best state') print(best_state) avg_result = np.mean(result, axis=0) print('avg result for varying input size' + str(avg_result)) title = self.problem_type + ' with RHC - # of Restarts Variation' plot_curve(restart_lengths, avg_result, title, '# of Restarts', 'Best Score')
def experiment_rhc_4(self): init_state = None t_pcts = np.arange(0.1, 1, 0.1) result = np.zeros((len(self.rand_seeds), len(t_pcts))) best_score = None max_iter = np.inf for i in range(len(self.rand_seeds)): restarts = 400 rand_state = self.rand_seeds[i] for j in range(len(t_pcts)): prob_length = 20 restarts = 400 max_attempt = 50 t_pct = t_pcts[j].item() fl = SixPeaks(prob_length, t_pct) problem = fl.create_problem() alg = RHC(problem, init_state, rand_state, max_attempt, max_iter, restarts) best_score, best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('avg result ' + str(avg_result)) print('best score') print(best_score) title = self.problem_type + ' with RHC - Threshold Variation' plot_curve(t_pcts, avg_result, title, 'Threshold', 'Best Score')
def experiment_sa_4(self): init_state = None schedule_var = 1 best_state = None max_attempts = np.array([5, 10, 15, 40, 60, 80, 100, 150, 200]) result = np.zeros((len(self.rand_seeds), len(max_attempts))) for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(max_attempts)): prob_length = 20 max_iter = np.inf fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() max_attempt = max_attempts[j].item() alg = SA(problem, init_state, rand_state, schedule_var, max_attempt, max_iter) best_state, best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('avg result ' + str(avg_result)) print('best_state') print(best_state) title = self.problem_type + ' with SA - Max Attempts Variation -Geom' plot_curve(max_attempts, avg_result, title, 'Max Attempts', 'Best Score')
def experiment_cluster_size(self, dataX, prefix): bic_scores = [] aic_scores = [] K = range(3, 15) X = dataX for k in K: learner = EMLearner(n_components=k, random_state=self.random_state) kf = learner.fit(X) bic_score = kf.bic(X) bic_scores.append(bic_score) aic_score = kf.aic(X) aic_scores.append(aic_score) prefix = self.splitter.reader.dataset + '-' + prefix plot_curve(K, bic_scores, 'Finding Optimal n using BIC -EM', '# of Components', 'BIC Score', prefix) plot_curve(K, aic_scores, 'Finding Optimal n using AIC - EM', '# of Components', 'AIC Score', prefix) """visualizer = KElbowVisualizer(learner, k=(1, 15))
def experiment_mimic_1(self): prob_lengths = np.arange(7, 30) result = np.zeros((len(self.rand_seeds), len(prob_lengths))) pop_size = 200 keep_pct = 0.1 for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(prob_lengths)): prob_length = prob_lengths[j] fl = CustomProblem(prob_length.item(), self.problem_type) problem = fl.create_problem() alg = Mimic(problem, rand_state, 10, 1000, pop_size, keep_pct) best_fitness = alg.optimize() result[i][j] = best_fitness print(str(result)) avg_result = np.mean(result, axis=0) print('avg result for varying input size' + str(avg_result)) title = self.problem_type + ' with mimic - Input Size Variation' plot_curve(prob_lengths, avg_result, title, 'Input Size', 'Best Score')
def experiment_cluster_size(self, dataX, prefix): print('experiment cluster size KMeans: ' + prefix) sum_sq_dist = [] distortions = [] db_scores = [] K = range(3, 15) X = dataX for k in K: learner = KMeansLearner(n_clusters=k, random_state=self.random_state) kf = learner.fit(X) sum_sq_dist.append(kf.inertia_) distortions.append( sum(np.min(cdist(X, kf.cluster_centers_, 'euclidean'), axis=1)) / X.shape[0]) kl = learner.fit_predict(X) silhouette_avg = silhouette_score(X, kl) db_score = davies_bouldin_score(X, kf.labels_) db_scores.append(db_score) print("For n_clusters =", k, "The average silhouette_score is :", silhouette_avg) sample_silhouette_values = silhouette_samples(X, kl) #TODO Plot prefix = self.splitter.reader.dataset + '-' + prefix plot_curve(K, sum_sq_dist, 'Finding Optimal K using Elbow - KMeans', 'K Value', 'Sum of Squared Distances', prefix) plot_curve(K, distortions, 'Finding Optimal K using Distortions- KMeans', 'K Value', 'Distortions', prefix) plot_curve(K, db_scores, 'Finding Optimal K using DB Scores- KMeans', 'K Value', 'DB Score', prefix) """visualizer = KElbowVisualizer(learner, k=(1, 15))
def experiment_mimic_5(self): keep_pcts = np.arange(0.1, 1, 0.1) result = np.zeros((len(self.rand_seeds), len(keep_pcts))) pop_size = 800 max_iter = np.inf for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(keep_pcts)): prob_length = 20 fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() max_attempt = 200 keep_pct = keep_pcts[j].item() alg = Mimic(problem, rand_state, max_attempt, max_iter, pop_size, keep_pct) best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('avg result ' + str(avg_result)) title = self.problem_type + ' with Mimic - Keep PCT Variation' plot_curve(keep_pcts, avg_result, title, 'Keep PCT', 'Best Score')
def experiment_mimic_4(self): pop_sizes = np.arange(200, 1000, 200) result = np.zeros((len(self.rand_seeds), len(pop_sizes))) max_iter = np.inf keep_pct = 0.1 for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(pop_sizes)): prob_length = 20 fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() max_attempt = 200 pop_size = pop_sizes[j].item() alg = Mimic(problem, rand_state, max_attempt, max_iter, pop_size, keep_pct) best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('avg result ' + str(avg_result)) title = self.problem_type + ' with Mimic - Population Size Variation' plot_curve(pop_sizes, avg_result, title, 'Population Size', 'Best Score')
def experiment_mimic_3(self): max_iters = np.arange(1000, 5000, 100) result = np.zeros((len(self.rand_seeds), len(max_iters))) pop_size = 800 keep_pct = 0.6 for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(max_iters)): prob_length = 20 fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() max_attempt = 200 max_iter = max_iters[j].item() alg = Mimic(problem, rand_state, max_attempt, max_iter, pop_size, keep_pct) best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('avg result ' + str(avg_result)) title = self.problem_type + ' with Mimic - Max Iterations Variation' plot_curve(max_iters, avg_result, title, 'Max Iterations', 'Best Score')
def experiment_ga_5(self): mutation_probs = np.arange(0.1, 1, 0.1) result = np.zeros((len(self.rand_seeds), len(mutation_probs))) pop_size = 1000 max_iter = np.inf for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(mutation_probs)): prob_length = 20 fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() max_attempt = 60 mutation_prob = mutation_probs[j].item() alg = GA(problem, rand_state, max_attempt, max_iter, pop_size, mutation_prob) best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('avg result ' + str(avg_result)) title = self.problem_type + ' with GA - Mutation Prob Variation' plot_curve(mutation_probs, avg_result, title, 'Mutation Prob', 'Best Score')
def experiment_ga_2(self): max_attempts = np.array([5, 10, 15, 30, 40, 50, 60, 80, 100]) result = np.zeros((len(self.rand_seeds), len(max_attempts))) pop_size = 200 mutation_prob = 0.1 for i in range(len(self.rand_seeds)): rand_state = self.rand_seeds[i] for j in range(len(max_attempts)): prob_length = 20 fl = CustomProblem(prob_length, self.problem_type) problem = fl.create_problem() max_attempt = max_attempts[j].item() max_iter = np.inf alg = GA(problem, rand_state, max_attempt, max_iter, pop_size, mutation_prob) best_fitness = alg.optimize() result[i][j] = best_fitness avg_result = np.mean(result, axis=0) print('avg result ' + str(avg_result)) title = self.problem_type + ' with GA - Max Attempts Variation' plot_curve(max_attempts, avg_result, title, 'Max Attempts', 'Best Score')
def experiment_3(self): env_name = '' #fl = FrozenLake(self.num_states) fl = Taxi() alpha = 0.1 epsilon = 0.1 #0.0000001 num_episodes = 100000 stopping_epsilon = 0.0008 gamma_range = np.arange(0.1, 1.1, 0.1) avg_lengths = [] avg_rewards = [] times = [] for gamma in gamma_range: #fl = FrozenLake(self.num_states) fl = Taxi() QL = QLearning(fl.get_env(), gamma, alpha, epsilon, num_episodes, stopping_epsilon) start = time.time() rewards_all_episodes, episode_lengths, episode_rewards, q_values_first, q_values_last = QL.test_run( ) end = time.time() times.append(end - start) avg_length = np.mean(episode_lengths) avg_reward = np.mean(episode_rewards) avg_lengths.append(avg_length) avg_rewards.append(avg_reward) #print('rewards_all_episodes') #print(rewards_all_episodes) # Calculate and print the average reward per thousand episodes plot_curve(gamma_range, avg_lengths, 'Convergence Steps Vs Gamma', 'Gamma', 'Convergence Steps', prefix='QL-Taxi') plot_curve(gamma_range, avg_rewards, 'Cumulative Rewards Vs Gamma', 'Gamma', 'Cumulative Rewards', prefix='QL-Taxi') title = 'Time Complexity Vs Gamma' x_title = 'Gamma' y_title = 'Time (seconds)' plot_curve(gamma_range, times, title, x_title, y_title, prefix='QL-Taxi')
def experiment_2(self): env_name = '' max_iter = 100000 policy_scores = [] converge_iters = [] gamma_range = np.arange(0.1, 1.0, 0.1) print('here in experiment_2') eps = 1e-20 df = pd.DataFrame(columns=['gamma', 'state', 'value']) times = [] for gamma in gamma_range: #fl = FrozenLake(self.num_states) fl = Taxi() PI = ValueIter(fl.get_env(), gamma, max_iter, eps) start = time.time() policy_score, converge_iter, v_arr, V = PI.test_run() end = time.time() times.append(end - start) converge_iters.append(converge_iter) policy_scores.append(policy_score) df = df.append( pd.DataFrame({ 'gamma': [ gamma for i in range(0, fl.get_env().observation_space.n) ], 'state': [i for i in range(0, fl.get_env().observation_space.n)], 'value': V })) df.state = df.state.astype(int) #plot_value_fn(df, 'Value Iteration - Values per gamma') title = 'Reward Vs Gamma' x_title = 'Gamma' y_title = 'Average Reward' plot_curve(gamma_range, policy_scores, title, x_title, y_title, prefix='ValueIter') title = 'Convergence Vs Gamma' x_title = 'Gamma' y_title = 'Convergence Step' plot_curve(gamma_range, converge_iters, title, x_title, y_title, prefix='ValueIter') title = 'Values per Iteration' #fl = FrozenLake(self.num_states) fl = Taxi() gamma = 0.5 PI = ValueIter(fl.get_env(), gamma, max_iter, eps) policy_score, converge_iter, v_arr, V = PI.test_run() plot_curve_single(v_arr, title, 'Iteration', 'Value', prefix='ValueIter') title = 'Time Complexity Vs Gamma' x_title = 'Gamma' y_title = 'Time (seconds)' plot_curve(gamma_range, times, title, x_title, y_title, prefix='ValueIter')
def experiment_1(self): env_name = '' max_iter = 1000000 gamma = 1.0 avg_scores = [] converge_iters = [] policy_scores = [] iter_arr = [] gamma_range = np.arange(0.1, 1.0, 0.1) print('here') eps = 1e-25 df = pd.DataFrame(columns=['gamma', 'state', 'value']) times = [] for gamma in gamma_range: #fl = FrozenLake(self.num_states) fl = Taxi() print('gamma ' + str(gamma)) PI = PolicyIter(fl.get_env(), gamma, max_iter, eps) start = time.time() optimal_policy, converge_iter, v_arr, avg_score = PI.test_run() end = time.time() times.append(end - start) print('avg reward' + str(avg_score)) print('converge_iters' + str(converge_iter)) avg_scores.append(avg_score) converge_iters.append(converge_iter) """df = df.append(pd.DataFrame({'gamma':[gamma for i in range(0,fl.get_env().observation_space.n)], 'state':[i for i in range(0,fl.get_env().observation_space.n)], 'value': V}))""" title = 'Reward Vs Gamma' x_title = 'Gamma' y_title = 'Average Reward' plot_curve(gamma_range, avg_scores, title, x_title, y_title, prefix='PolicyIter') title = 'Convergence Vs Gamma' x_title = 'Gamma' y_title = 'Convergence Step' plot_curve(gamma_range, converge_iters, title, x_title, y_title, prefix='PolicyIter') title = 'Values per Iteration' #fl = FrozenLake(self.num_states) fl = Taxi() gamma = 0.5 PI = PolicyIter(fl.get_env(), gamma, max_iter, eps) optimal_policy, converge_iter, v_arr, avg_score = PI.test_run() plot_curve_single(v_arr, title, 'Iteration', 'Value', prefix='PolicyIter') title = 'Time Complexity Vs Gamma' x_title = 'Gamma' y_title = 'Time (seconds)' plot_curve(gamma_range, times, title, x_title, y_title, prefix='PolicyIter')