def rollout_MC_per_run(env, runtime, runtimes, episodes, target, gamma, Lambda, alpha, beta): print('rolling out %d of %d for MC' % (runtime + 1, runtimes)) expected_return_trace, variance_of_return_trace, return_counts = MC( env, episodes, target, target, None, gamma) stationary_dist = return_counts / np.sum(return_counts) return (expected_return_trace[-1], variance_of_return_trace[-1], stationary_dist)
def main(self): # set hyperparameters theta = [0.01 for i in range(self.dim + 1) ] # initailize (dim + 1) delta to the same value threshold = 0.0001 max_iter = 50 pairs = [] #generate a (self.dim^2) list to hold all possible pairs for i in range(self.dim): for j in range(self.dim): pairs.append([i, j]) n = MC.MC() f, p = n.rtm(self.data) log_likelihood = 0 for k in range(max_iter): theta = self.update(theta, pairs, f) # compute new log likelihood new_log_likelihood = 0 for i in range(self.dim): for j in range(len(pairs)): # for lambda1 i_1, i_2 = pairs[j][0], pairs[j][1] row = i_1 + i_2 * self.dim denominator = self.lambda1 * self.Q[i_1][ i] + self.lambda2 * self.Q[i_2][i] if denominator != 0: new_log_likelihood += f[row][i] * math.log(denominator) # check whether log likelihood converges if log_likelihood == 0: log_likelihood = new_log_likelihood elif new_log_likelihood - log_likelihood < threshold: #print(new_log_likelihood - log_likelihood, "gives log_likelihood:", log_likelihood) return new_log_likelihood else: log_likelihood = new_log_likelihood print() print("final:", self.lambda1, self.lambda2) print("final:", self.Q) print("log_likelihood:", log_likelihood) return log_likelihood
import MC import parse_pos import proj list_pos= parse_pos.recup_pos() Xchap= MC.MC(list_pos) j=0 for i in range (len(list_pos)): if not list_pos[i].parent: list_pos[i].X_MC = Xchap[j] j+=1 list_pos[i].Y_MC = Xchap[j] j+=1 list_pos[i].Z_mc = Xchap[j] j+=1 list_pos[i].add_to_file("resultats.csv") points = proj.lecture("resultats.csv") pr = proj.choix_proj_cc(points) print('Paramètres de la projection conique conforme minimisant le module linéaire:\n', 'Phi0 =',\ rad_to_deg(pr.phi0), '\n', 'Phi1 =', rad_to_deg(pr.phi1), '\n', 'Phi2 =',rad_to_deg(pr.phi2),\ '\n', 'X0 :', pr.X0, 'Y0 :', pr.Y0, '\n', "ellipsoide de référence WGS 84") proj.affiche(points, pr)
lambda x: onehot(x, N), N, int(1e6), gamma=0.99) stationary_dist = return_counts / np.sum(return_counts) true_expectation, true_variance = E[-1], V[-1] np.savez(filename, true_expectation=true_expectation, true_variance=true_variance, stationary_dist=stationary_dist) j, v, s = iterative_policy_evaluation(env, target_policy, gamma=gamma) print('Iterative policy evaluation') Lambda = LAMBDA(env, lambda_type='constant', initial_value=np.ones(N)) mc_j, mc_v, mc_counts = MC(env, 10000, target_policy, target_policy, Lambda, gamma) # test both on-policy and off-policy Lambda = LAMBDA(env, lambda_type='constant', initial_value=np.ones(N)) off_mc_results, off_mc_var_results = eval_method_with_variance( MC, env, true_expectation, true_variance, stationary_dist, behavior_policy, target_policy, Lambda, gamma=gamma, alpha=0.05, beta=0.05,
from utils import * from joblib import Parallel, delayed from MC import * import numpy.matlib, argparse parser = argparse.ArgumentParser(description='') parser.add_argument('--episodes', type=int, default=int(1e8), help='') args = parser.parse_args() env = gym.make('FrozenLake-v0'); env.reset() N = env.observation_space.n gamma = lambda x: 0.95 target_policy = np.matlib.repmat(np.array([0.2, 0.3, 0.3, 0.2]).reshape(1, 4), env.observation_space.n, 1) # get ground truth expectation, variance and stationary distribution filename = 'frozenlake_truths_heuristic_%g.npz' % args.episodes try: loaded = np.load(filename) true_expectation, true_variance, stationary_dist = loaded['true_expectation'], loaded['true_variance'], loaded['stationary_dist'] except FileNotFoundError: true_expectation, true_variance, return_counts = MC(env, args.episodes, target_policy, target_policy, gamma) stationary_dist = return_counts / np.sum(return_counts) np.savez(filename, true_expectation=true_expectation, true_variance=true_variance, stationary_dist=stationary_dist) pass