def grid_world1_trp(exp_id=4, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 20 # Domain: maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]), binary=True, noise=noise) #domain = Pinball(noise=0.3) # Representation representation = Tabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=0.3) d = GoalPathPlanner(domain, representation,policy) trajs = d.generateTrajectories(N=5) a = TransitionStateClustering(window_size=2) for t in trajs: N = len(t) demo = np.zeros((N,2)) for i in range(0,N): demo[i,:] = t[i][0:2] a.addDemonstration(demo) a.fit(normalize=False, pruning=0.5) ac = [(round(a.means_[0][0]),round(a.means_[0][1])) for a in a.model] print ac #reinitialize domain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.statePassageEncoding(x,ac,5), noise=noise) representation = IncrementalTabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.3) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["domain"] = domain experiment = Experiment(**opt) experiment.run(visualize_steps=False, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
def make_experiment(exp_id=1, path="./Results/Experiments/HalfReward/" + getTimeStr() + "/"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: maze = os.path.join(GridWorldInter.default_map_dir, '11x11-RoomsSeg.txt') domain = GridWorldInter(maze, noise=0.01) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain, discretization=20) ## Policy policy = eGreedy(representation, epsilon=0.1) ## Need to change this back, limiting noise ATM ## Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.3) opt["checks_per_policy"] = 50 opt["max_steps"] = 12000 opt["num_policy_checks"] = 20 # experiment = ExperimentDelayed(**opt) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=10.09, initial_learn_rate=.47): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 5 sparsify = 1 ifddeps = 1e-7 domain = BlocksWorld(blocks=6, noise=0.3) opt["domain"] = domain representation = IndependentDiscretization(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(self, exp_id=1, path="results/"): opt = {} opt["exp_id"] = exp_id opt["path"] = path domain = NDomain(self.browser) opt["domain"] = domain representation = RBF(opt["domain"], num_rbfs=int(206,)) self.representation = self._pickle(representation, attrs='r', action='l') policy = eGreedy(representation, epsilon=0.3) agent = SARSA( representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.4 ) self.agent = self._pickle(agent, attrs='a', action='l') opt["agent"] = self.agent opt["checks_per_policy"] = 10 opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return(experiment)
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def runTIRL(self, N=5, w=2, pruning=0.5): opt = deepcopy(self.opt_template) dist = self.getIRLDist(N=N) ac = self.getTSCWaypoints(N, w, pruning) domain = self.createStateDomain( waypoints=ac, rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL( x, y, z, w, dist)) opt["domain"] = domain representation = IncrementalTabular( domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=self.createStateDomain( waypoints=self.env_template["consumable"]), visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.012695, lambda_=0.2, boyan_N0=80.798, initial_learn_rate=0.402807): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 sparsify = 1 domain = BlocksWorld(blocks=6, noise=0.3) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDDK(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, lazy=True, lambda_=lambda_) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=2120, initial_learn_rate=.26, lambda_=0.9, resolution=8, num_rbfs=4958): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = NonparametricLocalBases(domain, kernel=linf_triangle_kernel, resolution=resolution, normalization=True) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING( policy, representation,discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1204., lambda_=0., boyan_N0=7353.2, initial_learn_rate=.9712): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 sparsify = 1 kappa = 1e-7 domain = PST(NUM_UAV=4) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def solve(self): """Solve the domain MDP.""" self.start_time = clock() # Used to track the total time for solving self.bellmanUpdates = 0 converged = False PI_iteration = 0 # The policy is maintained as separate copy of the representation. # This way as the representation is updated the policy remains intact policy = eGreedy(deepcopy(self.representation), epsilon=0, forcedDeterministicAmongBestActions=True) while self.hasTime() and not converged: self.trajectoryBasedPolicyEvaluation(policy) # Policy Improvement (Updating the representation of the value # function will automatically improve the policy PI_iteration += 1 # Theta can increase in size if the representation is expanded hence padding the weight vector with zeros paddedTheta = padZeros(policy.representation.weight_vec, len(self.representation.weight_vec)) # Calculate the change in the weight_vec as L2-norm delta_weight_vec = np.linalg.norm(paddedTheta - self.representation.weight_vec) converged = delta_weight_vec < self.convergence_threshold # Update the underlying value function of the policy policy.representation = deepcopy( self.representation) # self.representation performance_return, performance_steps, performance_term, performance_discounted_return = self.performanceRun( ) self.logger.info( 'PI #%d [%s]: BellmanUpdates=%d, ||delta-weight_vec||=%0.4f, Return=%0.3f, steps=%d, features=%d' % (PI_iteration, hhmmss(deltaT(self.start_time)), self.bellmanUpdates, delta_weight_vec, performance_return, performance_steps, self.representation.features_num)) if self.show: self.domain.show(a, representation=self.representation, s=s) # store stats self.result["bellman_updates"].append(self.bellmanUpdates) self.result["return"].append(performance_return) self.result["planning_time"].append(deltaT(self.start_time)) self.result["num_features"].append( self.representation.features_num) self.result["steps"].append(performance_steps) self.result["terminated"].append(performance_term) self.result["discounted_return"].append( performance_discounted_return) self.result["policy_improvemnt_iteration"].append(PI_iteration) if converged: self.logger.info('Converged!') super(TrajectoryBasedPolicyIteration, self).solve()
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = NonparametricLocalBases(domain, kernel=linf_triangle_kernel, resolution=resolution, normalization=True) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def solve(self): """Solve the domain MDP.""" self.bellmanUpdates = 0 self.policy_improvement_iteration = 0 self.start_time = clock() # Check for Tabular Representation if not self.IsTabularRepresentation(): self.logger.error("Policy Iteration works only with a tabular representation.") return 0 # Initialize the policy policy = eGreedy( deepcopy(self.representation), epsilon=0, forcedDeterministicAmongBestActions=True) # Copy the representation so that the weight change during the evaluation does not change the policy # Setup the number of policy changes to 1 so the while loop starts policyChanges = True while policyChanges and deltaT(self.start_time) < self.planning_time: # Evaluate the policy converged = self.policyEvaluation(policy) # Improve the policy self.policy_improvement_iteration += 1 policy, policyChanges = self.policyImprovement(policy) super(PolicyIteration, self).solve()
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=738.59, lambda_=.8927, boyan_N0=2804., initial_learn_rate=.2706): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 1000000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 sparsify = 1 ifddeps = 1e-7 beta_coef = 1e-6 domain = PST(NUM_UAV=4) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, # discretization=discretization, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, BetaCoef=beta_coef, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=8.63917, lambda_=0.42, boyan_N0=202., initial_learn_rate=.7442, discretization=18.): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 100 sparsify = True domain = PuddleWorld() opt["domain"] = domain initial_rep = IndependentDiscretization( domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=True) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(self, exp_id=1, path="results/"): opt = {} opt["exp_id"] = exp_id opt["path"] = path domain = NDomain(self.browser) opt["domain"] = domain representation = RBF(opt["domain"], num_rbfs=int(206, )) self.representation = self._pickle(representation, attrs='r', action='l') policy = eGreedy(representation, epsilon=0.3) agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.4) self.agent = self._pickle(agent, attrs='a', action='l') opt["agent"] = self.agent opt["checks_per_policy"] = 10 opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return (experiment)
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=107091, lambda_=0.245, boyan_N0=514, initial_learn_rate=.327, discretization=18): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 sparsify = 1 domain = HIVTreatment() opt["domain"] = domain initial_rep = IndependentDiscretization( domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=True) #representation.PRINT_MAX_RELEVANCE = True policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_LEARNING( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=2120, initial_learn_rate=.26, lambda_=0.9, resolution=8, num_rbfs=4958): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=116.7025, initial_learn_rate=0.01402, discretization=6.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0.9, boyan_N0=22.36, initial_learn_rate=.068, discretization=9): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = Pacman() opt["domain"] = domain representation = IncrementalTabular( domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=119, initial_learn_rate=.06, discretization=34): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain representation = IncrementalTabular( domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def _make_experiment(exp_id=1, path="./Results/Tmp/test_FiftyChain"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ ## Domain: domain = FiftyChain() ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) checks_per_policy = 3 max_steps = 50 num_policy_checks = 3 experiment = Experiment(**locals()) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.013461679, lambda_=0., boyan_N0=484.78006, initial_learn_rate=0.5651405, discretization=23.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = True kappa = 1e-7 domain = InfCartPoleBalance() opt["domain"] = domain initial_rep = IndependentDiscretization( domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=1. - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, BetaCoef=1e-6, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=389.56, lambda_=0.52738, initial_learn_rate=.424409, discretization=30): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 400000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 100 domain = PuddleGapWorld() opt["domain"] = domain representation = Tabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_Learning( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=77., boyan_N0=11, lambda_=0.9, initial_learn_rate=.05, discretization=47): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = 1 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain initial_rep = IndependentDiscretization( domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=True) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_LEARNING( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-qlearning"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path # Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=20) # Policy policy = eGreedy(representation, epsilon=0.2) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-sarsa0"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain, discretization=20) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA0(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def solve(self): """Solve the domain MDP.""" self.bellmanUpdates = 0 self.policy_improvement_iteration = 0 self.start_time = clock() # Check for Tabular Representation if not self.IsTabularRepresentation(): self.logger.error( "Policy Iteration works only with a tabular representation.") return 0 # Initialize the policy policy = eGreedy( deepcopy(self.representation), epsilon=0, forcedDeterministicAmongBestActions=True ) # Copy the representation so that the weight change during the evaluation does not change the policy # Setup the number of policy changes to 1 so the while loop starts policyChanges = True while policyChanges and deltaT(self.start_time) < self.planning_time: # Evaluate the policy converged = self.policyEvaluation(policy) # Improve the policy self.policy_improvement_iteration += 1 policy, policyChanges = self.policyImprovement(policy) if converged: self.logger.log('Converged!') super(PolicyIteration, self).solve()
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=119, initial_learn_rate=.06, discretization=34): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain representation = IncrementalTabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1204., lambda_=0., boyan_N0=7353.2, initial_learn_rate=.9712): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 sparsify = 1 kappa = 1e-7 domain = PST(NUM_UAV=4) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.012695, lambda_=0.2, boyan_N0=80.798, initial_learn_rate=0.402807): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 sparsify = 1 domain = BlocksWorld(blocks=6, noise=0.3) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDDK(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, lazy=True, lambda_=lambda_) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=116.7025, initial_learn_rate=0.01402, discretization=6.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = IndependentDiscretization( domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def runRewardIRL(self,N=5): opt = deepcopy(self.opt_template) dist = self.getIRLTDist(self.env_template["consumable"],N=N) bdist = self.getIRLDist(N=N, rand=True) dist = [d-bdist for d in dist] print dist domain = self.createStateDomain(waypoints=self.env_template["consumable"], rewardFunction=lambda x,y,z,w: ConsumableGridWorldIRL.rewardIRL(x,y,z,w,dist,self.env_template["consumable"])) opt["domain"] = domain representation = IncrementalTabular(domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain = self.createStateDomain(self.env_template["consumable"]), visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=753, initial_learn_rate=.7, resolution=25., num_rbfs=206., lambda_=0.75): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1.0, lambda_=0., boyan_N0=20.1, initial_learn_rate=0.330): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = IntruderMonitoring() opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/ChainMDPTut-SARSA"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: chainSize = 50 domain = ChainMDPTut(chainSize=chainSize) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA(policy=policy, representation=representation, discount_factor=domain.discount_factor, initial_learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def runIRL(self, N=5): opt = deepcopy(self.opt_template) dist = self.getIRLDist(N=N) bdist = self.getIRLDist(N=N, rand=True) #print dist-bdist domain = self.createMarkovDomain( rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL. maxEntReward(x, y, z, w, dist - bdist)) opt["domain"] = domain representation = IncrementalTabular( domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) performance_domain = self.createMarkovDomain() experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=performance_domain, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def _make_experiment(domain, exp_id=1, path="./Results/Tmp/test_FiniteTrackCartPole"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param domain: the domain object to be used in the experiment @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = IncrementalTabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) checks_per_policy = 3 max_steps = 50 num_policy_checks = 3 experiment = Experiment(**locals()) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=753, initial_learn_rate=.7, discretization=20., lambda_=0.75): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance(episodeCap=1000) opt["domain"] = domain representation = Tabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.03613232738, lambda_=0., boyan_N0=12335.665, initial_learn_rate=0.037282, discretization=6.): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = True kappa = 1e-7 domain = InfCartPoleBalance() opt["domain"] = domain initial_rep = IndependentDiscretization( domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, lambda_=lambda_, discount_factor=domain.discount_factor, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/ChainMDPTut-SARSA"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id ## Domain: chainSize = 50 domain = ChainMDPTut(chainSize=chainSize) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA(representation=representation, policy=policy, disount_factor=domain.discount_factor, learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.03104970, lambda_=0., boyan_N0=1220.247254, initial_learn_rate=0.27986823): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = BlocksWorld(blocks=6, noise=0.3, ) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation,discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=120, initial_learn_rate=.06, discretization=50): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 domain = FiftyChain() opt["domain"] = domain representation = Tabular(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( # Path needs to have this format or hypersearch breaks exp_id=1, path=get_log_path(), boyan_N0=330.65, initial_learn_rate=0.219, lambda_=0.5547, resolution=7.0, num_rbfs=86.0, epsilon=0.4645, inv_discount_factor=3.186e-5, checks_per_policy=1): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 300000 opt["num_policy_checks"] = 40 opt["checks_per_policy"] = checks_per_policy opt["path"] = path discount_factor = 1.0 - inv_discount_factor domain = DomainType() domain.discount_factor = discount_factor opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=epsilon) opt["agent"] = Q_LEARNING( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def solve(self): """Solve the domain MDP.""" self.start_time = clock() # Used to track the total time for solving self.bellmanUpdates = 0 converged = False PI_iteration = 0 # The policy is maintained as separate copy of the representation. # This way as the representation is updated the policy remains intact policy = eGreedy( deepcopy(self.representation), epsilon=0, forcedDeterministicAmongBestActions=True) while self.hasTime() and not converged: self.trajectoryBasedPolicyEvaluation(policy) # Policy Improvement (Updating the representation of the value # function will automatically improve the policy PI_iteration += 1 # Theta can increase in size if the representation is expanded hence padding the weight vector with zeros paddedTheta = padZeros(policy.representation.weight_vec, len(self.representation.weight_vec)) # Calculate the change in the weight_vec as L2-norm delta_weight_vec = np.linalg.norm(paddedTheta - self.representation.weight_vec) converged = delta_weight_vec < self.convergence_threshold # Update the underlying value function of the policy policy.representation = deepcopy(self.representation) # self.representation performance_return, performance_steps, performance_term, performance_discounted_return = self.performanceRun() self.logger.info( 'PI #%d [%s]: BellmanUpdates=%d, ||delta-weight_vec||=%0.4f, Return=%0.3f, steps=%d, features=%d' % (PI_iteration, hhmmss( deltaT( self.start_time)), self.bellmanUpdates, delta_weight_vec, performance_return, performance_steps, self.representation.features_num)) if self.show: self.domain.show(a, representation=self.representation, s=s) # store stats self.result["bellman_updates"].append(self.bellmanUpdates) self.result["return"].append(performance_return) self.result["planning_time"].append(deltaT(self.start_time)) self.result["num_features"].append(self.representation.features_num) self.result["steps"].append(performance_steps) self.result["terminated"].append(performance_term) self.result["discounted_return"].append(performance_discounted_return) self.result["policy_improvemnt_iteration"].append(PI_iteration) if converged: self.logger.info('Converged!') super(TrajectoryBasedPolicyIteration, self).solve()
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0.9, boyan_N0=22.36, initial_learn_rate=.068, discretization=9): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=3019.313, initial_learn_rate=0.965830): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 beta_coef = 1e-6 domain = PST(NUM_UAV=4) opt["domain"] = domain representation = IndependentDiscretization(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, BetaCoef=beta_coef, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=389.56, lambda_=0.52738, initial_learn_rate=.424409, discretization=30): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 400000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 100 domain = PuddleGapWorld() opt["domain"] = domain representation = Tabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", Rmax=10**10, lipschitz_constant=10**3, epsilon_d=0.01, knn = 1): opt = {} opt["exp_id"] = exp_id opt["path"] = path #opt["max_steps"] = 150000 opt["max_steps"] = 8000 #opt["num_policy_checks"] = 30 opt["num_policy_checks"] = 4 opt["checks_per_policy"] = 2 epsilon_d = 0.9 knn = 1 domain = HIVTreatment() opt["domain"] = domain representation = RMAX_repr( domain, Rmax, lipschitz_constant, epsilon_d=epsilon_d, k=knn) policy = eGreedy(representation, epsilon=0.0) opt["agent"] = RMAX( policy, representation,discount_factor=domain.discount_factor, lambda_=0, initial_learn_rate=0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=.0807, boyan_N0=389.56, lambda_=0.52738, initial_learn_rate=.424409, kernel_resolution=8.567677): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 100 active_threshold = 0.01 max_base_feat_sim = 0.5 sparsify = 1 domain = PuddleWorld() opt["domain"] = domain kernel_width = ( domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) / kernel_resolution representation = KernelizediFDD(domain, sparsify=sparsify, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1.0, lambda_=0., boyan_N0=20.1, initial_learn_rate=0.330): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = IntruderMonitoring() opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-IncrTabularTut"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id ## Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = IncrTabularTut(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def select_agent(name: Optional[str], _seed: int) -> Agent: tabular = Tabular(DOMAIN, discretization=20) if name is None or name == 'lspi': policy = eGreedy(tabular, epsilon=0.1) return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000) elif name == 'nac': return NaturalActorCritic(GibbsPolicy(tabular), tabular, DOMAIN.discount_factor, forgetting_rate=0.3, min_steps_between_updates=100, max_steps_between_updates=1000, lambda_=0.7, learn_rate=0.1) elif name == 'tabular-q': return Q_Learning( eGreedy(tabular, epsilon=0.1), tabular, discount_factor=DOMAIN.discount_factor, lambda_=0.3, initial_learn_rate=0.11, learn_rate_decay_mode='boyan', boyan_N0=100, ) elif name == 'ifddk-q': lambda_ = 0.3 ifddk = iFDDK( DOMAIN, discovery_threshold=1.0, initial_representation=IndependentDiscretization(DOMAIN), sparsify=True, useCache=True, lazy=True, lambda_=lambda_, ) return Q_Learning( eGreedy(ifddk, epsilon=0.1), ifddk, discount_factor=DOMAIN.discount_factor, lambda_=lambda_, initial_learn_rate=0.11, learn_rate_decay_mode='boyan', boyan_N0=100, ) else: raise NotImplementedError()
def make_experiment(exp_id=1, path="./Results/Temp", initial_learn_rate=.40, lambda_=0., resolution=25, num_rbfs=300): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # import sys # import os # cur_dir = os.path.expanduser("~/work/clipper/models/rl/") # sys.path.append(cur_dir) # from Domains import RCCarModified # from Policies import RCCarGreedy # Experiment variables opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 200000 opt["num_policy_checks"] = 15 opt["checks_per_policy"] = 2 # Logging domain = RCCarLeftTurn(noise=0.) opt["domain"] = domain # Representation kernel = gaussian_kernel representation = RandomLocalBases(domain, gaussian_kernel, num=int(num_rbfs), normalization=True, resolution_max=resolution, seed=exp_id) policy = eGreedy(representation, epsilon=0.15) # if biasedaction > -1: # print "No Random starts with biasing {}".format(i % 4) # policy = BiasedGreedy(representation, epsilon=0.5, biasedaction=biasedaction) # Agent opt["agent"] = Q_Learning(policy, representation, domain.discount_factor, initial_learn_rate=initial_learn_rate, lambda_=lambda_, learn_rate_decay_mode="const") experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Experiments/", domain_class="GridWorld", mapf='9x9-2Path0.txt', max_steps=5000, num_policy_checks=50, agent_eps=0.1, env_noise=0.1, seg_goal=0.8, step_reward=-0.001, weights=None): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path maze = os.path.join(GridWorldInter.default_map_dir, mapf) ## Domain: if domain_class == "GridWorld": domain = GridWorld(maze, noise=env_noise, step_reward=step_reward) elif domain_class == "GridWorldInter": domain = GridWorldInter(maze, noise=env_noise, new_goal=seg_goal) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain, discretization=20) if weights is not None: assert domain_class == "GridWorld" ## ensure that we are transferring to right class representation.weight_vec = weights ## Policy policy = eGreedy( representation, epsilon=agent_eps) ## Need to change this back, limiting noise ATM ## Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.3) opt["checks_per_policy"] = 50 opt["max_steps"] = max_steps opt["num_policy_checks"] = num_policy_checks experiment = ExperimentSegment(**opt) return experiment