def createMarkovDomain(self, k=1, rewardFunction=None): return ConsumableGridWorldIRL(self.env_template["consumable"], mapname=self.env_template["map"], encodingFunction= lambda x: ConsumableGridWorldIRL.allMarkovEncoding(x,k), rewardFunction = rewardFunction, noise=self.env_template["noise"], binary=True)
def grid_world1_markov(exp_id=1, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 400 maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.allMarkovEncoding(x), noise=noise) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=exp) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def grid_world1_markov(exp_id=1, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 400 maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.allMarkovEncoding(x), noise=noise) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=exp) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
def gridworld1_irl(exp_id=5, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 400 # Domain: maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding( x, [(7, 5)]), noise=noise, binary=True) #domain = Pinball(noise=0.3) # Representation representation = Tabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=0.3) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 d = GoalPathPlanner(domain, representation, policy) trajs = d.generateTrajectories(N=5) dist = calculateStateDist((10, 7), trajs) # Policy reset policy = eGreedy(representation, epsilon=0.3) representation = Tabular(domain, discretization=discretization) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) domain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.allMarkovEncoding(x), rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.maxEntReward( x, y, z, w, dist), noise=noise) pdomain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.allMarkovEncoding(x), noise=noise) opt["domain"] = domain experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=pdomain, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def gridworld1_irl(exp_id=5, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 400 # Domain: maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]), noise=noise, binary=True) #domain = Pinball(noise=0.3) # Representation representation = Tabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=0.3) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 d = GoalPathPlanner(domain, representation,policy) trajs = d.generateTrajectories(N=5) dist = calculateStateDist((10,7), trajs) # Policy reset policy = eGreedy(representation, epsilon=0.3) representation = Tabular(domain, discretization=discretization) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) domain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.allMarkovEncoding(x), rewardFunction= lambda x,y,z,w: ConsumableGridWorldIRL.maxEntReward(x,y,z,w,dist), noise=noise) pdomain = ConsumableGridWorldIRL([(7,5), (1,2)], mapname=maze, encodingFunction= lambda x: ConsumableGridWorldIRL.allMarkovEncoding(x), noise=noise) opt["domain"] = domain experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain = pdomain, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]),np.sum(experiment.result["return"])