def _make_experiment(exp_id=1, path="./Results/Tmp/test_SystemAdministrator"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ ## Domain: domain = SystemAdministrator() ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = IncrementalTabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) checks_per_policy = 2 max_steps = 20 num_policy_checks = 2 experiment = Experiment(**locals()) return experiment
def make_experiment(exp_id=1, path="."): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: maze = '4x5.txt' domain = GridWorld(maze, noise=0.3) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = IncrementalTabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def runTIRL(self, N=5, w=2, pruning=0.5): opt = deepcopy(self.opt_template) dist = self.getIRLDist(N=N) ac = self.getTSCWaypoints(N, w, pruning) domain = self.createStateDomain( waypoints=ac, rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL( x, y, z, w, dist)) opt["domain"] = domain representation = IncrementalTabular( domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=self.createStateDomain( waypoints=self.env_template["consumable"]), visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def runIRL(self, N=5): opt = deepcopy(self.opt_template) dist = self.getIRLDist(N=N) bdist = self.getIRLDist(N=N, rand=True) #print dist-bdist domain = self.createMarkovDomain( rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL. maxEntReward(x, y, z, w, dist - bdist)) opt["domain"] = domain representation = IncrementalTabular( domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) performance_domain = self.createMarkovDomain() experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=performance_domain, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=120, initial_learn_rate=.06, discretization=50): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 domain = IntruderMonitoring() opt["domain"] = domain representation = IncrementalTabular(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def test_cell_expansion(): """ Ensure start with 0 cells, add one for each state uniquely. """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname = os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) rep = IncrementalTabular(domain, discretization=100) assert rep.features_num == 0 # start with 0 cells sOrigin = np.array([0, 0]) s2 = np.array([1, 2]) terminal = False # nonterminal state a = 1 # arbitrary action # Expect to add feats for these newly seen states numAdded = rep.pre_discover(sOrigin, terminal, a, s2, terminal) assert numAdded == 2 assert rep.features_num == 2 phiVecOrigin = rep.phi(sOrigin, terminal) phiVec2 = rep.phi(s2, terminal) assert sum(phiVecOrigin) == 1 assert sum(phiVec2) == 1 phiVecOrigin2 = rep.phi(np.array([0, 0]), terminal=False) assert rep.features_num == 2 # didn't duplicate the feature assert sum(phiVecOrigin2) == 1 # Make sure we dont duplicate feats anywhere numAdded = rep.pre_discover(np.array([0, 0]), terminal, a, s2, terminal) assert numAdded == 0 assert rep.features_num == 2
def test_cell_expansion(): """ Ensure start with 0 cells, add one for each state uniquely. """ mapDir = os.path.join(__rlpy_location__, "Domains", "GridWorldMaps") mapname=os.path.join(mapDir, "4x5.txt") # expect 4*5 = 20 states domain = GridWorld(mapname=mapname) rep = IncrementalTabular(domain, discretization=100) assert rep.features_num == 0 # start with 0 cells sOrigin = np.array([0,0]) s2 = np.array([1,2]) terminal = False # nonterminal state a = 1 # arbitrary action # Expect to add feats for these newly seen states numAdded = rep.pre_discover(sOrigin, terminal, a, s2, terminal) assert numAdded == 2 assert rep.features_num == 2 phiVecOrigin = rep.phi(sOrigin, terminal) phiVec2 = rep.phi(s2, terminal) assert sum(phiVecOrigin) == 1 assert sum(phiVec2) == 1 phiVecOrigin2 = rep.phi(np.array([0,0]), terminal=False) assert rep.features_num == 2 # didn't duplicate the feature assert sum(phiVecOrigin2) == 1 # Make sure we dont duplicate feats anywhere numAdded = rep.pre_discover(np.array([0,0]), terminal, a, s2, terminal) assert numAdded == 0 assert rep.features_num == 2
def make_experiment(exp_id=1, path="./results/ITab"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path # Domain: domain = hack_domain.HackDomain() opt["domain"] = domain # Representation global representation representation = IncrementalTabular(domain, discretization=20) representation = representation_pickle(representation, action=1) opt["path"] = "./results/ITab" """ representation = RBF(domain, num_rbfs=int(206.), resolution_max=25., resolution_min=25., const_feature=False, normalize=True, seed=exp_id) opt["path"] = "./results/RBF" """ # Policy policy = eGreedy(representation, epsilon=0.2) # Agent global agent agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.4) agent = agent_pickle(agent, action=1) opt["agent"] = agent opt["checks_per_policy"] = 10 opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def getIRLTDist(self, waypoints, N=5, rand=False): sd = self.createStateDomain(self.env_template["consumable"]) representation = IncrementalTabular( sd, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) if not rand: d = GoalPathPlanner(sd, representation, policy, steps=self.opt_template["max_steps"]) else: d = GoalPathPlanner(sd, representation, policy, steps=100) trajs = d.generateTrajectories(N=N) return calculateStateTemporalDist(np.shape(sd.map), trajs, waypoints)
def generate_multinomial_experiment(exp_id, agent_paths, path, unique=True, expdomain=None): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 8000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for a in agents: assert type(a.policy).__name__ == "DynamicsConfidenceGreedy" a.policy.epsilon = 0 if expdomain: domain = expdomain(mapname=mapname, noise=0.1) else: domain = GridWorldMixed(mapname=mapname, noise=0.1) representation = IncrementalTabular(domain) policy = MAgentMultinomial(representation, agents) # , tau=.1) print "$" * 10 print "You are currently running {}".format(policy.__class__) opt['agent'] = NoopAgent(representation=representation, policy=policy) opt['domain'] = domain experiment = Experiment(**opt) path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
def grid_world1_sliding(exp_id=3, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 400 maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL. slidingWindowEncoding(x, 3), noise=noise) opt["domain"] = domain # Representation representation = IncrementalTabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=exp) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def make_experiment(agent_paths=["./"], sublearning=False, exp_id=3, path="./Results/Confidence2/", temp=0.10517212721767522, discount_factor=0.7, lambda_=0.0, init_state=None): opt = {} opt["path"] = os.path.join(path, get_time_str()) opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 20 # start_at = np.array([4, 6]) agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for i, a in enumerate(agents): # import ipdb; ipdb.set_trace() a.policy.epsilon = i * 0.02 + 0.1 # a.learn_rate_decay_mode = 'boyan' # a.learn_rate = a.initial_learn_rate = 0.9 # a.boyan_N0 = 3000 a.learn_rate_decay_mode = 'dabney' domain = RCCarModified(noise=0.1, init_state=(-2, 0.8, 0, 2.5)) representation = IncrementalTabular(domain) policy = MultiAgentConfidence(representation, agents, tau=.1) print "$" * 10 print "You are currently running {}".format(policy.__class__) opt['agent'] = MetaAgent(representation=representation, policy=policy, learn_rate_decay_mode="const") opt['domain'] = domain experiment = Experiment(**opt) return experiment
def getTSCWaypoints(self, N=5, w=2, pruning=0.5): sd = self.createStateDomain(self.env_template["consumable"]) representation = IncrementalTabular( sd, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) d = GoalPathPlanner(sd, representation, policy, steps=self.opt_template["max_steps"]) trajs = d.generateTrajectories(N=N) a = TransitionStateClustering(window_size=w) for t in trajs: N = len(t) demo = np.zeros((N, 2)) for i in range(0, N): demo[i, :] = t[i][0:2] a.addDemonstration(demo) a.fit(normalize=False, pruning=pruning) dist = calculateStateDist(np.shape(sd.map), trajs) return discrete2DClustersToPoints(a.model, dist, radius=1)
def runSliding(self, k=3): opt = deepcopy(self.opt_template) domain = self.createSlidingDomain(k) opt["domain"] = domain representation = IncrementalTabular( domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def grid_world1_trb(exp_id=6, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 20 # Domain: maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding( x, [(7, 5)]), binary=True, noise=noise) #domain = Pinball(noise=0.3) # Representation representation = Tabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=0.3) d = GoalPathPlanner(domain, representation, policy) trajs = d.generateTrajectories(N=5) a = TransitionStateClustering(window_size=2) for t in trajs: N = len(t) demo = np.zeros((N, 2)) for i in range(0, N): demo[i, :] = t[i][0:2] a.addDemonstration(demo) a.fit(normalize=False, pruning=0.5) dist = calculateStateDist((10, 7), trajs) ac = discrete2DClustersToPoints(a.model, dist, radius=1) #ac = [(round(a.means_[0][0]),round(a.means_[0][1])) for a in a.model] print ac #reinitialize domain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding( x, ac), noise=noise, binary=True) representation = IncrementalTabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.3) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["domain"] = domain experiment = Experiment(**opt) experiment.run(visualize_steps=False, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def make_experiment(agent_paths="./", pretrained=False, sublearning=False, yaml_file=None, exp_id=3, path="./Results/TestVoting/", init_state=None): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 80000 opt["num_policy_checks"] = 40 opt["checks_per_policy"] = 10 # start_at = np.array([4, 6]) # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' domain = RCCarModified(noise=0.1, # random_start=True, # init_state=init_state ) # agent_1 = loadagent("QL") # most likely preloaded # agent_2 = loadagent("SARSA") # agent_3 = loadagent("NAC") # agents = [agent_1, agent_2, agent_3] agents = load_all_agents(agent_paths, pretrained=pretrained, yaml_file=yaml_file) for i, a in enumerate(agents): # import ipdb; ipdb.set_trace() a.policy.epsilon = i * 0.03 + 0.1 # import ipdb; ipdb.set_trace() representation = IncrementalTabular(domain) #This doesn't matter policy = MultiAgentVoting(representation, agents, tau=.7) opt['agent'] = MetaAgent( representation=representation, policy=policy, ) opt['domain'] = domain experiment = Experiment(**opt) #Seeding to match standard experiment for agent in agents: agent.random_state = np.random.RandomState( experiment.randomSeeds[experiment.exp_id - 1]) agent.init_randomization() # agent.representation.random_state = np.random.RandomState( # experiment.randomSeeds[experiment.exp_id - 1]) # agent.representation.init_randomization() #init_randomization is called on instantiation agent.policy.random_state = np.random.RandomState( experiment.randomSeeds[experiment.exp_id - 1]) agent.policy.init_randomization() for i, a in enumerate(agents): # import ipdb; ipdb.set_trace() print a.policy.epsilon path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) param_path = os.path.join(agent_paths[0], yaml_file) shutil.copy(param_path, path_join("params.yml")) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) # import ipdb; ipdb.set_trace() return experiment