def test_qlearn_valfun_chain(): """ Check if SARSA computes the value function of a simple Markov chain correctly. This only tests value function estimation, only one action possible """ rep = MockRepresentation() pol = eGreedy(rep) agent = Q_Learning(pol, rep, 0.9, lambda_=0.) for i in xrange(1000): if i % 4 == 3: continue agent.learn(np.array([i % 4]), [0], 0, 1., np.array([(i + 1) % 4]), [0], 0, (i + 2) % 4 == 0) V_true = np.array([2.71, 1.9, 1, 0]) np.testing.assert_allclose(rep.weight_vec, V_true)
def test_qlearn_valfun_chain(): """ Check if Q-Learning computes the value function of a simple Markov chain correctly. This only tests value function estimation, only one action possible """ rep = MockRepresentation() pol = eGreedy(rep) agent = Q_Learning(pol, rep, 0.9, lambda_=0.) for i in xrange(1000): if i % 4 == 3: continue agent.learn(np.array([i % 4]), [0], 0, 1., np.array([(i + 1) % 4]), [0], 0, (i + 2) % 4 == 0) V_true = np.array([2.71, 1.9, 1, 0]) np.testing.assert_allclose(rep.weight_vec, V_true)
def make_experiment(exp_id=1, path="./Results/Experiments/HalfReward/" + getTimeStr() + "/"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: maze = os.path.join(GridWorldInter.default_map_dir, '11x11-RoomsSeg.txt') domain = GridWorldInter(maze, noise=0.01) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain, discretization=20) ## Policy policy = eGreedy(representation, epsilon=0.1) ## Need to change this back, limiting noise ATM ## Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.3) opt["checks_per_policy"] = 50 opt["max_steps"] = 12000 opt["num_policy_checks"] = 20 # experiment = ExperimentDelayed(**opt) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=753, initial_learn_rate=.7, resolution=25., num_rbfs=206., lambda_=0.75): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def runTIRL(self, N=5, w=2, pruning=0.5): opt = deepcopy(self.opt_template) dist = self.getIRLDist(N=N) ac = self.getTSCWaypoints(N, w, pruning) domain = self.createStateDomain( waypoints=ac, rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL( x, y, z, w, dist)) opt["domain"] = domain representation = IncrementalTabular( domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=self.createStateDomain( waypoints=self.env_template["consumable"]), visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def runIRL(self, N=5): opt = deepcopy(self.opt_template) dist = self.getIRLDist(N=N) bdist = self.getIRLDist(N=N, rand=True) #print dist-bdist domain = self.createMarkovDomain( rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL. maxEntReward(x, y, z, w, dist - bdist)) opt["domain"] = domain representation = IncrementalTabular( domain, discretization=self.env_template["discretization"]) policy = eGreedy(representation, epsilon=self.env_template["exp"]) opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) performance_domain = self.createMarkovDomain() experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=performance_domain, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.03104970, lambda_=0., boyan_N0=1220.247254, initial_learn_rate=0.27986823): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = BlocksWorld(blocks=6, noise=0.3, ) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation,discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-qlearning"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path # Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=20) # Policy policy = eGreedy(representation, epsilon=0.2) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=389.56, lambda_=0.52738, initial_learn_rate=.424409, discretization=30): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 400000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 100 domain = PuddleGapWorld() opt["domain"] = domain representation = Tabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=1240.89223, initial_learn_rate=0.0063744503, discretization=8.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=119, initial_learn_rate=.06, discretization=34): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain representation = IncrementalTabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=753, initial_learn_rate=.7, discretization=20., lambda_=0.75): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance(episodeCap=1000) opt["domain"] = domain representation = Tabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0.9, boyan_N0=22.36, initial_learn_rate=.068, discretization=9): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1204., lambda_=0., boyan_N0=7353.2, initial_learn_rate=.9712): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 sparsify = 1 kappa = 1e-7 domain = PST(NUM_UAV=4) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.0148120884, lambda_=0., boyan_N0=460.3858, initial_learn_rate=0.8014120, discretization=25.): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = True kappa = 1e-7 domain = InfCartPoleBalance() opt["domain"] = domain initial_rep = IndependentDiscretization( domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=120, initial_learn_rate=.06, discretization=50): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 domain = FiftyChain() opt["domain"] = domain representation = Tabular(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def select_agent(name: Optional[str], _seed: int) -> Agent: tabular = Tabular(DOMAIN, discretization=20) if name is None or name == 'lspi': policy = eGreedy(tabular, epsilon=0.1) return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000) elif name == 'nac': return NaturalActorCritic(GibbsPolicy(tabular), tabular, DOMAIN.discount_factor, forgetting_rate=0.3, min_steps_between_updates=100, max_steps_between_updates=1000, lambda_=0.7, learn_rate=0.1) elif name == 'tabular-q': return Q_Learning( eGreedy(tabular, epsilon=0.1), tabular, discount_factor=DOMAIN.discount_factor, lambda_=0.3, initial_learn_rate=0.11, learn_rate_decay_mode='boyan', boyan_N0=100, ) elif name == 'ifddk-q': lambda_ = 0.3 ifddk = iFDDK( DOMAIN, discovery_threshold=1.0, initial_representation=IndependentDiscretization(DOMAIN), sparsify=True, useCache=True, lazy=True, lambda_=lambda_, ) return Q_Learning( eGreedy(ifddk, epsilon=0.1), ifddk, discount_factor=DOMAIN.discount_factor, lambda_=lambda_, initial_learn_rate=0.11, learn_rate_decay_mode='boyan', boyan_N0=100, ) else: raise NotImplementedError()
def make_experiment(exp_id=1, path="./Results/Temp", initial_learn_rate=.40, lambda_=0., resolution=25, num_rbfs=300): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # import sys # import os # cur_dir = os.path.expanduser("~/work/clipper/models/rl/") # sys.path.append(cur_dir) # from Domains import RCCarModified # from Policies import RCCarGreedy # Experiment variables opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 200000 opt["num_policy_checks"] = 15 opt["checks_per_policy"] = 2 # Logging domain = RCCarLeftTurn(noise=0.) opt["domain"] = domain # Representation kernel = gaussian_kernel representation = RandomLocalBases(domain, gaussian_kernel, num=int(num_rbfs), normalization=True, resolution_max=resolution, seed=exp_id) policy = eGreedy(representation, epsilon=0.15) # if biasedaction > -1: # print "No Random starts with biasing {}".format(i % 4) # policy = BiasedGreedy(representation, epsilon=0.5, biasedaction=biasedaction) # Agent opt["agent"] = Q_Learning(policy, representation, domain.discount_factor, initial_learn_rate=initial_learn_rate, lambda_=lambda_, learn_rate_decay_mode="const") experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Experiments/", domain_class="GridWorld", mapf='9x9-2Path0.txt', max_steps=5000, num_policy_checks=50, agent_eps=0.1, env_noise=0.1, seg_goal=0.8, step_reward=-0.001, weights=None): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path maze = os.path.join(GridWorldInter.default_map_dir, mapf) ## Domain: if domain_class == "GridWorld": domain = GridWorld(maze, noise=env_noise, step_reward=step_reward) elif domain_class == "GridWorldInter": domain = GridWorldInter(maze, noise=env_noise, new_goal=seg_goal) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain, discretization=20) if weights is not None: assert domain_class == "GridWorld" ## ensure that we are transferring to right class representation.weight_vec = weights ## Policy policy = eGreedy( representation, epsilon=agent_eps) ## Need to change this back, limiting noise ATM ## Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.3) opt["checks_per_policy"] = 50 opt["max_steps"] = max_steps opt["num_policy_checks"] = num_policy_checks experiment = ExperimentSegment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # Experiment variables opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000000 opt["num_policy_checks"] = 50 # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(GridWorld.default_map_dir, 'large_state.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation discover_threshold = 1. lambda_ = 0.3 initial_learn_rate = 0.11 boyan_N0 = 100 initial_rep = IndependentDiscretization(domain) representation = iFDDK(domain, discover_threshold, initial_rep, sparsify=True, useCache=True, lazy=True, lambda_=lambda_) # Policy policy = eGreedyDecay(representation, epsilonInit=0.9) # Agent opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Experiments/", mapf='9x9-2Path0.txt', eval_map='9x9-2Path0.txt', max_eps=10000, num_policy_checks=50, checks_per_policy=50, agent_eps=0.2, env_noise=0.1, episodeCap=30, step_reward=-0.1, door_reward=0.1, weights=None): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. :param id: number used to seed the random number generators :param path: output directory where logs and results are stored :param max_eps: total number of episodes to rollout :param episodeCap: total number of steps to take within one episode """ ## # from IPython.lib.pretty import pprint # print pprint(vars()) ## opt = {} opt["exp_id"] = exp_id opt["path"] = path maze = os.path.join(GridWorldInter.default_map_dir, mapf) eval_maze = os.path.join(GridWorldInter.default_map_dir, eval_map) ## Domain: domain = GridWorldTime(maze, noise=env_noise, episodeCap=episodeCap, door_reward=door_reward, step_reward=step_reward) eval_domain = GridWorldTime(eval_maze, noise=env_noise, episodeCap=episodeCap, step_reward=step_reward,) opt["domain"] = domain opt["eval_domain"] = eval_domain #TODO: Can change this implementation to have Experiment take care of running default maps ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain, discretization=20) if weights is not None: representation.weight_vec = weights ## Policy policy = eGreedy(representation, epsilon=agent_eps) ## Need to change this back, limiting noise ATM ## Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.3, learn_rate_decay_mode='const') opt["max_eps"] = max_eps opt["checks_per_policy"] = checks_per_policy opt["num_policy_checks"] = num_policy_checks experiment = ExperimentSegment(**opt) return experiment
def generate_meta_experiment(exp_id, agent_paths, path, unique=True, expdomain=None): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 40 opt["checks_per_policy"] = 20 # start_at = np.array([4, 6]) agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for a in agents: a.policy.epsilon = 0 if expdomain: actual_domain = expdomain(mapname=mapname, terrain_augmentation=False, noise=0.1) else: actual_domain = GridWorldMixed(mapname=mapname, terrain_augmentation=False, noise=0.1) domain = PolicyMixer(actual_domain, agents) representation = Tabular(domain) policy = eGreedy(representation) # , tau=.1) opt['agent'] = Q_Learning(policy, representation, discount_factor=0.9, initial_learn_rate=0.8, lambda_=0.5, learn_rate_decay_mode='boyan', boyan_N0=2380) opt['domain'] = domain experiment = Experiment(**opt) path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment
def make_experiment(arm, exp_id=1, path="./Results/Tutorial/dvrk-planar"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path #u = [{'x': 0.0381381038389, 'y': 0.0348028884984}, {'x': 0.0553447503026, 'y': 0.0523395529395}] u = [{ 'x': 0.0193056007411, 'y': 0.0370999763421 }, { 'x': 0.0393056007411, 'y': 0.0370999763421 }] domain = DVRKPlanarDomain(arm, u[0], u[1]) opt["domain"] = domain # Representation representation = RBF(domain, num_rbfs=1000, resolution_max=10, resolution_min=10, const_feature=False, normalize=True, seed=2) # Policy policy = eGreedy(representation, epsilon=0.2) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.875, learn_rate_decay_mode="boyan", boyan_N0=1000, lambda_=0.0) opt["checks_per_policy"] = 1 opt["max_steps"] = 100 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment, domain, policy, representation
def get_demonstrations(demonstration_per_policy, max_policy_iter, num_policy_demo_checks, agent): """return demonstrations generated from the parallel parking car rlpy simulator""" opt = {} opt["exp_id"] = 1 # opt["path"] = "./Results/gridworld2" opt["checks_per_policy"] = 5 opt["max_steps"] = 1000000 opt["num_policy_checks"] = 1000 exp = 0.3 discretization = 20 walls = [(-1, -0.3, 0.1, 0.3)] domain = RCIRL([(-0.1, -0.25)], wallArray=walls, noise=0, rewardFunction=RCIRL.rcreward) domain.episodeCap = 200 # Representation 10 representation = RBF(domain, num_rbfs=1000, resolution_max=25, resolution_min=25, const_feature=False, normalize=True, seed=1) #discretization=discretization) # Policy policy = eGreedy(representation, epsilon=0.3) # Agent # opt["agent"]=agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.7, learn_rate_decay_mode="boyan", boyan_N0=700, lambda_=0.) opt["domain"] = domain pdomain = RCIRL([(-0.1, -0.25)], wallArray=walls, noise=0) experiment = Experiment(**opt) experiment.run(visualize_steps=False, performance_domain=pdomain, visualize_learning=False, visualize_performance=1) # return experiment return map(lambda x: map(lambda y: np.array(y), x), experiment.all_experiment_list)
def make_experiment(exp_id=2, agent_paths=None, mapname="12x12-Bridge.txt", path="./Results/MetaRLSarsa", boyan_N0=1000, discount_factor=0.8286376417073243, initial_learn_rate=0.5, lambda_=0.2): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 5 opt["checks_per_policy"] = 10 # start_at = np.array([4, 5]) # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(map_dir, mapname) map_dir = os.path.expanduser("~/work/clipper/models/rl/GridworldMaps/") domain = GridWorldModified(maze, # random_start=True, noise=0.1, # start_at=np.array([4,6]) ) # agent_1 = loadagent("QL") # most likely preloaded # agent_2 = loadagent("SARSA") # agent_3 = loadagent("NAC") # agents = [agent_1, agent_2, agent_3] # agents = load_all_agents(agent_paths) # domain = PolicyMixer(actual_domain, agents, seed=exp_id) representation = Tabular(domain) policy = eGreedy(representation, epsilon=0.3) opt['agent'] = Q_Learning(representation=representation, policy=policy, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0, lambda_=lambda_, initial_learn_rate=initial_learn_rate, discount_factor=discount_factor) opt['domain'] = domain experiment = Experiment(**opt) return experiment
def grid_world1_reward(exp_id=2, path="./Results/gridworld1"): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["checks_per_policy"] = 10 opt["max_steps"] = 150000 opt["num_policy_checks"] = 20 noise = 0.1 exp = 0.3 discretization = 400 maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt') domain = ConsumableGridWorldIRL( [(7, 5), (1, 2)], mapname=maze, encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding( x, [(7, 5)]), noise=noise, binary=True) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=discretization) # Policy policy = eGreedy(representation, epsilon=exp) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) experiment = Experiment(**opt) experiment.run(visualize_steps=False, visualize_learning=False, visualize_performance=0) experiment.save() return np.max(experiment.result["return"]), np.sum( experiment.result["return"])
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=8948708.75, boyan_N0=627.12, lambda_=0.5433, initial_learn_rate=0.59812, kernel_resolution=24.340): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 active_threshold = 0.01 max_base_feat_sim = 0.5 sparsify = 1 domain = HIVTreatment() opt["domain"] = domain # domain = FiniteCartPoleBalanceModern() kernel_width = old_div( (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution) representation = KernelizediFDD(domain, sparsify=sparsify, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=2, path="./Results/MetaRLSarsa", boyan_N0=680.715, discount_factor=0.9, initial_learn_rate=1, lambda_=0.106): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 50 opt["checks_per_policy"] = 100 # start_at = np.array([4, 5]) # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' map_dir = os.path.expanduser("~/work/clipper/models/rl/GridworldMaps/") maze = os.path.join(map_dir, "12x12-Bridge.txt") print maze domain = GridWorld(maze, # random_start=True, noise=0.1, # start_at=np.array([4,6]) ) representation = Tabular(domain) policy = eGreedy(representation, epsilon=0.3) opt['agent'] = Q_Learning(representation=representation, policy=policy, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0, lambda_=lambda_, initial_learn_rate=initial_learn_rate, discount_factor=discount_factor) opt['domain'] = domain experiment = Experiment(**opt) # print opt return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1e6, boyan_N0=5e5, lambda_=0.5, initial_learn_rate=0.9, kernel_resolution=10): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["max_steps"] = 10000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 active_threshold = 0.01 max_base_feat_sim = 0.5 sparsify = 1 domain = HelicopterHover() opt["domain"] = domain # domain = FiniteCartPoleBalanceModern() kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \ / kernel_resolution representation = KernelizediFDD(domain, sparsify=sparsify, kernel=linf_triangle_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.1) # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def __init__(self, domain, representation, policy, steps=100000): opt = {} opt["domain"] = domain # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 10 opt["max_steps"] = steps opt["num_policy_checks"] = 20 experiment = Experiment(**opt) experiment.run() self.policy = opt["agent"].policy self.domain = domain
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=143.791, initial_learn_rate=0.18696): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 5 domain = BlocksWorld(blocks=6, noise=0.3) opt["domain"] = domain representation = IndependentDiscretization(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment