def test_sarsa_valfun_chain(): """ Check if SARSA computes the value function of a simple Markov chain correctly. This only tests value function estimation, only one action possible """ rep = MockRepresentation() pol = eGreedy(rep) agent = SARSA(pol, rep, 0.9, lambda_=0.) for i in xrange(1000): if i % 4 == 3: continue agent.learn(np.array([i % 4]), [0], 0, 1., np.array([(i + 1) % 4]), [0, ], 0, (i + 2) % 4 == 0) V_true = np.array([2.71, 1.9, 1, 0]) np.testing.assert_allclose(rep.weight_vec, V_true)
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1.0, lambda_=0., boyan_N0=20.1, initial_learn_rate=0.330): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = IntruderMonitoring() opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def test_sarsa_valfun_chain(): """ Check if SARSA computes the value function of a simple Markov chain correctly. This only tests value function estimation, only one action possible """ rep = MockRepresentation() pol = eGreedy(rep) agent = SARSA(pol, rep, 0.9, lambda_=0.) for i in xrange(1000): if i % 4 == 3: continue agent.learn(np.array([i % 4]), [0], 0, 1., np.array([(i + 1) % 4]), [ 0, ], 0, (i + 2) % 4 == 0) V_true = np.array([2.71, 1.9, 1, 0]) np.testing.assert_allclose(rep.weight_vec, V_true)
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=116.7025, initial_learn_rate=0.01402, discretization=6.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/ChainMDPTut-SARSA"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: chainSize = 50 domain = ChainMDPTut(chainSize=chainSize) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA(policy=policy, representation=representation, discount_factor=domain.discount_factor, initial_learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.03613232738, lambda_=0., boyan_N0=12335.665, initial_learn_rate=0.037282, discretization=6.): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = True kappa = 1e-7 domain = InfCartPoleBalance() opt["domain"] = domain initial_rep = IndependentDiscretization( domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, lambda_=lambda_, discount_factor=domain.discount_factor, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=1, lambda_=0.3, initial_learn_rate=1., resolution=15., num_rbfs=5000): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 5000 opt["num_policy_checks"] = 1 opt["checks_per_policy"] = 1 domain = ClothCutter() opt["domain"] = domain representation = ModifiedRBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = GibbsPolicy(representation) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="const", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-IncrTabularTut"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id ## Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = IncrTabularTut(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def make_experiment(self, exp_id=1, path="results/"): opt = {} opt["exp_id"] = exp_id opt["path"] = path domain = NDomain(self.browser) opt["domain"] = domain representation = RBF(opt["domain"], num_rbfs=int(206, )) self.representation = self._pickle(representation, attrs='r', action='l') policy = eGreedy(representation, epsilon=0.3) agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.4) self.agent = self._pickle(agent, attrs='a', action='l') opt["agent"] = self.agent opt["checks_per_policy"] = 10 opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return (experiment)
def make_experiment(exp_id=1, path="./results/ITab"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path # Domain: domain = hack_domain.HackDomain() opt["domain"] = domain # Representation global representation representation = IncrementalTabular(domain, discretization=20) representation = representation_pickle(representation, action=1) opt["path"] = "./results/ITab" """ representation = RBF(domain, num_rbfs=int(206.), resolution_max=25., resolution_min=25., const_feature=False, normalize=True, seed=exp_id) opt["path"] = "./results/RBF" """ # Policy policy = eGreedy(representation, epsilon=0.2) # Agent global agent agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.4) agent = agent_pickle(agent, action=1) opt["agent"] = agent opt["checks_per_policy"] = 10 opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=.05, boyan_N0=1885.42, lambda_=0.5879, initial_learn_rate=0.1, kernel_resolution=10.7920): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 1000000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 active_threshold = 0.05 max_base_feat_sim = 0.5 sparsify = 10 domain = Swimmer() opt["domain"] = domain kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) \ / kernel_resolution representation = KernelizediFDD(domain, sparsify=sparsify, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=False, max_active_base_feat=100, max_base_feat_sim=max_base_feat_sim) policy = SwimmerPolicy(representation) #policy = eGreedy(representation, epsilon=0.1) stat_bins_per_state_dim = 20 # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=10.25, initial_learn_rate=.6102): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 5 domain = BlocksWorld(blocks=6, noise=0.3) opt["domain"] = domain representation = IndependentDiscretization(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def select_agent(name: Optional[str], seed: int) -> Agent: tabular = Tabular(DOMAIN, discretization=20) if name is None or name == 'tabular-lspi': policy = eGreedy(tabular, epsilon=0.1) return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000) elif name == 'tabular-q': policy = eGreedy(tabular, epsilon=0.1) return Q_Learning(policy, tabular, DOMAIN.discount_factor, lambda_=0.3) elif name == 'tabular-sarsa': policy = eGreedy(tabular, epsilon=0.1) return SARSA(policy, tabular, DOMAIN.discount_factor, lambda_=0.3) elif name == 'ifdd-q': lambda_, boyan_N0 = 0.42, 202 discretization = 18 initial_rep = IndependentDiscretization(DOMAIN, discretization=discretization) ifdd = iFDD( DOMAIN, discovery_threshold=8.63917, initial_representation=initial_rep, useCache=True, iFDDPlus=True, ) return Q_Learning( eGreedy(ifdd, epsilon=0.1), ifdd, discount_factor=DOMAIN.discount_factor, lambda_=lambda_, initial_learn_rate=0.7422, learn_rate_decay_mode='boyan', boyan_N0=boyan_N0, ) elif name == 'kifdd-q': lambda_, boyan_N0 = 0.52738, 389.56 kernel_resolution = 8.567677 kernel_width = (DOMAIN.statespace_limits[:, 1] - DOMAIN.statespace_limits[:, 0]) \ / kernel_resolution kifdd = KernelizediFDD( DOMAIN, sparsify=True, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=0.01, discover_threshold=0.0807, normalization=True, max_active_base_feat=10, max_base_feat_sim=0.5 ) policy = eGreedy(kifdd, epsilon=0.1) return Q_Learning( policy, kifdd, discount_factor=DOMAIN.discount_factor, lambda_=lambda_, initial_learn_rate=0.4244, learn_rate_decay_mode='boyan', boyan_N0=boyan_N0, ) elif name == 'rbfs-q': rbf = RBF( DOMAIN, num_rbfs=96, resolution_max=21.0, resolution_min=21.0, const_feature=False, normalize=True, seed=seed, ) policy = eGreedy(rbf, epsilon=0.1) return Q_Learning( policy, rbf, discount_factor=DOMAIN.discount_factor, lambda_=0.1953, initial_learn_rate=0.6633, learn_rate_decay_mode='boyan', boyan_N0=13444.0, ) else: raise NotImplementedError()