/ kernel_resolution representation = KernelizediFDD(domain, sparsify=sparsify, kernel=linf_triangle_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.1) # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment if __name__ == '__main__': from rlpy.Tools.run import run_profiled run_profiled(make_experiment) #experiment = make_experiment(1) # experiment.run(visualize_learning=True) # experiment.plot() # experiment.save()
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=3571.6541, initial_learn_rate=0.62267772): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 domain = PST(NUM_UAV=4) opt["domain"] = domain representation = IndependentDiscretization(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment if __name__ == '__main__': from rlpy.Tools.run import run_profiled run_profiled(make_experiment) #experiment = make_experiment(1) # experiment.run() # experiment.plot() # experiment.save()
opt = {} opt["exp_id"] = exp_id opt["path"] = path # Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=20) # Policy policy = eGreedy(representation, epsilon=0.2) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment if __name__ == '__main__': from rlpy.Tools.run import run_profiled run_profiled(make_experiment, '.', 'gridworld.pdf')
domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=20) # Policy policy = eGreedy(representation, epsilon=0.2) # Agent opt["agent"] = Q_Learning( representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.0, ) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment if __name__ == "__main__": from rlpy.Tools.run import run_profiled run_profiled(make_experiment, ".", "gridworld.pdf")