def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=2120, initial_learn_rate=.26, lambda_=0.9, resolution=8, num_rbfs=4958): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = FiniteCartPoleBalanceModern() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = NonparametricLocalBases(domain, kernel=linf_triangle_kernel, resolution=resolution, normalization=True) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=235, initial_learn_rate=.05, discretization=5): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1e-20, boyan_N0=1589.56, lambda_=0.52738, initial_learn_rate=.0124409, kernel_resolution=8.5): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 100 active_threshold = 0.01 max_base_feat_sim = 0.7 sparsify = 10 domain = PuddleGapWorld() opt["domain"] = domain kernel_width = old_div( (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution) representation = KernelizediFDD(domain, sparsify=sparsify, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=False, max_active_base_feat=100, max_base_feat_sim=max_base_feat_sim) # policy = UniformRandom(representation, ) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan_const", boyan_N0=boyan_N0) stat_bins_per_state_dim = 22 experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=.21, boyan_N0=37., lambda_=.9, initial_learn_rate=.07, kernel_resolution=13.14): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 active_threshold = 0.01 max_base_feat_sim = 0.5 sparsify = 1 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain # domain = FiniteCartPoleBalanceModern() kernel_width = old_div( (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution) representation = KernelizediFDD(domain, sparsify=sparsify, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=8.925, boyan_N0=840., lambda_=0.5203, initial_learn_rate=.7512, kernel_resolution=26.4777): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 100 active_threshold = 0.01 max_base_feat_sim = 0.5 sparsify = 1 domain = PuddleWorld() opt["domain"] = domain kernel_width = old_div( (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]), kernel_resolution) representation = KernelizediFDD(domain, sparsify=sparsify, kernel=linf_triangle_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=.02208, lambda_=0.6756, boyan_N0=480.72, initial_learn_rate=.2911, kernel_resolution=18.435): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 active_threshold = 0.01 max_base_feat_sim = 0.5 sparsify = 1 domain = InfCartPoleBalance() opt["domain"] = domain kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) / kernel_resolution representation = KernelizediFDD(domain, sparsify=sparsify, kernel=linf_triangle_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=.01256, lambda_=0.81, boyan_N0=9811.2337, initial_learn_rate=.15, discretization=22.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = True domain = InfCartPoleBalance() opt["domain"] = domain initial_rep = IndependentDiscretization(domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=True) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=.0807, boyan_N0=389.56, lambda_=0.52738, initial_learn_rate=.424409, kernel_resolution=8.567677): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 100 active_threshold = 0.01 max_base_feat_sim = 0.5 sparsify = 1 domain = PuddleWorld() opt["domain"] = domain kernel_width = (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]) / kernel_resolution representation = KernelizediFDD(domain, sparsify=sparsify, kernel=gaussian_kernel, kernel_args=[kernel_width], active_threshold=active_threshold, discover_threshold=discover_threshold, normalization=True, max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=107091, lambda_=0.245, boyan_N0=514, initial_learn_rate=.327, discretization=18): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 sparsify = 1 domain = HIVTreatment() opt["domain"] = domain initial_rep = IndependentDiscretization(domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=True) #representation.PRINT_MAX_RELEVANCE = True policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=77., boyan_N0=11, lambda_=0.9, initial_learn_rate=.05, discretization=47): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = 1 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain initial_rep = IndependentDiscretization(domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=True) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate, # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment