def runExperiment(opt, visualize_steps, visualize_learning, visualize_performance, q): # Experiment要在子进程中创建,不能直接传创建好的对象(会影响logger的正常工作) exp = Experiment(**opt) # 给logger加handler # 子进程的log->MemoryHandler->OutputHandler-> queue <-ExpOutputDialog.receive->QTextEdit # log通过queue在进程间传递,主线程通过thread接收queue中的新消息 from logging.handlers import MemoryHandler handler = MemoryHandler(capacity=1024, flushLevel=logging.INFO, target=OutputHandler(q)) exp.logger.addHandler(handler) exp.run(visualize_steps=visualize_steps, # should each learning step be shown? visualize_learning=visualize_learning, # show policy / value function? visualize_performance=visualize_performance) # show performance runs? exp.plot()
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=753, initial_learn_rate=.7, discretization=20., lambda_=0.75): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance(episodeCap=1000) opt["domain"] = domain representation = Tabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.03104970, lambda_=0., boyan_N0=1220.247254, initial_learn_rate=0.27986823): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = BlocksWorld(blocks=6, noise=0.3, ) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation,discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0.9, boyan_N0=22.36, initial_learn_rate=.068, discretization=9): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=10.09, initial_learn_rate=.47): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 5 sparsify = 1 ifddeps = 1e-7 domain = BlocksWorld(blocks=6, noise=0.3) opt["domain"] = domain representation = IndependentDiscretization(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Experiments/HalfReward/" + getTimeStr() + "/"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: maze = os.path.join(GridWorldInter.default_map_dir, '11x11-RoomsSeg.txt') domain = GridWorldInter(maze, noise=0.01) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain, discretization=20) ## Policy policy = eGreedy(representation, epsilon=0.1) ## Need to change this back, limiting noise ATM ## Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.3) opt["checks_per_policy"] = 50 opt["max_steps"] = 12000 opt["num_policy_checks"] = 20 # experiment = ExperimentDelayed(**opt) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # Experiment variables opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 10 # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=20) # Policy policy = GibbsPolicy(representation) # Agent opt["agent"] = NaturalActorCritic(policy, representation, domain.discount_factor, 0.3, 100, 1000, .7, 0.1) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = NonparametricLocalBases(domain, kernel=linf_triangle_kernel, resolution=resolution, normalization=True) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(self, exp_id=1, path="results/"): opt = {} opt["exp_id"] = exp_id opt["path"] = path domain = NDomain(self.browser) opt["domain"] = domain representation = RBF(opt["domain"], num_rbfs=int(206, )) self.representation = self._pickle(representation, attrs='r', action='l') policy = eGreedy(representation, epsilon=0.3) agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.4) self.agent = self._pickle(agent, attrs='a', action='l') opt["agent"] = self.agent opt["checks_per_policy"] = 10 opt["max_steps"] = 5000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return (experiment)
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1.0, lambda_=0., boyan_N0=20.1, initial_learn_rate=0.330): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 sparsify = 1 ifddeps = 1e-7 domain = IntruderMonitoring() opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - ifddeps) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=136, lambda_=0.0985, initial_learn_rate=0.090564, resolution=13., num_rbfs=9019): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 150000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 1 domain = HIVTreatment() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-IncrTabularTut"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id ## Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = IncrTabularTut(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=2120, initial_learn_rate=.26, lambda_=0.9, resolution=8, num_rbfs=4958): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=753, initial_learn_rate=.7, resolution=25., num_rbfs=206., lambda_=0.75): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = RBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning( policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-qlearning"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path # Domain: maze = os.path.join(GridWorld.default_map_dir, '4x5.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation representation = Tabular(domain, discretization=20) # Policy policy = eGreedy(representation, epsilon=0.2) # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=119, initial_learn_rate=.06, discretization=34): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 30000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 domain = FiniteCartPoleBalanceOriginal(good_reward=0.) opt["domain"] = domain representation = IncrementalTabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=1, lambda_=0.3, initial_learn_rate=1., resolution=15., num_rbfs=5000): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 5000 opt["num_policy_checks"] = 1 opt["checks_per_policy"] = 1 domain = ClothCutter() opt["domain"] = domain representation = ModifiedRBF(domain, num_rbfs=int(num_rbfs), resolution_max=resolution, resolution_min=resolution, const_feature=False, normalize=True, seed=exp_id) policy = GibbsPolicy(representation) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="const", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=1204., lambda_=0., boyan_N0=7353.2, initial_learn_rate=.9712): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 sparsify = 1 kappa = 1e-7 domain = PST(NUM_UAV=4) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Tutorial/ChainMDPTut-SARSA"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ opt = {} opt["exp_id"] = exp_id opt["path"] = path ## Domain: chainSize = 50 domain = ChainMDPTut(chainSize=chainSize) opt["domain"] = domain ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent opt["agent"] = SARSA(policy=policy, representation=representation, discount_factor=domain.discount_factor, initial_learn_rate=0.1) opt["checks_per_policy"] = 100 opt["max_steps"] = 2000 opt["num_policy_checks"] = 10 experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=389.56, lambda_=0.52738, initial_learn_rate=.424409, discretization=30): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 400000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 100 domain = PuddleGapWorld() opt["domain"] = domain representation = Tabular(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) # agent = SARSA(representation,policy,domain,initial_learn_rate=1., # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def _make_experiment(exp_id=1, path="./Results/Tmp/test_FiftyChain"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ ## Domain: domain = FiftyChain() ## Representation # discretization only needed for continuous state spaces, discarded otherwise representation = Tabular(domain) ## Policy policy = eGreedy(representation, epsilon=0.2) ## Agent agent = SARSA(representation=representation, policy=policy, discount_factor=domain.discount_factor, learn_rate=0.1) checks_per_policy = 3 max_steps = 50 num_policy_checks = 3 experiment = Experiment(**locals()) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.012695, lambda_=0.2, boyan_N0=80.798, initial_learn_rate=0.402807): opt = {} opt["exp_id"] = exp_id opt["path"] = path opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 sparsify = 1 domain = BlocksWorld(blocks=6, noise=0.3) opt["domain"] = domain initial_rep = IndependentDiscretization(domain) representation = iFDDK(domain, discover_threshold, initial_rep, sparsify=sparsify, useCache=True, lazy=True, lambda_=lambda_) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=3019.313, initial_learn_rate=0.965830): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 500000 opt["num_policy_checks"] = 30 opt["checks_per_policy"] = 10 beta_coef = 1e-6 domain = PST(NUM_UAV=4) opt["domain"] = domain representation = IndependentDiscretization(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, BetaCoef=beta_coef, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", boyan_N0=120, initial_learn_rate=.06, discretization=50): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 40000 opt["num_policy_checks"] = 10 opt["checks_per_policy"] = 1 domain = FiftyChain() opt["domain"] = domain representation = Tabular(domain) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=0.9, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=116.7025, initial_learn_rate=0.01402, discretization=6.): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 domain = InfCartPoleBalance() opt["domain"] = domain representation = IndependentDiscretization(domain, discretization=discretization) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment( exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", discover_threshold=0.03613232738, lambda_=0., boyan_N0=12335.665, initial_learn_rate=0.037282, discretization=6.): opt = {} opt["exp_id"] = exp_id opt["max_steps"] = 50000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 10 sparsify = True kappa = 1e-7 domain = InfCartPoleBalance() opt["domain"] = domain initial_rep = IndependentDiscretization( domain, discretization=discretization) representation = iFDD(domain, discover_threshold, initial_rep, sparsify=sparsify, discretization=discretization, useCache=True, iFDDPlus=1 - kappa) policy = eGreedy(representation, epsilon=0.1) opt["agent"] = SARSA(policy, representation, lambda_=lambda_, discount_factor=domain.discount_factor, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def make_experiment(exp_id=1, path="./Results/Temp", initial_learn_rate=.40, lambda_=0., resolution=25, num_rbfs=300): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # import sys # import os # cur_dir = os.path.expanduser("~/work/clipper/models/rl/") # sys.path.append(cur_dir) # from Domains import RCCarModified # from Policies import RCCarGreedy # Experiment variables opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 200000 opt["num_policy_checks"] = 15 opt["checks_per_policy"] = 2 # Logging domain = RCCarLeftTurn(noise=0.) opt["domain"] = domain # Representation kernel = gaussian_kernel representation = RandomLocalBases(domain, gaussian_kernel, num=int(num_rbfs), normalization=True, resolution_max=resolution, seed=exp_id) policy = eGreedy(representation, epsilon=0.15) # if biasedaction > -1: # print "No Random starts with biasing {}".format(i % 4) # policy = BiasedGreedy(representation, epsilon=0.5, biasedaction=biasedaction) # Agent opt["agent"] = Q_Learning(policy, representation, domain.discount_factor, initial_learn_rate=initial_learn_rate, lambda_=lambda_, learn_rate_decay_mode="const") experiment = Experiment(**opt) return experiment
def __init__(self, domain, representation, policy,steps=100000): opt = {} opt["domain"] = domain # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 10 opt["max_steps"] = steps opt["num_policy_checks"] = 20 experiment = Experiment(**opt) experiment.run() self.policy = opt["agent"].policy self.domain = domain
def make_experiment(exp_id=1, path="./Results/Temp"): """ Each file specifying an experimental setup should contain a make_experiment function which returns an instance of the Experiment class with everything set up. @param id: number used to seed the random number generators @param path: output directory where logs and results are stored """ # Experiment variables opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000000 opt["num_policy_checks"] = 50 # Logging # Domain: # MAZE = '/Domains/GridWorldMaps/1x3.txt' maze = os.path.join(GridWorld.default_map_dir, 'large_state.txt') domain = GridWorld(maze, noise=0.3) opt["domain"] = domain # Representation discover_threshold = 1. lambda_ = 0.3 initial_learn_rate = 0.11 boyan_N0 = 100 initial_rep = IndependentDiscretization(domain) representation = iFDDK(domain, discover_threshold, initial_rep, sparsify=True, useCache=True, lazy=True, lambda_=lambda_) # Policy policy = eGreedyDecay(representation, epsilonInit=0.9) # Agent opt["agent"] = Q_Learning(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def __init__(self, domain, representation, policy, steps=100000): opt = {} opt["domain"] = domain # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 10 opt["max_steps"] = steps opt["num_policy_checks"] = 20 experiment = Experiment(**opt) experiment.run() self.policy = opt["agent"].policy self.domain = domain
def make_experiment(exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/", lambda_=0., boyan_N0=14.44946, initial_learn_rate=0.240155681): opt = {} opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 100000 opt["num_policy_checks"] = 20 opt["checks_per_policy"] = 1 domain = BlocksWorld( blocks=6, noise=0.3, ) opt["domain"] = domain mat = np.matrix("""1 1 1 0 0 0; 0 1 1 1 0 0; 0 0 1 1 1 0; 0 0 0 1 1 1; 0 0 1 0 1 1; 0 0 1 1 0 1; 1 0 1 1 0 0; 1 0 1 0 1 0; 1 0 0 1 1 0; 1 0 0 0 1 1; 1 0 1 0 0 1; 1 0 0 1 0 1; 1 1 0 1 0 0; 1 1 0 0 1 0; 1 1 0 0 0 1; 0 1 0 1 1 0; 0 1 0 0 1 1; 0 1 0 1 0 1; 0 1 1 0 1 0; 0 1 1 0 0 1""") #assert(mat.shape[0] == 20) representation = TileCoding(domain, memory=2000, num_tilings=[1] * mat.shape[0], resolution_matrix=mat * 6, safety="none") policy = eGreedy(representation, epsilon=0.1) opt["agent"] = Greedy_GQ(policy, representation, discount_factor=domain.discount_factor, lambda_=lambda_, initial_learn_rate=initial_learn_rate, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0) experiment = Experiment(**opt) return experiment
def generate_meta_experiment(exp_id, agent_paths, path, unique=True, expdomain=None): opt = {} if unique: opt["path"] = os.path.join(path, get_time_str()) else: opt["path"] = path opt["exp_id"] = exp_id opt["max_steps"] = 10000 opt["num_policy_checks"] = 40 opt["checks_per_policy"] = 20 # start_at = np.array([4, 6]) agents = load_all_agents(agent_paths, pretrained=True, load_confidence=True) for a in agents: a.policy.epsilon = 0 if expdomain: actual_domain = expdomain(mapname=mapname, terrain_augmentation=False, noise=0.1) else: actual_domain = GridWorldMixed(mapname=mapname, terrain_augmentation=False, noise=0.1) domain = PolicyMixer(actual_domain, agents) representation = Tabular(domain) policy = eGreedy(representation) # , tau=.1) opt['agent'] = Q_Learning(policy, representation, discount_factor=0.9, initial_learn_rate=0.8, lambda_=0.5, learn_rate_decay_mode='boyan', boyan_N0=2380) opt['domain'] = domain experiment = Experiment(**opt) path_join = lambda s: os.path.join(opt["path"], s) if not os.path.exists(opt["path"]): os.makedirs(opt["path"]) shutil.copy(inspect.getsourcefile(inspect.currentframe()), path_join("experiment.py")) return experiment