def runExperiment(opt, visualize_steps, visualize_learning, visualize_performance, q): # Experiment要在子进程中创建,不能直接传创建好的对象(会影响logger的正常工作) exp = Experiment(**opt) # 给logger加handler # 子进程的log->MemoryHandler->OutputHandler-> queue <-ExpOutputDialog.receive->QTextEdit # log通过queue在进程间传递,主线程通过thread接收queue中的新消息 from logging.handlers import MemoryHandler handler = MemoryHandler(capacity=1024, flushLevel=logging.INFO, target=OutputHandler(q)) exp.logger.addHandler(handler) exp.run(visualize_steps=visualize_steps, # should each learning step be shown? visualize_learning=visualize_learning, # show policy / value function? visualize_performance=visualize_performance) # show performance runs? exp.plot()
def __init__(self, domain, representation, policy,steps=100000): opt = {} opt["domain"] = domain # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 10 opt["max_steps"] = steps opt["num_policy_checks"] = 20 experiment = Experiment(**opt) experiment.run() self.policy = opt["agent"].policy self.domain = domain
def runExperiment(opt, visualize_steps, visualize_learning, visualize_performance, q): # Experiment要在子进程中创建,不能直接传创建好的对象(会影响logger的正常工作) exp = Experiment(**opt) # 给logger加handler # 子进程的log->MemoryHandler->OutputHandler-> queue <-ExpOutputDialog.receive->QTextEdit # log通过queue在进程间传递,主线程通过thread接收queue中的新消息 from logging.handlers import MemoryHandler handler = MemoryHandler(capacity=1024, flushLevel=logging.INFO, target=OutputHandler(q)) exp.logger.addHandler(handler) exp.run( visualize_steps=visualize_steps, # should each learning step be shown? visualize_learning=visualize_learning, # show policy / value function? visualize_performance=visualize_performance) # show performance runs? exp.plot()
def __init__(self, domain, representation, policy, steps=100000): opt = {} opt["domain"] = domain # Agent opt["agent"] = Q_Learning(representation=representation, policy=policy, discount_factor=domain.discount_factor, initial_learn_rate=0.1, learn_rate_decay_mode="boyan", boyan_N0=100, lambda_=0.) opt["checks_per_policy"] = 10 opt["max_steps"] = steps opt["num_policy_checks"] = 20 experiment = Experiment(**opt) experiment.run() self.policy = opt["agent"].policy self.domain = domain