Beispiel #1
0
def runExperiment(opt, visualize_steps, visualize_learning, visualize_performance, q):
    # Experiment要在子进程中创建,不能直接传创建好的对象(会影响logger的正常工作)
    exp = Experiment(**opt)

    # 给logger加handler
    # 子进程的log->MemoryHandler->OutputHandler-> queue <-ExpOutputDialog.receive->QTextEdit
    # log通过queue在进程间传递,主线程通过thread接收queue中的新消息
    from logging.handlers import MemoryHandler
    handler = MemoryHandler(capacity=1024, flushLevel=logging.INFO, target=OutputHandler(q))
    exp.logger.addHandler(handler)

    exp.run(visualize_steps=visualize_steps,  # should each learning step be shown?
           visualize_learning=visualize_learning,  # show policy / value function?
           visualize_performance=visualize_performance)  # show performance runs?
    exp.plot()
Beispiel #2
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=753,
                    initial_learn_rate=.7,
                    discretization=20.,
                    lambda_=0.75):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 5000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance(episodeCap=1000)
    opt["domain"] = domain
    representation = Tabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #3
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=0.03104970,
        lambda_=0.,
        boyan_N0=1220.247254,
        initial_learn_rate=0.27986823):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = BlocksWorld(blocks=6, noise=0.3, )
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation,discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #4
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.9,
                    boyan_N0=22.36,
                    initial_learn_rate=.068,
                    discretization=9):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = IndependentDiscretization(domain,
                                               discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #5
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=10.09,
                    initial_learn_rate=.47):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 5
    sparsify = 1
    ifddeps = 1e-7
    domain = BlocksWorld(blocks=6, noise=0.3)
    opt["domain"] = domain
    representation = IndependentDiscretization(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #6
0
def make_experiment(exp_id=1, path="./Results/Experiments/HalfReward/" + getTimeStr() + "/"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    ## Domain:
    maze = os.path.join(GridWorldInter.default_map_dir, '11x11-RoomsSeg.txt') 
    domain = GridWorldInter(maze, noise=0.01)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = Tabular(domain, discretization=20)

    ## Policy
    policy = eGreedy(representation, epsilon=0.1) ## Need to change this back, limiting noise ATM

    ## Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                   discount_factor=domain.discount_factor,
                       initial_learn_rate=0.3)
    opt["checks_per_policy"] = 50
    opt["max_steps"] = 12000
    opt["num_policy_checks"] = 20
    # experiment = ExperimentDelayed(**opt)
    experiment = Experiment(**opt)
    return experiment
Beispiel #7
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Experiment variables
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 10

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=20)

    # Policy
    policy = GibbsPolicy(representation)

    # Agent
    opt["agent"] = NaturalActorCritic(policy, representation, domain.discount_factor,
                               0.3, 100, 1000, .7, 0.1)

    experiment = Experiment(**opt)
    return experiment
Beispiel #8
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=136,
                    lambda_=0.0985,
                    initial_learn_rate=0.090564,
                    resolution=13.,
                    num_rbfs=9019):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = NonparametricLocalBases(domain,
                                             kernel=linf_triangle_kernel,
                                             resolution=resolution,
                                             normalization=True)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #9
0
    def make_experiment(self, exp_id=1, path="results/"):
        opt = {}
        opt["exp_id"] = exp_id
        opt["path"] = path

        domain = NDomain(self.browser)
        opt["domain"] = domain

        representation = RBF(opt["domain"], num_rbfs=int(206, ))
        self.representation = self._pickle(representation,
                                           attrs='r',
                                           action='l')
        policy = eGreedy(representation, epsilon=0.3)

        agent = SARSA(representation=representation,
                      policy=policy,
                      discount_factor=domain.discount_factor,
                      initial_learn_rate=0.1,
                      learn_rate_decay_mode="boyan",
                      boyan_N0=100,
                      lambda_=0.4)
        self.agent = self._pickle(agent, attrs='a', action='l')
        opt["agent"] = self.agent

        opt["checks_per_policy"] = 10
        opt["max_steps"] = 5000
        opt["num_policy_checks"] = 10
        experiment = Experiment(**opt)
        return (experiment)
Beispiel #10
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=1.0,
                    lambda_=0.,
                    boyan_N0=20.1,
                    initial_learn_rate=0.330):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = IntruderMonitoring()
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain,
                          discover_threshold,
                          initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(policy,
                         representation,
                         discount_factor=domain.discount_factor,
                         lambda_=lambda_,
                         initial_learn_rate=initial_learn_rate,
                         learn_rate_decay_mode="boyan",
                         boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #11
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=136,
                    lambda_=0.0985,
                    initial_learn_rate=0.090564,
                    resolution=13.,
                    num_rbfs=9019):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = RBF(domain,
                         num_rbfs=int(num_rbfs),
                         resolution_max=resolution,
                         resolution_min=resolution,
                         const_feature=False,
                         normalize=True,
                         seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #12
0
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-IncrTabularTut"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id

    ## Domain:
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = IncrTabularTut(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    opt["agent"] = SARSA(representation=representation, policy=policy,
                  discount_factor=domain.discount_factor,
                       learn_rate=0.1)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Beispiel #13
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=2120,
                    initial_learn_rate=.26,
                    lambda_=0.9,
                    resolution=8,
                    num_rbfs=4958):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    representation = RBF(domain,
                         num_rbfs=int(num_rbfs),
                         resolution_max=resolution,
                         resolution_min=resolution,
                         const_feature=False,
                         normalize=True,
                         seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #14
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        boyan_N0=753,
        initial_learn_rate=.7,
        resolution=25.,
        num_rbfs=206.,
        lambda_=0.75):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance()
    opt["domain"] = domain
    representation = RBF(domain, num_rbfs=int(num_rbfs),
                         resolution_max=resolution, resolution_min=resolution,
                         const_feature=False, normalize=True, seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #15
0
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-qlearning"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    # Domain:
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=20)

    # Policy
    policy = eGreedy(representation, epsilon=0.2)

    # Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Beispiel #16
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=119,
                    initial_learn_rate=.06,
                    discretization=34):

    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    representation = IncrementalTabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #17
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=1,
                    lambda_=0.3,
                    initial_learn_rate=1.,
                    resolution=15.,
                    num_rbfs=5000):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 5000
    opt["num_policy_checks"] = 1
    opt["checks_per_policy"] = 1

    domain = ClothCutter()
    opt["domain"] = domain
    representation = ModifiedRBF(domain,
                                 num_rbfs=int(num_rbfs),
                                 resolution_max=resolution,
                                 resolution_min=resolution,
                                 const_feature=False,
                                 normalize=True,
                                 seed=exp_id)
    policy = GibbsPolicy(representation)
    opt["agent"] = SARSA(policy,
                         representation,
                         discount_factor=domain.discount_factor,
                         lambda_=lambda_,
                         initial_learn_rate=initial_learn_rate,
                         learn_rate_decay_mode="const",
                         boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #18
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=1204.,
                    lambda_=0.,
                    boyan_N0=7353.2,
                    initial_learn_rate=.9712):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 500000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 10
    sparsify = 1
    kappa = 1e-7
    domain = PST(NUM_UAV=4)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain,
                          discover_threshold,
                          initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - kappa)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #19
0
def make_experiment(exp_id=1, path="./Results/Tutorial/ChainMDPTut-SARSA"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    ## Domain:
    chainSize = 50
    domain = ChainMDPTut(chainSize=chainSize)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    opt["agent"] = SARSA(policy=policy,
                         representation=representation,
                         discount_factor=domain.discount_factor,
                         initial_learn_rate=0.1)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Beispiel #20
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=389.56,
                    lambda_=0.52738,
                    initial_learn_rate=.424409,
                    discretization=30):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 400000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 100

    domain = PuddleGapWorld()
    opt["domain"] = domain
    representation = Tabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=1.,
    # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #21
0
def _make_experiment(exp_id=1, path="./Results/Tmp/test_FiftyChain"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    ## Domain:
    domain = FiftyChain()

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    agent = SARSA(representation=representation,
                  policy=policy,
                  discount_factor=domain.discount_factor,
                  learn_rate=0.1)
    checks_per_policy = 3
    max_steps = 50
    num_policy_checks = 3
    experiment = Experiment(**locals())
    return experiment
Beispiel #22
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=0.012695,
                    lambda_=0.2,
                    boyan_N0=80.798,
                    initial_learn_rate=0.402807):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    sparsify = 1
    domain = BlocksWorld(blocks=6, noise=0.3)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain,
                           discover_threshold,
                           initial_rep,
                           sparsify=sparsify,
                           useCache=True,
                           lazy=True,
                           lambda_=lambda_)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #23
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=3019.313,
                    initial_learn_rate=0.965830):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 500000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 10
    beta_coef = 1e-6
    domain = PST(NUM_UAV=4)
    opt["domain"] = domain
    representation = IndependentDiscretization(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             BetaCoef=beta_coef,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #24
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=120,
                    initial_learn_rate=.06,
                    discretization=50):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 40000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1

    domain = FiftyChain()
    opt["domain"] = domain
    representation = Tabular(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #25
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=116.7025,
                    initial_learn_rate=0.01402,
                    discretization=6.):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 50000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance()
    opt["domain"] = domain

    representation = IndependentDiscretization(domain,
                                               discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(policy,
                         representation,
                         discount_factor=domain.discount_factor,
                         lambda_=lambda_,
                         initial_learn_rate=initial_learn_rate,
                         learn_rate_decay_mode="boyan",
                         boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #26
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=0.03613232738,
        lambda_=0.,
        boyan_N0=12335.665,
        initial_learn_rate=0.037282,
        discretization=6.):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 50000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10
    sparsify = True
    kappa = 1e-7
    domain = InfCartPoleBalance()
    opt["domain"] = domain

    initial_rep = IndependentDiscretization(
        domain,
        discretization=discretization)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          discretization=discretization,
                          useCache=True,
                          iFDDPlus=1 - kappa)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(policy, representation,
                  lambda_=lambda_,
                  discount_factor=domain.discount_factor,
                  initial_learn_rate=initial_learn_rate,
                  learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #27
0
def make_experiment(exp_id=1,
                    path="./Results/Temp",
                    initial_learn_rate=.40,
                    lambda_=0.,
                    resolution=25,
                    num_rbfs=300):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.
    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    # import sys
    # import os
    # cur_dir = os.path.expanduser("~/work/clipper/models/rl/")
    # sys.path.append(cur_dir)
    # from Domains import RCCarModified
    # from Policies import RCCarGreedy

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 200000
    opt["num_policy_checks"] = 15
    opt["checks_per_policy"] = 2
    # Logging

    domain = RCCarLeftTurn(noise=0.)
    opt["domain"] = domain

    # Representation
    kernel = gaussian_kernel
    representation = RandomLocalBases(domain,
                                      gaussian_kernel,
                                      num=int(num_rbfs),
                                      normalization=True,
                                      resolution_max=resolution,
                                      seed=exp_id)

    policy = eGreedy(representation, epsilon=0.15)
    # if biasedaction > -1:
    #     print "No Random starts with biasing {}".format(i % 4)
    #     policy = BiasedGreedy(representation, epsilon=0.5, biasedaction=biasedaction)

    # Agent

    opt["agent"] = Q_Learning(policy,
                              representation,
                              domain.discount_factor,
                              initial_learn_rate=initial_learn_rate,
                              lambda_=lambda_,
                              learn_rate_decay_mode="const")

    experiment = Experiment(**opt)

    return experiment
	def __init__(self, domain, representation, policy,steps=100000):
		
		opt = {}
		opt["domain"] = domain
		# Agent
		opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)
    
		opt["checks_per_policy"] = 10
		opt["max_steps"] = steps
		opt["num_policy_checks"] = 20
		experiment = Experiment(**opt)
		experiment.run()
		self.policy = opt["agent"].policy
		self.domain = domain
Beispiel #29
0
def make_experiment(exp_id=1, path="./Results/Temp"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000000
    opt["num_policy_checks"] = 50

    # Logging

    # Domain:
    # MAZE                = '/Domains/GridWorldMaps/1x3.txt'
    maze = os.path.join(GridWorld.default_map_dir, 'large_state.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    discover_threshold = 1.
    lambda_ = 0.3
    initial_learn_rate = 0.11
    boyan_N0 = 100

    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain,
                           discover_threshold,
                           initial_rep,
                           sparsify=True,
                           useCache=True,
                           lazy=True,
                           lambda_=lambda_)

    # Policy
    policy = eGreedyDecay(representation, epsilonInit=0.9)

    # Agent
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)

    experiment = Experiment(**opt)
    return experiment
Beispiel #30
0
    def __init__(self, domain, representation, policy, steps=100000):

        opt = {}
        opt["domain"] = domain
        # Agent
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        opt["checks_per_policy"] = 10
        opt["max_steps"] = steps
        opt["num_policy_checks"] = 20
        experiment = Experiment(**opt)
        experiment.run()
        self.policy = opt["agent"].policy
        self.domain = domain
Beispiel #31
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=14.44946,
                    initial_learn_rate=0.240155681):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    domain = BlocksWorld(
        blocks=6,
        noise=0.3,
    )
    opt["domain"] = domain
    mat = np.matrix("""1 1 1 0 0 0;
                    0 1 1 1 0 0;
                    0 0 1 1 1 0;
                    0 0 0 1 1 1;

                    0 0 1 0 1 1;
                    0 0 1 1 0 1;
                    1 0 1 1 0 0;
                    1 0 1 0 1 0;
                    1 0 0 1 1 0;
                    1 0 0 0 1 1;
                    1 0 1 0 0 1;
                    1 0 0 1 0 1;
                    1 1 0 1 0 0;
                    1 1 0 0 1 0;
                    1 1 0 0 0 1;
                    0 1 0 1 1 0;
                    0 1 0 0 1 1;
                    0 1 0 1 0 1;
                    0 1 1 0 1 0;
                    0 1 1 0 0 1""")
    #assert(mat.shape[0] == 20)
    representation = TileCoding(domain,
                                memory=2000,
                                num_tilings=[1] * mat.shape[0],
                                resolution_matrix=mat * 6,
                                safety="none")
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Beispiel #32
0
def generate_meta_experiment(exp_id,
                             agent_paths,
                             path,
                             unique=True,
                             expdomain=None):
    opt = {}
    if unique:
        opt["path"] = os.path.join(path, get_time_str())
    else:
        opt["path"] = path

    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 40
    opt["checks_per_policy"] = 20
    # start_at = np.array([4, 6])

    agents = load_all_agents(agent_paths,
                             pretrained=True,
                             load_confidence=True)
    for a in agents:
        a.policy.epsilon = 0
    if expdomain:
        actual_domain = expdomain(mapname=mapname,
                                  terrain_augmentation=False,
                                  noise=0.1)
    else:
        actual_domain = GridWorldMixed(mapname=mapname,
                                       terrain_augmentation=False,
                                       noise=0.1)
    domain = PolicyMixer(actual_domain, agents)
    representation = Tabular(domain)
    policy = eGreedy(representation)  # , tau=.1)
    opt['agent'] = Q_Learning(policy,
                              representation,
                              discount_factor=0.9,
                              initial_learn_rate=0.8,
                              lambda_=0.5,
                              learn_rate_decay_mode='boyan',
                              boyan_N0=2380)
    opt['domain'] = domain
    experiment = Experiment(**opt)

    path_join = lambda s: os.path.join(opt["path"], s)
    if not os.path.exists(opt["path"]):
        os.makedirs(opt["path"])

    shutil.copy(inspect.getsourcefile(inspect.currentframe()),
                path_join("experiment.py"))

    return experiment