Exemple #1
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=2120,
                    initial_learn_rate=.26,
                    lambda_=0.9,
                    resolution=8,
                    num_rbfs=4958):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    representation = RBF(domain,
                         num_rbfs=int(num_rbfs),
                         resolution_max=resolution,
                         resolution_min=resolution,
                         const_feature=False,
                         normalize=True,
                         seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Exemple #2
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=119,
                    initial_learn_rate=.06,
                    discretization=34):

    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    representation = IncrementalTabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Exemple #3
0
    def makeComponents(self):
        #noise = float(self.spNoise.value())
        position1 = float(self.spPosition1.value())
        position2 = float(self.spPosition2.value())

        velocity1 = float(self.spVelocity1.value())
        velocity2 = float(self.spVelocity2.value())
        availForce1 = float(self.spAvailForce1.value())
        availForce2 = float(self.spAvailForce2.value())
        

        domain = FiniteCartPoleBalanceOriginal(good_reward=0.)

        domain.POSITION_LIMITS = [position1, position2]
  
        #: m/s - Default limits on cart velocity [per RL Community CartPole]
        domain.VELOCITY_LIMITS = [velocity1, velocity2]

         #: Newtons, N - Force values available as actions
        domain.AVAIL_FORCE = np.array([availForce1, availForce2])

        #: kilograms, kg - Mass of the pendulum arm
        domain.MASS_PEND = float(self.spMassPend.value())
        #: kilograms, kg - Mass of cart
        domain.MASS_CART = float(self.spMassCart.value())
        #: meters, m - Physical length of the pendulum, meters (note the moment-arm lies at half this distance)
        domain.LENGTH = float(self.spLength.value())
        # m - Length of moment-arm to center of mass (= half the pendulum length)
        domain.MOMENT_ARM = domain.LENGTH / 2.
        # 1/kg - Used in dynamics computations, equal to 1 / (MASS_PEND +
        # MASS_CART)
        domain._ALPHA_MASS = 1.0 / (domain.MASS_CART + domain.MASS_PEND)

        representation = RepresentationFactory.get(config=self.representationConfig,
            name=str(self.lstRepresentation.currentItem().text()),
            domain=domain)

        policy = PolicyFactory.get(config=self.policyConfig,
            name=str(self.lstPolicy.currentItem().text()),
            representation=representation)
        
        agent = AgentFactory.get(config=self.agentConfig,
            name=str(self.lstAgent.currentItem().text()),
            representation=representation,
            policy=policy)

        return domain, agent
Exemple #4
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=.21,
                    boyan_N0=37.,
                    lambda_=.9,
                    initial_learn_rate=.07,
                    kernel_resolution=13.14):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10
    active_threshold = 0.01
    max_base_feat_sim = 0.5
    sparsify = 1

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    # domain = FiniteCartPoleBalanceModern()
    kernel_width = old_div(
        (domain.statespace_limits[:, 1] - domain.statespace_limits[:, 0]),
        kernel_resolution)
    representation = KernelizediFDD(domain,
                                    sparsify=sparsify,
                                    kernel=gaussian_kernel,
                                    kernel_args=[kernel_width],
                                    active_threshold=active_threshold,
                                    discover_threshold=discover_threshold,
                                    normalization=True,
                                    max_active_base_feat=10,
                                    max_base_feat_sim=max_base_feat_sim)
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate,
    # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Exemple #5
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=77.,
                    boyan_N0=11,
                    lambda_=0.9,
                    initial_learn_rate=.05,
                    discretization=47):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10
    sparsify = 1

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain,
                                            discretization=discretization)
    representation = iFDD(domain,
                          discover_threshold,
                          initial_rep,
                          sparsify=sparsify,
                          discretization=discretization,
                          useCache=True,
                          iFDDPlus=True)
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate,
    # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment