예제 #1
0
    def makeComponents(self):
        self.block_number = int(self.spinBoxBlocksNumber.value())
        noise = float(self.spNoise.value())
        domain = BlocksWorld(blocks=self.block_number,
                             towerSize=self.block_number,
                             noise=noise)
        domain.GOAL_REWARD = float(self.spGoalReward.value())
        domain.STEP_REWARD = float(self.spStepReward.value())

        representation = RepresentationFactory.get(
            config=self.representationConfig,
            name=str(self.lstRepresentation.currentItem().text()),
            domain=domain)

        policy = PolicyFactory.get(config=self.policyConfig,
                                   name=str(
                                       self.lstPolicy.currentItem().text()),
                                   representation=representation)

        agent = AgentFactory.get(config=self.agentConfig,
                                 name=str(self.lstAgent.currentItem().text()),
                                 representation=representation,
                                 policy=policy)

        return domain, agent
예제 #2
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=0.03104970,
        lambda_=0.,
        boyan_N0=1220.247254,
        initial_learn_rate=0.27986823):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = BlocksWorld(blocks=6, noise=0.3, )
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation,discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
예제 #3
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=0.012695,
                    lambda_=0.2,
                    boyan_N0=80.798,
                    initial_learn_rate=0.402807):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    sparsify = 1
    domain = BlocksWorld(blocks=6, noise=0.3)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain,
                           discover_threshold,
                           initial_rep,
                           sparsify=sparsify,
                           useCache=True,
                           lazy=True,
                           lambda_=lambda_)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
예제 #4
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=10.09,
                    initial_learn_rate=.47):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 5
    sparsify = 1
    ifddeps = 1e-7
    domain = BlocksWorld(blocks=6, noise=0.3)
    opt["domain"] = domain
    representation = IndependentDiscretization(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
예제 #5
0
파일: BlocksWorld.py 프로젝트: zyc9012/sdrl
    def makeComponents(self):
        self.block_number = int(self.spinBoxBlocksNumber.value())
        noise = float(self.spNoise.value())
        domain = BlocksWorld(blocks=self.block_number,towerSize=self.block_number, noise=noise)
        domain.GOAL_REWARD = float(self.spGoalReward.value())
        domain.STEP_REWARD = float(self.spStepReward.value())

        representation = RepresentationFactory.get(config=self.representationConfig,
            name=str(self.lstRepresentation.currentItem().text()),
            domain=domain)

        policy = PolicyFactory.get(config=self.policyConfig,
            name=str(self.lstPolicy.currentItem().text()),
            representation=representation)

        agent = AgentFactory.get(config=self.agentConfig,
            name=str(self.lstAgent.currentItem().text()),
            representation=representation,
            policy=policy)

        return domain, agent
예제 #6
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=14.44946,
                    initial_learn_rate=0.240155681):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    domain = BlocksWorld(
        blocks=6,
        noise=0.3,
    )
    opt["domain"] = domain
    mat = np.matrix("""1 1 1 0 0 0;
                    0 1 1 1 0 0;
                    0 0 1 1 1 0;
                    0 0 0 1 1 1;

                    0 0 1 0 1 1;
                    0 0 1 1 0 1;
                    1 0 1 1 0 0;
                    1 0 1 0 1 0;
                    1 0 0 1 1 0;
                    1 0 0 0 1 1;
                    1 0 1 0 0 1;
                    1 0 0 1 0 1;
                    1 1 0 1 0 0;
                    1 1 0 0 1 0;
                    1 1 0 0 0 1;
                    0 1 0 1 1 0;
                    0 1 0 0 1 1;
                    0 1 0 1 0 1;
                    0 1 1 0 1 0;
                    0 1 1 0 0 1""")
    #assert(mat.shape[0] == 20)
    representation = TileCoding(domain,
                                memory=2000,
                                num_tilings=[1] * mat.shape[0],
                                resolution_matrix=mat * 6,
                                safety="none")
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment