Ejemplo n.º 1
0
def grid_world1_trp(exp_id=4, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 20

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]),
                                    binary=True, 
                                    noise=noise)
    #domain = Pinball(noise=0.3)
    
    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    d = GoalPathPlanner(domain, representation,policy)
    trajs = d.generateTrajectories(N=5)
    a = TransitionStateClustering(window_size=2)
    for t in trajs:
        N = len(t)
        demo = np.zeros((N,2))
        for i in range(0,N):
            demo[i,:] = t[i][0:2]
        a.addDemonstration(demo)
    a.fit(normalize=False, pruning=0.5)
    ac = [(round(a.means_[0][0]),round(a.means_[0][1])) for a in a.model]

    print ac

    #reinitialize
    domain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.statePassageEncoding(x,ac,5), noise=noise)
    representation = IncrementalTabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.3)
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)

    opt["domain"] = domain

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
Ejemplo n.º 2
0
def make_experiment(exp_id=1, path="./Results/Experiments/HalfReward/" + getTimeStr() + "/"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    ## Domain:
    maze = os.path.join(GridWorldInter.default_map_dir, '11x11-RoomsSeg.txt') 
    domain = GridWorldInter(maze, noise=0.01)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = Tabular(domain, discretization=20)

    ## Policy
    policy = eGreedy(representation, epsilon=0.1) ## Need to change this back, limiting noise ATM

    ## Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                   discount_factor=domain.discount_factor,
                       initial_learn_rate=0.3)
    opt["checks_per_policy"] = 50
    opt["max_steps"] = 12000
    opt["num_policy_checks"] = 20
    # experiment = ExperimentDelayed(**opt)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 3
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=10.09,
                    initial_learn_rate=.47):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 5
    sparsify = 1
    ifddeps = 1e-7
    domain = BlocksWorld(blocks=6, noise=0.3)
    opt["domain"] = domain
    representation = IndependentDiscretization(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 4
0
    def make_experiment(self, exp_id=1, path="results/"):
        opt = {}
        opt["exp_id"] = exp_id
        opt["path"] = path

        domain = NDomain(self.browser)
        opt["domain"] = domain

        representation = RBF(opt["domain"], num_rbfs=int(206,))
        self.representation = self._pickle(representation, attrs='r', action='l')
        policy = eGreedy(representation, epsilon=0.3)

        agent = SARSA(
            representation=representation,
            policy=policy,
            discount_factor=domain.discount_factor,
            initial_learn_rate=0.1,
            learn_rate_decay_mode="boyan",
            boyan_N0=100,
            lambda_=0.4
        )
        self.agent = self._pickle(agent, attrs='a', action='l')
        opt["agent"] = self.agent

        opt["checks_per_policy"] = 10
        opt["max_steps"] = 5000
        opt["num_policy_checks"] = 10
        experiment = Experiment(**opt)
        return(experiment)
Ejemplo n.º 5
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=136,
                    lambda_=0.0985,
                    initial_learn_rate=0.090564,
                    resolution=13.,
                    num_rbfs=9019):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = RBF(domain,
                         num_rbfs=int(num_rbfs),
                         resolution_max=resolution,
                         resolution_min=resolution,
                         const_feature=False,
                         normalize=True,
                         seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 6
0
    def runTIRL(self, N=5, w=2, pruning=0.5):
        opt = deepcopy(self.opt_template)
        dist = self.getIRLDist(N=N)
        ac = self.getTSCWaypoints(N, w, pruning)
        domain = self.createStateDomain(
            waypoints=ac,
            rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL(
                x, y, z, w, dist))
        opt["domain"] = domain
        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain=self.createStateDomain(
                           waypoints=self.env_template["consumable"]),
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
Ejemplo n.º 7
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=0.012695,
        lambda_=0.2,
        boyan_N0=80.798,
        initial_learn_rate=0.402807):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    sparsify = 1
    domain = BlocksWorld(blocks=6, noise=0.3)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          useCache=True, lazy=True,
                          lambda_=lambda_)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 8
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=2120,
                    initial_learn_rate=.26,
                    lambda_=0.9,
                    resolution=8,
                    num_rbfs=4958):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    representation = RBF(domain,
                         num_rbfs=int(num_rbfs),
                         resolution_max=resolution,
                         resolution_min=resolution,
                         const_feature=False,
                         normalize=True,
                         seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 9
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        boyan_N0=136,
        lambda_=0.0985,
        initial_learn_rate=0.090564,
        resolution=13., num_rbfs=9019):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = NonparametricLocalBases(domain,
                                             kernel=linf_triangle_kernel,
                                             resolution=resolution,
                                             normalization=True)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(
        policy, representation,discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 10
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=1204.,
        lambda_=0.,
        boyan_N0=7353.2,
        initial_learn_rate=.9712):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 500000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 10
    sparsify = 1
    kappa = 1e-7
    domain = PST(NUM_UAV=4)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - kappa)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy, representation,
                       discount_factor=domain.discount_factor,
                       lambda_=lambda_, initial_learn_rate=initial_learn_rate,
                       learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
    def solve(self):
        """Solve the domain MDP."""

        self.start_time = clock()  # Used to track the total time for solving
        self.bellmanUpdates = 0
        converged = False
        PI_iteration = 0

        # The policy is maintained as separate copy of the representation.
        # This way as the representation is updated the policy remains intact
        policy = eGreedy(deepcopy(self.representation),
                         epsilon=0,
                         forcedDeterministicAmongBestActions=True)

        while self.hasTime() and not converged:

            self.trajectoryBasedPolicyEvaluation(policy)

            # Policy Improvement (Updating the representation of the value
            # function will automatically improve the policy
            PI_iteration += 1

            # Theta can increase in size if the representation is expanded hence padding the weight vector with zeros
            paddedTheta = padZeros(policy.representation.weight_vec,
                                   len(self.representation.weight_vec))

            # Calculate the change in the weight_vec as L2-norm
            delta_weight_vec = np.linalg.norm(paddedTheta -
                                              self.representation.weight_vec)
            converged = delta_weight_vec < self.convergence_threshold

            # Update the underlying value function of the policy
            policy.representation = deepcopy(
                self.representation)  # self.representation

            performance_return, performance_steps, performance_term, performance_discounted_return = self.performanceRun(
            )
            self.logger.info(
                'PI #%d [%s]: BellmanUpdates=%d, ||delta-weight_vec||=%0.4f, Return=%0.3f, steps=%d, features=%d'
                % (PI_iteration, hhmmss(deltaT(self.start_time)),
                   self.bellmanUpdates, delta_weight_vec, performance_return,
                   performance_steps, self.representation.features_num))
            if self.show:
                self.domain.show(a, representation=self.representation, s=s)

            # store stats
            self.result["bellman_updates"].append(self.bellmanUpdates)
            self.result["return"].append(performance_return)
            self.result["planning_time"].append(deltaT(self.start_time))
            self.result["num_features"].append(
                self.representation.features_num)
            self.result["steps"].append(performance_steps)
            self.result["terminated"].append(performance_term)
            self.result["discounted_return"].append(
                performance_discounted_return)
            self.result["policy_improvemnt_iteration"].append(PI_iteration)

        if converged:
            self.logger.info('Converged!')
        super(TrajectoryBasedPolicyIteration, self).solve()
Ejemplo n.º 12
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=136,
                    lambda_=0.0985,
                    initial_learn_rate=0.090564,
                    resolution=13.,
                    num_rbfs=9019):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = NonparametricLocalBases(domain,
                                             kernel=linf_triangle_kernel,
                                             resolution=resolution,
                                             normalization=True)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 13
0
    def solve(self):
        """Solve the domain MDP."""
        self.bellmanUpdates = 0
        self.policy_improvement_iteration = 0
        self.start_time = clock()

        # Check for Tabular Representation
        if not self.IsTabularRepresentation():
            self.logger.error("Policy Iteration works only with a tabular representation.")
            return 0

        # Initialize the policy
        policy = eGreedy(
            deepcopy(self.representation),
            epsilon=0,
            forcedDeterministicAmongBestActions=True)  # Copy the representation so that the weight change during the evaluation does not change the policy

        # Setup the number of policy changes to 1 so the while loop starts
        policyChanges = True

        while policyChanges and deltaT(self.start_time) < self.planning_time:

            # Evaluate the policy
            converged = self.policyEvaluation(policy)

            # Improve the policy
            self.policy_improvement_iteration += 1
            policy, policyChanges = self.policyImprovement(policy)

        super(PolicyIteration, self).solve()
Ejemplo n.º 14
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=738.59,
        lambda_=.8927,
        boyan_N0=2804.,
        initial_learn_rate=.2706):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 1000000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 10
    sparsify = 1
    ifddeps = 1e-7
    beta_coef = 1e-6
    domain = PST(NUM_UAV=4)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          # discretization=discretization,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy, representation,
                      discount_factor=domain.discount_factor,
                      BetaCoef=beta_coef,
                      lambda_=lambda_, initial_learn_rate=initial_learn_rate,
                      learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 15
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=8.63917,
        lambda_=0.42,
        boyan_N0=202.,
        initial_learn_rate=.7442,
        discretization=18.):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 40000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 100
    sparsify = True

    domain = PuddleWorld()
    opt["domain"] = domain

    initial_rep = IndependentDiscretization(
        domain,
        discretization=discretization)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          discretization=discretization,
                          useCache=True,
                          iFDDPlus=True)
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=1.,
    # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100)
    opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor,
                       lambda_=lambda_, initial_learn_rate=initial_learn_rate,
                       learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 16
0
    def make_experiment(self, exp_id=1, path="results/"):
        opt = {}
        opt["exp_id"] = exp_id
        opt["path"] = path

        domain = NDomain(self.browser)
        opt["domain"] = domain

        representation = RBF(opt["domain"], num_rbfs=int(206, ))
        self.representation = self._pickle(representation,
                                           attrs='r',
                                           action='l')
        policy = eGreedy(representation, epsilon=0.3)

        agent = SARSA(representation=representation,
                      policy=policy,
                      discount_factor=domain.discount_factor,
                      initial_learn_rate=0.1,
                      learn_rate_decay_mode="boyan",
                      boyan_N0=100,
                      lambda_=0.4)
        self.agent = self._pickle(agent, attrs='a', action='l')
        opt["agent"] = self.agent

        opt["checks_per_policy"] = 10
        opt["max_steps"] = 5000
        opt["num_policy_checks"] = 10
        experiment = Experiment(**opt)
        return (experiment)
Ejemplo n.º 17
0
Archivo: ifdd.py Proyecto: shengtu/rlpy
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=107091,
        lambda_=0.245,
        boyan_N0=514,
        initial_learn_rate=.327,
        discretization=18):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1
    sparsify = 1
    domain = HIVTreatment()
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(
        domain,
        discretization=discretization)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          discretization=discretization,
                          useCache=True,
                          iFDDPlus=True)
    #representation.PRINT_MAX_RELEVANCE = True
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate,
    # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    opt["agent"] = Q_LEARNING(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 18
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        boyan_N0=2120,
        initial_learn_rate=.26,
        lambda_=0.9,
        resolution=8, num_rbfs=4958):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    representation = RBF(domain, num_rbfs=int(num_rbfs),
                         resolution_max=resolution, resolution_min=resolution,
                         const_feature=False, normalize=True, seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 19
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=116.7025,
                    initial_learn_rate=0.01402,
                    discretization=6.):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 50000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance()
    opt["domain"] = domain

    representation = IndependentDiscretization(domain,
                                               discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(policy,
                         representation,
                         discount_factor=domain.discount_factor,
                         lambda_=lambda_,
                         initial_learn_rate=initial_learn_rate,
                         learn_rate_decay_mode="boyan",
                         boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 20
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        lambda_=0.9,
        boyan_N0=22.36,
        initial_learn_rate=.068,
        discretization=9):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = Pacman()
    opt["domain"] = domain
    representation = IncrementalTabular(
        domain,
        discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=0.9, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 21
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        boyan_N0=119,
        initial_learn_rate=.06,
        discretization=34):

    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    representation = IncrementalTabular(
        domain,
        discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=0.9, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 22
0
def _make_experiment(exp_id=1, path="./Results/Tmp/test_FiftyChain"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    ## Domain:
    domain = FiftyChain()

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    agent = SARSA(representation=representation,
                  policy=policy,
                  discount_factor=domain.discount_factor,
                  learn_rate=0.1)
    checks_per_policy = 3
    max_steps = 50
    num_policy_checks = 3
    experiment = Experiment(**locals())
    return experiment
Ejemplo n.º 23
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=0.013461679,
        lambda_=0.,
        boyan_N0=484.78006,
        initial_learn_rate=0.5651405,
        discretization=23.):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 50000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10
    sparsify = True
    kappa = 1e-7
    domain = InfCartPoleBalance()
    opt["domain"] = domain

    initial_rep = IndependentDiscretization(
        domain,
        discretization=discretization)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          discretization=discretization,
                          useCache=True,
                          iFDDPlus=1. - kappa)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy, representation,
                      discount_factor=domain.discount_factor,
                      lambda_=lambda_,
                      BetaCoef=1e-6,
                      initial_learn_rate=initial_learn_rate,
                      learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 24
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        boyan_N0=389.56,
        lambda_=0.52738,
        initial_learn_rate=.424409,
        discretization=30):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 400000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 100

    domain = PuddleGapWorld()
    opt["domain"] = domain
    representation = Tabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=1.,
    # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100)
    opt["agent"] = Q_Learning(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 25
0
Archivo: ifdd.py Proyecto: okkhoy/rlpy
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=77.,
        boyan_N0=11,
        lambda_=0.9,
        initial_learn_rate=.05,
        discretization=47):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10
    sparsify = 1

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(
        domain,
        discretization=discretization)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          discretization=discretization,
                          useCache=True,
                          iFDDPlus=True)
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=initial_learn_rate,
    # lambda_=.0, learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    opt["agent"] = Q_LEARNING(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 26
0
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-qlearning"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    # Domain:
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=20)

    # Policy
    policy = eGreedy(representation, epsilon=0.2)

    # Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 27
0
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-sarsa0"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    ## Domain:
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = Tabular(domain, discretization=20)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    opt["agent"] = SARSA0(representation=representation, policy=policy,
                   discount_factor=domain.discount_factor,
                       learn_rate=0.1)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 28
0
    def solve(self):
        """Solve the domain MDP."""
        self.bellmanUpdates = 0
        self.policy_improvement_iteration = 0
        self.start_time = clock()

        # Check for Tabular Representation
        if not self.IsTabularRepresentation():
            self.logger.error(
                "Policy Iteration works only with a tabular representation.")
            return 0

        # Initialize the policy
        policy = eGreedy(
            deepcopy(self.representation),
            epsilon=0,
            forcedDeterministicAmongBestActions=True
        )  # Copy the representation so that the weight change during the evaluation does not change the policy

        # Setup the number of policy changes to 1 so the while loop starts
        policyChanges = True

        while policyChanges and deltaT(self.start_time) < self.planning_time:

            # Evaluate the policy
            converged = self.policyEvaluation(policy)

            # Improve the policy
            self.policy_improvement_iteration += 1
            policy, policyChanges = self.policyImprovement(policy)

        if converged: self.logger.log('Converged!')
        super(PolicyIteration, self).solve()
Ejemplo n.º 29
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=119,
                    initial_learn_rate=.06,
                    discretization=34):

    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 30000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1

    domain = FiniteCartPoleBalanceOriginal(good_reward=0.)
    opt["domain"] = domain
    representation = IncrementalTabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 30
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=1204.,
                    lambda_=0.,
                    boyan_N0=7353.2,
                    initial_learn_rate=.9712):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 500000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 10
    sparsify = 1
    kappa = 1e-7
    domain = PST(NUM_UAV=4)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain,
                          discover_threshold,
                          initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - kappa)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 31
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=0.012695,
                    lambda_=0.2,
                    boyan_N0=80.798,
                    initial_learn_rate=0.402807):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    sparsify = 1
    domain = BlocksWorld(blocks=6, noise=0.3)
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDDK(domain,
                           discover_threshold,
                           initial_rep,
                           sparsify=sparsify,
                           useCache=True,
                           lazy=True,
                           lambda_=lambda_)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 32
0
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-qlearning"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    # Domain:
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=20)

    # Policy
    policy = eGreedy(representation, epsilon=0.2)

    # Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 33
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        lambda_=0.,
        boyan_N0=116.7025,
        initial_learn_rate=0.01402,
        discretization=6.):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 50000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance()
    opt["domain"] = domain

    representation = IndependentDiscretization(
        domain,
        discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(policy, representation,
                  discount_factor=domain.discount_factor,
                  lambda_=lambda_,
                  initial_learn_rate=initial_learn_rate,
                  learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 34
0
    def runRewardIRL(self,N=5):
        opt = deepcopy(self.opt_template)
        dist = self.getIRLTDist(self.env_template["consumable"],N=N)
        bdist = self.getIRLDist(N=N, rand=True)
        dist = [d-bdist for d in dist]

        print dist

        domain = self.createStateDomain(waypoints=self.env_template["consumable"],
                                        rewardFunction=lambda x,y,z,w: ConsumableGridWorldIRL.rewardIRL(x,y,z,w,dist,self.env_template["consumable"]))
        
        opt["domain"] = domain
        representation = IncrementalTabular(domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain = self.createStateDomain(self.env_template["consumable"]),
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        
        return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
Ejemplo n.º 35
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        boyan_N0=753,
        initial_learn_rate=.7,
        resolution=25.,
        num_rbfs=206.,
        lambda_=0.75):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 10000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance()
    opt["domain"] = domain
    representation = RBF(domain, num_rbfs=int(num_rbfs),
                         resolution_max=resolution, resolution_min=resolution,
                         const_feature=False, normalize=True, seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 36
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=1.0,
        lambda_=0.,
        boyan_N0=20.1,
        initial_learn_rate=0.330):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = IntruderMonitoring()
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 37
0
def make_experiment(exp_id=1, path="./Results/Tutorial/ChainMDPTut-SARSA"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    ## Domain:
    chainSize = 50
    domain = ChainMDPTut(chainSize=chainSize)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    opt["agent"] = SARSA(policy=policy,
                         representation=representation,
                         discount_factor=domain.discount_factor,
                         initial_learn_rate=0.1)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 38
0
    def runIRL(self, N=5):
        opt = deepcopy(self.opt_template)

        dist = self.getIRLDist(N=N)
        bdist = self.getIRLDist(N=N, rand=True)

        #print dist-bdist

        domain = self.createMarkovDomain(
            rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.
            maxEntReward(x, y, z, w, dist - bdist))
        opt["domain"] = domain

        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        performance_domain = self.createMarkovDomain()

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain=performance_domain,
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
Ejemplo n.º 39
0
def _make_experiment(domain, exp_id=1,
                     path="./Results/Tmp/test_FiniteTrackCartPole"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param domain: the domain object to be used in the experiment
    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = IncrementalTabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    agent = SARSA(representation=representation, policy=policy,
                  discount_factor=domain.discount_factor,
                       learn_rate=0.1)
    checks_per_policy = 3
    max_steps = 50
    num_policy_checks = 3
    experiment = Experiment(**locals())
    return experiment
Ejemplo n.º 40
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=753,
                    initial_learn_rate=.7,
                    discretization=20.,
                    lambda_=0.75):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 5000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 10

    domain = InfCartPoleBalance(episodeCap=1000)
    opt["domain"] = domain
    representation = Tabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 41
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=0.03613232738,
        lambda_=0.,
        boyan_N0=12335.665,
        initial_learn_rate=0.037282,
        discretization=6.):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 50000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 10
    sparsify = True
    kappa = 1e-7
    domain = InfCartPoleBalance()
    opt["domain"] = domain

    initial_rep = IndependentDiscretization(
        domain,
        discretization=discretization)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          discretization=discretization,
                          useCache=True,
                          iFDDPlus=1 - kappa)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(policy, representation,
                  lambda_=lambda_,
                  discount_factor=domain.discount_factor,
                  initial_learn_rate=initial_learn_rate,
                  learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 42
0
def make_experiment(exp_id=1, path="./Results/Tutorial/ChainMDPTut-SARSA"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id

    ## Domain:
    chainSize = 50
    domain = ChainMDPTut(chainSize=chainSize)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = Tabular(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    opt["agent"] = SARSA(representation=representation, policy=policy,
                  disount_factor=domain.discount_factor,
                       learn_rate=0.1)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 43
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=0.03104970,
        lambda_=0.,
        boyan_N0=1220.247254,
        initial_learn_rate=0.27986823):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = BlocksWorld(blocks=6, noise=0.3, )
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain, discover_threshold, initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(
        policy, representation,discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 44
0
Archivo: rbfs.py Proyecto: okkhoy/rlpy
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        boyan_N0=136,
        lambda_=0.0985,
        initial_learn_rate=0.090564,
        resolution=13., num_rbfs=9019):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = RBF(domain, num_rbfs=int(num_rbfs),
                         resolution_max=resolution, resolution_min=resolution,
                         const_feature=False, normalize=True, seed=exp_id)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(
        policy, representation, discount_factor=domain.discount_factor,
        lambda_=lambda_, initial_learn_rate=initial_learn_rate,
        learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 45
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=120,
                    initial_learn_rate=.06,
                    discretization=50):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 40000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1

    domain = FiftyChain()
    opt["domain"] = domain
    representation = Tabular(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 46
0
    def make_experiment(
        # Path needs to have this format or hypersearch breaks
            exp_id=1, path=get_log_path(),
            boyan_N0=330.65,
            initial_learn_rate=0.219,
            lambda_=0.5547,
            resolution=7.0, num_rbfs=86.0,
            epsilon=0.4645,
            inv_discount_factor=3.186e-5,
            checks_per_policy=1):
        opt = {}
        opt["exp_id"] = exp_id
        opt["max_steps"] = 300000
        opt["num_policy_checks"] = 40
        opt["checks_per_policy"] = checks_per_policy
        opt["path"] = path

        discount_factor = 1.0 - inv_discount_factor

        domain = DomainType()
        domain.discount_factor = discount_factor
        opt["domain"] = domain
        representation = RBF(domain, num_rbfs=int(num_rbfs),
                             resolution_max=resolution, resolution_min=resolution,
                             const_feature=False, normalize=True, seed=exp_id)
        policy = eGreedy(representation, epsilon=epsilon)
        opt["agent"] = Q_LEARNING(
            policy, representation, discount_factor=domain.discount_factor,
            lambda_=lambda_, initial_learn_rate=initial_learn_rate,
            learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
        experiment = Experiment(**opt)
        return experiment
    def solve(self):
        """Solve the domain MDP."""

        self.start_time = clock()  # Used to track the total time for solving
        self.bellmanUpdates = 0
        converged = False
        PI_iteration = 0

        # The policy is maintained as separate copy of the representation.
        # This way as the representation is updated the policy remains intact
        policy = eGreedy(
            deepcopy(self.representation),
            epsilon=0,
            forcedDeterministicAmongBestActions=True)

        while self.hasTime() and not converged:

            self.trajectoryBasedPolicyEvaluation(policy)

            # Policy Improvement (Updating the representation of the value
            # function will automatically improve the policy
            PI_iteration += 1

            # Theta can increase in size if the representation is expanded hence padding the weight vector with zeros
            paddedTheta = padZeros(policy.representation.weight_vec, len(self.representation.weight_vec))

            # Calculate the change in the weight_vec as L2-norm
            delta_weight_vec = np.linalg.norm(paddedTheta - self.representation.weight_vec)
            converged = delta_weight_vec < self.convergence_threshold

            # Update the underlying value function of the policy
            policy.representation = deepcopy(self.representation)  # self.representation

            performance_return, performance_steps, performance_term, performance_discounted_return = self.performanceRun()
            self.logger.info(
                'PI #%d [%s]: BellmanUpdates=%d, ||delta-weight_vec||=%0.4f, Return=%0.3f, steps=%d, features=%d' % (PI_iteration,
                                                                                                                hhmmss(
                                                                                                                    deltaT(
                                                                                                                        self.start_time)),
                                                                                                                self.bellmanUpdates,
                                                                                                                delta_weight_vec,
                                                                                                                performance_return,
                                                                                                                performance_steps,
                                                                                                                self.representation.features_num))
            if self.show:
                self.domain.show(a, representation=self.representation, s=s)

            # store stats
            self.result["bellman_updates"].append(self.bellmanUpdates)
            self.result["return"].append(performance_return)
            self.result["planning_time"].append(deltaT(self.start_time))
            self.result["num_features"].append(self.representation.features_num)
            self.result["steps"].append(performance_steps)
            self.result["terminated"].append(performance_term)
            self.result["discounted_return"].append(performance_discounted_return)
            self.result["policy_improvemnt_iteration"].append(PI_iteration)

        if converged:
            self.logger.info('Converged!')
        super(TrajectoryBasedPolicyIteration, self).solve()
Ejemplo n.º 48
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.9,
                    boyan_N0=22.36,
                    initial_learn_rate=.068,
                    discretization=9):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = IndependentDiscretization(domain,
                                               discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=0.9,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 49
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    lambda_=0.,
                    boyan_N0=3019.313,
                    initial_learn_rate=0.965830):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 500000
    opt["num_policy_checks"] = 30
    opt["checks_per_policy"] = 10
    beta_coef = 1e-6
    domain = PST(NUM_UAV=4)
    opt["domain"] = domain
    representation = IndependentDiscretization(domain)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Greedy_GQ(policy,
                             representation,
                             discount_factor=domain.discount_factor,
                             BetaCoef=beta_coef,
                             lambda_=lambda_,
                             initial_learn_rate=initial_learn_rate,
                             learn_rate_decay_mode="boyan",
                             boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 50
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    boyan_N0=389.56,
                    lambda_=0.52738,
                    initial_learn_rate=.424409,
                    discretization=30):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 400000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 100

    domain = PuddleGapWorld()
    opt["domain"] = domain
    representation = Tabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.1)
    # agent           = SARSA(representation,policy,domain,initial_learn_rate=1.,
    # lambda_=0., learn_rate_decay_mode="boyan", boyan_N0=100)
    opt["agent"] = Q_Learning(policy,
                              representation,
                              discount_factor=domain.discount_factor,
                              lambda_=lambda_,
                              initial_learn_rate=initial_learn_rate,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 51
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        Rmax=10**10, lipschitz_constant=10**3, epsilon_d=0.01, knn = 1):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    #opt["max_steps"] = 150000
    opt["max_steps"] = 8000
    #opt["num_policy_checks"] = 30
    opt["num_policy_checks"] = 4
    opt["checks_per_policy"] = 2
    epsilon_d = 0.9
    knn = 1

    domain = HIVTreatment()
    opt["domain"] = domain
    representation = RMAX_repr(
        domain, Rmax, lipschitz_constant, epsilon_d=epsilon_d, k=knn)
    policy = eGreedy(representation, epsilon=0.0)

    opt["agent"] = RMAX(
        policy, representation,discount_factor=domain.discount_factor,
        lambda_=0, initial_learn_rate=0)

    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 52
0
def make_experiment(
        exp_id=1, path="./Results/Temp/{domain}/{agent}/{representation}/",
        discover_threshold=.0807,
        boyan_N0=389.56,
        lambda_=0.52738,
        initial_learn_rate=.424409,
        kernel_resolution=8.567677):
    opt = {}
    opt["exp_id"] = exp_id
    opt["max_steps"] = 40000
    opt["num_policy_checks"] = 20
    opt["checks_per_policy"] = 100
    active_threshold = 0.01
    max_base_feat_sim = 0.5
    sparsify = 1

    domain = PuddleWorld()
    opt["domain"] = domain
    kernel_width = (
        domain.statespace_limits[:,
                                 1] - domain.statespace_limits[:,
                                                               0]) / kernel_resolution
    representation = KernelizediFDD(domain, sparsify=sparsify,
                               kernel=gaussian_kernel,
                               kernel_args=[kernel_width],
                               active_threshold=active_threshold,
                               discover_threshold=discover_threshold,
                               normalization=True,
                               max_active_base_feat=10, max_base_feat_sim=max_base_feat_sim)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = Q_LEARNING(policy, representation, discount_factor=domain.discount_factor,
                       lambda_=lambda_, initial_learn_rate=initial_learn_rate,
                       learn_rate_decay_mode="boyan", boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 53
0
def make_experiment(exp_id=1,
                    path="./Results/Temp/{domain}/{agent}/{representation}/",
                    discover_threshold=1.0,
                    lambda_=0.,
                    boyan_N0=20.1,
                    initial_learn_rate=0.330):
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 100000
    opt["num_policy_checks"] = 10
    opt["checks_per_policy"] = 1
    sparsify = 1
    ifddeps = 1e-7
    domain = IntruderMonitoring()
    opt["domain"] = domain
    initial_rep = IndependentDiscretization(domain)
    representation = iFDD(domain,
                          discover_threshold,
                          initial_rep,
                          sparsify=sparsify,
                          useCache=True,
                          iFDDPlus=1 - ifddeps)
    policy = eGreedy(representation, epsilon=0.1)
    opt["agent"] = SARSA(policy,
                         representation,
                         discount_factor=domain.discount_factor,
                         lambda_=lambda_,
                         initial_learn_rate=initial_learn_rate,
                         learn_rate_decay_mode="boyan",
                         boyan_N0=boyan_N0)
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 54
0
def make_experiment(exp_id=1, path="./Results/Tutorial/gridworld-IncrTabularTut"):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id

    ## Domain:
    maze = os.path.join(GridWorld.default_map_dir, '4x5.txt')
    domain = GridWorld(maze, noise=0.3)
    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation  = IncrTabularTut(domain)

    ## Policy
    policy = eGreedy(representation, epsilon=0.2)

    ## Agent
    opt["agent"] = SARSA(representation=representation, policy=policy,
                  discount_factor=domain.discount_factor,
                       learn_rate=0.1)
    opt["checks_per_policy"] = 100
    opt["max_steps"] = 2000
    opt["num_policy_checks"] = 10
    experiment = Experiment(**opt)
    return experiment
Ejemplo n.º 55
0
def select_agent(name: Optional[str], _seed: int) -> Agent:
    tabular = Tabular(DOMAIN, discretization=20)
    if name is None or name == 'lspi':
        policy = eGreedy(tabular, epsilon=0.1)
        return LSPI(policy, tabular, DOMAIN.discount_factor, MAX_STEPS, 1000)
    elif name == 'nac':
        return NaturalActorCritic(GibbsPolicy(tabular),
                                  tabular,
                                  DOMAIN.discount_factor,
                                  forgetting_rate=0.3,
                                  min_steps_between_updates=100,
                                  max_steps_between_updates=1000,
                                  lambda_=0.7,
                                  learn_rate=0.1)
    elif name == 'tabular-q':
        return Q_Learning(
            eGreedy(tabular, epsilon=0.1),
            tabular,
            discount_factor=DOMAIN.discount_factor,
            lambda_=0.3,
            initial_learn_rate=0.11,
            learn_rate_decay_mode='boyan',
            boyan_N0=100,
        )
    elif name == 'ifddk-q':
        lambda_ = 0.3
        ifddk = iFDDK(
            DOMAIN,
            discovery_threshold=1.0,
            initial_representation=IndependentDiscretization(DOMAIN),
            sparsify=True,
            useCache=True,
            lazy=True,
            lambda_=lambda_,
        )
        return Q_Learning(
            eGreedy(ifddk, epsilon=0.1),
            ifddk,
            discount_factor=DOMAIN.discount_factor,
            lambda_=lambda_,
            initial_learn_rate=0.11,
            learn_rate_decay_mode='boyan',
            boyan_N0=100,
        )
    else:
        raise NotImplementedError()
Ejemplo n.º 56
0
def make_experiment(exp_id=1,
                    path="./Results/Temp",
                    initial_learn_rate=.40,
                    lambda_=0.,
                    resolution=25,
                    num_rbfs=300):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.
    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    # import sys
    # import os
    # cur_dir = os.path.expanduser("~/work/clipper/models/rl/")
    # sys.path.append(cur_dir)
    # from Domains import RCCarModified
    # from Policies import RCCarGreedy

    # Experiment variables
    opt = {}
    opt["path"] = path
    opt["exp_id"] = exp_id
    opt["max_steps"] = 200000
    opt["num_policy_checks"] = 15
    opt["checks_per_policy"] = 2
    # Logging

    domain = RCCarLeftTurn(noise=0.)
    opt["domain"] = domain

    # Representation
    kernel = gaussian_kernel
    representation = RandomLocalBases(domain,
                                      gaussian_kernel,
                                      num=int(num_rbfs),
                                      normalization=True,
                                      resolution_max=resolution,
                                      seed=exp_id)

    policy = eGreedy(representation, epsilon=0.15)
    # if biasedaction > -1:
    #     print "No Random starts with biasing {}".format(i % 4)
    #     policy = BiasedGreedy(representation, epsilon=0.5, biasedaction=biasedaction)

    # Agent

    opt["agent"] = Q_Learning(policy,
                              representation,
                              domain.discount_factor,
                              initial_learn_rate=initial_learn_rate,
                              lambda_=lambda_,
                              learn_rate_decay_mode="const")

    experiment = Experiment(**opt)

    return experiment
Ejemplo n.º 57
0
def make_experiment(exp_id=1,
                    path="./Results/Experiments/",
                    domain_class="GridWorld",
                    mapf='9x9-2Path0.txt',
                    max_steps=5000,
                    num_policy_checks=50,
                    agent_eps=0.1,
                    env_noise=0.1,
                    seg_goal=0.8,
                    step_reward=-0.001,
                    weights=None):
    """
    Each file specifying an experimental setup should contain a
    make_experiment function which returns an instance of the Experiment
    class with everything set up.

    @param id: number used to seed the random number generators
    @param path: output directory where logs and results are stored
    """
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path

    maze = os.path.join(GridWorldInter.default_map_dir, mapf)

    ## Domain:
    if domain_class == "GridWorld":
        domain = GridWorld(maze, noise=env_noise, step_reward=step_reward)
    elif domain_class == "GridWorldInter":
        domain = GridWorldInter(maze, noise=env_noise, new_goal=seg_goal)

    opt["domain"] = domain

    ## Representation
    # discretization only needed for continuous state spaces, discarded otherwise
    representation = Tabular(domain, discretization=20)
    if weights is not None:
        assert domain_class == "GridWorld"  ## ensure that we are transferring to right class
        representation.weight_vec = weights

    ## Policy
    policy = eGreedy(
        representation,
        epsilon=agent_eps)  ## Need to change this back, limiting noise ATM

    ## Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.3)
    opt["checks_per_policy"] = 50
    opt["max_steps"] = max_steps
    opt["num_policy_checks"] = num_policy_checks

    experiment = ExperimentSegment(**opt)
    return experiment