Exemple #1
0
 def createSlidingDomain(self, k):
     return ConsumableGridWorldIRL(
         self.env_template["consumable"],
         mapname=self.env_template["map"],
         encodingFunction=lambda x: ConsumableGridWorldIRL.
         slidingWindowEncoding(x, k),
         noise=self.env_template["noise"])
Exemple #2
0
 def createPassageDomain(self, selfwaypoints, k=5):
     return ConsumableGridWorldIRL(
         self.env_template["consumable"],
         mapname=self.env_template["map"],
         encodingFunction=lambda x: ConsumableGridWorldIRL.
         statePassageEncoding(x, waypoints, k),
         noise=self.env_template["noise"],
         binary=True)
Exemple #3
0
 def createStateDomain(self, waypoints, rewardFunction=None):
     return ConsumableGridWorldIRL(
         self.env_template["consumable"],
         mapname=self.env_template["map"],
         encodingFunction=lambda x: ConsumableGridWorldIRL.
         stateVisitEncoding(x, waypoints),
         rewardFunction=rewardFunction,
         noise=self.env_template["noise"],
         binary=True)
Exemple #4
0
 def createMarkovDomain(self, k=1, rewardFunction=None):
     return ConsumableGridWorldIRL(
         self.env_template["consumable"],
         mapname=self.env_template["map"],
         encodingFunction=lambda x: ConsumableGridWorldIRL.
         allMarkovEncoding(x, k),
         rewardFunction=rewardFunction,
         noise=self.env_template["noise"],
         binary=True)
Exemple #5
0
def grid_world1_reward(exp_id=2, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise,
        binary=True)

    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=exp)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
Exemple #6
0
def gridworld1_tirl(exp_id=7, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise,
        binary=True)
    #domain = Pinball(noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20

    d = GoalPathPlanner(domain, representation, policy)
    trajs = d.generateTrajectories(N=5)
    dist = calculateStateDist((10, 7), trajs)
    a = TransitionStateClustering(window_size=2)
    for t in trajs:
        N = len(t)
        demo = np.zeros((N, 2))
        for i in range(0, N):
            demo[i, :] = t[i][0:2]
        a.addDemonstration(demo)
    a.fit(normalize=False, pruning=0.5)
    dist = calculateStateDist((10, 7), trajs)
    ac = discrete2DClustersToPoints(a.model, dist, radius=1)

    # Policy reset
    policy = eGreedy(representation, epsilon=0.3)
    representation = Tabular(domain, discretization=discretization)

    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.tRewardIRL(
            x, y, z, w, dist, [(7, 5)]),
        noise=noise)

    pdomain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise)

    opt["domain"] = domain
    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   performance_domain=pdomain,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()

    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
Exemple #7
0
def gridworld1_rirl(exp_id=6, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise,
        binary=True)
    #domain = Pinball(noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20

    d = GoalPathPlanner(domain, representation, policy)
    trajs = d.generateTrajectories(N=5)
    dist = calculateStateDist((10, 7), trajs)

    # Policy reset
    policy = eGreedy(representation, epsilon=0.3)
    representation = Tabular(domain, discretization=discretization)

    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL(
            x, y, z, w, dist),
        noise=noise)

    pdomain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise)

    opt["domain"] = domain
    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   performance_domain=pdomain,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()

    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
Exemple #8
0
def grid_world1_trp(exp_id=4, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 20

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        binary=True,
        noise=noise)
    #domain = Pinball(noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    d = GoalPathPlanner(domain, representation, policy)
    trajs = d.generateTrajectories(N=5)
    a = TransitionStateClustering(window_size=2)
    for t in trajs:
        N = len(t)
        demo = np.zeros((N, 2))
        for i in range(0, N):
            demo[i, :] = t[i][0:2]
        a.addDemonstration(demo)
    a.fit(normalize=False, pruning=0.5)
    ac = [(round(a.means_[0][0]), round(a.means_[0][1])) for a in a.model]

    print ac

    #reinitialize
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.statePassageEncoding(
            x, ac, 5),
        noise=noise)
    representation = IncrementalTabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.3)
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    opt["domain"] = domain

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])