def runRewardIRL(self,N=5):
        opt = deepcopy(self.opt_template)
        dist = self.getIRLTDist(self.env_template["consumable"],N=N)
        bdist = self.getIRLDist(N=N, rand=True)
        dist = [d-bdist for d in dist]

        print dist

        domain = self.createStateDomain(waypoints=self.env_template["consumable"],
                                        rewardFunction=lambda x,y,z,w: ConsumableGridWorldIRL.rewardIRL(x,y,z,w,dist,self.env_template["consumable"]))
        
        opt["domain"] = domain
        representation = IncrementalTabular(domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain = self.createStateDomain(self.env_template["consumable"]),
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        
        return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
def grid_world1_trp(exp_id=4, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 20

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]),
                                    binary=True, 
                                    noise=noise)
    #domain = Pinball(noise=0.3)
    
    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    d = GoalPathPlanner(domain, representation,policy)
    trajs = d.generateTrajectories(N=5)
    a = TransitionStateClustering(window_size=2)
    for t in trajs:
        N = len(t)
        demo = np.zeros((N,2))
        for i in range(0,N):
            demo[i,:] = t[i][0:2]
        a.addDemonstration(demo)
    a.fit(normalize=False, pruning=0.5)
    ac = [(round(a.means_[0][0]),round(a.means_[0][1])) for a in a.model]

    print ac

    #reinitialize
    domain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.statePassageEncoding(x,ac,5), noise=noise)
    representation = IncrementalTabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.3)
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)

    opt["domain"] = domain

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
Exemple #3
0
    def runIRL(self, N=5):
        opt = deepcopy(self.opt_template)

        dist = self.getIRLDist(N=N)
        bdist = self.getIRLDist(N=N, rand=True)

        #print dist-bdist

        domain = self.createMarkovDomain(
            rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.
            maxEntReward(x, y, z, w, dist - bdist))
        opt["domain"] = domain

        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        performance_domain = self.createMarkovDomain()

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain=performance_domain,
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
Exemple #4
0
    def runTIRL(self, N=5, w=2, pruning=0.5):
        opt = deepcopy(self.opt_template)
        dist = self.getIRLDist(N=N)
        ac = self.getTSCWaypoints(N, w, pruning)
        domain = self.createStateDomain(
            waypoints=ac,
            rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL(
                x, y, z, w, dist))
        opt["domain"] = domain
        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       performance_domain=self.createStateDomain(
                           waypoints=self.env_template["consumable"]),
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
def get_demonstrations(demonstration_per_policy, max_policy_iter,
                       num_policy_demo_checks, agent):
    """return demonstrations generated from the parallel parking car rlpy simulator"""
    opt = {}
    opt["exp_id"] = 1
    #    opt["path"] = "./Results/gridworld2"
    opt["checks_per_policy"] = 5
    opt["max_steps"] = 1000000
    opt["num_policy_checks"] = 1000
    exp = 0.3
    discretization = 20
    walls = [(-1, -0.3, 0.1, 0.3)]
    domain = RCIRL([(-0.1, -0.25)],
                   wallArray=walls,
                   noise=0,
                   rewardFunction=RCIRL.rcreward)
    domain.episodeCap = 200
    # Representation 10
    representation = RBF(domain,
                         num_rbfs=1000,
                         resolution_max=25,
                         resolution_min=25,
                         const_feature=False,
                         normalize=True,
                         seed=1)  #discretization=discretization)
    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    # opt["agent"]=agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.7,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=700,
                              lambda_=0.)

    opt["domain"] = domain

    pdomain = RCIRL([(-0.1, -0.25)], wallArray=walls, noise=0)

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   performance_domain=pdomain,
                   visualize_learning=False,
                   visualize_performance=1)
    # return experiment
    return map(lambda x: map(lambda y: np.array(y), x),
               experiment.all_experiment_list)
Exemple #6
0
def grid_world1_reward(exp_id=2, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise,
        binary=True)

    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=exp)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
    def runSliding(self,k=3):
        opt = deepcopy(self.opt_template)
        domain = self.createSlidingDomain(k)
        opt["domain"] = domain
        representation = IncrementalTabular(domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
        experiment.save()
        
        return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
def get_demonstrations(demonstration_per_policy,max_policy_iter,num_policy_demo_checks,agent):
	"""return demonstrations generated from the parallel parking car rlpy simulator"""
	opt = {}
	opt["exp_id"] = 1
#    opt["path"] = "./Results/gridworld2"
	opt["checks_per_policy"] = 5
	opt["max_steps"] = 1000000
	opt["num_policy_checks"] = 1000
	exp = 0.3
	discretization = 20
	walls = [(-1, -0.3, 0.1, 0.3)]
	domain = RCIRL([(-0.1, -0.25)],
					  wallArray=walls,
					  noise=0, rewardFunction=RCIRL.rcreward)
	domain.episodeCap = 200
	# Representation 10
	representation = RBF(domain, num_rbfs=1000,resolution_max=25, resolution_min=25,
						 const_feature=False, normalize=True, seed=1) #discretization=discretization)
	# Policy
	policy = eGreedy(representation, epsilon=0.3)

	# Agent
	# opt["agent"]=agent
	opt["agent"] = Q_Learning(representation=representation, policy=policy,
					   discount_factor=domain.discount_factor,
					   initial_learn_rate=0.7,
					   learn_rate_decay_mode="boyan", boyan_N0=700,
					   lambda_=0.)
	
	opt["domain"] = domain 


	pdomain = RCIRL([(-0.1, -0.25)],
					  wallArray=walls,
					  noise=0)

	experiment = Experiment(**opt)
	experiment.run(visualize_steps=False,
					   performance_domain = pdomain,
					   visualize_learning=False,
					   visualize_performance=1)
	# return experiment
	return map(lambda x:map(lambda y:np.array(y),x),experiment.all_experiment_list)
def grid_world1_reward(exp_id=2, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL([(7,5), (1,2)], 
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]), 
                                    noise=noise,
                                    binary=True)
    
    opt["domain"] = domain

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=exp)

    # Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
Exemple #10
0
    def runSliding(self, k=3):
        opt = deepcopy(self.opt_template)
        domain = self.createSlidingDomain(k)
        opt["domain"] = domain
        representation = IncrementalTabular(
            domain, discretization=self.env_template["discretization"])
        policy = eGreedy(representation, epsilon=self.env_template["exp"])
        opt["agent"] = Q_Learning(representation=representation,
                                  policy=policy,
                                  discount_factor=domain.discount_factor,
                                  initial_learn_rate=0.1,
                                  learn_rate_decay_mode="boyan",
                                  boyan_N0=100,
                                  lambda_=0.)

        experiment = Experiment(**opt)
        experiment.run(visualize_steps=False,
                       visualize_learning=False,
                       visualize_performance=0)
        experiment.save()

        return np.max(experiment.result["return"]), np.sum(
            experiment.result["return"])
Exemple #11
0
def gridworld1_tirl(exp_id=7, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise,
        binary=True)
    #domain = Pinball(noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20

    d = GoalPathPlanner(domain, representation, policy)
    trajs = d.generateTrajectories(N=5)
    dist = calculateStateDist((10, 7), trajs)
    a = TransitionStateClustering(window_size=2)
    for t in trajs:
        N = len(t)
        demo = np.zeros((N, 2))
        for i in range(0, N):
            demo[i, :] = t[i][0:2]
        a.addDemonstration(demo)
    a.fit(normalize=False, pruning=0.5)
    dist = calculateStateDist((10, 7), trajs)
    ac = discrete2DClustersToPoints(a.model, dist, radius=1)

    # Policy reset
    policy = eGreedy(representation, epsilon=0.3)
    representation = Tabular(domain, discretization=discretization)

    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.tRewardIRL(
            x, y, z, w, dist, [(7, 5)]),
        noise=noise)

    pdomain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise)

    opt["domain"] = domain
    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   performance_domain=pdomain,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()

    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
Exemple #12
0
def gridworld1_rirl(exp_id=6, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise,
        binary=True)
    #domain = Pinball(noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20

    d = GoalPathPlanner(domain, representation, policy)
    trajs = d.generateTrajectories(N=5)
    dist = calculateStateDist((10, 7), trajs)

    # Policy reset
    policy = eGreedy(representation, epsilon=0.3)
    representation = Tabular(domain, discretization=discretization)

    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        rewardFunction=lambda x, y, z, w: ConsumableGridWorldIRL.rewardIRL(
            x, y, z, w, dist),
        noise=noise)

    pdomain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        noise=noise)

    opt["domain"] = domain
    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   performance_domain=pdomain,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()

    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
Exemple #13
0
def grid_world1_trp(exp_id=4, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 20

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir,
                        '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.stateVisitEncoding(
            x, [(7, 5)]),
        binary=True,
        noise=noise)
    #domain = Pinball(noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    d = GoalPathPlanner(domain, representation, policy)
    trajs = d.generateTrajectories(N=5)
    a = TransitionStateClustering(window_size=2)
    for t in trajs:
        N = len(t)
        demo = np.zeros((N, 2))
        for i in range(0, N):
            demo[i, :] = t[i][0:2]
        a.addDemonstration(demo)
    a.fit(normalize=False, pruning=0.5)
    ac = [(round(a.means_[0][0]), round(a.means_[0][1])) for a in a.model]

    print ac

    #reinitialize
    domain = ConsumableGridWorldIRL(
        [(7, 5), (1, 2)],
        mapname=maze,
        encodingFunction=lambda x: ConsumableGridWorldIRL.statePassageEncoding(
            x, ac, 5),
        noise=noise)
    representation = IncrementalTabular(domain, discretization=discretization)
    policy = eGreedy(representation, epsilon=0.3)
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.1,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=100,
                              lambda_=0.)

    opt["domain"] = domain

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()
    return np.max(experiment.result["return"]), np.sum(
        experiment.result["return"])
def gridworld1_tirl(exp_id=7, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]), 
                                    noise=noise,
                                    binary=True)
    #domain = Pinball(noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)
    
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    

    d = GoalPathPlanner(domain, representation,policy)
    trajs = d.generateTrajectories(N=5) 
    dist = calculateStateDist((10,7), trajs)   
    a = TransitionStateClustering(window_size=2)
    for t in trajs:
        N = len(t)
        demo = np.zeros((N,2))
        for i in range(0,N):
            demo[i,:] = t[i][0:2]
        a.addDemonstration(demo)
    a.fit(normalize=False, pruning=0.5)
    dist = calculateStateDist((10,7), trajs) 
    ac = discrete2DClustersToPoints(a.model, dist, radius=1)     

    # Policy reset
    policy = eGreedy(representation, epsilon=0.3)
    representation = Tabular(domain, discretization=discretization)

    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)

    domain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]),
                                    rewardFunction= lambda x,y,z,w: ConsumableGridWorldIRL.tRewardIRL(x,y,z,w,dist,[(7,5)]),
                                    noise=noise)
    
    pdomain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]),
                                    noise=noise)

    opt["domain"] = domain
    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   performance_domain = pdomain,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()

    return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
def gridworld1_rirl(exp_id=6, path="./Results/gridworld1"):
    opt = {}
    opt["exp_id"] = exp_id
    opt["path"] = path
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    noise = 0.1
    exp = 0.3
    discretization = 400

    # Domain:
    maze = os.path.join(ConsumableGridWorld.default_map_dir, '10x7-ACC2011.txt')
    domain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]), 
                                    noise=noise,
                                    binary=True)
    #domain = Pinball(noise=0.3)

    # Representation
    representation = Tabular(domain, discretization=discretization)

    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)
    
    opt["checks_per_policy"] = 10
    opt["max_steps"] = 150000
    opt["num_policy_checks"] = 20
    

    d = GoalPathPlanner(domain, representation,policy)
    trajs = d.generateTrajectories(N=5) 
    dist = calculateStateDist((10,7), trajs)        

    # Policy reset
    policy = eGreedy(representation, epsilon=0.3)
    representation = Tabular(domain, discretization=discretization)

    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.1,
                       learn_rate_decay_mode="boyan", boyan_N0=100,
                       lambda_=0.)

    domain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]),
                                    rewardFunction= lambda x,y,z,w: ConsumableGridWorldIRL.rewardIRL(x,y,z,w,dist),
                                    noise=noise)
    
    pdomain = ConsumableGridWorldIRL([(7,5), (1,2)],
                                    mapname=maze, 
                                    encodingFunction= lambda x: ConsumableGridWorldIRL.stateVisitEncoding(x,[(7,5)]),
                                    noise=noise)

    opt["domain"] = domain
    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   performance_domain = pdomain,
                   visualize_learning=False,
                   visualize_performance=0)
    experiment.save()

    return np.max(experiment.result["return"]),np.sum(experiment.result["return"])
    walls = [(-1, -0.3, 0.1, 0.3)]
    domain = RCIRL([(-0.1, -0.25)],
                      wallArray=walls,
                      noise=0, rewardFunction=RCIRL.rcreward)
    domain.episodeCap = 200
    # Representation 10
    representation = RBF(domain, num_rbfs=1000,resolution_max=25, resolution_min=25,
                         const_feature=False, normalize=True, seed=1) #discretization=discretization)
    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    opt["agent"] = Q_Learning(representation=representation, policy=policy,
                       discount_factor=domain.discount_factor,
                       initial_learn_rate=0.7,
                       learn_rate_decay_mode="boyan", boyan_N0=700,
                       lambda_=0.)
    opt["domain"] = domain 


    pdomain = RCIRL([(-0.1, -0.25)],
                      wallArray=walls,
                      noise=0)

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                       performance_domain = pdomain,
                       visualize_learning=False,
                       visualize_performance=1)
    experiment.save()
    
Exemple #17
0
    domain.episodeCap = 200
    # Representation 10
    representation = RBF(domain,
                         num_rbfs=1000,
                         resolution_max=25,
                         resolution_min=25,
                         const_feature=False,
                         normalize=True,
                         seed=1)  #discretization=discretization)
    # Policy
    policy = eGreedy(representation, epsilon=0.3)

    # Agent
    opt["agent"] = Q_Learning(representation=representation,
                              policy=policy,
                              discount_factor=domain.discount_factor,
                              initial_learn_rate=0.7,
                              learn_rate_decay_mode="boyan",
                              boyan_N0=700,
                              lambda_=0.)
    opt["domain"] = domain

    pdomain = RCIRL([(-0.1, -0.25)], wallArray=walls, noise=0)

    experiment = Experiment(**opt)
    experiment.run(visualize_steps=False,
                   performance_domain=pdomain,
                   visualize_learning=False,
                   visualize_performance=1)
    experiment.save()