Example #1
0
def getPlotsForGridWorldQl(worlds, grid, starts, goals):
    iters = range(1, 21, 1)
    lRates = [x for x in [0.2, 0.7]]
    epsilons = [x for x in [0.1, 0.9]]
    qlearningIter = [1000, 10000]
    worldCntr = 1

    for data in worlds:
        ql_rewards = []
        ql_error = []
        ql_time = []
        ql_iter = []
        size = len(data)
        holesCoords = []
        for row in range(0, data.shape[0]):
            for col in range(0, data.shape[1]):
                if data[row, col] == 1:  # Obstacle
                    holesCoords.append((row, col))
                if data[row, col] == 2:  # El roboto
                    start = (row, col)
                if data[row, col] == 3:  # Goal
                    goal = (row, col)
        transitions, reward, discount, lake = get_environement(
            data, size, holesCoords, start, goal)

        for lRate in lRates:
            for epsilon in epsilons:
                # Q-Learning
                q_learning = QLearner.QLearningEx(
                    transitions,
                    reward,
                    grid=grid[worldCntr - 1],
                    start=starts[worldCntr - 1],
                    goals=goals[worldCntr - 1],
                    n_iter=qlearningIter[worldCntr - 1],
                    n_restarts=1000,
                    alpha=lRate,
                    gamma=0.9,
                    rar=epsilon,
                    radr=0.99)
                q_learning.run()

                q_learning.run()
                print_as_grid(q_learning.policy, lake.lake, size)
                ql_rewards.append(q_learning.episode_reward)
                ql_time.append(q_learning.episode_times)
                ql_error.append(q_learning.episode_error)

        elCntr = 0
        run_stat_frequency = max(1, qlearningIter[worldCntr - 1] // 10000)

        print("First Combination reward mean: ", np.mean(ql_rewards[0]))
        print("Second Combination reward mean: ", np.mean(ql_rewards[1]))
        print("Third Combination reward mean: ", np.mean(ql_rewards[2]))
        print("Four Combination reward mean: ", np.mean(ql_rewards[3]))
        print("First Combination error mean: ", np.mean(ql_error[0]))
        print("Second Combination error mean: ", np.mean(ql_error[1]))
        print("Third Combination error mean: ", np.mean(ql_error[2]))
        print("Four Combination error mean: ", np.mean(ql_error[3]))

        plt.figure(figsize=(15, 8))
        plt.style.use('seaborn-whitegrid')
        for lRate in lRates:
            for epsilon in epsilons:
                if lRate == 0.2:
                    plt.plot(range(0, 1000)[::10],
                             ql_error[elCntr][::10],
                             label='a: ' + str(lRate) + ', e: ' + str(epsilon))
                    elCntr += 1
                else:
                    plt.plot(range(0, 1000)[::10],
                             ql_error[elCntr][::10],
                             label='a: ' + str(lRate) + ', e: ' + str(epsilon),
                             linestyle='--')
                    elCntr += 1
        plt.ylabel('Convergence', fontsize=12)
        plt.xlabel('Iter. (x' + str(qlearningIter[worldCntr - 1]) + ')',
                   fontsize=12)
        plt.title('Convergence vs Iteration for Grid World no.' +
                  str(worldCntr),
                  fontsize=12,
                  y=1.03)
        plt.legend()
        plt.savefig(
            'Figures/Grid/Convergence vs Iteration for Grid World no.' +
            str(worldCntr) + ', QL.png')
        plt.close()

        elCntr = 0

        plt.figure(figsize=(15, 8))
        plt.style.use('seaborn-whitegrid')
        for lRate in lRates:
            for epsilon in epsilons:
                if lRate == 0.2:
                    plt.plot(range(0, 1000)[::10],
                             ql_rewards[elCntr][::10],
                             label='a: ' + str(lRate) + ', e: ' + str(epsilon))
                else:
                    plt.plot(range(0, 1000)[::10],
                             ql_rewards[elCntr][::10],
                             label='a: ' + str(lRate) + ', e: ' + str(epsilon),
                             linestyle='--')
                elCntr += 1
        plt.ylabel('Reward', fontsize=12)
        plt.xlabel('Iter. (x' + str(qlearningIter[worldCntr - 1]) + ')',
                   fontsize=12)
        plt.title('Reward vs Iteration for Grid World no.' + str(worldCntr),
                  fontsize=12,
                  y=1.03)
        plt.legend()
        plt.savefig('Figures/Grid/Reward vs Iteration for Grid World no.' +
                    str(worldCntr) + ', QL.png')
        plt.close()

        worldCntr += 1
def getPlotsForForestQl():
	iters = range(1, 21, 1)
	lRates = [x for x in [0.8, 0.9]]
	epsilons = [x for x in [0.8, 0.9]]
	ql_rewards = []
	ql_error = []
	ql_time = []
	ql_iter = []

	forest = ForestMng(states=1000, reward_wait=4, reward_cut=2 prob_fire=0.3)

	for lRate in lRates:
		for epsilon in epsilons:
			# Q-Learning
			q_learning = QLearner.QLearningEx(forest.P, forest.R, grid=np.zeros(shape=(15, 1)), start=0, goals=[14],
											  n_iter=1000, n_restarts=1000, alpha=lRate, gamma=0.9, rar=epsilon,
											  radr=0.999999)
			q_learning.run()
			ql_rewards.append(q_learning.episode_reward)
			ql_time.append(q_learning.episode_times)
			ql_error.append(q_learning.episode_error)
			print(q_learning.policy)

	elCntr = 0

	print("First Combination reward mean: ", np.mean(ql_rewards[0]))
	print("Second Combination reward mean: ", np.mean(ql_rewards[1]))
	print("Third Combination reward mean: ", np.mean(ql_rewards[2]))
	print("Four Combination reward mean: ", np.mean(ql_rewards[3]))
	print("First Combination error mean: ", np.mean(ql_error[0]))
	print("Second Combination error mean: ", np.mean(ql_error[1]))
	print("Third Combination error mean: ", np.mean(ql_error[2]))
	print("Four Combination error mean: ", np.mean(ql_error[3]))

	plt.figure(figsize=(15, 8))
	plt.style.use('seaborn-whitegrid')
	for lRate in lRates:
		for epsilon in epsilons:
			if lRate == 0.8:
				plt.plot(range(0, 1000)[::10], ql_error[elCntr][::10],
						 label='a: ' + str(lRate) + ', e: ' + str(epsilon))
				elCntr += 1
			else:
				plt.plot(range(0, 1000)[::10], ql_error[elCntr][::10],
						 label='a: ' + str(lRate) + ', e: ' + str(epsilon), linestyle='--')
				elCntr += 1
	plt.ylabel('Convergence', fontsize=12)
	plt.xlabel('Iter.', fontsize=12)
	plt.title('Error Convergence vs Iteration for Forest Mng State 1000 fire = 0.3', fontsize=12, y=1.03)
	plt.legend()
	plt.savefig('Figures/Forest/Convergence vs Iteration for Forest Mng, QL State 1000.png')
	plt.close()

	elCntr = 0

	plt.figure(figsize=(15, 8))
	plt.style.use('seaborn-whitegrid')
	for lRate in lRates:
		for epsilon in epsilons:
			if lRate == 0.8:
				plt.plot(range(0, 1000)[::10], ql_rewards[elCntr][::10],
						 label='a: ' + str(lRate) + ', e: ' + str(epsilon))
			else:
				plt.plot(range(0, 1000)[::10], ql_rewards[elCntr][::10],
						 label='a: ' + str(lRate) + ', e: ' + str(epsilon), linestyle='--')
			elCntr += 1
	plt.ylabel('Reward', fontsize=12)
	plt.xlabel('Iter.', fontsize=12)
	plt.title('Reward vs Iteration for Forest Mng state 1000', fontsize=12, y=1.03)
	plt.legend()
	plt.savefig('Figures/Forest/Reward vs Iteration for Forest Mng, QL State 1000.png')
	plt.close()
Example #3
0
def findBestPolicyForGridWorlds(worlds, grid, starts, goals):
    qlearningIter = [1000, 10000]
    worldCntr = 1

    for data in worlds:
        size = len(data)
        holesCoords = []
        for row in range(0, data.shape[0]):
            for col in range(0, data.shape[1]):
                if data[row, col] == 1:  # Obstacle
                    holesCoords.append((row, col))
                if data[row, col] == 2:  # El roboto
                    start = (row, col)
                if data[row, col] == 3:  # Goal
                    goal = (row, col)
        transitions, reward, discount, lake = get_environement(
            data, size, holesCoords, start, goal)

        #Policy iteration
        policy_iteration = mdp.PolicyIteration(transitions,
                                               reward,
                                               discount,
                                               policy0=None,
                                               max_iter=1000,
                                               eval_type=0)
        policy_iteration.run()
        print_as_grid(policy_iteration.policy, lake.lake, size)
        print(policy_iteration.time)
        print(policy_iteration.iter)

        actions = getActions(policy_iteration.policy, start, goal, size)
        svg = gv.gridworld(n=size,
                           tile2classes=lake.tile2classes,
                           actions=actions,
                           extra_css='goal',
                           start=start,
                           policyList=policy_iteration.policy)
        svg.saveas("Figures/Grid/PI-Final-Path for World " + str(worldCntr) +
                   ".svg",
                   pretty=True)

        #Value iteration
        value_iteration = mdp.ValueIteration(transitions,
                                             reward,
                                             discount,
                                             epsilon=0.001,
                                             max_iter=1000,
                                             initial_value=0)
        value_iteration.run()
        print_as_grid(value_iteration.policy, lake.lake, size)
        print(value_iteration.time)
        print(value_iteration.iter)

        actions = getActions(value_iteration.policy, start, goal, size)
        svg = gv.gridworld(n=size,
                           tile2classes=lake.tile2classes,
                           actions=actions,
                           extra_css='goal',
                           start=start,
                           policyList=value_iteration.policy)
        svg.saveas("Figures/Grid/VI-Final-Path for World " + str(worldCntr) +
                   ".svg",
                   pretty=True)

        #Q-Learning
        q_learning = QLearner.QLearningEx(transitions,
                                          reward,
                                          grid=grid[worldCntr - 1],
                                          start=starts[worldCntr - 1],
                                          goals=goals[worldCntr - 1],
                                          n_iter=qlearningIter[worldCntr - 1],
                                          n_restarts=1000,
                                          alpha=0.2,
                                          gamma=0.9,
                                          rar=0.1,
                                          radr=0.99)
        q_learning.run()
        print_as_grid(q_learning.policy, lake.lake, size)
        #print(q_learning.time)

        actions = getActions(q_learning.policy, start, goal, size)
        svg = gv.gridworld(n=size,
                           tile2classes=lake.tile2classes,
                           actions=actions,
                           extra_css='goal',
                           start=start,
                           policyList=q_learning.policy)
        svg.saveas("Figures/Grid/QL-Final-Path for World " + str(worldCntr) +
                   ".svg",
                   pretty=True)

        worldCntr += 1