Python QLearner.QLearningEx Examples

Programming Language: Python

Class/Type: QLearner

Method/Function: QLearningEx

Examples at hotexamples.com: 3

Python QLearner.QLearningEx - 3 examples found. These are the top rated real world Python examples of QLearner.QLearningEx from package VDACs extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

QLearner(30)

QLearningEx(3)

QLearningAgent(1)

QlearningAgentOnline(1)

Example #1

Show file

File: runscript.py Project: ziarrdan/Machine-Learning

def getPlotsForGridWorldQl(worlds, grid, starts, goals):
    iters = range(1, 21, 1)
    lRates = [x for x in [0.2, 0.7]]
    epsilons = [x for x in [0.1, 0.9]]
    qlearningIter = [1000, 10000]
    worldCntr = 1

    for data in worlds:
        ql_rewards = []
        ql_error = []
        ql_time = []
        ql_iter = []
        size = len(data)
        holesCoords = []
        for row in range(0, data.shape[0]):
            for col in range(0, data.shape[1]):
                if data[row, col] == 1:  # Obstacle
                    holesCoords.append((row, col))
                if data[row, col] == 2:  # El roboto
                    start = (row, col)
                if data[row, col] == 3:  # Goal
                    goal = (row, col)
        transitions, reward, discount, lake = get_environement(
            data, size, holesCoords, start, goal)

        for lRate in lRates:
            for epsilon in epsilons:
                # Q-Learning
                q_learning = QLearner.QLearningEx(
                    transitions,
                    reward,
                    grid=grid[worldCntr - 1],
                    start=starts[worldCntr - 1],
                    goals=goals[worldCntr - 1],
                    n_iter=qlearningIter[worldCntr - 1],
                    n_restarts=1000,
                    alpha=lRate,
                    gamma=0.9,
                    rar=epsilon,
                    radr=0.99)
                q_learning.run()

                q_learning.run()
                print_as_grid(q_learning.policy, lake.lake, size)
                ql_rewards.append(q_learning.episode_reward)
                ql_time.append(q_learning.episode_times)
                ql_error.append(q_learning.episode_error)

        elCntr = 0
        run_stat_frequency = max(1, qlearningIter[worldCntr - 1] // 10000)

        print("First Combination reward mean: ", np.mean(ql_rewards[0]))
        print("Second Combination reward mean: ", np.mean(ql_rewards[1]))
        print("Third Combination reward mean: ", np.mean(ql_rewards[2]))
        print("Four Combination reward mean: ", np.mean(ql_rewards[3]))
        print("First Combination error mean: ", np.mean(ql_error[0]))
        print("Second Combination error mean: ", np.mean(ql_error[1]))
        print("Third Combination error mean: ", np.mean(ql_error[2]))
        print("Four Combination error mean: ", np.mean(ql_error[3]))

        plt.figure(figsize=(15, 8))
        plt.style.use('seaborn-whitegrid')
        for lRate in lRates:
            for epsilon in epsilons:
                if lRate == 0.2:
                    plt.plot(range(0, 1000)[::10],
                             ql_error[elCntr][::10],
                             label='a: ' + str(lRate) + ', e: ' + str(epsilon))
                    elCntr += 1
                else:
                    plt.plot(range(0, 1000)[::10],
                             ql_error[elCntr][::10],
                             label='a: ' + str(lRate) + ', e: ' + str(epsilon),
                             linestyle='--')
                    elCntr += 1
        plt.ylabel('Convergence', fontsize=12)
        plt.xlabel('Iter. (x' + str(qlearningIter[worldCntr - 1]) + ')',
                   fontsize=12)
        plt.title('Convergence vs Iteration for Grid World no.' +
                  str(worldCntr),
                  fontsize=12,
                  y=1.03)
        plt.legend()
        plt.savefig(
            'Figures/Grid/Convergence vs Iteration for Grid World no.' +
            str(worldCntr) + ', QL.png')
        plt.close()

        elCntr = 0

        plt.figure(figsize=(15, 8))
        plt.style.use('seaborn-whitegrid')
        for lRate in lRates:
            for epsilon in epsilons:
                if lRate == 0.2:
                    plt.plot(range(0, 1000)[::10],
                             ql_rewards[elCntr][::10],
                             label='a: ' + str(lRate) + ', e: ' + str(epsilon))
                else:
                    plt.plot(range(0, 1000)[::10],
                             ql_rewards[elCntr][::10],
                             label='a: ' + str(lRate) + ', e: ' + str(epsilon),
                             linestyle='--')
                elCntr += 1
        plt.ylabel('Reward', fontsize=12)
        plt.xlabel('Iter. (x' + str(qlearningIter[worldCntr - 1]) + ')',
                   fontsize=12)
        plt.title('Reward vs Iteration for Grid World no.' + str(worldCntr),
                  fontsize=12,
                  y=1.03)
        plt.legend()
        plt.savefig('Figures/Grid/Reward vs Iteration for Grid World no.' +
                    str(worldCntr) + ', QL.png')
        plt.close()

        worldCntr += 1

Example #2

Show file

File: Forest ManagementExp - probability.py Project: xiangxa/CS7641

def getPlotsForForestQl():
	iters = range(1, 21, 1)
	lRates = [x for x in [0.8, 0.9]]
	epsilons = [x for x in [0.8, 0.9]]
	ql_rewards = []
	ql_error = []
	ql_time = []
	ql_iter = []

	forest = ForestMng(states=1000, reward_wait=4, reward_cut=2 prob_fire=0.3)

	for lRate in lRates:
		for epsilon in epsilons:
			# Q-Learning
			q_learning = QLearner.QLearningEx(forest.P, forest.R, grid=np.zeros(shape=(15, 1)), start=0, goals=[14],
											  n_iter=1000, n_restarts=1000, alpha=lRate, gamma=0.9, rar=epsilon,
											  radr=0.999999)
			q_learning.run()
			ql_rewards.append(q_learning.episode_reward)
			ql_time.append(q_learning.episode_times)
			ql_error.append(q_learning.episode_error)
			print(q_learning.policy)

	elCntr = 0

	print("First Combination reward mean: ", np.mean(ql_rewards[0]))
	print("Second Combination reward mean: ", np.mean(ql_rewards[1]))
	print("Third Combination reward mean: ", np.mean(ql_rewards[2]))
	print("Four Combination reward mean: ", np.mean(ql_rewards[3]))
	print("First Combination error mean: ", np.mean(ql_error[0]))
	print("Second Combination error mean: ", np.mean(ql_error[1]))
	print("Third Combination error mean: ", np.mean(ql_error[2]))
	print("Four Combination error mean: ", np.mean(ql_error[3]))

	plt.figure(figsize=(15, 8))
	plt.style.use('seaborn-whitegrid')
	for lRate in lRates:
		for epsilon in epsilons:
			if lRate == 0.8:
				plt.plot(range(0, 1000)[::10], ql_error[elCntr][::10],
						 label='a: ' + str(lRate) + ', e: ' + str(epsilon))
				elCntr += 1
			else:
				plt.plot(range(0, 1000)[::10], ql_error[elCntr][::10],
						 label='a: ' + str(lRate) + ', e: ' + str(epsilon), linestyle='--')
				elCntr += 1
	plt.ylabel('Convergence', fontsize=12)
	plt.xlabel('Iter.', fontsize=12)
	plt.title('Error Convergence vs Iteration for Forest Mng State 1000 fire = 0.3', fontsize=12, y=1.03)
	plt.legend()
	plt.savefig('Figures/Forest/Convergence vs Iteration for Forest Mng, QL State 1000.png')
	plt.close()

	elCntr = 0

	plt.figure(figsize=(15, 8))
	plt.style.use('seaborn-whitegrid')
	for lRate in lRates:
		for epsilon in epsilons:
			if lRate == 0.8:
				plt.plot(range(0, 1000)[::10], ql_rewards[elCntr][::10],
						 label='a: ' + str(lRate) + ', e: ' + str(epsilon))
			else:
				plt.plot(range(0, 1000)[::10], ql_rewards[elCntr][::10],
						 label='a: ' + str(lRate) + ', e: ' + str(epsilon), linestyle='--')
			elCntr += 1
	plt.ylabel('Reward', fontsize=12)
	plt.xlabel('Iter.', fontsize=12)
	plt.title('Reward vs Iteration for Forest Mng state 1000', fontsize=12, y=1.03)
	plt.legend()
	plt.savefig('Figures/Forest/Reward vs Iteration for Forest Mng, QL State 1000.png')
	plt.close()

Example #3

Show file

File: runscript.py Project: ziarrdan/Machine-Learning

def findBestPolicyForGridWorlds(worlds, grid, starts, goals):
    qlearningIter = [1000, 10000]
    worldCntr = 1

    for data in worlds:
        size = len(data)
        holesCoords = []
        for row in range(0, data.shape[0]):
            for col in range(0, data.shape[1]):
                if data[row, col] == 1:  # Obstacle
                    holesCoords.append((row, col))
                if data[row, col] == 2:  # El roboto
                    start = (row, col)
                if data[row, col] == 3:  # Goal
                    goal = (row, col)
        transitions, reward, discount, lake = get_environement(
            data, size, holesCoords, start, goal)

        #Policy iteration
        policy_iteration = mdp.PolicyIteration(transitions,
                                               reward,
                                               discount,
                                               policy0=None,
                                               max_iter=1000,
                                               eval_type=0)
        policy_iteration.run()
        print_as_grid(policy_iteration.policy, lake.lake, size)
        print(policy_iteration.time)
        print(policy_iteration.iter)

        actions = getActions(policy_iteration.policy, start, goal, size)
        svg = gv.gridworld(n=size,
                           tile2classes=lake.tile2classes,
                           actions=actions,
                           extra_css='goal',
                           start=start,
                           policyList=policy_iteration.policy)
        svg.saveas("Figures/Grid/PI-Final-Path for World " + str(worldCntr) +
                   ".svg",
                   pretty=True)

        #Value iteration
        value_iteration = mdp.ValueIteration(transitions,
                                             reward,
                                             discount,
                                             epsilon=0.001,
                                             max_iter=1000,
                                             initial_value=0)
        value_iteration.run()
        print_as_grid(value_iteration.policy, lake.lake, size)
        print(value_iteration.time)
        print(value_iteration.iter)

        actions = getActions(value_iteration.policy, start, goal, size)
        svg = gv.gridworld(n=size,
                           tile2classes=lake.tile2classes,
                           actions=actions,
                           extra_css='goal',
                           start=start,
                           policyList=value_iteration.policy)
        svg.saveas("Figures/Grid/VI-Final-Path for World " + str(worldCntr) +
                   ".svg",
                   pretty=True)

        #Q-Learning
        q_learning = QLearner.QLearningEx(transitions,
                                          reward,
                                          grid=grid[worldCntr - 1],
                                          start=starts[worldCntr - 1],
                                          goals=goals[worldCntr - 1],
                                          n_iter=qlearningIter[worldCntr - 1],
                                          n_restarts=1000,
                                          alpha=0.2,
                                          gamma=0.9,
                                          rar=0.1,
                                          radr=0.99)
        q_learning.run()
        print_as_grid(q_learning.policy, lake.lake, size)
        #print(q_learning.time)

        actions = getActions(q_learning.policy, start, goal, size)
        svg = gv.gridworld(n=size,
                           tile2classes=lake.tile2classes,
                           actions=actions,
                           extra_css='goal',
                           start=start,
                           policyList=q_learning.policy)
        svg.saveas("Figures/Grid/QL-Final-Path for World " + str(worldCntr) +
                   ".svg",
                   pretty=True)

        worldCntr += 1