Python GridworldEnv Examples

Programming Language: Python

Namespace/Package Name: lib.envs.gridworld

Class/Type: GridworldEnv

Examples at hotexamples.com: 9

Python GridworldEnv - 9 examples found. These are the top rated real world Python examples of lib.envs.gridworld.GridworldEnv extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GridworldEnv(8)

_render(1)

Frequently Used Methods

GridworldEnv (8)

_render (1)

Example #1

Show file

def policy_iteration_exercise():
    pp = pprint.PrettyPrinter(indent=2)
    env = GridworldEnv()

    policy, v = policy_improvement(env)
    print("Policy Probability Distribution:")
    print(policy)
    print("")

    print("Reshaped Grid Policy (0=up, 1=right, 2=down, 3=left):")
    print(np.reshape(np.argmax(policy, axis=1), env.shape))
    print("")

    print("Value Function:")
    print(v)
    print("")

    print("Reshaped Grid Value Function:")
    print(v.reshape(env.shape))
    print("")

    # Test the value function
    expected_v = np.array(
        [0, -1, -2, -3, -1, -2, -3, -2, -2, -3, -2, -1, -3, -2, -1, 0])
    np.testing.assert_array_almost_equal(v, expected_v, decimal=2)

Example #2

Show file

def getEnv(domain):
    if domain == "Blackjack":
        return BlackjackEnv()
    elif domain == "Gridworld":
        return GridworldEnv()
    elif domain == "CliffWalking":
        return CliffWalkingEnv()
    elif domain == "WindyGridworld":
        return WindyGridworldEnv()
    else:
        try:
            return gym.make(domain)
        except:
            assert False, "Domain must be a valid (and installed) Gym environment"

Example #3

Show file

def main():
    env = GridworldEnv()

    random_policy = np.ones([env.nS, env.nA]) / env.nA
    v = policy_eval(random_policy, env)
    print("Value Function:")
    print(v)
    print("")

    print("Reshaped Grid Value Function:")
    print(v.reshape(env.shape))
    print("")

    # Test: Make sure the evaluated policy is what we expected
    expected_v = np.array([
        0, -14, -20, -22, -14, -18, -20, -20, -20, -20, -18, -14, -22, -20,
        -14, 0
    ])
    np.testing.assert_array_almost_equal(v, expected_v, decimal=2)

Example #4

Show file

def main():
    env = GridworldEnv()

    policy, v = value_iteration(env)

    print("Policy Probability Distribution:")
    print(policy)
    print("")

    print("Reshaped Grid Policy (0=up, 1=right, 2=down, 3=left):")
    print(np.reshape(np.argmax(policy, axis=1), env.shape))
    print("")

    print("Value Function:")
    print(v)
    print("")

    print("Reshaped Grid Value Function:")
    print(v.reshape(env.shape))
    print("")

Example #5

Show file

from lib.envs.gridworld import GridworldEnv
# initialize
env = GridworldEnv()
# render env
env._render()

print('State space:', env.nS)
print('Action space:', env.nA)
# P[state][action]
# return: probability, next_state, reward, is_terminated
print('Action space:', env.P[14][3])

Example #6

Show file

import numpy as np
import pandas as pd
import sys
import random

from collections import namedtuple
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

from collections import defaultdict
from lib.envs.gridworld import GridworldEnv
from lib.envs.windy_gridworld import WindyGridworldEnv
from lib.envs.cliff_walking import CliffWalkingEnv
from lib import plotting

env = GridworldEnv()


def make_epsilon_greedy_policy(Q, epsilon, nA):
    def policy_fn(observation):
        A = np.ones(nA, dtype=float) * epsilon / nA
        best_action = np.argmax(Q[observation])
        A[best_action] += (1.0 - epsilon)
        return A

    return policy_fn


def chosen_action(Q):
    best_action = np.argmax(Q)
    return best_action

Example #7

Show file

File: PoliceEvaluation.py Project: resoliwan/reinforcement-learning

import numpy as np
import pprint
import sys
if "./" not in sys.path:
    sys.path.append(".")
from lib.envs.gridworld import GridworldEnv

pp = pprint.PrettyPrinter(indent=2)
shape = [4,4]
env = GridworldEnv(shape)
# env.render()

def policy_eval(policy, env, discount_factor=1.0, theta=0.00001):
    """
    Evalueate a policy given an environment and a full description of the environment's dynamics.

    Args:
        policy: [S, A] shaped matrix representing the policy.
        env: OpenAI env. env.P represents transition probability of the environment.
            env.P[s][a] is a (prob, next_state, reward, done) tuple.
        theta: We stop evaluation one our value function changes is less than theta for all states.
        dicount_factor: lambda discount_factor

    Returns:
        Vector of length env.nS representing the value function.
    """
    V = np.zeros(env.nS)
    while True:
        delta = 0
        for s in np.arange(env.nS):
            v = 0

Example #8

Show file

File: exercise-02_test.py Project: Teslatic/RL_playground

def setUpModule():
    global env
    env = GridworldEnv()

Example #9

Show file

def policy_evaluation_exercise():
    env = GridworldEnv()

    random_policy = np.ones([env.nS, env.nA]) / env.nA
    v = policy_eval(random_policy, env)
    print(v)