Exemple #1
0
 def __init__(self):
     """Declare agent variables."""
     self.iht = IHT(2048)
     self.numTiling = 8
     self.stepSize = 0.1 / self.numTiling
     self.lambdaValue = 0.9
     self.epsilon = 0
     self.gamma = 1
Exemple #2
0
 def __init__(self):
     """Declare agent variables."""
     self.numTilings = 8
     self.alpha = 0.5 / self.numTilings
     self.gamma = 1
     self.p = 0
     self.max_size = 2048
     self.iht = IHT(self.max_size)
     self.l = 0.9
     self.actions = [0, 1, 2]
 def __init__(self):
     self.prevState = None
     self.action = None
     self.features = dict()
     self.Xvector = None
     self.prevXvector = None
     self.Ztrace = np.zeros(TILES)
     self.iht = IHT(TILES)
     self.prevState = None
     self.weightVector = np.random.uniform(-0.001, 0, TILES)
     self.Q = dict()
    def agent_init(self):

        # set needed variables to values from preconditions
        self.w = np.array([0] * 2048)

        # random initial values
        for i in range(2048):
            self.w[i] = random.uniform(-0.001, 0)

        # hash table
        self.iht = IHT(2048)
        self.epis = 0
Exemple #5
0
import gym
from gym import wrappers
from tile3 import IHT, tiles
import random
import numpy as np

maxSize = 5000
iht = IHT(maxSize)
weights = [0] * maxSize
numTilings = 8
learningRate = 0.0125


def mytiles(position, velocity, action=[]):
    scale_P = 5 / 1.7
    scale_V = 5 / .14
    return tiles(iht, numTilings, list(
        (position * scale_P, velocity * scale_V)), action)


def Q_estimate(position, velocity, action=[]):
    tiles = mytiles(position, velocity, action)
    estimate = 0
    for tile in tiles:
        estimate += weights[tile]
    return estimate


def Q_learn(position, velocity, q_td_target, action=[]):
    tiles = mytiles(position, velocity, action)
    estimate = 0
Exemple #6
0
 def __init__(self):
     """Declare agent variables."""
     self.iht = IHT(2048)
     self.offset_t = 8
Exemple #7
0
from __future__ import print_function
from tile3 import tiles, IHT
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import time

lims = [(0, 2.0 * np.pi)] * 2
def target_ftn(x, y, noise=True):
    return np.sin(x) + np.cos(y) + noise * np.random.randn() * 0.1


maxSize = 5000
iht = IHT(maxSize)
weights = [0]*maxSize
numTilings = 8
stepSize = 0.1/numTilings

def mytiles(x, y):
    scaleFactor = 20/(2*np.pi)
    return tiles(iht, numTilings, list((x*scaleFactor,y*scaleFactor)))

def learn(x, y, z):
    tiles = mytiles(x, y)
    estimate = 0
    for tile in tiles: 
        estimate += weights[tile]                  #form estimate
    error = z - estimate
    for tile in tiles: 
        weights[tile] += stepSize * error          #learn weights