def __init__(self): """Declare agent variables.""" self.iht = IHT(2048) self.numTiling = 8 self.stepSize = 0.1 / self.numTiling self.lambdaValue = 0.9 self.epsilon = 0 self.gamma = 1
def __init__(self): """Declare agent variables.""" self.numTilings = 8 self.alpha = 0.5 / self.numTilings self.gamma = 1 self.p = 0 self.max_size = 2048 self.iht = IHT(self.max_size) self.l = 0.9 self.actions = [0, 1, 2]
def __init__(self): self.prevState = None self.action = None self.features = dict() self.Xvector = None self.prevXvector = None self.Ztrace = np.zeros(TILES) self.iht = IHT(TILES) self.prevState = None self.weightVector = np.random.uniform(-0.001, 0, TILES) self.Q = dict()
def agent_init(self): # set needed variables to values from preconditions self.w = np.array([0] * 2048) # random initial values for i in range(2048): self.w[i] = random.uniform(-0.001, 0) # hash table self.iht = IHT(2048) self.epis = 0
import gym from gym import wrappers from tile3 import IHT, tiles import random import numpy as np maxSize = 5000 iht = IHT(maxSize) weights = [0] * maxSize numTilings = 8 learningRate = 0.0125 def mytiles(position, velocity, action=[]): scale_P = 5 / 1.7 scale_V = 5 / .14 return tiles(iht, numTilings, list( (position * scale_P, velocity * scale_V)), action) def Q_estimate(position, velocity, action=[]): tiles = mytiles(position, velocity, action) estimate = 0 for tile in tiles: estimate += weights[tile] return estimate def Q_learn(position, velocity, q_td_target, action=[]): tiles = mytiles(position, velocity, action) estimate = 0
def __init__(self): """Declare agent variables.""" self.iht = IHT(2048) self.offset_t = 8
from __future__ import print_function from tile3 import tiles, IHT import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D import time lims = [(0, 2.0 * np.pi)] * 2 def target_ftn(x, y, noise=True): return np.sin(x) + np.cos(y) + noise * np.random.randn() * 0.1 maxSize = 5000 iht = IHT(maxSize) weights = [0]*maxSize numTilings = 8 stepSize = 0.1/numTilings def mytiles(x, y): scaleFactor = 20/(2*np.pi) return tiles(iht, numTilings, list((x*scaleFactor,y*scaleFactor))) def learn(x, y, z): tiles = mytiles(x, y) estimate = 0 for tile in tiles: estimate += weights[tile] #form estimate error = z - estimate for tile in tiles: weights[tile] += stepSize * error #learn weights