Example #1
0
import numpy as np
from gym.envs.toy_text import gridworld_cont
from util.util_cgridworld import *
from util.policy_gaussian import *
from util.learner import *

np.set_printoptions(precision=6)
np.set_printoptions(suppress=True)

mdp = gridworld_cont.GridworldContEnv(changeProbs=[0.5, 1],
                                      posBounds1=[-1, -1, 2, 2],
                                      posBounds2=[-1, -1, -1, -1],
                                      goalBounds1=[-2.5, 2.5, 0, 2.5])
mdp.horizon = 50

sfMask = np.ones(shape=50, dtype=bool)  # state features mask
sfMask[40:50] = False

# AGENT SPACE
agent_policy = GaussianPolicy(np.count_nonzero(sfMask), 2)
agent_learner = GpomdpLearner(mdp, agent_policy, gamma=0.98)

clearn(agent_learner,
       steps=0,
       nEpisodes=500,
       sfmask=sfMask,
       adamOptimizer=True,
       learningRate=0.03,
       loadFile=None,
       saveFile=None,
       autosave=True,
Example #2
0
import numpy as np
from gym.envs.toy_text import gridworld_cont
from util.util_cgridworld import *
from util.policy_gaussian import *
from util.learner import *

np.set_printoptions(precision=6)
np.set_printoptions(suppress=True)

mdp = gridworld_cont.GridworldContEnv()
mdp.horizon = 50

sfMask = np.ones(shape=50, dtype=bool)  # state features mask
sfMask[30:50] = False

# AGENT SPACE
agent_policy = GaussianPolicy(nStateFeatures=np.count_nonzero(sfMask),
                              actionDim=2)
agent_learner = GpomdpLearner(mdp, agent_policy, gamma=0.98)

clearn(
    agent_learner,
    steps=25,
    nEpisodes=100,
    sfmask=sfMask,
    adamOptimizer=True,
    learningRate=0.1,
    loadFile=None,  #"cparams30.npy",
    saveFile=None,
    autosave=True,
    plotGradient=False)
Example #3
0
import scipy as sp
from gym.envs.toy_text import gridworld_cont_normal
from gym.envs.toy_text import gridworld_cont
from util.util_cgridworld import *
from util.policy_gaussian import *
from util.learner import *

np.set_printoptions(precision=6)
np.set_printoptions(suppress=True)

mean_model1 = [-2,-2,2,2]
var_model1 = [0.1,0.1,0.1,0.1]
mdp = gridworld_cont_normal.GridworldContNormalEnv(mean=mean_model1,var=var_model1)
mdp.horizon = 50

mdp_uniform = gridworld_cont.GridworldContEnv()
mdp_uniform.horizon = 50

sfMask = np.ones(shape=50,dtype=np.bool) # state features mask
sfMask[45:50] = False

# AGENT SPACE
agent_policy = GaussianPolicy(np.count_nonzero(sfMask),2)
agent_learner = GpomdpLearner(mdp,agent_policy,gamma=0.98)

clearn(
	agent_learner,
	steps=100,
	nEpisodes=250,
	sfmask=sfMask,
	adamOptimizer=True,