Example #1
0
    sfMask = np.random.choice(a=[False, True], size=(16), p=[0.5, 0.5])
    sfMask = np.concatenate([sfMask, [True]])  # constant term

    sfTestMask = np.zeros(
        shape=16,
        dtype=np.bool)  # State features rejected (we think the agent have)
    sfTestTrials = np.zeros(
        shape=16, dtype=np.int32)  # Num of trials for each state feature

    #
    # Initial model - First test
    #
    print("Using initial MDP =\n", initialModel, flush=True)

    mdp = gridworld.GridworldEnv(initialModel)
    mdp.horizon = MDP_HORIZON
    if SAVE_STATE_IMAGES:
        saveStateImage("stateImage_" + str(experiment_i) + "_0A.png", mdp,
                       sfTestMask)

    #agent_policy_initial_model = GaussianPolicy(nStateFeatures=np.count_nonzero(sfMask),actionDim=2)
    #agent_learner_initial_model = GpomdpLearner(mdp,agent_policy_initial_model,gamma=0.98)
    agent_policy_initial_model = BoltzmannPolicy(np.count_nonzero(sfMask), 4)
    agent_learner_initial_model = GpomdpLearner(mdp,
                                                agent_policy_initial_model,
                                                gamma=0.98)

    learn(learner=agent_learner_initial_model,
          steps=LEARNING_STEPS,
          nEpisodes=LEARNING_EPISODES,
Example #2
0
import numpy as np
from gym.envs.toy_text import gridworld
from util.util_gridworld import *
from util.policy_boltzmann import *
from util.learner import *

np.set_printoptions(precision=6)
np.set_printoptions(suppress=True)

mdp = gridworld.GridworldEnv(changeProb=0.5, targetCol=2)
mdp.horizon = 50

sfMask = np.ones(shape=17, dtype=bool)  # state features mask
sfMask[14] = False

# AGENT SPACE
agent_policy = BoltzmannPolicy(np.count_nonzero(sfMask), 4)
agent_learner = GpomdpLearner(mdp, agent_policy, gamma=0.98)

learn(agent_learner,
      steps=0,
      nEpisodes=1000,
      sfmask=sfMask,
      adamOptimizer=True,
      learningRate=0.3,
      loadFile=None,
      saveFile=None,
      autosave=False,
      plotGradient=False)

# SUPERVISOR SPACE
Example #3
0
import numpy as np
import sys
from gym.envs.toy_text import gridworld
from util.learner import *
from util.optimizer import *
from util.policy_boltzmann import *
from util.util_gridworld import *

w_row = [5, 1, 1, 1, 1]
w_col = [5, 1, 1, 1, 1]
w_grow = [5, 1, 1, 1, 1]
w_gcol = [1, 1, 1, 1, 5]
model = np.array([w_row, w_col, w_grow, w_gcol], dtype=np.float32)

mdp = gridworld.GridworldEnv(model)
mdp.horizon = 50

sfMask = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1],
                  dtype=bool)  # Agent features

sfTestMask = np.ones(shape=16, dtype=np.bool)  # Features not rejected

#
# Cycle ENVIRONMENT CONFIGURATION
#
for conf_index in range(1000):

    print("Using MDP ", model)
    mdp = gridworld.GridworldEnv(model)
    mdp.horizon = 50
    saveStateImage("stateImage" + str(conf_index) + "A.png", mdp, sfTestMask)
Example #4
0
import numpy as np
from gym.envs.toy_text import gridworld
from util.util_gridworld import *
from util.policy_boltzmann import *
from util.learner import *

np.set_printoptions(precision=6)
np.set_printoptions(suppress=True)

mdp = gridworld.GridworldEnv()
mdp.horizon = 50

sfMask = np.ones(shape=16,dtype=bool) # state features mask
sfMask[8:16] = False

# AGENT SPACE
agent_policy = BoltzmannPolicy(np.count_nonzero(sfMask),4)
agent_learner = GpomdpLearner(mdp,agent_policy,gamma=0.98)


learn(
    agent_learner,
    steps=30,
    nEpisodes=250,
    sfmask=sfMask,
    adamOptimizer=True,
    learningRate=0.3,
    loadFile=None,#"params8.npy",
    saveFile=None,
    autosave=True,
    plotGradient=False