sfMask = np.random.choice(a=[False, True], size=(16), p=[0.5, 0.5]) sfMask = np.concatenate([sfMask, [True]]) # constant term sfTestMask = np.zeros( shape=16, dtype=np.bool) # State features rejected (we think the agent have) sfTestTrials = np.zeros( shape=16, dtype=np.int32) # Num of trials for each state feature # # Initial model - First test # print("Using initial MDP =\n", initialModel, flush=True) mdp = gridworld.GridworldEnv(initialModel) mdp.horizon = MDP_HORIZON if SAVE_STATE_IMAGES: saveStateImage("stateImage_" + str(experiment_i) + "_0A.png", mdp, sfTestMask) #agent_policy_initial_model = GaussianPolicy(nStateFeatures=np.count_nonzero(sfMask),actionDim=2) #agent_learner_initial_model = GpomdpLearner(mdp,agent_policy_initial_model,gamma=0.98) agent_policy_initial_model = BoltzmannPolicy(np.count_nonzero(sfMask), 4) agent_learner_initial_model = GpomdpLearner(mdp, agent_policy_initial_model, gamma=0.98) learn(learner=agent_learner_initial_model, steps=LEARNING_STEPS, nEpisodes=LEARNING_EPISODES,
import numpy as np from gym.envs.toy_text import gridworld from util.util_gridworld import * from util.policy_boltzmann import * from util.learner import * np.set_printoptions(precision=6) np.set_printoptions(suppress=True) mdp = gridworld.GridworldEnv(changeProb=0.5, targetCol=2) mdp.horizon = 50 sfMask = np.ones(shape=17, dtype=bool) # state features mask sfMask[14] = False # AGENT SPACE agent_policy = BoltzmannPolicy(np.count_nonzero(sfMask), 4) agent_learner = GpomdpLearner(mdp, agent_policy, gamma=0.98) learn(agent_learner, steps=0, nEpisodes=1000, sfmask=sfMask, adamOptimizer=True, learningRate=0.3, loadFile=None, saveFile=None, autosave=False, plotGradient=False) # SUPERVISOR SPACE
import numpy as np import sys from gym.envs.toy_text import gridworld from util.learner import * from util.optimizer import * from util.policy_boltzmann import * from util.util_gridworld import * w_row = [5, 1, 1, 1, 1] w_col = [5, 1, 1, 1, 1] w_grow = [5, 1, 1, 1, 1] w_gcol = [1, 1, 1, 1, 5] model = np.array([w_row, w_col, w_grow, w_gcol], dtype=np.float32) mdp = gridworld.GridworldEnv(model) mdp.horizon = 50 sfMask = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1], dtype=bool) # Agent features sfTestMask = np.ones(shape=16, dtype=np.bool) # Features not rejected # # Cycle ENVIRONMENT CONFIGURATION # for conf_index in range(1000): print("Using MDP ", model) mdp = gridworld.GridworldEnv(model) mdp.horizon = 50 saveStateImage("stateImage" + str(conf_index) + "A.png", mdp, sfTestMask)
import numpy as np from gym.envs.toy_text import gridworld from util.util_gridworld import * from util.policy_boltzmann import * from util.learner import * np.set_printoptions(precision=6) np.set_printoptions(suppress=True) mdp = gridworld.GridworldEnv() mdp.horizon = 50 sfMask = np.ones(shape=16,dtype=bool) # state features mask sfMask[8:16] = False # AGENT SPACE agent_policy = BoltzmannPolicy(np.count_nonzero(sfMask),4) agent_learner = GpomdpLearner(mdp,agent_policy,gamma=0.98) learn( agent_learner, steps=30, nEpisodes=250, sfmask=sfMask, adamOptimizer=True, learningRate=0.3, loadFile=None,#"params8.npy", saveFile=None, autosave=True, plotGradient=False