def create_multi_feature_world(): num_rows = 3 num_cols = 3 # r = (1,0) # w = (0,1) r = (1, 0, 0, 0) w = (0, 1, 0, 0) b = (0, 0, 1, 0) g = (0, 0, 0, 1) features = [ [w, w, w], #only gives one halfspace [b, b, w], [r, r, g] ] # features = [[w,w,w], #still buggy! # [r,w,w], # [r,r,w]] #r #w #b #g weights = [-10, -1, -2, +1] initials = [(0, 0)] terminals = [(2, 2)] gamma = 0.9 return mdp.LinearFeatureGridWorld(features, weights, initials, terminals, gamma)
def create_safety_lava_world(): #Taken from the AI safety grid worlds paper #features is a 2-d array of tuples num_rows = 7 num_cols = 9 wall = None goal = (1, 0, 0) white = (0, 1, 0) lava = (0, 0, 1) features = [[wall, wall, wall, wall, wall, wall, wall, wall, wall], [wall, white, white, lava, lava, lava, white, goal, wall], [wall, white, white, lava, lava, lava, white, white, wall], [wall, white, white, white, white, white, white, white, wall], [wall, white, white, white, white, white, white, white, wall], [wall, white, white, white, white, white, white, white, wall], [wall, wall, wall, wall, wall, wall, wall, wall, wall]] weights = [+50, -1, -50] #goal, movement, lava initials = [(r, c) for r in range(num_rows) for c in range(num_cols) if features[r][c] != None ] #states indexed by row and then column print(initials) terminals = [(1, 7)] gamma = 0.95 world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals, gamma) return world
def create_safety_island_world(): #Taken from the AI safety grid worlds paper #features is a 2-d array of tuples num_rows = 6 num_cols = 8 wall = None goal = (1, 0, 0) white = (0, 1, 0) water = (0, 0, 1) features = [[water, water, wall, wall, wall, wall, wall, wall], [water, water, white, white, white, white, white, water], [water, water, white, white, white, white, white, water], [water, white, white, white, white, white, white, water], [water, white, white, goal, white, white, water, water], [water, wall, wall, wall, wall, wall, wall, wall]] weights = [+50, -1, -50] #goal, movement, water #can't start in water or wall initials = [(r, c) for r in range(num_rows) for c in range(num_cols) if (features[r][c] != None and features[r][c] != water) ] #states indexed by row and then column print(initials) terminals = [(4, 3)] gamma = 0.95 world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals, gamma) return world
def create_random_10x10_3feature(): np.random.seed(0) #debug value function inline with a bigger MDP #weird that the inplace seems slightly slower despite avoiding repeated copying of dictionary...seems to give same result though num_rows = 10 num_cols = 10 r = (1, 0, 0) w = (0, 1, 0) b = (0, 0, 1) features = [] for i in range(num_rows): row = [] for j in range(num_cols): if np.random.rand() < 0.33: f = r elif np.random.rand() < 0.66: f = w else: f = b row.append(f) features.append(row) #features = [[w if np.random.rand() < 0.5 else r for _ in range(num_cols)] for i in range(num_rows)] weights = -np.random.rand(3) initials = [(num_rows - 1, num_cols - 1)] terminals = [] gamma = 0.99 #Only requires 2-questions to teach (2-dim feature space) world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals, gamma) return world
def create_wall_3x3_world(): num_rows = 3 num_cols = 3 r = (1, 0) w = (0, 1) features = [[w, w, w], [w, None, w], [w, r, w]] weights = [-10, -1] initials = [(2, 0)] terminals = [(2, 2)] world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals) #debug_mdp(world) return world
def create_multiple_optimal_action_mdp(): #debug the multiple optimal actions r = (1, 0, 0) w = (0, 1, 0) b = (0, 0, 1) features = [[w, w, w], [b, b, w], [w, r, w]] weights = [-10, -1, -1] initials = [(0, 0)] terminals = [(2, 2)] world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals) return world
def create_aaai19_toy_world(): #features is a 2-d array of tuples num_rows = 2 num_cols = 3 features = [[(1, 0), (0, 1), (1, 0)], [(1, 0), (1, 0), (1, 0)]] weights = [-1, -4] initials = [(r, c) for r in range(num_rows) for c in range(num_cols) ] #states indexed by row and then column #print(initials) terminals = [(0, 0)] gamma = 0.9 world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals, gamma) return world
def create_random_10x10_2feature(): #debug value function inline with a bigger MDP #weird that the inplace seems slightly slower despite avoiding repeated copying of dictionary...seems to give same result though num_rows = 10 num_cols = 10 r = (1, 0) w = (0, 1) features = [[w if np.random.rand() < 0.5 else r for _ in range(num_cols)] for i in range(num_rows)] weights = [-10, -1] initials = [(num_rows - 1, num_cols - 1)] terminals = [(0, 0)] gamma = 0.999 #Only requires 2-questions to teach (2-dim feature space) world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals, gamma) return world
def create_aaai19_toy_world_3features(): #features is a 2-d array of tuples num_rows = 2 num_cols = 3 okay = (1, 0, 0) bad = (0, 1, 0) goal = (0, 0, 1) features = [[goal, bad, okay], [okay, okay, okay]] weights = [-1, -4, +1] initials = [(r, c) for r in range(num_rows) for c in range(num_cols) ] #states indexed by row and then column #print(initials) terminals = [(0, 0)] gamma = 0.5 world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals, gamma) return world
def create_row_x_col_m_feature_mdp(rows, cols, num_features): #No terminal state for now and positive and negative reward #debug value function inline with a bigger MDP #weird that the inplace seems slightly slower despite avoiding repeated copying of dictionary...seems to give same result though num_rows = rows num_cols = cols f_vecs = np.eye(num_features) features = [tuple(f) for f in f_vecs] #print(features) state_features = [[random.choice(features) for _ in range(num_cols)] for _ in range(num_rows)] #print(state_features) weights = 1.0 - 2.0 * np.random.rand(num_features) initials = [(num_rows // 2, num_cols // 2)] terminals = [] #[(num_rows-1,num_cols-1)] gamma = 0.95 #Only requires 2-questions to teach (2-dim feature space) world = mdp.LinearFeatureGridWorld(state_features, weights, initials, terminals, gamma) #debug_mdp(world) return world
def create_cakmak_task3(): #features is a 2-d array of tuples num_rows = 6 num_cols = 6 wall = None star = (1, 0, 0) diamond = (0, 1, 0) white = (0, 0, 1) features = [[star, wall, white, white, wall, diamond], [white, wall, white, white, wall, white], [white, white, white, white, white, white], [white, white, white, white, white, white], [white, white, white, white, white, white], [white, white, white, white, white, white]] weights = [1, 1, -1] initials = [(r, c) for r in range(num_rows) for c in range(num_cols) if features[r][c] != None ] #states indexed by row and then column print(initials) terminals = [(0, 0), (0, 5)] gamma = 0.95 world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals, gamma) return world
#print("state features\n",state_features) unique_mdps = mdp_gen.get_all_unique_mdps(num_features, grid_length, use_terminal, max_mdps) mdp_family = [] #for each grid set up an MDP env for mdp_grid, term_grid in unique_mdps: print("--" * 10) state_features = mdp_grid terminals = mdp_gen.get_terminals_from_grid(term_grid) #print("state features\n",state_features) state_features = mdp_gen.categorical_to_one_hot_features( state_features, num_features) print('one hot features', state_features) world = mdp.LinearFeatureGridWorld(state_features, true_weights, initials, terminals, gamma) mdp_family.append(world) #plot for visualization all_opts = [] all_features = [] for i, mdp_env in enumerate(mdp_family): V = mdp.value_iteration(mdp_env, epsilon=precision) Qopt = mdp.compute_q_values(mdp_env, V=V, eps=precision) opt_policy = mdp.find_optimal_policy(mdp_env, Q=Qopt, epsilon=precision) print(opt_policy) print(mdp_env.features) all_opts.append(opt_policy) all_features.append(mdp_env.features) #input() filename = "./data_analysis/figs/twoXtwo/firstthree.png"