コード例 #1
0
def create_multi_feature_world():
    num_rows = 3
    num_cols = 3
    # r = (1,0)
    # w = (0,1)
    r = (1, 0, 0, 0)
    w = (0, 1, 0, 0)
    b = (0, 0, 1, 0)
    g = (0, 0, 0, 1)
    features = [
        [w, w, w],  #only gives one halfspace
        [b, b, w],
        [r, r, g]
    ]
    # features = [[w,w,w],   #still buggy!
    #             [r,w,w],
    #             [r,r,w]]

    #r  #w #b  #g
    weights = [-10, -1, -2, +1]
    initials = [(0, 0)]
    terminals = [(2, 2)]
    gamma = 0.9
    return mdp.LinearFeatureGridWorld(features, weights, initials, terminals,
                                      gamma)
コード例 #2
0
def create_safety_lava_world():
    #Taken from the AI safety grid worlds paper
    #features is a 2-d array of tuples
    num_rows = 7
    num_cols = 9
    wall = None
    goal = (1, 0, 0)
    white = (0, 1, 0)
    lava = (0, 0, 1)
    features = [[wall, wall, wall, wall, wall, wall, wall, wall, wall],
                [wall, white, white, lava, lava, lava, white, goal, wall],
                [wall, white, white, lava, lava, lava, white, white, wall],
                [wall, white, white, white, white, white, white, white, wall],
                [wall, white, white, white, white, white, white, white, wall],
                [wall, white, white, white, white, white, white, white, wall],
                [wall, wall, wall, wall, wall, wall, wall, wall, wall]]
    weights = [+50, -1, -50]  #goal, movement, lava
    initials = [(r, c) for r in range(num_rows) for c in range(num_cols)
                if features[r][c] != None
                ]  #states indexed by row and then column
    print(initials)
    terminals = [(1, 7)]
    gamma = 0.95
    world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals,
                                       gamma)
    return world
コード例 #3
0
def create_safety_island_world():
    #Taken from the AI safety grid worlds paper
    #features is a 2-d array of tuples
    num_rows = 6
    num_cols = 8
    wall = None
    goal = (1, 0, 0)
    white = (0, 1, 0)
    water = (0, 0, 1)
    features = [[water, water, wall, wall, wall, wall, wall, wall],
                [water, water, white, white, white, white, white, water],
                [water, water, white, white, white, white, white, water],
                [water, white, white, white, white, white, white, water],
                [water, white, white, goal, white, white, water, water],
                [water, wall, wall, wall, wall, wall, wall, wall]]
    weights = [+50, -1, -50]  #goal, movement, water
    #can't start in water or wall
    initials = [(r, c) for r in range(num_rows) for c in range(num_cols)
                if (features[r][c] != None and features[r][c] != water)
                ]  #states indexed by row and then column
    print(initials)
    terminals = [(4, 3)]
    gamma = 0.95
    world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals,
                                       gamma)
    return world
コード例 #4
0
def create_random_10x10_3feature():
    np.random.seed(0)
    #debug value function inline with a bigger MDP
    #weird that the inplace seems slightly slower despite avoiding repeated copying of dictionary...seems to give same result though
    num_rows = 10
    num_cols = 10
    r = (1, 0, 0)
    w = (0, 1, 0)
    b = (0, 0, 1)
    features = []
    for i in range(num_rows):
        row = []
        for j in range(num_cols):
            if np.random.rand() < 0.33:
                f = r
            elif np.random.rand() < 0.66:
                f = w
            else:
                f = b
            row.append(f)
        features.append(row)
    #features = [[w if np.random.rand() < 0.5 else r for _ in range(num_cols)] for i in range(num_rows)]
    weights = -np.random.rand(3)
    initials = [(num_rows - 1, num_cols - 1)]
    terminals = []
    gamma = 0.99
    #Only requires 2-questions to teach (2-dim feature space)
    world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals,
                                       gamma)
    return world
コード例 #5
0
def create_wall_3x3_world():
    num_rows = 3
    num_cols = 3
    r = (1, 0)
    w = (0, 1)
    features = [[w, w, w], [w, None, w], [w, r, w]]
    weights = [-10, -1]
    initials = [(2, 0)]
    terminals = [(2, 2)]
    world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals)
    #debug_mdp(world)
    return world
コード例 #6
0
def create_multiple_optimal_action_mdp():
    #debug the multiple optimal actions

    r = (1, 0, 0)
    w = (0, 1, 0)
    b = (0, 0, 1)
    features = [[w, w, w], [b, b, w], [w, r, w]]
    weights = [-10, -1, -1]
    initials = [(0, 0)]
    terminals = [(2, 2)]
    world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals)
    return world
コード例 #7
0
def create_aaai19_toy_world():
    #features is a 2-d array of tuples
    num_rows = 2
    num_cols = 3
    features = [[(1, 0), (0, 1), (1, 0)], [(1, 0), (1, 0), (1, 0)]]
    weights = [-1, -4]
    initials = [(r, c) for r in range(num_rows) for c in range(num_cols)
                ]  #states indexed by row and then column
    #print(initials)
    terminals = [(0, 0)]
    gamma = 0.9
    world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals,
                                       gamma)
    return world
コード例 #8
0
def create_random_10x10_2feature():
    #debug value function inline with a bigger MDP
    #weird that the inplace seems slightly slower despite avoiding repeated copying of dictionary...seems to give same result though
    num_rows = 10
    num_cols = 10
    r = (1, 0)
    w = (0, 1)
    features = [[w if np.random.rand() < 0.5 else r for _ in range(num_cols)]
                for i in range(num_rows)]
    weights = [-10, -1]
    initials = [(num_rows - 1, num_cols - 1)]
    terminals = [(0, 0)]
    gamma = 0.999
    #Only requires 2-questions to teach (2-dim feature space)
    world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals,
                                       gamma)
    return world
コード例 #9
0
def create_aaai19_toy_world_3features():
    #features is a 2-d array of tuples
    num_rows = 2
    num_cols = 3
    okay = (1, 0, 0)
    bad = (0, 1, 0)
    goal = (0, 0, 1)

    features = [[goal, bad, okay], [okay, okay, okay]]
    weights = [-1, -4, +1]
    initials = [(r, c) for r in range(num_rows) for c in range(num_cols)
                ]  #states indexed by row and then column
    #print(initials)
    terminals = [(0, 0)]
    gamma = 0.5
    world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals,
                                       gamma)
    return world
コード例 #10
0
def create_row_x_col_m_feature_mdp(rows, cols, num_features):
    #No terminal state for now and positive and negative reward
    #debug value function inline with a bigger MDP
    #weird that the inplace seems slightly slower despite avoiding repeated copying of dictionary...seems to give same result though
    num_rows = rows
    num_cols = cols
    f_vecs = np.eye(num_features)
    features = [tuple(f) for f in f_vecs]
    #print(features)
    state_features = [[random.choice(features) for _ in range(num_cols)]
                      for _ in range(num_rows)]
    #print(state_features)

    weights = 1.0 - 2.0 * np.random.rand(num_features)
    initials = [(num_rows // 2, num_cols // 2)]
    terminals = []  #[(num_rows-1,num_cols-1)]
    gamma = 0.95
    #Only requires 2-questions to teach (2-dim feature space)
    world = mdp.LinearFeatureGridWorld(state_features, weights, initials,
                                       terminals, gamma)
    #debug_mdp(world)
    return world
コード例 #11
0
def create_cakmak_task3():
    #features is a 2-d array of tuples
    num_rows = 6
    num_cols = 6
    wall = None
    star = (1, 0, 0)
    diamond = (0, 1, 0)
    white = (0, 0, 1)
    features = [[star, wall, white, white, wall, diamond],
                [white, wall, white, white, wall, white],
                [white, white, white, white, white, white],
                [white, white, white, white, white, white],
                [white, white, white, white, white, white],
                [white, white, white, white, white, white]]
    weights = [1, 1, -1]
    initials = [(r, c) for r in range(num_rows) for c in range(num_cols)
                if features[r][c] != None
                ]  #states indexed by row and then column
    print(initials)
    terminals = [(0, 0), (0, 5)]
    gamma = 0.95
    world = mdp.LinearFeatureGridWorld(features, weights, initials, terminals,
                                       gamma)
    return world
#print("state features\n",state_features)

unique_mdps = mdp_gen.get_all_unique_mdps(num_features, grid_length,
                                          use_terminal, max_mdps)
mdp_family = []
#for each grid set up an MDP env
for mdp_grid, term_grid in unique_mdps:
    print("--" * 10)
    state_features = mdp_grid
    terminals = mdp_gen.get_terminals_from_grid(term_grid)
    #print("state features\n",state_features)
    state_features = mdp_gen.categorical_to_one_hot_features(
        state_features, num_features)
    print('one hot features', state_features)

    world = mdp.LinearFeatureGridWorld(state_features, true_weights, initials,
                                       terminals, gamma)
    mdp_family.append(world)

#plot for visualization
all_opts = []
all_features = []
for i, mdp_env in enumerate(mdp_family):
    V = mdp.value_iteration(mdp_env, epsilon=precision)
    Qopt = mdp.compute_q_values(mdp_env, V=V, eps=precision)
    opt_policy = mdp.find_optimal_policy(mdp_env, Q=Qopt, epsilon=precision)
    print(opt_policy)
    print(mdp_env.features)
    all_opts.append(opt_policy)
    all_features.append(mdp_env.features)
    #input()
filename = "./data_analysis/figs/twoXtwo/firstthree.png"