Exemple #1
0
def random_gridworld(num_rows, num_cols, num_features):
    #no terminal random rewards and features

    num_states = num_rows * num_cols

    #create one-hot features
    f_vecs = np.eye(num_features)
    features = [tuple(f) for f in f_vecs]

    state_features = []
    for i in range(num_states):
        #select all but last state randomly from all but last feature, last feature is target feature
        r_idx = np.random.randint(num_features)
        state_features.append(features[r_idx])

    state_features = np.array(state_features)

    weights = utils.sample_l2_ball(num_features)

    print("weights", weights)
    gamma = 0.99
    #let's look at all starting states for now
    init_dist = np.ones(num_states) / num_states
    # init_states = [10]
    # for si in init_states:
    #     init_dist[si] = 1.0 / len(init_states)

    #no terminal
    term_states = []

    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env
Exemple #2
0
def lava_ird_simplified_b():
    #trying to replicate the ambiguous lava domain from Dylan's IRD paper figure 2 (a)
    #four features, 10x10 grid

    num_rows = 5
    num_cols = 5
    num_states = num_rows * num_cols
    d = (1, 0, 0, 0)
    g = (0, 1, 0, 0)
    t = (0, 0, 1, 0)
    l = (0, 0, 0, 1)
    state_features = np.array([
        d, g, g, g, d, d, g, g, g, d, d, l, l, l, t, d, l, l, l, d, d, d, g, d,
        d
    ])
    weights = np.array([-1, -5, +1, -100])
    #weights = weights / np.linalg.norm(weights)
    print(weights)
    gamma = 0.95
    init_dist = np.zeros(num_states)
    # init_states = [10]
    # for si in init_states:
    #     init_dist[si] = 1.0 / len(init_states)
    term_states = []  #[14]
    init_dist = 1 / (num_states) * np.ones(num_states)
    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env
Exemple #3
0
def lavaland_smaller(contains_lava=False):
    #trying to replicate the ambiguous lava domain from Dylan's IRD paper figure 2 (a)
    #four features, 10x10 grid

    num_rows = 2
    num_cols = 3
    num_states = num_rows * num_cols
    #four types of terrain
    num_features = 4
    d = (1, 0, 0, 0)
    g = (0, 1, 0, 0)
    t = (0, 0, 1, 0)
    l = (0, 0, 0, 1)
    #create one-hot features

    if not contains_lava:
        state_features = [d, d, t, d, g, d]

    else:
        state_features = [l, d, t, d, g, d]

    state_features = np.array(state_features)
    weights = np.array([-1, -5, +1, -100])
    #weights = weights / np.linalg.norm(weights)
    print(weights)
    gamma = 0.95
    init_dist = np.zeros(num_states)
    init_states = [0]
    for si in init_states:
        init_dist[si] = 1.0 / len(init_states)
    term_states = []  #no terminal
    # init_dist = 1/(num_states) * np.ones(num_states)
    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env
Exemple #4
0
def lava_ambiguous_corridor2():
    num_rows = 3
    num_cols = 5
    num_states = num_rows * num_cols
    white = (1, 0, 0)
    red = (0, 1, 0)
    green = (0, 0, 1)
    state_features = np.array([
        white, white, white, white, white, white, red, red, red, red, white,
        white, white, white, green
    ])
    weights = np.array(
        [-1, -9,
         +1])  #np.array([-0.26750391, -0.96355677])#np.array([-.18, -.82])
    #weights = weights / np.linalg.norm(weights)
    print(weights)
    gamma = 0.99
    init_dist = np.zeros(num_states)
    # init_states = [5,4]
    # for si in init_states:
    #     init_dist[si] = 1.0 / len(init_states)
    term_states = []
    init_dist = 1 / (num_states) * np.ones(num_states)
    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env
Exemple #5
0
def lava_ambiguous_aaai18():
    num_rows = 5
    num_cols = 5
    num_states = num_rows * num_cols
    white = (1, 0)
    red = (0, 1)
    state_features = np.array([
        white, white, white, red, white, white, red, white, red, white, white,
        red, white, red, white, white, red, white, red, white, white, red,
        white, white, white
    ])
    weights = np.array(
        [-0.18,
         -0.82])  #np.array([-0.26750391, -0.96355677])#np.array([-.18, -.82])
    weights = weights / np.linalg.norm(weights)
    print(weights)
    gamma = 0.95
    init_dist = np.zeros(num_states)
    init_states = [5, 4]
    for si in init_states:
        init_dist[si] = 1.0 / len(init_states)
    term_states = [12]
    init_dist = 1 / (num_states) * np.ones(num_states)
    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env
Exemple #6
0
def windy_cliff_world_large(slip_prob):
    #trying to replicate the ambiguous lava domain from Dylan's IRD paper figure 2 (a)
    #four features, 10x10 grid

    #trying with less grass and fewer starting states
    #setting all starting states to be dirt

    num_rows = 6
    num_cols = 7
    num_states = num_rows * num_cols
    #four types of terrain
    num_features = 3
    d = (1, 0, 0)
    c = (0, 1, 0)
    t = (0, 0, 1)
    #create one-hot features
    f_vecs = np.eye(num_features)
    features = [tuple(f) for f in f_vecs]

    state_features = np.array([
        d, d, d, d, d, d, d, d, d, d, d, d, d, d, d, d, d, d, d, d, d, d, d, d,
        d, d, d, d, d, d, d, d, d, d, d, d, c, c, c, c, t, d
    ])
    weights = np.array([-1, -100, +1])

    gamma = 0.99
    init_dist = np.zeros(num_states)
    init_state = num_cols * (num_rows - 1)
    # init_states = [init_state]
    # for si in init_states:
    #     state_features[si] = d
    #     init_dist[si] = 1.0 / len(init_states)
    #term_states = [num_states - 1] #no terminal
    term_states = []
    init_dist = 1 / (num_states) * np.ones(num_states)
    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)

    #set transition dynamics
    mdp_env.Ps = mdp.get_windy_down_const_prob_transitions(mdp_env, slip_prob)

    #set transitions for cliff to go to back to init state
    for s in range(num_states):
        if (state_features[s] == c).all():
            for P in mdp_env.Ps:
                P[s, :] = np.zeros(num_states)
                P[s, init_state] = 1.

    # for i,P in enumerate(mdp_env.Ps):
    #     print("action dir " + mdp_env.get_readable_actions(i))
    #     print(P)

    return mdp_env
Exemple #7
0
def lavaland_small_somegrass(contains_lava=False):
    #trying to replicate the ambiguous lava domain from Dylan's IRD paper figure 2 (a)
    #four features, 10x10 grid

    #trying with less grass and fewer starting states
    #setting all starting states to be dirt

    num_rows = 5
    num_cols = 5
    num_states = num_rows * num_cols
    #four types of terrain
    num_features = 4
    d = (1, 0, 0, 0)
    g = (0, 1, 0, 0)
    t = (0, 0, 1, 0)
    l = (0, 0, 0, 1)
    #create one-hot features
    f_vecs = np.eye(num_features)
    features = [tuple(f) for f in f_vecs]

    state_features = []
    for i in range(num_states):
        #select all but last state randomly from all but last feature, last feature is target feature
        if not contains_lava:
            #add lava randomly but at lower percentage
            p = [0.85, 0.15, 0.0,
                 0.0]  #add dirt and grass in equal proportions
        else:
            p = [0.7, 0.15, 0,
                 0.15]  #add dirt and grass and lava in equal proportions
        r_idx = np.random.choice(
            4, p=p)  #sample from four features with probabilities p
        # r_idx = np.random.randint(num_features - 1)
        state_features.append(features[r_idx])
    #make the middle state a target state
    state_features[14] = t

    state_features = np.array(state_features)
    weights = np.array([-1, -5, +1, -100])
    #weights = weights / np.linalg.norm(weights)
    print(weights)
    gamma = 0.95
    # init_dist = np.zeros(num_states)
    # init_states = [0,4,20,24]
    # for si in init_states:
    #     state_features[si] = d
    #     init_dist[si] = 1.0 / len(init_states)
    term_states = []  #no terminal
    init_dist = 1 / (num_states) * np.ones(num_states)
    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env
Exemple #8
0
def negative_sideeffects_goal(num_rows,
                              num_cols,
                              num_features,
                              unseen_feature=False):
    #no terminal random rewards and features

    num_states = num_rows * num_cols

    if unseen_feature:
        assert (num_features >= 3)

    #create one-hot features
    f_vecs = np.eye(num_features)
    features = [tuple(f) for f in f_vecs]

    state_features = []
    for i in range(num_states):
        #select all but last two states randomly (last state is goal, second to last state is possibly unseen)
        if unseen_feature:
            r_idx = np.random.randint(num_features - 1)
        else:
            r_idx = np.random.randint(num_features - 2)
        state_features.append(features[r_idx])

    #select goal
    goal_state = np.random.randint(num_states)
    state_features[goal_state] = features[-1]

    state_features = np.array(state_features)

    #sample from L2 ball
    weights = -np.random.rand(num_features)
    #set goal as positive
    weights[-1] = +2
    #set unseen as negative
    weights[-2] = -2
    weights = weights / np.linalg.norm(weights)

    print("weights", weights)
    gamma = 0.99
    #let's look at all starting states for now
    init_dist = np.ones(num_states) / num_states
    # init_states = [10]
    # for si in init_states:
    #     init_dist[si] = 1.0 / len(init_states)

    #no terminal
    term_states = [goal_state]

    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env
Exemple #9
0
def machine_teaching_toy_featurized():
    num_rows = 2
    num_cols = 3
    num_states = num_rows * num_cols
    white = (1, 0)
    gray = (0, 1)
    state_features = np.array([white, gray, white, white, white, white])
    weights = np.array([-1, -10])
    gamma = 0.9
    init_dist = 1 / (num_states) * np.ones(num_states)
    terminals = [0]
    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, terminals,
                                    False)
    return mdp_env
Exemple #10
0
def negative_sideeffects_small(contains_lava=False):
    #inspired by ambiguous lava domain from Dylan's IRD paper figure 2 (a) and also by Jessie Huang and Doinna's paper
    #has two features that are terrain types, a goal, and a "lava" feature that is unobserved in training but observed at testing

    num_rows = 6
    num_cols = 6
    num_states = num_rows * num_cols
    #four types of terrain
    num_features = 4
    d = (1, 0, 0, 0)
    g = (0, 1, 0, 0)
    t = (0, 0, 1, 0)
    l = (0, 0, 0, 1)
    #create one-hot features
    f_vecs = np.eye(num_features)
    features = [tuple(f) for f in f_vecs]

    state_features = []
    for i in range(num_states):
        #select all but last state randomly from all but last feature, last feature is target feature
        if not contains_lava:
            #add lava randomly but at lower percentage
            p = [0.5, 0.5, 0.0, 0.0]  #add dirt and grass in equal proportions
        else:
            p = [0.4, 0.4, 0,
                 0.2]  #add dirt and grass equally but add in some lava now
        r_idx = np.random.choice(
            4, p=p)  #sample from four features with probabilities p
        # r_idx = np.random.randint(num_features - 1)
        state_features.append(features[r_idx])
    #make a random state the target
    goal_state = np.random.randint(num_states)
    state_features[goal_state] = t

    state_features = np.array(state_features)
    weights = np.array([-np.random.rand(), -np.random.rand(), +5, -5])
    weights = weights / np.linalg.norm(weights)
    print(weights)
    gamma = 0.95
    # init_dist = np.zeros(num_states)
    # init_states = [0]
    # for si in init_states:
    #     init_dist[si] = 1.0 / len(init_states)
    term_states = [goal_state]  #no terminal
    init_dist = 1 / (num_states) * np.ones(num_states)
    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env
Exemple #11
0
def random_gridworld_corner_terminal(num_rows, num_cols, num_features):
    #random grid world with terminal in bottom right corner

    num_states = num_rows * num_cols

    #create one-hot features
    f_vecs = np.eye(num_features)
    features = [tuple(f) for f in f_vecs]

    state_features = []
    for i in range(num_states - 1):
        #select all but last state randomly from all but last feature, last feature is target feature
        r_idx = np.random.randint(num_features - 1)
        state_features.append(features[r_idx])
    #make the last state a target state and terminal
    state_features.append(features[-1])

    state_features = np.array(state_features)

    weights = utils.sample_l2_ball(num_features)
    #make the weights so all are non-positive except for the target which is non-negative
    for i, w in enumerate(weights):
        if i < num_features - 1:
            weights[i] = -np.abs(w)
        else:
            weights[i] = np.abs(w)

    print("weights", weights)
    gamma = 0.99
    #let's look at all starting states for now
    init_dist = np.ones(num_states) / num_states
    # init_states = [10]
    # for si in init_states:
    #     init_dist[si] = 1.0 / len(init_states)

    #make the last state the terminal state
    term_states = [num_states - 1]

    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env
Exemple #12
0
def lava_ambiguous_corridor():
    num_rows = 10
    num_cols = 15
    num_states = num_rows * num_cols

    state_features = np.array([(0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1),
                               (1, 1, 0), (0, 1, 1), (1, 0, 1), (1, 1, 1)])

    weights = np.array(
        [-100,
         -100])  #np.array([-0.26750391, -0.96355677])#np.array([-.18, -.82])

    weights = weights / np.linalg.norm(weights)
    print(weights)
    gamma = 0.99
    init_dist = np.ones(num_states)
    init_states = [100, 4]
    for si in init_states:
        init_dist[si] = 1.0 / len(num_states) * np.ones(num_states)
    term_states = [200]
    mdp_env = mdp.FeaturizedGridMDP(num_rows, num_cols, state_features,
                                    weights, gamma, init_dist, term_states)
    return mdp_env