#    activations = []
    #    for i,x_i in enumerate(x):
    #        activations.append(fMap.map_features([x_i,y[i]]))
    #    print(activations)
    #    plt.plot(x, activations)
    #    plt.show()

    demos = []
    writer = open(
        "data/mcar_birl_steps" + str(num_steps) + "_size" + str(step_size) +
        "_conf" + str(confidence) + "_seed" + str(seed) + "_demos" + str(reps),
        "w")
    for i in range(reps):
        print(">>>>iteration", i)

        reward, states_visited, actions_taken, steps = run_episode(
            env, valueFunction, n, False, EPSILON, get_actions=True)
        #collect (s,a) pairs
        if i >= skip_time:
            demos.extend(zip(states_visited, actions_taken))

        print("steps = ", steps)
    bayesirl = BIRL(solve_mdp, fMap, env, discount)
    birl_value_fn, birl_reward = bayesirl.get_opt_policy(demos,
                                                         num_features,
                                                         confidence,
                                                         num_steps,
                                                         step_size,
                                                         time_limit=200)

    #pickle the controller (value function)
    #with open('mcar_maxent_policy_ss.pickle', 'wb') as f:
    #    x = np.linspace(0,1)
    #    y = np.ones(len(x))
    #    activations = []
    #    for i,x_i in enumerate(x):
    #        activations.append(fMap.map_features([x_i,y[i]]))
    #    print(activations)
    #    plt.plot(x, activations)
    #    plt.show()

    writer = open("data/mcar_mwal_seed" + str(seed) + "_demos" + str(reps),
                  "w")
    for i in range(reps):
        print(">>>>iteration", i)

        reward, states_visited, steps = run_episode(env, valueFunction, n,
                                                    False, EPSILON)
        #compute feature counts
        fcounts = compute_feature_counts(fMap, states_visited, discount, env)
        print("steps = ", steps)
        #print("feature count = ", fcounts)
        features.append(fcounts)

    features = np.array(features)

    flabels = [str(c) for c in centers]
    sign_finder = FeatureSignExtractor(features, flabels)
    slopes = sign_finder.estimate_signs()
    fsigns = np.sign(slopes)

    signedfMap = rbf.SignedRbf_2D_Feature_Map(rbfun, fsigns)
    #    for f in range(len(features[0])):
Exemple #3
0
    EPSILON = 0
    discount = 1.0  #using no discount factor for now

    valueFunction = ValueFunction(alpha, numOfTilings)

    features = []

    for i in range(reps):
        print(">>>>iteration", i)
        #pick feature map
        #fMap = Constant_Feature_Map()
        rbf = RBF(np.array([[-1.2], [-0.3], [0.6]]), 0.7 * np.ones(3),
                  env.action_space.n)
        fMap = Rbf_Position_Feature_Map(rbf)

        steps, states_visited = run_episode(env, valueFunction, 1, False,
                                            EPSILON)

        #compute feature counts
        fcounts = compute_feature_counts(fMap, states_visited, discount)
        print("steps = ", steps)
        print("feature count = ", fcounts)

        features.append(fcounts)

    plt.plot(range(1, reps + 1), features)
    plt.legend(['RBF(-1.2)', 'RBF(-0.3)', 'RBF(0.6)'])
    plt.xlabel("Number of episodes")
    plt.ylabel("Feature Counts")
    plt.show()