Ejemplo n.º 1
0
def imperfect_features_test():
    from src.env.Amatrix_task import Amatrix

    n = 10
    m = 2
    env = Amatrix(n, m)

    features = env.get_approx_A()  # first m features
    weights = np.zeros(m)

    config = Config()
    config.parameter_size = m
    config.theta = 0.001
    config.init_beta = 0
    idbd = SIDBD(config)

    sample_size = 50000
    for i in range(sample_size):
        rand_row = np.random.randint(n)
        target = env.sample_target(rand_row, noisy=True)

        pred_features = features[rand_row, :]
        prediction = np.dot(pred_features, weights)
        error = target - prediction
        gradient, new_stepsize, new_weight_vector = idbd.update_weight_vector(
            error, pred_features, weights)
        weights = new_weight_vector
        print("Sample number: {0}".format(i + 1))
        print("\tPrediction error:{0}".format(error))

    print("Theta star:\n{0}".format(env.theta_star))
    print("Estimated theta:\n{0}".format(weights))
Ejemplo n.º 2
0
def perfect_features_test():
    from src.env.Amatrix_task import Amatrix

    n = 3
    m = 2
    env = Amatrix(n, m)

    features = env.Amatrix  # perfect features
    weights = np.zeros(n)

    config = Config()
    config.parameter_size = n
    config.theta = 0.1
    config.init_beta = np.log(0.001)
    idbd = SIDBD(config)

    sample_size = 100000
    for i in range(sample_size):
        rand_row = np.random.randint(n)
        target = env.sample_target(rand_row, noisy=True)

        pred_features = features[rand_row, :]
        prediction = np.dot(pred_features, weights)
        error = target - prediction
        gradient, new_stepsize, new_weight_vector = idbd.update_weight_vector(
            error, pred_features, weights)
        weights = new_weight_vector
        if (i + 1) % 10000 == 0:
            print("Sample number: {0}".format(i + 1))
            print("\tPrediction error:{0}".format(error))

    print("Theta star:\n{0}".format(env.theta_star))
    print("Estimated theta:\n{0}".format(weights))
    difference = np.sqrt(np.sum(np.square(env.theta_star - weights)))
    print("L2 norm of difference:\n{0}".format(difference))
Ejemplo n.º 3
0
def adding_bad_features_test():
    from src.env.Amatrix_task import Amatrix

    n = 10
    m = 5
    env = Amatrix(n, m)

    features = env.get_approx_A()  # first m features
    weights = np.zeros(m)

    config = Config()
    config.parameter_size = m
    config.theta = 0.1
    config.init_beta = np.log(0.0001)
    idbd = SIDBD(config)

    sample_size = 50000
    additional_features = 30
    for k in range(additional_features + 1):
        print("Number of features in the representation: {0}".format(
            idbd.parameter_size))
        for i in range(sample_size):
            rand_row = np.random.randint(n)
            target = env.sample_target(rand_row, noisy=True)

            pred_features = features[rand_row, :]
            prediction = np.dot(pred_features, weights)
            error = target - prediction
            gradient, new_stepsize, new_weight_vector = idbd.update_weight_vector(
                error, pred_features, weights)
            weights = new_weight_vector
            if ((i + 1) % 25000) == 0:
                print("\tSample number: {0}".format(i + 1))
                print("\t\tPrediction error: {0}".format(error))

        print("Theta star:\n{0}".format(env.theta_star))
        print("Estimated theta:\n{0}".format(weights))

        if k < additional_features:
            print("Adding new feature...")
            new_feature = env.get_new_bad_features(1)
            features = np.hstack((features, new_feature))
            idbd.increase_size(1)

            new_weights = np.zeros(m + 1)
            new_weights[:m] = weights
            m += 1
            weights = new_weights
Ejemplo n.º 4
0
def boyan_chain_test(steps=50000):
    from src.env.BoyanChain import BoyanChain
    from src.env.RandomFeatures_task import LinearFunctionApproximator
    from src.util import Config
    import matplotlib.pyplot as plt

    config = Config()
    checkpoint = 100
    """ Environment Setup """
    config.init_noise_var = 0.1
    config.num_obs_features = 4
    config.max_num_features = 9
    """ AutoTIDBD Setup """
    config.parameter_size = 4
    config.theta = 0.001
    config.tau = 10000
    config.init_stepsize = 0.001
    # to keep track of learning progress
    run_avg_msve = np.zeros(steps // checkpoint, dtype=np.float64)
    current_checkpoint = 0
    avg_msve = 0

    env = BoyanChain(config)
    approximator = LinearFunctionApproximator(config)
    optimizer = AutoTIDBD(config)
    """ Start of Learning"""
    curr_obs_feats = env.get_observable_features()
    for s in range(steps):
        state_value = approximator.get_prediction(curr_obs_feats)
        optimal_value = env.compute_true_value()
        # step in the environment
        _, r, next_obs_feats, term = env.step()
        next_state_value = approximator.get_prediction(next_obs_feats)
        # compute td error
        td_error = r + (1 - term) * next_state_value - state_value
        # update weights
        _, _, new_weights = optimizer.update_weight_vector(
            td_error,
            features=curr_obs_feats,
            weights=approximator.get_weight_vector(),
            discounted_next_features=next_obs_feats)
        approximator.update_weight_vector(new_weights)
        # update features
        curr_obs_feats = next_obs_feats
        # keep track of progress
        avg_msve += np.square(state_value - optimal_value) / checkpoint
        # check if terminal state
        if term:
            env.reset()
            curr_obs_feats = env.get_observable_features()
        # store learning progress so far
        if (s + 1) % checkpoint == 0:
            run_avg_msve[current_checkpoint] += avg_msve
            avg_msve *= 0
            current_checkpoint += 1

        if (s + 1) == (steps // 2):
            env.add_feature(k=4, noise=0.0, fake_feature=False)
            approximator.increase_num_features(4)
            optimizer.increase_size(4)
            curr_obs_feats = env.get_observable_features()

    print("The average MSVE is: {0:0.4f}".format(np.average(run_avg_msve)))

    xaxis = np.arange(run_avg_msve.size) + 1
    plt.plot(xaxis, run_avg_msve)
    plt.show()
    plt.close()