def adding_bad_features_test(): from src.env.Amatrix_task import Amatrix n = 10 m = 5 env = Amatrix(n, m) features = env.get_approx_A() # first m features weights = np.zeros(m) config = Config() config.parameter_size = m config.init_stepsize = 0.01 autostep = AutoStep(config) sample_size = 50000 additional_features = 30 for k in range(additional_features + 1): print("Number of features in the representation: {0}".format(autostep.parameter_size)) for i in range(sample_size): rand_row = np.random.randint(n) target = env.sample_target(rand_row, noisy=True) pred_features = features[rand_row, :] prediction = np.dot(pred_features, weights) error = target - prediction gradient, new_stepsize, new_weight_vector = autostep.update_weight_vector(error, pred_features, weights) weights = new_weight_vector if ((i+1) % 50000) == 0: print("\tSample number: {0}".format(i + 1)) print("\t\tPrediction error: {0}".format(error)) print("Theta star:\n{0}".format(env.theta_star)) print("Estimated theta:\n{0}".format(weights)) if k < additional_features: print("Adding new feature...") new_feature = env.get_new_bad_features(1) features = np.hstack((features, new_feature)) autostep.increase_size(1) new_weights = np.zeros(m+1) new_weights[:m] = weights m += 1 weights = new_weights
def boyan_chain_test(steps=50000): from src.env.BoyanChain import BoyanChain from src.env.RandomFeatures_task import LinearFunctionApproximator from src.util import Config import matplotlib.pyplot as plt config = Config() checkpoint = 100 """ Environment Setup """ config.init_noise_var = 0.1 config.num_obs_features = 4 config.max_num_features = 9 """ AutoTIDBD Setup """ config.parameter_size = 4 config.theta = 0.001 config.tau = 10000 config.init_stepsize = 0.001 # to keep track of learning progress run_avg_msve = np.zeros(steps // checkpoint, dtype=np.float64) current_checkpoint = 0 avg_msve = 0 env = BoyanChain(config) approximator = LinearFunctionApproximator(config) optimizer = AutoTIDBD(config) """ Start of Learning""" curr_obs_feats = env.get_observable_features() for s in range(steps): state_value = approximator.get_prediction(curr_obs_feats) optimal_value = env.compute_true_value() # step in the environment _, r, next_obs_feats, term = env.step() next_state_value = approximator.get_prediction(next_obs_feats) # compute td error td_error = r + (1 - term) * next_state_value - state_value # update weights _, _, new_weights = optimizer.update_weight_vector( td_error, features=curr_obs_feats, weights=approximator.get_weight_vector(), discounted_next_features=next_obs_feats) approximator.update_weight_vector(new_weights) # update features curr_obs_feats = next_obs_feats # keep track of progress avg_msve += np.square(state_value - optimal_value) / checkpoint # check if terminal state if term: env.reset() curr_obs_feats = env.get_observable_features() # store learning progress so far if (s + 1) % checkpoint == 0: run_avg_msve[current_checkpoint] += avg_msve avg_msve *= 0 current_checkpoint += 1 if (s + 1) == (steps // 2): env.add_feature(k=4, noise=0.0, fake_feature=False) approximator.increase_num_features(4) optimizer.increase_size(4) curr_obs_feats = env.get_observable_features() print("The average MSVE is: {0:0.4f}".format(np.average(run_avg_msve))) xaxis = np.arange(run_avg_msve.size) + 1 plt.plot(xaxis, run_avg_msve) plt.show() plt.close()