def init_LWPR(self): self.lwpr = [] for i in range(self.joints_num): lwpr = LWPR(1, 1) lwpr.init_D = 10000 * np.eye(1) lwpr.update_D = True lwpr.init_alpha = 10 * np.eye(1) lwpr.meta = False lwpr.penalty = 0.000000001 # lwpr.w_gen = 0.2 # lwpr.init_D = 200 * np.eye(1) # lwpr.update_D = True # lwpr.init_alpha = 0.1 * np.eye(1) # lwpr.meta = False # lwpr.penalty = 0.005 # lwpr.w_gen = 0.2 # lwpr.w_prune = 0.8 # double w_gen=0.2; # double w_prune=0.8; # bool update_D=true; # double init_alpha=0.1; # double penalty=0.005; # VectorXd init_D=VectorXd::Constant(input_dim,200); self.lwpr.append(lwpr)
def all(self): """ The main method bringing together all functionalities """ data = self.get_data() # Cut into train and test set, based on percentage random.shuffle(data) train_set = data[ int(len(data) * (self.percentage_of_trest_data / 100)):] test_set = data[:int( len(data) * (self.percentage_of_trest_data / 100))] # first n print "Length full set: %2i" % data.shape[0] print "Length train set: %2i" % train_set.shape[0] print "Length test set: %2i" % test_set.shape[0] row, column = train_set.shape dim = column - 1 Ntrain = row Xtr, Ytr = train_set[:, :dim], train_set[:, -1:] # initialize the LWPR model model = LWPR(dim, 1) model.init_D = 20 * eye(dim) model.update_D = True model.init_alpha = 40 * eye(dim) model.meta = False # train the model model = self.train_model(model, Ntrain, Xtr, Ytr) # show data # self.show_plot(Xtr, Ytr, test_set, model, dim) # establish udp connection etc self.networking(model)
def __init__(self, history_length, prediction_horizon, difference_learning, averaging, streaming, settings=None): super().__init__(history_length, prediction_horizon, difference_learning, averaging=averaging, streaming=streaming) self.model_ = LWPR(self._get_input_dim(), self.observation_dimension) # Default values. init_D = 25 init_alpha = 175 self.time_threshold = np.inf if settings: init_D = settings['init_D'] init_alpha = settings['init_alpha'] self.time_threshold = settings.get('time_threshold', np.inf) self.model_.init_D = init_D * np.eye(self._get_input_dim()) self.model_.init_alpha = init_alpha * np.eye(self._get_input_dim())
from numpy import * from matplotlib import pyplot as plt from lwpr import LWPR def testfunc(x): return 10 * sin(7.8 * log(1 + x)) / (1 + 0.1 * x**2) Ntr = 500 Xtr = 10 * random.random((Ntr, 1)) Ytr = 5 + testfunc(Xtr) + 0.1 * random.normal(0, 1, (Ntr, 1)) * Xtr # initialize the LWPR model model = LWPR(1, 1) model.init_D = 20 * eye(1) model.update_D = True model.init_alpha = 40 * eye(1) model.meta = False print Xtr print Ytr # train the model for k in range(20): ind = random.permutation(Ntr) mse = 0 for i in range(Ntr): yp = model.update(Xtr[ind[i]], Ytr[ind[i]]) mse = mse + (Ytr[ind[i], :] - yp)**2
def main(): parser = argparse.ArgumentParser() parser.add_argument("--environment", type=str, default='AntBulletEnv-v0') parser.add_argument("--no_data", type=int, default=10000) args = parser.parse_args() state_action, state, reward, next_state = gather_data( args.no_data, args.environment) assert len(state_action) == len(next_state) assert len(state_action) == len(reward) ''' data = pickle.load(open('data.p')) state2, action2, reward2, next_state2 = data[0] state_action2 = np.concatenate([state2, action2], axis=-1) state_action = np.concatenate([state_action, state_action2], axis=0) reward = np.concatenate([reward, reward2], axis=0) next_state = np.concatenate([next_state, next_state2], axis=0) ''' no_data = len(state_action) model_state = LWPR(state_action.shape[-1], next_state.shape[-1]) model_state.init_D = 5. * np.eye(state_action.shape[-1]) model_state.update_D = True model_state.init_alpha = 1. * np.eye(state_action.shape[-1]) model_state.meta = True action_shape = state_action.shape[-1] - next_state.shape[-1] model_state.norm_in = np.array(([10.] * state.shape[-1]) + [2.] * action_shape) model_reward = LWPR(state_action.shape[-1], reward.shape[-1]) model_reward.init_D = 1. * np.eye(state_action.shape[-1]) model_reward.update_D = True model_reward.init_alpha = 20. * np.eye(state_action.shape[-1]) model_reward.meta = True #for k in range(20): for k in range(1): ind = np.random.permutation(no_data) for i in range(no_data): print(k, i) model_state.update(state_action[ind[i]], next_state[ind[i]]) #model_state.update(state_action[ind[i]], next_state[ind[i]] - state[ind[i]]) model_reward.update(state_action[ind[i]], reward[ind[i]]) uid = str(uuid.uuid4()) for k in range(10): state_action_test, state_test, reward_test, next_state_test = gather_data_epoch( 1, args.environment) ''' if k % 2 == 0: state_action_test, state_test, reward_test, next_state_test = gather_data_epoch(1, args.environment) else: idx = np.random.randint(1, len(data)) state_test, action_test, reward_test, next_state_test = data[idx] state_action_test = np.concatenate([state_test, action_test], axis=-1) ''' Y = [] confs = [] Y_r = [] confs_r = [] for i in range(len(state_action_test)): y, conf = model_state.predict_conf(state_action_test[i]) #Y.append(y + state_test[i]) Y.append(y) confs.append(conf) y_r, conf_r = model_reward.predict_conf(state_action_test[i]) Y_r.append(y_r) confs_r.append(conf_r) Y = np.stack(Y, axis=0) confs = np.stack(confs, axis=0) Y_r = np.stack(Y_r, axis=0) confs_r = np.stack(confs_r, axis=0) for i in range(next_state.shape[-1]): plt.figure() print('Here is the length of the trajectory:', len(next_state_test)) assert len(next_state_test[:, i:i + 1]) == len(Y[:, i:i + 1]) #plt.plot(np.arange(len(next_state_test[:, i:i+1])), next_state_test[:, i:i+1] - state_test[:, i:i+1]) plt.plot(np.arange(len(next_state_test[:, i:i + 1])), next_state_test[:, i:i + 1]) plt.errorbar(np.arange(len(Y[:, i:i + 1])), Y[:, i:i + 1], yerr=confs[:, i:i + 1], color='r', ecolor='y') plt.grid() #plt.savefig(args.environment+'_'+'k:'+str(k)+'_'+'dim:'+str(i)+'_'+uid+'.pdf') plt.figure() plt.plot(np.arange(len(reward_test)), reward_test) plt.errorbar(np.arange(len(Y_r)), Y_r, yerr=confs_r, color='r', ecolor='g') plt.grid() plt.show()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--environment", type=str, default='AntBulletEnv-v0') parser.add_argument("--no_data_start", type=int, default=10000) parser.add_argument("--train_policy_batch_size", type=int, default=30) parser.add_argument("--cma_maxiter", type=int, default=1000) parser.add_argument("--unroll_steps", type=int, default=200) args = parser.parse_args() print args env = gym.make(args.environment) input_dim = env.observation_space.shape[0]+env.action_space.shape[0] output_dim = env.observation_space.shape[0] + 1 model = LWPR(input_dim, output_dim) model.init_D = 1. * np.eye(input_dim) model.update_D = True model.init_alpha = 20. * np.eye(input_dim) model.meta = True agent = AGENT(env.observation_space.shape[0], env.action_space.shape[0], action_space_low=env.action_space.low, action_space_high=env.action_space.high, unroll_steps=args.unroll_steps) init_states = np.stack([env.reset() for _ in range(args.train_policy_batch_size)], axis=0) #Train the dynamics model the intial data. data_buffer = gather_data3(env, args.no_data_start) states, actions, rewards, next_states, _ = zip(*data_buffer) states = np.stack(states, axis=0) actions = np.stack(actions, axis=0) rewards = np.array(rewards)[..., np.newaxis] next_states = np.stack(next_states, axis=0) state_actions = np.concatenate([states, actions], axis=-1) state_diff = next_states - states targets = np.concatenate([state_diff, rewards], axis=-1) assert len(state_actions) == len(targets) ind = np.random.permutation(len(state_actions)) for i in range(len(state_actions)): model.update(state_actions[ind[i]], targets[ind[i]]) for epoch in range(1000): agent._fit(model, init_states, args.cma_maxiter) total_rewards = 0. state = env.reset() while True: action = agent._forward(agent.thetas, state[np.newaxis, ...])[0] next_state, reward, done, _ = env.step(action) state_action = np.concatenate([state, action]) state_diff = next_state - state target = np.append(state_diff, reward) model.update(state_action, target) total_rewards += float(reward) state = next_state.copy() if done: print 'epoch:', epoch, 'total_rewards:', total_rewards break
context1 = [2.0, context[0]] context2 = [3.6, context[1]] circle1 = plt.Circle((context1[0], context1[1]), 0.1, color='b', fill=False) circle2 = plt.Circle((context2[0], context2[1]), 0.1, color='b', fill=False) ax = plt.gca() ax.add_artist(circle1) ax.add_artist(circle2) plt.grid() plt.savefig('/home/fmeccanici/Documents/thesis/thesis_workspace/src/promp_demo_2d/figures/lwpr/lwpr_demos.png') plt.clf() # initialize lwpr model model = LWPR(n_in, n_out) model.init_D = 10*eye(n_in) model.init_alpha = 0.1* eye(n_in) # model.kernel = 'BiSquare' for i in range(10): for demonstration in demonstrations: output = np.asarray(demonstration[0]) context = np.asarray(demonstration[1]) # print("added output: " + str(output)) # print("added context: " + str(context)) model.update(context, output) # generalize