def build_dataset(idims=9,odims=6,angi=[],f=test_func1,n_train=500,n_test=50, input_noise=0.01, output_noise=0.01,rand_seed=None): if rand_seed is not None: np.random.seed(rand_seed) # ================== train dataset ================== # sample training points x_train = 15*(np.random.rand(n_train,idims) - 0.25) # generate the output at the training points y_train = np.empty((n_train,odims)) for i in range(odims): y_train[:,i] = (i+1)*f(x_train) + output_noise*(np.random.randn(n_train)) x_train = utils.gTrig_np(x_train, angi) # ================== test dataset ================== # generate testing points #kk = input_noise*convolve2d(np.array([[1,2,3,2,1]]),np.array([[1,2,3,2,1]]).T)/9.0; #s_test = convolve2d(np.eye(idims),kk,'same') s_test = input_noise*np.eye(idims) s_test = np.tile(s_test,(n_test,1)).reshape(n_test,idims,idims) x_test = 120*(np.random.rand(n_test,idims) - 0.5) # generate the output at the test points y_test = np.empty((n_test,odims)) for i in range(odims): y_test[:,i] = (i+1)*f(x_test) if len(angi)>0: x_test,s_test = utils.gTrig2_np(x_test,s_test, angi, idims) return (x_train,y_train),(x_test,y_test,s_test)
def train_inverse_dynamics(self, deltas=True): utils.print_with_stamp('Training inverse dynamics model', self.name) X = [] Y = [] n_episodes = len(self.experience.states) if n_episodes > 0: # construct training dataset for i in range(self.next_episode_inv, n_episodes): x = np.array(self.experience.states[i]) u = np.array(self.experience.actions[i]) # inputs are pairs of consecutive states < x_{t}, x_{t+1} > x_ = utils.gTrig_np(x, self.angle_idims) if deltas: X.append(np.hstack((x_[:-1], x_[:-1] - x_[1:]))) else: X.append(np.hstack((x_[:-1], x_[1:]))) # outputs are the actions that produced the input state transition Y.append(u[:-1]) self.next_episode_inv = n_episodes X = np.vstack(X) Y = np.vstack(Y) # get distribution of initial states x0 = np.array([x[0] for x in self.experience.states]) if n_episodes > 1: self.mx0.set_value(x0.mean(0).astype('float64')) self.Sx0.set_value(np.cov(x0.T).astype('float64')) else: self.mx0.set_value(x0.astype('float64').flatten()) self.Sx0.set_value(1e-2 * np.eye(x0.size).astype('float64')) # append data to the dynamics model self.inverse_dynamics_model.append_dataset(X, Y) else: x0 = np.array(self.plant.x0, dtype='float64').squeeze() S0 = np.array(self.plant.S0, dtype='float64').squeeze() self.mx0.set_value(x0) self.Sx0.set_value(S0) utils.print_with_stamp( 'Dataset size:: Inputs: [ %s ], Targets: [ %s ] ' % (self.inverse_dynamics_model.X.get_value().shape, self.inverse_dynamics_model.Y.get_value().shape), self.name) if self.inverse_dynamics_model.should_recompile: # reinitialize log likelihood self.inverse_dynamics_model.init_loss() self.inverse_dynamics_model.train() utils.print_with_stamp('Done training inverse dynamics model', self.name)
def get_state(self, noisy=True): state = self.state if noisy and self.noise_dist is not None: # noisy state measurement state += self.noise_dist.sample(1).flatten() if self.angle_dims: # convert angle dimensions to complex representation state = gTrig_np(state, self.angle_dims).flatten() return state.flatten(), self.t
def distance_based_cost(mx, Sx, target, Q, cw=[1], expl=None, angle_dims=[], loss_func=quadratic_saturating_loss, *args, **kwargs): ''' Loss function that depends on a quadratic function of state ''' if isinstance(target, list) or isinstance(target, tuple): target = np.array(target) # convert angle dimensions if angle_dims: if isinstance(target, np.ndarray): target = utils.gTrig_np(target, angle_dims).flatten() else: target = utils.gTrig(target, angle_dims).flatten() mx, Sx = convert_angle_dimensions(mx, Sx, angle_dims) Q = Q.astype(theano.config.floatX) target = target.astype(theano.config.floatX) if not isinstance(cw, list): cw = [cw] if Sx is None: # deterministic case cost = [] # total cost is the sum of costs with different widths # TODO this can be vecotrized for c in cw: cost_c = loss_func(mx, None, target, Q / c**2, *args, **kwargs) cost.append(cost_c) return sum(cost) / len(cw) else: M_cost = [] S_cost = [] # total cost is the sum of costs with different widths # TODO this can be vectorized for c in cw: m_cost, s_cost = loss_func(mx, Sx, target, Q / c**2, *args, **kwargs) # add UCB exploration term if expl is not None and expl != 0.0: m_cost += expl * tt.sqrt(s_cost) M_cost.append(m_cost) S_cost.append(s_cost) return sum(M_cost) / len(cw), sum(S_cost) / (len(cw)**2)
def train_adjustment(self, deltas=True): n_source_traj = 5 n_target_traj = 3 total_trajectories = len(self.source_experience.states) X = [] Y = [] Y_var = [] # from sourcce trajectories for i in range(max(0, total_trajectories - n_source_traj), total_trajectories): self.inverse_dynamics_model.update() # for every state transition in the source experience, use the target inverse dynamics # to find an action that would produce the desired transition x = np.array(self.source_experience.states[i]) x_s = utils.gTrig_np(x, self.angle_idims) # source transitions if deltas: t_s = np.hstack((x_s[:-1], x_s[:-1] - x_s[1:])) else: t_s = np.hstack((x_s[:-1], x_s[1:])) # source actions u_s = np.array(self.source_experience.actions[i]) # target actions u_t = [] Su_t = [] for t_s_i in t_s: # get prediction from inverse dynamics model u, Su, Cu = self.inverse_dynamics_model.predict(t_s_i) u_t.append(np.random.multivariate_normal(u, Su)) #u_t.append(u) Su_t.append(np.diag(np.maximum(Su, 1e-9))) u_t = np.stack(u_t) Su_t = np.stack(Su_t) if self.policy.use_control_input: X.append(np.hstack([x_s[1:-1], u_s[1:-1]])) else: X.append(x_s[:-1]) Y.append(u_t[1:] - u_s[1:-1]) Y_var.append(Su_t[1:]) X = np.vstack(X) Y = np.vstack(Y) Y_var = np.vstack(Y_var) self.policy.adjustment_model.set_dataset(X, Y, Y_var=Y_var) self.policy.adjustment_model.train() self.policy.adjustment_model.update()
def init_params(self, compile_funcs=False): utils.print_with_stamp('Initializing parameters', self.name) # init inputs inputs_ = self.state0_dist.sample(self.n_inducing) inputs = utils.gTrig_np(inputs_, self.angle_dims) # set the initial log hyperparameters (1 for linear dimensions, # 0.7 for angular) l0 = np.hstack([ np.ones(inputs_.shape[1] - len(self.angle_dims)), 0.7 * np.ones(2 * len(self.angle_dims)), 1, 0.01 ]) l0 = np.tile(l0, (self.maxU.size, 1)).astype(floatX) l0 = np.log(np.exp(l0, dtype=floatX) - 1.0) # init policy targets close to zero mu = np.zeros((self.maxU.size, )) Su = 0.1 * np.eye(self.maxU.size) targets = utils.distributions.Gaussian(mu, Su).sample(self.n_inducing) targets = targets.reshape((self.n_inducing, self.maxU.size)) self.trained = False # set the parameters self.N = inputs.shape[0] self.D = inputs.shape[1] self.E = targets.shape[1] self.set_params({ 'X': inputs.astype(floatX), 'Y': targets.astype(floatX) }) self.set_params({'unconstrained_hyp': l0.astype(floatX)}) eps = np.finfo(np.__dict__[floatX]).eps self.hyp = tt.nnet.softplus(self.unconstrained_hyp) + eps # don't optimize the signal and noise variances self.hyp = tt.concatenate([ self.hyp[:, :-2], theano.gradient.disconnected_grad(self.hyp[:, -2:]) ], axis=np.array(1, dtype='int64')) # call init loss to initialize the intermediate shared variables super(RBFGP, self).get_loss(cache_intermediate=False) # init the prediction function self.evaluate(np.zeros((self.D, )))
def preprocess_angles(x_t, angle_dims=[]): x_t_ = utils.gTrig_np(x_t[None, :], angle_dims).flatten() return x_t_
def get_dynmodel_dataset(self, deltas=True, filter_episodes=None, angle_dims=None, x_steps=1, u_steps=1, output_steps=1, return_costs=False, stack=False): ''' Returns a dataset where the inputs are state_actions and the outputs are next steps. Parameters: ----------- deltas: wheter to return changes in state (x_t - x_{t-1}, x_{t-1} - x_{t-2}, ...) or future states (x_t, x_{t-1}, x_{t-2}, ...), in the output filter_episodes: list containing episode indices to extract from which to extract data. if list empty or undefined ( equal to None ), extracts data from all episodes angle_dims: indices of input state dimensions to linearize, by converting to complex representation \theta => (sin(\theta), cos(\theta)) x_steps: how many steps in the past to concatenate as input u_steps: how many steps in the past to concatenate as input output_steps: how many steps in the future to concatenate as output return_costs: whether to append the cost feedback to the output stack: whether to stack or concatenate the multi step data Returns: -------- X: if stack is False X is a numpy array of shape [n, x_steps*D + u_steps*U], where n is the number of data samples, D the input state dimensions.abs if stack is True, the shape of X is [n, x_steps, D + U] ''' filter_episodes = filter_episodes or [] angle_dims = angle_dims or [] inputs, targets = [], [] join = np.stack if stack else np.concatenate if stack: # ignore the u_steps parameter u_steps = x_steps # output steps output_steps = x_steps + output_steps - 1 if not isinstance(filter_episodes, list): filter_episodes = [filter_episodes] if len(filter_episodes) < 1: # use all data filter_episodes = list(range(self.n_episodes())) for epi in filter_episodes: if len(self.states[epi]) == 0: continue # get state action pairs for current episode states, actions = np.array( self.states[epi]), np.array(self.actions[epi]) # convert input angle dimensions to complex representation states_ = utils.gTrig_np(np.array(states), angle_dims) # pad with initial state for the first x_steps timesteps states_ = np.concatenate([states_[[0]*(x_steps-1)], states_]) # get input states up to x_steps in the past. states_ = join( [states_[i:i-x_steps-(output_steps-1), :] for i in range(x_steps)], axis=1) # same for actions (u_steps in the past, pad with zeros for the # first u_steps) actions_ = np.concatenate( [np.zeros((u_steps-1, actions.shape[1])), actions]) actions_ = join( [actions_[i:i-u_steps-(output_steps-1), :] for i in range(u_steps)], axis=1) # create input vector inp = np.concatenate([states_, actions_], axis=-1) # get output states up to output_steps in the future H = states.shape[0] ostates = join( [states[i:H-(output_steps-i-1), :] for i in range(output_steps)], axis=1) # create output vector tgt = (ostates[1:, :] - ostates[:-1, :] if deltas else ostates[1:, :]) # append costs if requested if return_costs: costs = np.array(self.costs[epi])[:, None] ocosts = join( [costs[i:H-(output_steps-i-1), :] for i in range(output_steps)], axis=1) tgt = np.concatenate([tgt, ocosts[1:, :]], axis=-1) inputs.append(inp) targets.append(tgt) ret = np.concatenate(inputs), np.concatenate(targets) return ret
def gTrig(state, angle_dims=[]): return utils.gTrig_np(state, angle_dims).flatten()