def evaluate_policy(env, pol, exp, params, n_tests=100, render=False): H = params['min_steps'] angle_dims = params['angle_dims'] def gTrig(state): return utils.gTrig_np(state, angle_dims).flatten() def step_cb(*args, **kwargs): if render: env.render() results = [] for i, p in enumerate(exp.policy_parameters): utils.print_with_stamp('Evaluating policy at iteration %d' % i) if p: pol.set_params(p) else: continue results_i = [] for it in range(n_tests): ret = apply_controller(env, pol, H, preprocess=gTrig, callback=step_cb) results_i.append(ret) results.append(results_i) return results
def pretrain_full(self): if not hasattr(self, 'full_optimizer'): import copy self.full_optimizer = copy.copy(self.optimizer) self.full_optimizer.name = self.name+'_fullopt' self.full_optimizer.max_evals = self.optimizer.max_evals self.full_optimizer.loss_fn = None if self.full_optimizer.loss_fn is None or self.should_recompile: loss, inps, updts = GP.get_loss(self) self.full_optimizer.set_objective( loss, self.get_params(symbolic=True)[:-1], inps, updts) # train the full GP ( if dataset too large, take a random subsample) X_full = None Y_full = None n_subsample = 2048 X = self.X.get_value() if X.shape[0] > n_subsample: msg = 'Training full gp with random subsample of size %d' utils.print_with_stamp(msg % (n_subsample), self.name) idx = np.arange(X.shape[0]) np.random.shuffle(idx) idx = idx[:n_subsample] X_full = X Y_full = self.Y.get_value() self.set_dataset(X_full[idx], Y_full[idx]) super(SSGP, self).train(self.full_optimizer) if X_full is not None: # restore full dataset for SSGP training utils.print_with_stamp('Restoring full dataset', self.name) self.set_dataset(X_full, Y_full)
def load(self, output_folder=None, output_filename=None): ''' Loads the class form disk ''' if not hasattr(self, 'registered_types'): self.registered_types = set() if not hasattr(self, 'registered_keys'): self.registered_keys = set() output_folder = utils.get_output_dir( ) if output_folder is None else output_folder [output_filename, self.filename] = utils.sync_output_filename(output_filename, self.filename, '.zip') path = os.path.join(output_folder, output_filename) # append the zip extension if not path.endswith('.zip'): path = path + '.zip' try: with open(path, 'rb') as f: utils.print_with_stamp('Loading state from %s' % (path), self.name) state = t_load(f) self.set_instance_state(state) self.state_changed = False except IOError as err: utils.print_with_stamp('Unable to load state from %s' % (path), self.name) print(err) return False return True
def save(self, output_folder=None, output_filename=None): ''' Serializes the class using the theano pickling utility function, and saves it to disk ''' sys.setrecursionlimit(100000) output_folder = utils.get_output_dir( ) if output_folder is None else output_folder [output_filename, self.filename] = utils.sync_output_filename(output_filename, self.filename, '.zip') if self.state_changed or output_folder is not None or output_filename is not None: # check if output_folder exists, create it if necessary. if not os.path.exists(output_folder): try: os.makedirs(output_folder) except OSError: utils.print_with_stamp( 'Unable to create the directory: %s' % (output_folder), self.name) raise # construct file path path = os.path.join(output_folder, output_filename) # append the zip extension if not path.endswith('.zip'): path = path + '.zip' with open(path, 'wb') as f: utils.print_with_stamp('Saving state to %s' % (path), self.name) t_dump(self.get_instance_state(), f, 2) os.system('chmod 666 %s' % (path)) self.state_changed = False
def init_params(self): utils.print_with_stamp('Initialising parameters', self.name) idims = self.D odims = self.E # initialize the hyperparameters of the gp # this code supports squared exponential only, at the moment X = self.X.get_value() Y = self.Y.get_value() hyp = np.zeros((odims, idims + 2)) hyp[:, :idims] = X.std(0, ddof=1) hyp[:, idims] = Y.std(0, ddof=1) hyp[:, idims + 1] = 0.1 * hyp[:, idims] hyp = np.log(np.exp(hyp, dtype=floatX) - 1.0) # set params will either create the hyp attribute, or update # its value self.set_params({'unconstrained_hyp': hyp}) if self.hyp is None: # constrain hyperparameters to always be positive eps = np.finfo(np.__dict__[floatX]).eps self.hyp = tt.nnet.softplus(self.unconstrained_hyp) + eps # create sn (used in PILCO) if self.sn is None: self.sn = self.hyp[:, -1]
def nigp_updates(self): idims = self.D msg = 'Compiling derivative of mean function at training inputs' utils.print_with_stamp(msg, self.name) # we need to evaluate the derivative of the mean function at the # training inputs def dM2_f_i(mx, beta, hyp, X): hyps = (hyp[:idims + 1], hyp[idims + 1]) kernel_func = partial(cov.Sum, hyps, self.covs) k = kernel_func(mx[None, :], X).flatten() mean = k.dot(beta) dmean = tt.jacobian(mean.flatten(), mx) return tt.square(dmean.flatten()) def dM2_f(beta, hyp, X): # iterate over training inputs dM2_o, updts = theano.scan(fn=dM2_f_i, sequences=[X], non_sequences=[beta, hyp, X], allow_gc=False) return dM2_o # iterate over output dimensions dM2, updts = theano.scan(fn=dM2_f, sequences=[self.beta, self.hyp], non_sequences=[self.X], allow_gc=False) # update the nigp parameter using the derivative of the mean function nigp = ((dM2[:, :, :, None] * self.X_cov[None]).sum(2) * dM2).sum(-1) nigp_updts = updts + (self.nigp, nigp) return nigp_updts
def __init__(self, name='Experience', filename_prefix=None, filename=None): self.name = name self.time_stamps = [] self.states = [] self.actions = [] self.costs = [] self.info = [] self.policy_parameters = [] self.curr_episode = -1 self.state_changed = True if filename is not None: self.filename = filename else: self.filename = (self.name+'_dataset' if filename_prefix is None else filename_prefix+'_dataset') utils.print_with_stamp( 'Initialising new experience dataset', self.name) Loadable.__init__(self, name=name, filename=self.filename) # if a filename was passed, try loading it if filename is not None: self.load() self.register_types([list]) self.register(['curr_episode'])
def plot_rollout(rollout_fn, exp, *args, **kwargs): fig = kwargs.get('fig', None) axarr = kwargs.get('axarr', None) name = kwargs.get('name', 'Rollout') n_exp = kwargs.get('n_exp', 0) ret = rollout_fn(*args) trajectories = m_states = None if len(ret) == 3: loss, costs, trajectories = ret n_samples, T, dims = trajectories.shape else: loss, m_costs, s_costs, m_states, s_states = ret T, dims = m_states.shape if fig is None or axarr is None: utils.print_with_stamp("Creating fig and axes", "plot_rollout") fig, axarr = plt.subplots(dims, num=name, sharex=True) exp_states = np.array(exp.states) for d in range(dims): axarr[d].clear() if trajectories is not None: st = trajectories[:, :, d] # plot predictive distribution for i in range(n_samples): axarr[d].plot( np.arange(T), st[i, :], color='steelblue', alpha=10.0/n_samples) axarr[d].plot( np.arange(T), st[:, :].mean(0), color='blue', linewidth=2) if m_states is not None: axarr[d].plot( np.arange(T), m_states[:, d], color='steelblue', alpha=0.3) axarr[d].errorbar( np.arange(T), m_states[:, d], 1.96*np.sqrt(s_states[:, d, d]), color='steelblue', alpha=0.3) # plot experience if n_exp == 0: exp_i = np.array(exp.states[-1])[:, d] else: exp_i = np.array(exp.states[-n_exp:])[:, :, d].mean(0) T_exp = len(exp_i) total_exp = len(exp_states) for i in range(n_exp): axarr[d].plot( np.arange(T_exp), exp_states[total_exp - n_exp + i][:, d], color='orange', alpha=0.3) axarr[d].plot( np.arange(T_exp), exp_i, color='red') axarr[d].figure.canvas.draw() plt.show(False) plt.waitforbuttonpress(0.5) return fig, axarr
def stop(self): self.running.clear() if self.drawing_thread is not None and self.drawing_thread.is_alive(): # wait until thread stops self.drawing_thread.join(10) if self.polling_thread is not None and self.polling_thread.is_alive(): # wait until thread stops self.polling_thread.join(10) print_with_stamp('Stopped drawing loop', self.name)
def start(self): print_with_stamp('Starting drawing loop', self.name) self.drawing_thread = Process(target=self.drawing_loop, args=(self.drawing_pipe, )) self.drawing_thread.daemon = True self.polling_thread = Thread(target=self.polling_loop, args=(self.polling_pipe, )) self.polling_thread.daemon = True # self.drawing_thread = Process(target=self.run) self.running.set() self.polling_thread.start() self.drawing_thread.start()
def _reset(self): msg = 'Please reset your plant to its initial state and hit Enter' print_with_stamp(msg, self.name) input() if not self.serial.isOpen(): self.serial.open() self.serial.flushInput() self.serial.flushOutput() self.serial.write((self.cmds['RESET_STATE']+";").encode()) sleep(self.dt) self.state, self.t = self.state_from_serial() self.t = -1 return self.state
def rollout(mx0, Sx0, H, gamma, policy, dynmodel, cost, angle_dims=None): ''' Given some initial state distribution Normal(mx0,Sx0), and a prediction horizon H (number of timesteps), returns the predicted state distribution and discounted cost for every timestep. The discounted cost is returned as a distribution, since the state is uncertain.''' msg = 'Building computation graph for belief state propagation' utils.print_with_stamp(msg, 'pilco.rollout') # define internal scan computations def step_rollout(i, mx, Sx, *args): ''' Single step of rollout. ''' # get next state distribution b_out, updates = propagate_belief(mx, Sx, policy, dynmodel, angle_dims) mx_next, Sx_next = b_out # get cost of applying action: mcost, Scost = cost(mx_next, Sx_next) gamma = args[0] gamma_i = gamma**i next_v = [ gamma_i * mcost, tt.square(gamma_i) * Scost, mx_next, Sx_next ] return next_v, updates # these are the shared variables that will be used in the graph. # we need to pass them as non_sequences here # (see: http://deeplearning.net/software/theano/library/scan.html) nseq = [gamma] nseq.extend(dynmodel.get_intermediate_outputs()) nseq.extend(policy.get_intermediate_outputs()) # create the nodes that return the result from scan rollout_output, updts = theano.scan(fn=step_rollout, sequences=[theano.tensor.arange(H)], outputs_info=[None, None, mx0, Sx0], non_sequences=nseq, strict=True, allow_gc=False, name="pilco>rollout_scan") mean_costs, var_costs, mean_states, cov_states = rollout_output[:4] mean_costs.name = 'mc_list' var_costs.name = 'Sc_list' mean_states.name = 'mx_list' cov_states.name = 'Sx_list' return [mean_costs, var_costs, mean_states, cov_states], updts
def rollout(mx0, Sx0, Z_nom, U_nom, I, L, dynmodel, cost, D, angle_dims=None): ''' Given some initial state distribution Normal(mx0,Sx0), and a prediction horizon H (number of timesteps), returns the predicted state distribution and discounted cost for every timestep. The discounted cost is returned as a distribution, since the state is uncertain.''' msg = 'Building computation graph for belief state propagation' utils.print_with_stamp(msg, 'pilco.rollout') # define internal scan computations def forward_step(z, z_nom, u_nom, L_, I_, *args): ''' Single step of rollout. ''' # get controls from local linear policy u = u_nom + I_ + L_.dot(z - z_nom) # split z into the mean and covariance of the state mx, Sx, triu_indices = unwrap_belief(z, D) # get next state distribution b_out, updates = propagate_belief(mx, Sx, u, dynmodel, D, angle_dims) mx_next, Sx_next = b_out # build belief vector z_next = wrap_belief(mx_next, Sx_next, triu_indices) # get cost of applying action: mcost, Scost = cost(mx_next, Sx_next) next_v = [z_next, u] return next_v, updates # these are the shared variables that will be used in the graph. # we need to pass them as non_sequences here # (see: http://deeplearning.net/software/theano/library/scan.html) shared_vars = dynmodel.get_all_shared_vars() z0 = wrap_belief(mx0, Sx0, np.triu_indices(D)) # create the nodes that return the result from scan rollout_output, updts = theano.scan(fn=forward_step, sequences=[Z_nom, U_nom, L, I], outputs_info=[z0], non_sequences=shared_vars, strict=True, allow_gc=False, name="pddp>rollout_scan") z, u = rollout_output[:2] return [z, u], updts
def init_params(self, compile_funcs=False): utils.print_with_stamp('Initializing parameters', self.name) # init inputs inputs_ = self.state0_dist.sample(self.n_inducing) inputs = utils.gTrig_np(inputs_, self.angle_dims) # set the initial log hyperparameters (1 for linear dimensions, # 0.7 for angular) l0 = np.hstack([ np.ones(inputs_.shape[1] - len(self.angle_dims)), 0.7 * np.ones(2 * len(self.angle_dims)), 1, 0.01 ]) l0 = np.tile(l0, (self.maxU.size, 1)).astype(floatX) l0 = np.log(np.exp(l0, dtype=floatX) - 1.0) # init policy targets close to zero mu = np.zeros((self.maxU.size, )) Su = 0.1 * np.eye(self.maxU.size) targets = utils.distributions.Gaussian(mu, Su).sample(self.n_inducing) targets = targets.reshape((self.n_inducing, self.maxU.size)) self.trained = False # set the parameters self.N = inputs.shape[0] self.D = inputs.shape[1] self.E = targets.shape[1] self.set_params({ 'X': inputs.astype(floatX), 'Y': targets.astype(floatX) }) self.set_params({'unconstrained_hyp': l0.astype(floatX)}) eps = np.finfo(np.__dict__[floatX]).eps self.hyp = tt.nnet.softplus(self.unconstrained_hyp) + eps # don't optimize the signal and noise variances self.hyp = tt.concatenate([ self.hyp[:, :-2], theano.gradient.disconnected_grad(self.hyp[:, -2:]) ], axis=np.array(1, dtype='int64')) # call init loss to initialize the intermediate shared variables super(RBFGP, self).get_loss(cache_intermediate=False) # init the prediction function self.evaluate(np.zeros((self.D, )))
def reset(self): ''' Empties the internal data structures''' fmt = 'Resetting experience dataset' fmt += '(WARNING: data from %s will be overwritten)' utils.print_with_stamp(fmt % (self.filename), self.name) self.time_stamps = [] self.states = [] self.actions = [] self.costs = [] self.info = [] self.policy_parameters = [] self.curr_episode = -1 # Let's give people a last chance of recovering their data. Also, we # don't want to save an empty experience dataset self.state_changed = False
def init_pseudo_inputs(self): msg = "Dataset must have more than n_inducing [ %n ] to enable" msg += " inference with sparse pseudo inputs" assert self.N >= self.n_inducing, msg % (self.n_inducing) self.should_recompile = True # pick initial cluster centers from dataset X = self.X.get_value() X_sp_ = utils.kmeanspp(X, self.n_inducing) # perform kmeans to get initial cluster centers utils.print_with_stamp('Initialising pseudo inputs', self.name) X_sp_, dist = kmeans(X, X_sp_, iter=200, thresh=1e-9) # initialize symbolic tensor variable if necessary # (this will create the self.X_sp atttribute) self.set_params({'X_sp': X_sp_})
def truncate(self, episode): ''' Resets the experience to start from the given episode number''' if episode <= self.curr_episode and episode > 0: # Let's give people a last chance of recovering their data. Also, # we don't want to save an empty experience dataset fmt = 'Resetting experience dataset to episode %d' fmt += ' (WARNING: data from %s will be overwritten)' utils.print_with_stamp(fmt % (episode, self.filename), self.name) self.curr_episode = episode self.time_stamps = self.time_stamps[:episode] self.states = self.states[:episode] self.actions = self.actions[:episode] self.costs = self.costs[:episode] self.info = self.info[:episode] self.policy_parameters = self.policy_parameters[:episode] self.state_changed = True
def loss_wrapper(self, p, p_shapes, *inputs): ''' Loss function wrapper compatible with scipy optimize @param p numpy array with the current evaluation point for the loss @param p_shapes array with the shapes of every parameter ''' # transform flattened parameter vector into array of parameters p = utils.unwrap_params(p, p_shapes) # set new parameter values for i in range(len(self.params)): self.params[i].set_value(p[i]) # compute value + derivatives ret = self.grads_fn(*inputs) loss, dloss = ret[0], ret[1:] # flatten gradients dloss = utils.wrap_params(dloss) # cast value and gradients as double precision floats # (required by fmin_l_bfgs_b) loss, dloss = (np.array(loss).astype(np.float64), np.array(dloss).astype(np.float64)) # update internal state variables self.n_evals += 1 if loss < self.best_p[0]: self.best_p = [loss, p, self.n_evals] end_time = time.time() iter_time_upt = ((end_time - self.start_time) - self.iter_time) iter_time_upt /= self.n_evals self.iter_time += iter_time_upt msg = 'Current loss: %s, Total evaluations: %d' msg += ', Avg. time per call: %f\t' utils.print_with_stamp(msg % (str(loss), self.n_evals, self.iter_time), self.name, True) self.start_time = time.time() if callable(self.callback): self.callback(p, loss, dloss) # return loss+gradients return loss, dloss
def minimize(func, x0, alpha0=0.01, max_nfevals=1000): #loss returns loss_mean, grad_of_loss_mean, loss_variance, grad_of_loss_variance f, df, var_f, var_df = func(x0) search_dir = -df xt = x0 step_size = alpha0 utils.print_with_stamp('new step_size: %f' % (step_size), 'ProbLS') nfevals = 1 while nfevals < max_nfevals: step_size, xt, f, df, var_f, var_df = prob_line_search( func, xt, f, df, var_f, var_df, step_size, search_dir) utils.print_with_stamp('new step_size: %f' % (step_size), 'ProbLS') # set new initial line search step size to be 1.3 longer than previous step_size *= 1.3 if step_size == 0.0: step_size = alpha0 search_dir = -df
def set_dataset(self, X_dataset, Y_dataset): # set the dataset on the parent class super(SPGP, self).set_dataset(X_dataset, Y_dataset) if self.N < self.n_inducing: msg = "Dataset must have more than n_inducing [ %n ] to enable" msg + " inference with sparse pseudo inputs" utils.print_with_stamp(msg, self.name) self.X_sp = None self.loss_sp_fn = None self.dloss_sp_fn = None self.beta_sp = None self.Lmm = None self.Amm = None self.should_recompile = False if self.N >= self.n_inducing and self.X_sp is None: msg = 'Dataset is large enough for using pseudo inputs. You should' msg += ' reinitiialise the training loss function and predictions.' utils.print_with_stamp(msg, self.name) # init the shared variable for the pseudo inputs self.init_pseudo_inputs() self.should_recompile = True
def train_inverse_dynamics(self, deltas=True): utils.print_with_stamp('Training inverse dynamics model', self.name) X = [] Y = [] n_episodes = len(self.experience.states) if n_episodes > 0: # construct training dataset for i in range(self.next_episode_inv, n_episodes): x = np.array(self.experience.states[i]) u = np.array(self.experience.actions[i]) # inputs are pairs of consecutive states < x_{t}, x_{t+1} > x_ = utils.gTrig_np(x, self.angle_idims) if deltas: X.append(np.hstack((x_[:-1], x_[:-1] - x_[1:]))) else: X.append(np.hstack((x_[:-1], x_[1:]))) # outputs are the actions that produced the input state transition Y.append(u[:-1]) self.next_episode_inv = n_episodes X = np.vstack(X) Y = np.vstack(Y) # get distribution of initial states x0 = np.array([x[0] for x in self.experience.states]) if n_episodes > 1: self.mx0.set_value(x0.mean(0).astype('float64')) self.Sx0.set_value(np.cov(x0.T).astype('float64')) else: self.mx0.set_value(x0.astype('float64').flatten()) self.Sx0.set_value(1e-2 * np.eye(x0.size).astype('float64')) # append data to the dynamics model self.inverse_dynamics_model.append_dataset(X, Y) else: x0 = np.array(self.plant.x0, dtype='float64').squeeze() S0 = np.array(self.plant.S0, dtype='float64').squeeze() self.mx0.set_value(x0) self.Sx0.set_value(S0) utils.print_with_stamp( 'Dataset size:: Inputs: [ %s ], Targets: [ %s ] ' % (self.inverse_dynamics_model.X.get_value().shape, self.inverse_dynamics_model.Y.get_value().shape), self.name) if self.inverse_dynamics_model.should_recompile: # reinitialize log likelihood self.inverse_dynamics_model.init_loss() self.inverse_dynamics_model.train() utils.print_with_stamp('Done training inverse dynamics model', self.name)
def train_dynamics(dynmodel, data, angle_dims=[], init_episode=0, max_episodes=None, max_dataset_size=0, wrap_angles=False, append=False): ''' Trains a dynamics model using the data dataset ''' utils.print_with_stamp('Training dynamics model', 'train_dynamics') X = [] Y = [] n_episodes = len(data.states) if n_episodes > init_episode: # get dataset for dynamics model episodes = list(range(init_episode, n_episodes))\ if max_episodes is None or n_episodes < max_episodes\ else list(range(max(0, n_episodes-max_episodes), n_episodes)) X, Y = data.get_dynmodel_dataset(filter_episodes=episodes, angle_dims=angle_dims, deltas=True) X = X[-max_dataset_size:] Y = Y[-max_dataset_size:] # wrap angles if requested # (this might introduce error if the angular velocities are high) if wrap_angles: # wrap angle differences to [-pi,pi] Y[:, angle_dims] = (Y[:, angle_dims] + np.pi) % (2 * np.pi) - np.pi if append: # append data to the dynamics model dynmodel.append_dataset(X, Y) else: dynmodel.set_dataset(X, Y) i_shp = dynmodel.X.get_value(borrow=True).shape o_shp = dynmodel.Y.get_value(borrow=True).shape msg = 'Dataset size:: Inputs: [ %s ], Targets: [ %s ] ' % (i_shp, o_shp) utils.print_with_stamp(msg, 'train_dynamics') # finally, train the dynamics model dynmodel.train() utils.print_with_stamp('Done training dynamics model', 'train_dynamics') return dynmodel
def init_predict(self, input_covariance=False, input_ndim=1, *args, **kwargs): ''' Compiles a prediction function for the operation specified in self.predict_symbolic''' # input variables mx = tt.TensorType(floatX, (False, ) * input_ndim)('mx') Sx = tt.matrix('Sx') if input_covariance else None # initialize variable for input covariance input_vars = [mx] if not input_covariance else [mx, Sx] # get prediction utils.print_with_stamp('Initialising expression graph for prediction', self.name) output_vars = self.predict_symbolic(mx, Sx, *args, **kwargs) # outputs if not any([isinstance(output_vars, cl) for cl in [tuple, list]]): output_vars = [output_vars] prediction = [o for o in output_vars if o is not None] # compile prediction utils.print_with_stamp('Compiling mean and variance of prediction', self.name) fn_name = ('%s>predict_ui' % (self.name) if input_covariance else '%s>predict' % (self.name)) if len(prediction) == 1: prediction = prediction[0] predict_fn = theano.function(input_vars, prediction, on_unused_input='ignore', name=fn_name, allow_input_downcast=True) utils.print_with_stamp('Done compiling', self.name) return predict_fn
def compile_loss_fn(losses, params, updates=None, callback=None): ''' compiles two loss function compatible with the minimize_probls method. TODO allow for various SGD methods (e.g. adam, nesterov, rmsprop) ''' # mean and variance of loss (assuming first axis is the batch index) utils.print_with_stamp("Computing loss mean and variance", 'ProbLS') m_loss, S_loss = losses.mean(0), losses.var(0) # mean and variance of gradients # TODO compute the variance of gradients efficiently utils.print_with_stamp("Computing gradient mean and variance", 'ProbLS') grads = theano.tensor.jacobian(losses, params) m_grad, S_grad = list( zip(*[(g.mean(0).flatten(), g.var(0).flatten()) for g in grads])) m_grad, S_grad = theano.tensor.concatenate( m_grad), theano.tensor.concatenate(S_grad) loss_fn = theano.function([], [m_loss, m_grad, S_loss, S_grad], updates=updates) utils.print_with_stamp("Done compiling.", 'ProbLS') return loss_fn
def set_objective(self, loss, params, inputs=None, updts=None, grads=None, compilation_mode=None, **kwargs): ''' Changes the objective function to be optimized @param loss theano graph representing the loss to be optimized @param params theano shared variables representing the parameters to be optimized @param inputs theano variables representing the inputs required to compute the loss, other than params @param updts dictionary of list of theano updates to be applied after every evaluation of the loss function @param grads gradients of the loss function. If not provided, will be computed here ''' if inputs is None: inputs = [] if updts is not None: updts = OrderedUpdates(updts) if grads is None: utils.print_with_stamp('Building computation graph for gradients', self.name) grads = theano.grad(loss, params) utils.print_with_stamp('Compiling function for loss', self.name) self.loss_fn = theano.function( inputs, loss, updates=updts, allow_input_downcast=True, mode=compilation_mode) utils.print_with_stamp('Compiling function for loss+gradients', self.name) self.grads_fn = theano.function( inputs, [loss, ]+grads, updates=updts, allow_input_downcast=True, mode=compilation_mode) self.n_evals = 0 self.start_time = 0 self.iter_time = 0 self.params = params
def set_objective(self, loss, params, inputs=None, updts=None, grads=None, polyak_averaging=None, clip=None, trust_input=True, compilation_mode=None, **kwargs): ''' Changes the objective function to be optimized @param loss theano graph representing the loss to be optimized @param params theano shared variables representing the parameters to be optimized @param inputs theano variables representing the inputs required to compute the loss, other than params @param updts dictionary of list of theano updates to be applied after every evaluation of the loss function @param grads gradients of the loss function. If not provided, will be computed here @param kwargs arguments to pass to the lasagne.updates function ''' if inputs is None: inputs = [] if updts is not None: updts = OrderedUpdates(updts) if grads is None: utils.print_with_stamp('Building computation graph for gradients', self.name) grads = theano.grad(loss, params) if clip is not None: utils.print_with_stamp( "Clipping gradients to norm %s" % (str(clip)), self.name) grads = lasagne.updates.total_norm_constraint(grads, clip) else: utils.print_with_stamp("No gradient clipping", self.name) utils.print_with_stamp("Computing parameter update rules", self.name) min_method_updt = LASAGNE_MIN_METHODS[self.min_method] grad_updates = min_method_updt(grads, params, **kwargs) outputs = [loss] + grads grad_updates = grad_updates + updts if polyak_averaging and polyak_averaging > 0.0: # create copy of params params_avg = [ theano.shared(p.get_value(borrow=False, return_internal_type=True), broadcastable=p.broadcastable, name=p.name + '_copy') for p in params ] # prepare updates for polyak averaging t = theano.shared(np.array(1, dtype=floatX)) b = polyak_averaging replace_dict = OrderedDict() for p, pp in zip(params, params_avg): grad_updates[pp] = ((b - b**t) * pp + (1 - b) * grad_updates[p]) / (1 - b**t) replace_dict[p] = pp grad_updates[t] = t + 1 outputs[0] = theano.clone(loss, replace=replace_dict, strict=True) self.params_avg = params_avg else: if hasattr(self, 'params_avg'): delattr(self, 'params_avg') utils.print_with_stamp('Compiling function for loss', self.name) # converts inputs to shared variables to avoid repeated gpu transfers self.shared_inpts = [ theano.shared(np.empty([1] * inp.ndim, dtype=inp.dtype), name=inp.name) for inp in inputs ] givens_dict = dict(zip(inputs, self.shared_inpts)) self.loss_fn = theano.function([], loss, updates=updts, on_unused_input='ignore', allow_input_downcast=True, givens=givens_dict, mode=compilation_mode) self.loss_fn.trust_input = trust_input utils.print_with_stamp("Compiling parameter updates", self.name) self.update_params_fn = theano.function([], outputs, updates=grad_updates, on_unused_input='ignore', allow_input_downcast=True, givens=givens_dict, mode=compilation_mode) self.update_params_fn.trust_input = trust_input self.n_evals = 0 self.start_time = 0 self.iter_time = 0 self.params = params self.optimizer_state = [s for s in grad_updates.keys()]
def minimize(self, *inputs, **kwargs): ''' @param inputs python variables to pass as inputs to the compiled theano functions for the loss and gradients ''' callback = kwargs.get('callback') return_best = kwargs.get('return_best', False) self.iter_time = 0 self.start_time = time.time() self.n_evals = 0 utils.print_with_stamp('Optimizing parameters', self.name) # set values for shared inputs for s, i in zip(self.shared_inpts, inputs): s.set_value(np.array(i).astype(s.dtype)) # set initial loss and parameters state0 = [ s.get_value(return_internal_type=True, borrow=False) for s in self.optimizer_state ] ret = self.update_params_fn() loss0 = self.loss_fn() utils.print_with_stamp('Initial loss [%s]' % (loss0), self.name) self.best_p = [loss0, state0, 0] # training loop if return_best: out_str = 'Curr loss: %E [%d: %E], n_evals: %d, Avg. time per updt: %f' else: out_str = 'Curr loss: %E, n_evals: %d, Avg. time per updt: %f' for i in range(1, self.max_evals): start_time = time.time() # evaluate current policy and update parameters ret = self.update_params_fn() # the returned loss corresponds to the parameters BEFORE the update loss, dloss = ret[0], ret[1:] if loss < self.best_p[0] or i < 10 and return_best: # get current optimizer state state = [ s.get_value(return_internal_type=True, borrow=False) for s in self.optimizer_state ] self.best_p = [loss, state, i] if callable(callback): callback(loss, dloss) self.n_evals += 1 end_time = time.time() dt = end_time - start_time it_updt = (dt - self.iter_time) / self.n_evals self.iter_time += it_updt if return_best: str_params = (loss, self.best_p[2], self.best_p[0], self.n_evals, self.iter_time) else: str_params = (loss, self.n_evals, self.iter_time) utils.print_with_stamp(out_str % str_params, self.name, True) print('') if return_best: v, s, i = self.best_p for s_i, st_i in zip(self.optimizer_state, s): s_i.set_value(st_i) if hasattr(self, 'params_avg'): # set the model parameters to be the ones found via # polyak averaging for p_i, pp_i in zip(self.params, self.params_avg): p_i.set_value(pp_i.get_value()) v = self.loss_fn() msg = 'Done training. New loss [%f] iter: [%d]' utils.print_with_stamp(msg % (v, i), self.name)
def minibatch_minimize(self, X, Y, *inputs, **kwargs): callback = kwargs.get('callback', None) return_best = kwargs.get('return_best', False) batch_size = kwargs.get('batch_size', 100) batch_size = min(batch_size, X.shape[0]) self.iter_time = 0 self.start_time = time.time() self.n_evals = 0 utils.print_with_stamp('Optimizing parameters via mini batches', self.name) # set values for shared inputs self.shared_inpts[0].set_value(X[-batch_size:]) self.shared_inpts[1].set_value(Y[-batch_size:]) for s, i in zip(self.shared_inpts[2:], inputs): s.set_value(np.array(i).astype(s.dtype)) # set initial loss and parameters state0 = [ s.get_value(return_internal_type=True, borrow=False) for s in self.optimizer_state ] ret = self.update_params_fn() loss0 = self.loss_fn() utils.print_with_stamp('Initial loss [%s]' % (loss0), self.name) self.best_p = [loss0, state0, 0] # go through the dataset out_str = 'Curr loss: %E [%d: %E], n_evals: %d, Avg. time per updt: %f' while True: start_time = time.time() should_exit = False b_iter = utils.iterate_minibatches(X, Y, batch_size, shuffle=True) for x, y in b_iter: start_time = time.time() # add small amount of noise for smoothing x += 1e-4 * (x.max() - x.min()) * np.random.randn(*x.shape) # mini batch update self.shared_inpts[0].set_value(x) self.shared_inpts[1].set_value(y) ret = self.update_params_fn() # the returned loss and gradients correspond to the parameters # BEFORE the update loss, dloss = ret[0], ret[1:] if loss < self.best_p[0] or self.n_evals < 10: # get current optimizer state state = [ s.get_value(return_internal_type=True, borrow=False) for s in self.optimizer_state ] self.best_p = [loss, state, self.n_evals] if callable(callback): callback(loss, dloss) self.n_evals += 1 if self.n_evals >= self.max_evals: should_exit = True break end_time = time.time() dt = end_time - start_time it_updt = (dt - self.iter_time) / self.n_evals self.iter_time += it_updt str_params = (loss, self.best_p[2], self.best_p[0], self.n_evals, self.iter_time) utils.print_with_stamp(out_str % str_params, self.name, True) if should_exit: break print('') i = self.n_evals if return_best: v, s, i = self.best_p for s_i, st_i in zip(self.optimizer_state, s): s_i.set_value(st_i) if hasattr(self, 'params_avg'): # set the model parameters to be the ones found via # polyak averaging for p_i, pp_i in zip(self.params, self.params_avg): p_i.set_value(pp_i.get_value()) v = self.loss_fn() msg = 'Done training. New loss [%f] iter: [%d]' utils.print_with_stamp(msg % (v, i), self.name)
odir = args.output_folder if args.name is not None: name = args.name + '_' + str(e_id) else: name = env.name + '_' + str(e_id) output_folder = os.path.join(odir, name) try: os.makedirs(output_folder) except OSError: # move the old stuff target_dir = output_folder + '_' + str(os.stat(output_folder).st_ctime) os.rename(output_folder, target_dir) os.mkdir(output_folder) utils.print_with_stamp('Moved old results from [%s] to [%s]' % (output_folder, target_dir)) utils.set_output_dir(output_folder) utils.print_with_stamp('Results will be saved in [%s]' % (output_folder)) # write the inital configuration to disk params_path = os.path.join(output_folder, 'initial_config.dill') with open(params_path, 'wb+') as f: config_dict = dict(params=params, loss_kwargs=loss_kwargs, polopt_kwargs=polopt_kwargs, extra_inps=extra_inps) dill.dump(config_dict, f) scenario = partial(learner_setup, pol=pol, dyn=dyn)
def get_loss(self): ''' initializes the loss function for training ''' # build the network if self.network is None: params = self.network_params\ if self.network_params is not None\ else {} self.build_network(self.network_spec, params=params, name=self.name) utils.print_with_stamp('Initialising loss function', self.name) # Input variables input_lengthscale = tt.scalar('%s>input_lengthscale' % (self.name)) hidden_lengthscale = tt.scalar('%s>hidden_lengthscale' % (self.name)) train_inputs = tt.matrix('%s>train_inputs' % (self.name)) train_targets = tt.matrix('%s>train_targets' % (self.name)) # evaluate nework output for batch train_predictions, sn = self.predict_symbolic( train_inputs, None, deterministic=False, iid_per_eval=True, return_samples=True) # build the dropout loss function ( See Gal and Ghahramani 2015) M = train_targets.shape[0].astype(theano.config.floatX) N = self.X.shape[0].astype(theano.config.floatX) # compute negative log likelihood # note that if we have sn_std be a 1xD vector, broadcasting # rules apply lml = objectives.gaussian_log_likelihood( train_targets, train_predictions, sn) # compute regularization term # this is only for binary dropout layers input_ls = input_lengthscale hidden_ls = hidden_lengthscale reg = objectives.dropout_gp_kl( self.network, input_ls, hidden_ls) # this is only for gaussian dropout layers reg += objectives.gaussian_dropout_kl( self.network, input_ls, hidden_ls) # this is only for log normal dropout layers reg += objectives.log_normal_kl( self.network, input_ls, hidden_ls) loss = -lml/M + reg/N inputs = [train_inputs, train_targets, input_lengthscale, hidden_lengthscale] updates = theano.updates.OrderedUpdates() # get trainable network parameters params = lasagne.layers.get_all_params(self.network, trainable=True) # if we are learning the noise if not self.heteroscedastic: params.append(self.unconstrained_sn) self.set_params(dict([(p.name, p) for p in params])) return loss, inputs, updates