Esempio n. 1
0
def evaluate_policy(env, pol, exp, params, n_tests=100, render=False):
    H = params['min_steps']
    angle_dims = params['angle_dims']

    def gTrig(state):
        return utils.gTrig_np(state, angle_dims).flatten()

    def step_cb(*args, **kwargs):
        if render:
            env.render()

    results = []
    for i, p in enumerate(exp.policy_parameters):
        utils.print_with_stamp('Evaluating policy at iteration %d' % i)
        if p:
            pol.set_params(p)
        else:
            continue
        results_i = []
        for it in range(n_tests):
            ret = apply_controller(env,
                                   pol,
                                   H,
                                   preprocess=gTrig,
                                   callback=step_cb)
            results_i.append(ret)
        results.append(results_i)

    return results
Esempio n. 2
0
    def pretrain_full(self):
        if not hasattr(self, 'full_optimizer'):
            import copy
            self.full_optimizer = copy.copy(self.optimizer)
            self.full_optimizer.name = self.name+'_fullopt'
            self.full_optimizer.max_evals = self.optimizer.max_evals
            self.full_optimizer.loss_fn = None

        if self.full_optimizer.loss_fn is None or self.should_recompile:
            loss, inps, updts = GP.get_loss(self)
            self.full_optimizer.set_objective(
                loss, self.get_params(symbolic=True)[:-1], inps, updts)

        # train the full GP ( if dataset too large, take a random subsample)
        X_full = None
        Y_full = None
        n_subsample = 2048
        X = self.X.get_value()
        if X.shape[0] > n_subsample:
            msg = 'Training full gp with random subsample of size %d'
            utils.print_with_stamp(msg % (n_subsample),
                                   self.name)
            idx = np.arange(X.shape[0])
            np.random.shuffle(idx)
            idx = idx[:n_subsample]
            X_full = X
            Y_full = self.Y.get_value()
            self.set_dataset(X_full[idx], Y_full[idx])

        super(SSGP, self).train(self.full_optimizer)

        if X_full is not None:
            # restore full dataset for SSGP training
            utils.print_with_stamp('Restoring full dataset', self.name)
            self.set_dataset(X_full, Y_full)
Esempio n. 3
0
    def load(self, output_folder=None, output_filename=None):
        '''
        Loads the class form disk
        '''
        if not hasattr(self, 'registered_types'):
            self.registered_types = set()
        if not hasattr(self, 'registered_keys'):
            self.registered_keys = set()

        output_folder = utils.get_output_dir(
        ) if output_folder is None else output_folder
        [output_filename,
         self.filename] = utils.sync_output_filename(output_filename,
                                                     self.filename, '.zip')
        path = os.path.join(output_folder, output_filename)

        # append the zip extension
        if not path.endswith('.zip'):
            path = path + '.zip'
        try:
            with open(path, 'rb') as f:
                utils.print_with_stamp('Loading state from %s' % (path),
                                       self.name)
                state = t_load(f)
                self.set_instance_state(state)
            self.state_changed = False
        except IOError as err:
            utils.print_with_stamp('Unable to load state from %s' % (path),
                                   self.name)
            print(err)
            return False
        return True
Esempio n. 4
0
    def save(self, output_folder=None, output_filename=None):
        '''
        Serializes the class using the theano pickling utility function, and saves it to disk
        '''
        sys.setrecursionlimit(100000)
        output_folder = utils.get_output_dir(
        ) if output_folder is None else output_folder
        [output_filename,
         self.filename] = utils.sync_output_filename(output_filename,
                                                     self.filename, '.zip')

        if self.state_changed or output_folder is not None or output_filename is not None:
            # check if output_folder exists, create it if necessary.
            if not os.path.exists(output_folder):
                try:
                    os.makedirs(output_folder)
                except OSError:
                    utils.print_with_stamp(
                        'Unable to create the directory: %s' % (output_folder),
                        self.name)
                    raise

            # construct file path
            path = os.path.join(output_folder, output_filename)
            # append the zip extension
            if not path.endswith('.zip'):
                path = path + '.zip'

            with open(path, 'wb') as f:
                utils.print_with_stamp('Saving state to %s' % (path),
                                       self.name)
                t_dump(self.get_instance_state(), f, 2)
            os.system('chmod 666 %s' % (path))
            self.state_changed = False
Esempio n. 5
0
    def init_params(self):
        utils.print_with_stamp('Initialising parameters', self.name)
        idims = self.D
        odims = self.E
        # initialize the hyperparameters of the gp
        # this code supports squared exponential only, at the moment
        X = self.X.get_value()
        Y = self.Y.get_value()
        hyp = np.zeros((odims, idims + 2))
        hyp[:, :idims] = X.std(0, ddof=1)
        hyp[:, idims] = Y.std(0, ddof=1)
        hyp[:, idims + 1] = 0.1 * hyp[:, idims]
        hyp = np.log(np.exp(hyp, dtype=floatX) - 1.0)

        # set params will either create the hyp attribute, or update
        # its value
        self.set_params({'unconstrained_hyp': hyp})

        if self.hyp is None:
            # constrain hyperparameters to always be positive
            eps = np.finfo(np.__dict__[floatX]).eps
            self.hyp = tt.nnet.softplus(self.unconstrained_hyp) + eps

        # create sn (used in PILCO)
        if self.sn is None:
            self.sn = self.hyp[:, -1]
Esempio n. 6
0
    def nigp_updates(self):
        idims = self.D
        msg = 'Compiling derivative of mean function at training inputs'
        utils.print_with_stamp(msg, self.name)

        # we need to evaluate the derivative of the mean function at the
        # training inputs
        def dM2_f_i(mx, beta, hyp, X):
            hyps = (hyp[:idims + 1], hyp[idims + 1])
            kernel_func = partial(cov.Sum, hyps, self.covs)
            k = kernel_func(mx[None, :], X).flatten()
            mean = k.dot(beta)
            dmean = tt.jacobian(mean.flatten(), mx)
            return tt.square(dmean.flatten())

        def dM2_f(beta, hyp, X):
            # iterate over training inputs
            dM2_o, updts = theano.scan(fn=dM2_f_i,
                                       sequences=[X],
                                       non_sequences=[beta, hyp, X],
                                       allow_gc=False)
            return dM2_o

        # iterate over output dimensions
        dM2, updts = theano.scan(fn=dM2_f,
                                 sequences=[self.beta, self.hyp],
                                 non_sequences=[self.X],
                                 allow_gc=False)

        # update the nigp parameter using the derivative of the mean function
        nigp = ((dM2[:, :, :, None] * self.X_cov[None]).sum(2) * dM2).sum(-1)
        nigp_updts = updts + (self.nigp, nigp)

        return nigp_updts
Esempio n. 7
0
    def __init__(self, name='Experience', filename_prefix=None, filename=None):
        self.name = name
        self.time_stamps = []
        self.states = []
        self.actions = []
        self.costs = []
        self.info = []
        self.policy_parameters = []
        self.curr_episode = -1
        self.state_changed = True
        if filename is not None:
            self.filename = filename
        else:
            self.filename = (self.name+'_dataset'
                             if filename_prefix is None
                             else filename_prefix+'_dataset')
            utils.print_with_stamp(
                'Initialising new experience dataset', self.name)

        Loadable.__init__(self, name=name, filename=self.filename)

        # if a filename was passed, try loading it
        if filename is not None:
            self.load()

        self.register_types([list])
        self.register(['curr_episode'])
Esempio n. 8
0
def plot_rollout(rollout_fn, exp, *args, **kwargs):
    fig = kwargs.get('fig', None)
    axarr = kwargs.get('axarr', None)
    name = kwargs.get('name', 'Rollout')
    n_exp = kwargs.get('n_exp', 0)
    ret = rollout_fn(*args)
    trajectories = m_states = None
    if len(ret) == 3:
        loss, costs, trajectories = ret
        n_samples, T, dims = trajectories.shape
    else:
        loss, m_costs, s_costs, m_states, s_states = ret
        T, dims = m_states.shape

    if fig is None or axarr is None:
        utils.print_with_stamp("Creating fig and axes", "plot_rollout")
        fig, axarr = plt.subplots(dims, num=name, sharex=True)

    exp_states = np.array(exp.states)
    for d in range(dims):
        axarr[d].clear()
        if trajectories is not None:
            st = trajectories[:, :, d]
            # plot predictive distribution
            for i in range(n_samples):
                axarr[d].plot(
                    np.arange(T), st[i, :], color='steelblue',
                    alpha=10.0/n_samples)
            axarr[d].plot(
                np.arange(T), st[:, :].mean(0), color='blue', linewidth=2)
        if m_states is not None:
            axarr[d].plot(
                np.arange(T), m_states[:, d], color='steelblue',
                alpha=0.3)
            axarr[d].errorbar(
                np.arange(T), m_states[:, d],
                1.96*np.sqrt(s_states[:, d, d]), color='steelblue', alpha=0.3)

        # plot experience
        if n_exp == 0:
            exp_i = np.array(exp.states[-1])[:, d]
        else:
            exp_i = np.array(exp.states[-n_exp:])[:, :, d].mean(0)
        T_exp = len(exp_i)
        total_exp = len(exp_states)
        for i in range(n_exp):
            axarr[d].plot(
                 np.arange(T_exp), exp_states[total_exp - n_exp + i][:, d],
                 color='orange', alpha=0.3)
        axarr[d].plot(
            np.arange(T_exp), exp_i, color='red')
        axarr[d].figure.canvas.draw()

    plt.show(False)
    plt.waitforbuttonpress(0.5)

    return fig, axarr
Esempio n. 9
0
    def stop(self):
        self.running.clear()

        if self.drawing_thread is not None and self.drawing_thread.is_alive():
            # wait until thread stops
            self.drawing_thread.join(10)

        if self.polling_thread is not None and self.polling_thread.is_alive():
            # wait until thread stops
            self.polling_thread.join(10)

        print_with_stamp('Stopped drawing loop', self.name)
Esempio n. 10
0
 def start(self):
     print_with_stamp('Starting drawing loop', self.name)
     self.drawing_thread = Process(target=self.drawing_loop,
                                   args=(self.drawing_pipe, ))
     self.drawing_thread.daemon = True
     self.polling_thread = Thread(target=self.polling_loop,
                                  args=(self.polling_pipe, ))
     self.polling_thread.daemon = True
     # self.drawing_thread = Process(target=self.run)
     self.running.set()
     self.polling_thread.start()
     self.drawing_thread.start()
Esempio n. 11
0
 def _reset(self):
     msg = 'Please reset your plant to its initial state and hit Enter'
     print_with_stamp(msg, self.name)
     input()
     if not self.serial.isOpen():
         self.serial.open()
     self.serial.flushInput()
     self.serial.flushOutput()
     self.serial.write((self.cmds['RESET_STATE']+";").encode())
     sleep(self.dt)
     self.state, self.t = self.state_from_serial()
     self.t = -1
     return self.state
Esempio n. 12
0
def rollout(mx0, Sx0, H, gamma, policy, dynmodel, cost, angle_dims=None):
    ''' Given some initial state distribution Normal(mx0,Sx0), and a
    prediction horizon H (number of timesteps), returns the predicted state
    distribution and discounted cost for every timestep. The discounted cost
    is returned as a distribution, since the state is uncertain.'''
    msg = 'Building computation graph for belief state propagation'
    utils.print_with_stamp(msg, 'pilco.rollout')

    # define internal scan computations
    def step_rollout(i, mx, Sx, *args):
        '''
            Single step of rollout.
        '''
        # get next state distribution
        b_out, updates = propagate_belief(mx, Sx, policy, dynmodel, angle_dims)
        mx_next, Sx_next = b_out

        #  get cost of applying action:
        mcost, Scost = cost(mx_next, Sx_next)
        gamma = args[0]
        gamma_i = gamma**i
        next_v = [
            gamma_i * mcost,
            tt.square(gamma_i) * Scost, mx_next, Sx_next
        ]
        return next_v, updates

    # these are the shared variables that will be used in the graph.
    # we need to pass them as non_sequences here
    # (see: http://deeplearning.net/software/theano/library/scan.html)
    nseq = [gamma]
    nseq.extend(dynmodel.get_intermediate_outputs())
    nseq.extend(policy.get_intermediate_outputs())

    # create the nodes that return the result from scan
    rollout_output, updts = theano.scan(fn=step_rollout,
                                        sequences=[theano.tensor.arange(H)],
                                        outputs_info=[None, None, mx0, Sx0],
                                        non_sequences=nseq,
                                        strict=True,
                                        allow_gc=False,
                                        name="pilco>rollout_scan")

    mean_costs, var_costs, mean_states, cov_states = rollout_output[:4]

    mean_costs.name = 'mc_list'
    var_costs.name = 'Sc_list'
    mean_states.name = 'mx_list'
    cov_states.name = 'Sx_list'

    return [mean_costs, var_costs, mean_states, cov_states], updts
Esempio n. 13
0
def rollout(mx0, Sx0, Z_nom, U_nom, I, L, dynmodel, cost, D, angle_dims=None):
    ''' Given some initial state distribution Normal(mx0,Sx0), and a
    prediction horizon H (number of timesteps), returns the predicted state
    distribution and discounted cost for every timestep. The discounted cost
    is returned as a distribution, since the state is uncertain.'''
    msg = 'Building computation graph for belief state propagation'
    utils.print_with_stamp(msg, 'pilco.rollout')

    # define internal scan computations
    def forward_step(z, z_nom, u_nom, L_, I_, *args):
        '''
            Single step of rollout.
        '''
        # get controls from local linear policy
        u = u_nom + I_ + L_.dot(z - z_nom)

        # split z into the mean and covariance of the state
        mx, Sx, triu_indices = unwrap_belief(z, D)

        # get next state distribution
        b_out, updates = propagate_belief(mx, Sx, u, dynmodel, D, angle_dims)
        mx_next, Sx_next = b_out

        # build belief vector
        z_next = wrap_belief(mx_next, Sx_next, triu_indices)

        #  get cost of applying action:
        mcost, Scost = cost(mx_next, Sx_next)
        next_v = [z_next, u]
        return next_v, updates

    # these are the shared variables that will be used in the graph.
    # we need to pass them as non_sequences here
    # (see: http://deeplearning.net/software/theano/library/scan.html)
    shared_vars = dynmodel.get_all_shared_vars()

    z0 = wrap_belief(mx0, Sx0, np.triu_indices(D))

    # create the nodes that return the result from scan
    rollout_output, updts = theano.scan(fn=forward_step,
                                        sequences=[Z_nom, U_nom, L, I],
                                        outputs_info=[z0],
                                        non_sequences=shared_vars,
                                        strict=True,
                                        allow_gc=False,
                                        name="pddp>rollout_scan")

    z, u = rollout_output[:2]

    return [z, u], updts
Esempio n. 14
0
    def init_params(self, compile_funcs=False):
        utils.print_with_stamp('Initializing parameters', self.name)

        # init inputs
        inputs_ = self.state0_dist.sample(self.n_inducing)
        inputs = utils.gTrig_np(inputs_, self.angle_dims)

        # set the initial log hyperparameters (1 for linear dimensions,
        # 0.7 for angular)
        l0 = np.hstack([
            np.ones(inputs_.shape[1] - len(self.angle_dims)),
            0.7 * np.ones(2 * len(self.angle_dims)), 1, 0.01
        ])

        l0 = np.tile(l0, (self.maxU.size, 1)).astype(floatX)
        l0 = np.log(np.exp(l0, dtype=floatX) - 1.0)

        # init policy targets close to zero
        mu = np.zeros((self.maxU.size, ))
        Su = 0.1 * np.eye(self.maxU.size)
        targets = utils.distributions.Gaussian(mu, Su).sample(self.n_inducing)
        targets = targets.reshape((self.n_inducing, self.maxU.size))

        self.trained = False

        # set the parameters
        self.N = inputs.shape[0]
        self.D = inputs.shape[1]
        self.E = targets.shape[1]

        self.set_params({
            'X': inputs.astype(floatX),
            'Y': targets.astype(floatX)
        })
        self.set_params({'unconstrained_hyp': l0.astype(floatX)})
        eps = np.finfo(np.__dict__[floatX]).eps
        self.hyp = tt.nnet.softplus(self.unconstrained_hyp) + eps

        # don't optimize the signal and noise variances
        self.hyp = tt.concatenate([
            self.hyp[:, :-2],
            theano.gradient.disconnected_grad(self.hyp[:, -2:])
        ],
                                  axis=np.array(1, dtype='int64'))

        # call init loss to initialize the intermediate shared variables
        super(RBFGP, self).get_loss(cache_intermediate=False)

        # init the prediction function
        self.evaluate(np.zeros((self.D, )))
Esempio n. 15
0
 def reset(self):
     ''' Empties the internal data structures'''
     fmt = 'Resetting experience dataset'
     fmt += '(WARNING: data from %s will be overwritten)'
     utils.print_with_stamp(fmt % (self.filename), self.name)
     self.time_stamps = []
     self.states = []
     self.actions = []
     self.costs = []
     self.info = []
     self.policy_parameters = []
     self.curr_episode = -1
     # Let's give people a last chance of recovering their data. Also, we
     # don't want to save an empty experience dataset
     self.state_changed = False
Esempio n. 16
0
    def init_pseudo_inputs(self):
        msg = "Dataset must have more than n_inducing [ %n ] to enable"
        msg += " inference with sparse pseudo inputs"
        assert self.N >= self.n_inducing, msg % (self.n_inducing)
        self.should_recompile = True
        # pick initial cluster centers from dataset
        X = self.X.get_value()
        X_sp_ = utils.kmeanspp(X, self.n_inducing)

        # perform kmeans to get initial cluster centers
        utils.print_with_stamp('Initialising pseudo inputs', self.name)
        X_sp_, dist = kmeans(X, X_sp_, iter=200, thresh=1e-9)
        # initialize symbolic tensor variable if necessary
        # (this will create the self.X_sp atttribute)
        self.set_params({'X_sp': X_sp_})
Esempio n. 17
0
 def truncate(self, episode):
     ''' Resets the experience to start from the given episode number'''
     if episode <= self.curr_episode and episode > 0:
         # Let's give people a last chance of recovering their data. Also,
         # we don't want to save an empty experience dataset
         fmt = 'Resetting experience dataset to episode %d'
         fmt += ' (WARNING: data from %s will be overwritten)'
         utils.print_with_stamp(fmt % (episode, self.filename), self.name)
         self.curr_episode = episode
         self.time_stamps = self.time_stamps[:episode]
         self.states = self.states[:episode]
         self.actions = self.actions[:episode]
         self.costs = self.costs[:episode]
         self.info = self.info[:episode]
         self.policy_parameters = self.policy_parameters[:episode]
         self.state_changed = True
Esempio n. 18
0
    def loss_wrapper(self, p, p_shapes, *inputs):
        '''
            Loss function wrapper compatible with scipy optimize
            @param p numpy array with the current evaluation point for the loss
            @param p_shapes array with the shapes of every parameter
        '''
        # transform flattened parameter vector into array of parameters
        p = utils.unwrap_params(p, p_shapes)

        # set new parameter values
        for i in range(len(self.params)):
            self.params[i].set_value(p[i])

        # compute value + derivatives
        ret = self.grads_fn(*inputs)
        loss, dloss = ret[0], ret[1:]

        # flatten gradients
        dloss = utils.wrap_params(dloss)

        # cast value and gradients as double precision floats
        # (required by fmin_l_bfgs_b)
        loss, dloss = (np.array(loss).astype(np.float64),
                       np.array(dloss).astype(np.float64))

        # update internal state variables
        self.n_evals += 1
        if loss < self.best_p[0]:
            self.best_p = [loss, p, self.n_evals]
        end_time = time.time()
        iter_time_upt = ((end_time - self.start_time) - self.iter_time)
        iter_time_upt /= self.n_evals
        self.iter_time += iter_time_upt
        msg = 'Current loss: %s, Total evaluations: %d'
        msg += ', Avg. time per call: %f\t'
        utils.print_with_stamp(msg % (str(loss), self.n_evals, self.iter_time),
                               self.name, True)
        self.start_time = time.time()

        if callable(self.callback):
            self.callback(p, loss, dloss)

        # return loss+gradients
        return loss, dloss
Esempio n. 19
0
def minimize(func, x0, alpha0=0.01, max_nfevals=1000):
    #loss returns loss_mean, grad_of_loss_mean, loss_variance, grad_of_loss_variance
    f, df, var_f, var_df = func(x0)

    search_dir = -df
    xt = x0
    step_size = alpha0
    utils.print_with_stamp('new step_size: %f' % (step_size), 'ProbLS')
    nfevals = 1

    while nfevals < max_nfevals:
        step_size, xt, f, df, var_f, var_df = prob_line_search(
            func, xt, f, df, var_f, var_df, step_size, search_dir)
        utils.print_with_stamp('new step_size: %f' % (step_size), 'ProbLS')
        # set new initial line search step size to be 1.3 longer than previous
        step_size *= 1.3
        if step_size == 0.0:
            step_size = alpha0
        search_dir = -df
Esempio n. 20
0
    def set_dataset(self, X_dataset, Y_dataset):
        # set the dataset on the parent class
        super(SPGP, self).set_dataset(X_dataset, Y_dataset)
        if self.N < self.n_inducing:
            msg = "Dataset must have more than n_inducing [ %n ] to enable"
            msg + " inference with sparse pseudo inputs"
            utils.print_with_stamp(msg, self.name)
            self.X_sp = None
            self.loss_sp_fn = None
            self.dloss_sp_fn = None
            self.beta_sp = None
            self.Lmm = None
            self.Amm = None
            self.should_recompile = False

        if self.N >= self.n_inducing and self.X_sp is None:
            msg = 'Dataset is large enough for using pseudo inputs. You should'
            msg += ' reinitiialise the training loss function and predictions.'
            utils.print_with_stamp(msg, self.name)
            # init the shared variable for the pseudo inputs
            self.init_pseudo_inputs()
            self.should_recompile = True
Esempio n. 21
0
    def train_inverse_dynamics(self, deltas=True):
        utils.print_with_stamp('Training inverse dynamics model', self.name)

        X = []
        Y = []
        n_episodes = len(self.experience.states)

        if n_episodes > 0:
            # construct training dataset
            for i in range(self.next_episode_inv, n_episodes):
                x = np.array(self.experience.states[i])
                u = np.array(self.experience.actions[i])

                # inputs are pairs of consecutive states < x_{t}, x_{t+1} >
                x_ = utils.gTrig_np(x, self.angle_idims)
                if deltas:
                    X.append(np.hstack((x_[:-1], x_[:-1] - x_[1:])))
                else:
                    X.append(np.hstack((x_[:-1], x_[1:])))
                # outputs are the actions that produced the input state transition
                Y.append(u[:-1])

            self.next_episode_inv = n_episodes
            X = np.vstack(X)
            Y = np.vstack(Y)

            # get distribution of initial states
            x0 = np.array([x[0] for x in self.experience.states])
            if n_episodes > 1:
                self.mx0.set_value(x0.mean(0).astype('float64'))
                self.Sx0.set_value(np.cov(x0.T).astype('float64'))
            else:
                self.mx0.set_value(x0.astype('float64').flatten())
                self.Sx0.set_value(1e-2 * np.eye(x0.size).astype('float64'))

            # append data to the dynamics model
            self.inverse_dynamics_model.append_dataset(X, Y)
        else:
            x0 = np.array(self.plant.x0, dtype='float64').squeeze()
            S0 = np.array(self.plant.S0, dtype='float64').squeeze()
            self.mx0.set_value(x0)
            self.Sx0.set_value(S0)

        utils.print_with_stamp(
            'Dataset size:: Inputs: [ %s ], Targets: [ %s ]  ' %
            (self.inverse_dynamics_model.X.get_value().shape,
             self.inverse_dynamics_model.Y.get_value().shape), self.name)
        if self.inverse_dynamics_model.should_recompile:
            # reinitialize log likelihood
            self.inverse_dynamics_model.init_loss()

        self.inverse_dynamics_model.train()
        utils.print_with_stamp('Done training inverse dynamics model',
                               self.name)
Esempio n. 22
0
def train_dynamics(dynmodel,
                   data,
                   angle_dims=[],
                   init_episode=0,
                   max_episodes=None,
                   max_dataset_size=0,
                   wrap_angles=False,
                   append=False):
    ''' Trains a dynamics model using the data dataset '''
    utils.print_with_stamp('Training dynamics model', 'train_dynamics')

    X = []
    Y = []
    n_episodes = len(data.states)
    if n_episodes > init_episode:
        # get dataset for dynamics model
        episodes = list(range(init_episode, n_episodes))\
            if max_episodes is None or n_episodes < max_episodes\
            else list(range(max(0, n_episodes-max_episodes), n_episodes))

        X, Y = data.get_dynmodel_dataset(filter_episodes=episodes,
                                         angle_dims=angle_dims,
                                         deltas=True)
        X = X[-max_dataset_size:]
        Y = Y[-max_dataset_size:]
        # wrap angles if requested
        # (this might introduce error if the angular velocities are high)
        if wrap_angles:
            # wrap angle differences to [-pi,pi]
            Y[:, angle_dims] = (Y[:, angle_dims] + np.pi) % (2 * np.pi) - np.pi

        if append:
            # append data to the dynamics model
            dynmodel.append_dataset(X, Y)
        else:
            dynmodel.set_dataset(X, Y)

    i_shp = dynmodel.X.get_value(borrow=True).shape
    o_shp = dynmodel.Y.get_value(borrow=True).shape
    msg = 'Dataset size:: Inputs: [ %s ], Targets: [ %s ] ' % (i_shp, o_shp)
    utils.print_with_stamp(msg, 'train_dynamics')

    # finally, train the dynamics model
    dynmodel.train()
    utils.print_with_stamp('Done training dynamics model', 'train_dynamics')

    return dynmodel
Esempio n. 23
0
    def init_predict(self,
                     input_covariance=False,
                     input_ndim=1,
                     *args,
                     **kwargs):
        ''' Compiles a prediction function for the operation specified in
        self.predict_symbolic'''
        # input variables
        mx = tt.TensorType(floatX, (False, ) * input_ndim)('mx')
        Sx = tt.matrix('Sx') if input_covariance else None

        # initialize variable for input covariance
        input_vars = [mx] if not input_covariance else [mx, Sx]

        # get prediction
        utils.print_with_stamp('Initialising expression graph for prediction',
                               self.name)
        output_vars = self.predict_symbolic(mx, Sx, *args, **kwargs)

        # outputs
        if not any([isinstance(output_vars, cl) for cl in [tuple, list]]):
            output_vars = [output_vars]
        prediction = [o for o in output_vars if o is not None]

        # compile prediction
        utils.print_with_stamp('Compiling mean and variance of prediction',
                               self.name)

        fn_name = ('%s>predict_ui' %
                   (self.name) if input_covariance else '%s>predict' %
                   (self.name))
        if len(prediction) == 1:
            prediction = prediction[0]
        predict_fn = theano.function(input_vars,
                                     prediction,
                                     on_unused_input='ignore',
                                     name=fn_name,
                                     allow_input_downcast=True)

        utils.print_with_stamp('Done compiling', self.name)

        return predict_fn
Esempio n. 24
0
def compile_loss_fn(losses, params, updates=None, callback=None):
    ''' 
    compiles two loss function compatible with the minimize_probls method.
    TODO allow for various SGD methods (e.g. adam, nesterov, rmsprop)

    '''
    # mean and variance of loss (assuming first axis is the batch index)
    utils.print_with_stamp("Computing loss mean and variance", 'ProbLS')
    m_loss, S_loss = losses.mean(0), losses.var(0)
    # mean and variance of gradients
    # TODO compute the variance of gradients efficiently
    utils.print_with_stamp("Computing gradient mean and variance", 'ProbLS')
    grads = theano.tensor.jacobian(losses, params)
    m_grad, S_grad = list(
        zip(*[(g.mean(0).flatten(), g.var(0).flatten()) for g in grads]))
    m_grad, S_grad = theano.tensor.concatenate(
        m_grad), theano.tensor.concatenate(S_grad)
    loss_fn = theano.function([], [m_loss, m_grad, S_loss, S_grad],
                              updates=updates)

    utils.print_with_stamp("Done compiling.", 'ProbLS')
    return loss_fn
Esempio n. 25
0
    def set_objective(self, loss, params, inputs=None, updts=None, grads=None,
                      compilation_mode=None, **kwargs):
        '''
            Changes the objective function to be optimized
            @param loss theano graph representing the loss to be optimized
            @param params theano shared variables representing the parameters
                          to be optimized
            @param inputs theano variables representing the inputs required to
                          compute the loss, other than params
            @param updts dictionary of list of theano updates to be applied
                         after every evaluation of the loss function
            @param grads gradients of the loss function. If not provided, will
                         be computed here
        '''
        if inputs is None:
            inputs = []

        if updts is not None:
            updts = OrderedUpdates(updts)

        if grads is None:
            utils.print_with_stamp('Building computation graph for gradients',
                                   self.name)
            grads = theano.grad(loss, params)

        utils.print_with_stamp('Compiling function for loss', self.name)
        self.loss_fn = theano.function(
            inputs, loss, updates=updts, allow_input_downcast=True,
            mode=compilation_mode)
        utils.print_with_stamp('Compiling function for loss+gradients',
                               self.name)
        self.grads_fn = theano.function(
            inputs, [loss, ]+grads, updates=updts, allow_input_downcast=True,
            mode=compilation_mode)

        self.n_evals = 0
        self.start_time = 0
        self.iter_time = 0
        self.params = params
Esempio n. 26
0
    def set_objective(self,
                      loss,
                      params,
                      inputs=None,
                      updts=None,
                      grads=None,
                      polyak_averaging=None,
                      clip=None,
                      trust_input=True,
                      compilation_mode=None,
                      **kwargs):
        '''
            Changes the objective function to be optimized
            @param loss theano graph representing the loss to be optimized
            @param params theano shared variables representing the parameters
                          to be optimized
            @param inputs theano variables representing the inputs required to
                          compute the loss, other than params
            @param updts dictionary of list of theano updates to be applied
                         after every evaluation of the loss function
            @param grads gradients of the loss function. If not provided, will
                         be computed here
            @param kwargs arguments to pass to the lasagne.updates function
        '''
        if inputs is None:
            inputs = []

        if updts is not None:
            updts = OrderedUpdates(updts)

        if grads is None:
            utils.print_with_stamp('Building computation graph for gradients',
                                   self.name)
            grads = theano.grad(loss, params)
            if clip is not None:
                utils.print_with_stamp(
                    "Clipping gradients to norm %s" % (str(clip)), self.name)
                grads = lasagne.updates.total_norm_constraint(grads, clip)
            else:
                utils.print_with_stamp("No gradient clipping", self.name)

        utils.print_with_stamp("Computing parameter update rules", self.name)
        min_method_updt = LASAGNE_MIN_METHODS[self.min_method]
        grad_updates = min_method_updt(grads, params, **kwargs)

        outputs = [loss] + grads
        grad_updates = grad_updates + updts
        if polyak_averaging and polyak_averaging > 0.0:
            # create copy of params
            params_avg = [
                theano.shared(p.get_value(borrow=False,
                                          return_internal_type=True),
                              broadcastable=p.broadcastable,
                              name=p.name + '_copy') for p in params
            ]

            # prepare updates for polyak averaging
            t = theano.shared(np.array(1, dtype=floatX))
            b = polyak_averaging

            replace_dict = OrderedDict()
            for p, pp in zip(params, params_avg):
                grad_updates[pp] = ((b - b**t) * pp +
                                    (1 - b) * grad_updates[p]) / (1 - b**t)
                replace_dict[p] = pp
            grad_updates[t] = t + 1

            outputs[0] = theano.clone(loss, replace=replace_dict, strict=True)
            self.params_avg = params_avg

        else:
            if hasattr(self, 'params_avg'):
                delattr(self, 'params_avg')

        utils.print_with_stamp('Compiling function for loss', self.name)
        # converts inputs to shared variables to avoid repeated gpu transfers
        self.shared_inpts = [
            theano.shared(np.empty([1] * inp.ndim, dtype=inp.dtype),
                          name=inp.name) for inp in inputs
        ]

        givens_dict = dict(zip(inputs, self.shared_inpts))
        self.loss_fn = theano.function([],
                                       loss,
                                       updates=updts,
                                       on_unused_input='ignore',
                                       allow_input_downcast=True,
                                       givens=givens_dict,
                                       mode=compilation_mode)
        self.loss_fn.trust_input = trust_input

        utils.print_with_stamp("Compiling parameter updates", self.name)

        self.update_params_fn = theano.function([],
                                                outputs,
                                                updates=grad_updates,
                                                on_unused_input='ignore',
                                                allow_input_downcast=True,
                                                givens=givens_dict,
                                                mode=compilation_mode)
        self.update_params_fn.trust_input = trust_input

        self.n_evals = 0
        self.start_time = 0
        self.iter_time = 0
        self.params = params
        self.optimizer_state = [s for s in grad_updates.keys()]
Esempio n. 27
0
    def minimize(self, *inputs, **kwargs):
        '''
            @param inputs python variables to pass as inputs to the compiled
                          theano functions for the loss and gradients
        '''
        callback = kwargs.get('callback')
        return_best = kwargs.get('return_best', False)
        self.iter_time = 0
        self.start_time = time.time()
        self.n_evals = 0
        utils.print_with_stamp('Optimizing parameters', self.name)
        # set values for shared inputs
        for s, i in zip(self.shared_inpts, inputs):
            s.set_value(np.array(i).astype(s.dtype))
        # set initial loss and parameters
        state0 = [
            s.get_value(return_internal_type=True, borrow=False)
            for s in self.optimizer_state
        ]
        ret = self.update_params_fn()
        loss0 = self.loss_fn()
        utils.print_with_stamp('Initial loss [%s]' % (loss0), self.name)
        self.best_p = [loss0, state0, 0]

        # training loop
        if return_best:
            out_str = 'Curr loss: %E [%d: %E], n_evals: %d, Avg. time per updt: %f'
        else:
            out_str = 'Curr loss: %E, n_evals: %d, Avg. time per updt: %f'
        for i in range(1, self.max_evals):
            start_time = time.time()

            # evaluate current policy and update parameters
            ret = self.update_params_fn()
            # the returned loss corresponds to the parameters BEFORE the update
            loss, dloss = ret[0], ret[1:]

            if loss < self.best_p[0] or i < 10 and return_best:
                # get current optimizer state
                state = [
                    s.get_value(return_internal_type=True, borrow=False)
                    for s in self.optimizer_state
                ]
                self.best_p = [loss, state, i]
            if callable(callback):
                callback(loss, dloss)
            self.n_evals += 1

            end_time = time.time()
            dt = end_time - start_time
            it_updt = (dt - self.iter_time) / self.n_evals
            self.iter_time += it_updt
            if return_best:
                str_params = (loss, self.best_p[2], self.best_p[0],
                              self.n_evals, self.iter_time)
            else:
                str_params = (loss, self.n_evals, self.iter_time)
            utils.print_with_stamp(out_str % str_params, self.name, True)

        print('')

        if return_best:
            v, s, i = self.best_p
            for s_i, st_i in zip(self.optimizer_state, s):
                s_i.set_value(st_i)

        if hasattr(self, 'params_avg'):
            # set the model parameters to be the ones found via
            # polyak averaging
            for p_i, pp_i in zip(self.params, self.params_avg):
                p_i.set_value(pp_i.get_value())

        v = self.loss_fn()

        msg = 'Done training. New loss [%f] iter: [%d]'
        utils.print_with_stamp(msg % (v, i), self.name)
Esempio n. 28
0
    def minibatch_minimize(self, X, Y, *inputs, **kwargs):
        callback = kwargs.get('callback', None)
        return_best = kwargs.get('return_best', False)
        batch_size = kwargs.get('batch_size', 100)
        batch_size = min(batch_size, X.shape[0])
        self.iter_time = 0
        self.start_time = time.time()
        self.n_evals = 0
        utils.print_with_stamp('Optimizing parameters via mini batches',
                               self.name)
        # set values for shared inputs
        self.shared_inpts[0].set_value(X[-batch_size:])
        self.shared_inpts[1].set_value(Y[-batch_size:])
        for s, i in zip(self.shared_inpts[2:], inputs):
            s.set_value(np.array(i).astype(s.dtype))

        # set initial loss and parameters
        state0 = [
            s.get_value(return_internal_type=True, borrow=False)
            for s in self.optimizer_state
        ]
        ret = self.update_params_fn()
        loss0 = self.loss_fn()
        utils.print_with_stamp('Initial loss [%s]' % (loss0), self.name)
        self.best_p = [loss0, state0, 0]

        # go through the dataset
        out_str = 'Curr loss: %E [%d: %E], n_evals: %d, Avg. time per updt: %f'
        while True:
            start_time = time.time()
            should_exit = False
            b_iter = utils.iterate_minibatches(X, Y, batch_size, shuffle=True)
            for x, y in b_iter:
                start_time = time.time()

                # add small amount of noise for smoothing
                x += 1e-4 * (x.max() - x.min()) * np.random.randn(*x.shape)

                # mini batch update
                self.shared_inpts[0].set_value(x)
                self.shared_inpts[1].set_value(y)
                ret = self.update_params_fn()

                # the returned loss and gradients correspond to the parameters
                # BEFORE the update
                loss, dloss = ret[0], ret[1:]

                if loss < self.best_p[0] or self.n_evals < 10:
                    # get current optimizer state
                    state = [
                        s.get_value(return_internal_type=True, borrow=False)
                        for s in self.optimizer_state
                    ]
                    self.best_p = [loss, state, self.n_evals]
                if callable(callback):
                    callback(loss, dloss)

                self.n_evals += 1
                if self.n_evals >= self.max_evals:
                    should_exit = True
                    break

                end_time = time.time()
                dt = end_time - start_time
                it_updt = (dt - self.iter_time) / self.n_evals
                self.iter_time += it_updt
                str_params = (loss, self.best_p[2], self.best_p[0],
                              self.n_evals, self.iter_time)
                utils.print_with_stamp(out_str % str_params, self.name, True)
            if should_exit:
                break
        print('')

        i = self.n_evals
        if return_best:
            v, s, i = self.best_p
            for s_i, st_i in zip(self.optimizer_state, s):
                s_i.set_value(st_i)

        if hasattr(self, 'params_avg'):
            # set the model parameters to be the ones found via
            # polyak averaging
            for p_i, pp_i in zip(self.params, self.params_avg):
                p_i.set_value(pp_i.get_value())

        v = self.loss_fn()
        msg = 'Done training. New loss [%f] iter: [%d]'
        utils.print_with_stamp(msg % (v, i), self.name)
Esempio n. 29
0
    odir = args.output_folder
    if args.name is not None:
        name = args.name + '_' + str(e_id)
    else:
        name = env.name + '_' + str(e_id)

    output_folder = os.path.join(odir, name)

    try:
        os.makedirs(output_folder)
    except OSError:
        # move the old stuff
        target_dir = output_folder + '_' + str(os.stat(output_folder).st_ctime)
        os.rename(output_folder, target_dir)
        os.mkdir(output_folder)
        utils.print_with_stamp('Moved old results from [%s] to [%s]' %
                               (output_folder, target_dir))
    utils.set_output_dir(output_folder)
    utils.print_with_stamp('Results will be saved in [%s]' % (output_folder))

    # write the inital configuration to disk
    params_path = os.path.join(output_folder, 'initial_config.dill')
    with open(params_path, 'wb+') as f:
        config_dict = dict(params=params,
                           loss_kwargs=loss_kwargs,
                           polopt_kwargs=polopt_kwargs,
                           extra_inps=extra_inps)

        dill.dump(config_dict, f)

    scenario = partial(learner_setup, pol=pol, dyn=dyn)
Esempio n. 30
0
    def get_loss(self):
        ''' initializes the loss function for training '''
        # build the network
        if self.network is None:
            params = self.network_params\
                     if self.network_params is not None\
                     else {}
            self.build_network(self.network_spec,
                               params=params,
                               name=self.name)

        utils.print_with_stamp('Initialising loss function', self.name)

        # Input variables
        input_lengthscale = tt.scalar('%s>input_lengthscale' % (self.name))
        hidden_lengthscale = tt.scalar('%s>hidden_lengthscale' % (self.name))
        train_inputs = tt.matrix('%s>train_inputs' % (self.name))
        train_targets = tt.matrix('%s>train_targets' % (self.name))

        # evaluate nework output for batch
        train_predictions, sn = self.predict_symbolic(
            train_inputs, None, deterministic=False,
            iid_per_eval=True, return_samples=True)

        # build the dropout loss function ( See Gal and Ghahramani 2015)
        M = train_targets.shape[0].astype(theano.config.floatX)
        N = self.X.shape[0].astype(theano.config.floatX)

        # compute negative log likelihood
        # note that if we have sn_std be a 1xD vector, broadcasting
        # rules apply
        lml = objectives.gaussian_log_likelihood(
            train_targets, train_predictions, sn)

        # compute regularization term
        # this is only for binary dropout layers
        input_ls = input_lengthscale
        hidden_ls = hidden_lengthscale
        reg = objectives.dropout_gp_kl(
            self.network, input_ls, hidden_ls)
        # this is only for gaussian dropout layers
        reg += objectives.gaussian_dropout_kl(
            self.network, input_ls, hidden_ls)
        # this is only for log normal dropout layers
        reg += objectives.log_normal_kl(
            self.network, input_ls, hidden_ls)

        loss = -lml/M + reg/N

        inputs = [train_inputs, train_targets,
                  input_lengthscale, hidden_lengthscale]
        updates = theano.updates.OrderedUpdates()

        # get trainable network parameters
        params = lasagne.layers.get_all_params(self.network, trainable=True)

        # if we are learning the noise
        if not self.heteroscedastic:
            params.append(self.unconstrained_sn)
        self.set_params(dict([(p.name, p) for p in params]))
        return loss, inputs, updates