Beispiel #1
0
def pqr_gbrbm_perturb(to_perturb_Bp, to_perturb_Bq, dx=50, dh=10):
    """
    Get a Gaussian-Bernoulli RBM problem where the first entry of the B matrix
    (the matrix linking the latent and the observation) is perturbed.

    - to_perturb_Bp: constant to add to the entry of B in p to purturb it    
    - to_perturb_Bq: constant to add to the entry of B in q to purturb it    
    - dx: observed dimension
    - dh: latent dimension

    Return P (model.Model), Q (model.Model), data source (representing
        distribution R)
    """
    with util.NumpySeedContext(seed=11):
        B = np.random.randint(0, 2, (dx, dh)) * 2 - 1.0
        b = np.random.randn(dx)
        c = np.random.randn(dh)
        r = density.GaussBernRBM(B, b, c)

        # for p
        Bp_perturb = np.copy(B)
        Bp_perturb[0, 0] = Bp_perturb[0, 0] + to_perturb_Bp

        # for q
        Bq_perturb = np.copy(B)
        Bq_perturb[0, 0] = Bq_perturb[0, 0] + to_perturb_Bq

        p = density.GaussBernRBM(Bp_perturb, b, c)
        q = density.GaussBernRBM(Bq_perturb, b, c)
        ds = r.get_datasource(burnin=2000)

    return (model.ComposedModel(p=p), model.ComposedModel(p=q), ds)
 def __init__(self, train_x, train_y, shared_nn, non_shared_nns, max_iter = 100, l1 = 0, l2 = 0, debug=False): 
     self.train_x        = np.copy(train_x)
     self.train_y        = np.copy(train_y)
     self.dim            = self.train_x.shape[0]
     self.num_train      = self.train_x.shape[1]
     self.num_obj        = self.train_y.shape[1]
     self.means          = np.mean(self.train_y, axis=0)
     self.stds           = np.std(self.train_y, axis=0)
     self.train_y        = (self.train_y - self.means) / self.stds # standardize output
     self.debug          = debug
     self.max_iter       = max_iter # max iter for the L-BFGS optimization
     self.l1             = l1
     self.l2             = l2
     self.shared_nn      = shared_nn
     self.non_shared_nns = non_shared_nns
     self.num_param      = self.calc_num_params()
     if(train_x.ndim != 2 or train_y.ndim != 2):
         print("train_x.ndim != 2 or train_y.ndim != 2")
         sys.exit(1)
     if(train_x.shape[1] != train_y.shape[0]):
         print("train_x.shape[1] != train_y.shape[0]")
         sys.exit(1)
     if(len(non_shared_nns) != self.num_obj):
         print("len(non_shared_nns) != self.num_obj")
         sys.exit(1)
Beispiel #3
0
 def __init__(self,
              train_x,
              train_y,
              layer_sizes,
              activations,
              bfgs_iter=100,
              l1=0,
              l2=0,
              debug=False):
     self.train_x = np.copy(train_x)
     self.train_y = np.copy(train_y)
     self.dim = train_x.shape[0]
     self.num_train = train_x.shape[1]
     self.nn = NN(layer_sizes, activations)
     self.num_param = 2 + self.dim + self.nn.num_param(self.dim)
     self.bfgs_iter = bfgs_iter
     self.l1 = l1
     self.l2 = l2
     self.debug = debug
     self.m = layer_sizes[-1]
     self.in_mean = np.mean(self.train_x, axis=1)
     self.in_std = np.std(self.train_x, axis=1)
     self.train_x = ((self.train_x.T - self.in_mean) / self.in_std).T
     self.out_mean = np.mean(self.train_y)
     self.out_std = np.std(self.train_y)
     self.train_y = (self.train_y - self.out_mean) / self.out_std
     self.loss = np.inf
Beispiel #4
0
    def __init__(self, dataset, gamma, scale, bounds, bfgs_iter, debug=True):
        self.dataset = {}
        self.dataset['low_x'] = np.copy(dataset['low_x'])
        self.dataset['low_y'] = np.copy(dataset['low_y'])
        self.dataset['high_x'] = np.copy(dataset['high_x'])
        self.dataset['high_y'] = np.copy(dataset['high_y'])
        self.gamma = self.dataset['high_y'].shape[0] * gamma * (
            self.dataset['low_y'].max(axis=1) -
            self.dataset['low_y'].min(axis=1))
        self.scale = scale
        self.bounds = np.copy(bounds)
        self.bfgs_iter = bfgs_iter
        self.debug = debug
        self.dim = self.dataset['low_x'].shape[0]
        self.outdim = self.dataset['low_y'].shape[0]
        self.num_low = self.dataset['low_y'].shape[1]
        self.num_high = self.dataset['high_y'].shape[1]
        self.construct_model()

        self.best_constr = np.array([np.inf, np.inf])
        self.best_y = np.zeros((2, self.outdim))
        self.best_y[:, 0] = np.inf
        self.best_x = np.zeros((2, self.dim))
        self.get_best_y(self.dataset['low_x'],
                        self.dataset['low_y'],
                        is_high=0)
        self.get_best_y(self.dataset['high_x'],
                        self.dataset['high_y'],
                        is_high=1)
Beispiel #5
0
def leapfrog(q, p, dVdq, path_len, step_size):
    """Leapfrog integrator for Hamiltonian Monte Carlo.

    Parameters
    ----------
    q : np.floatX
        Initial position
    p : np.floatX
        Initial momentum
    dVdq : callable
        Gradient of the velocity
    path_len : float
        How long to integrate for
    step_size : float
        How long each integration step should be

    Returns
    -------
    q, p : np.floatX, np.floatX
        New position and momentum
    """
    q, p = np.copy(q), np.copy(p)

    p -= step_size * dVdq(q) / 2  # half step
    for _ in range(int(path_len / step_size) - 1):
        q += step_size * p  # whole step
        p -= step_size * dVdq(q)  # whole step
    q += step_size * p  # whole step
    p -= step_size * dVdq(q) / 2  # half step

    # momentum flip at end
    return q, -p
Beispiel #6
0
    def calc_update(self,
                    x,
                    p,
                    trust_radius,
                    trust_radius_max,
                    obj,
                    quality_required=0.2,
                    quality_low=0.25,
                    quality_high=0.75):
        # Parameter checks
        if not quality_required < quality_low < quality_high:
            raise ValueError(
                'Invalid quality parameters, must be: quality_required < quality_low < quality_high'
            )

        df = obj.function(x) - obj.function(x + p)
        dm = self.model(x, np.zeros_like(x), obj) - self.model(x, p, obj)
        quality = df / dm

        if quality < quality_low:
            trust_radius_new = quality_low * trust_radius
        else:
            if quality > quality_high and np.isclose(la.norm(p), trust_radius):
                trust_radius_new = min(2 * trust_radius, trust_radius_max)
            else:
                trust_radius_new = np.copy(trust_radius)

        if quality > quality_required:
            x_new = x + p
        else:
            x_new = np.copy(x)

        return x_new, trust_radius_new
Beispiel #7
0
    def kinetic_fd(self, param, pos, eps=1E-6):
        """Compute the action of the kinetic operator on the we points.
        Args :
            pos : position of the electrons
            metod (string) : mehod to compute the derivatives
        Returns : value of K * psi
        """

        nwalk = pos.shape[0]
        ndim = pos.shape[1]
        out = np.zeros(nwalk)

        for icol in range(ndim):

            pos_tmp = np.copy(pos)
            feps = -2*self.values(param, pos_tmp)

            pos_tmp = np.copy(pos)
            pos_tmp[:, icol] += eps
            feps += self.values(param, pos_tmp)

            pos_tmp = np.copy(pos)
            pos_tmp[:, icol] -= eps
            feps += self.values(param, pos_tmp)

            out += feps/(eps**2)

        return out
Beispiel #8
0
    def fullSample( self, stabilize=False, **kwargs ):

        if( stabilize == True ):

            J11 = np.copy( self.J11 )
            J12 = np.copy( self.J12 )
            J22 = np.copy( self.J22 )

            A, sigma = Regression.natToStandard( -0.5 * self.J11, -0.5 * self.J22, -self.J12.T )
            A = stab( A )

            n1, n2, n3 = Regression.standardToNat( A, sigma )

            self.J11 = -2 * n1
            self.J12 = -n3.T
            self.J22 = -2 * n2
            self.log_Z = 0.5 * np.linalg.slogdet( np.linalg.inv( self.J11 ) )[ 1 ]

        ans = super( LDSState, self ).fullSample( **kwargs )

        if( stabilize == True ):

            self.J11 = J11
            self.J12 = J12
            self.J22 = J22
            self.log_Z = 0.5 * np.linalg.slogdet( np.linalg.inv( self.J11 ) )[ 1 ]

        return ans
    def loss_augmented_inference(self, x, y, w, relaxed=False, return_energy=False):
        """Loss-augmented Inference for x relative to y using parameters w.

        Finds (approximately)
        argmax_y_hat np.dot(w, joint_feature(x, y_hat)) + loss(y, y_hat)
        using self.inference_method.
        """
        self.inference_calls += 1
        self._check_size_w(w)  # check if size of w is equal to size of joint feature
        # need to compute the joint feature and hinge loss
        # return self.loss_augmented_inference_graph_iht(x, y, w, relaxed=relaxed)

        # y_hat = x
        # y_hat = np.random.rand(len(y))
        y_hat = np.zeros_like(y)
        yt = np.copy(y_hat)
        max_iter = 1000
        for iter in range(max_iter):
            print("iter {}".format(iter))
            print("yt {}\n{}".format(yt, np.nonzero(yt)))
            y_prev = np.copy(yt)
            gradient = self._get_cost_augment_grad(x, y, w, yt)
            print("gradient {}".format(gradient))

            yt = yt + 0.001 * gradient
            yt[yt <= 0.] = 0.
            yt[yt >= 1.0] = 1.0
            gap_y = np.linalg.norm(yt - y_prev) ** 2
            if gap_y < 1e-6:
                break
        return yt
Beispiel #10
0
 def __init__(self,
              name,
              num_models,
              dataset,
              bfgs_iter=100,
              debug=False,
              scale=[],
              num_layers=[],
              layer_sizes=[],
              activations=[],
              l1=0,
              l2=0):
     self.name = name
     self.num_models = num_models
     self.dataset = dataset
     self.bfgs_iter = bfgs_iter
     self.debug = debug
     self.num_layers = np.copy(num_layers)
     self.layer_sizes = np.copy(layer_sizes)
     self.activations = np.copy(activations)
     self.l1 = l1
     self.l2 = l2
     self.scale = np.copy(scale)
     self.construct_model()
     self.best_constr = np.inf
     self.best_y = np.zeros(self.outdim)
     self.best_y[0] = np.inf
     # get best_y from the dataset
     if self.dataset.has_key('train_x'):
         self.get_best_y(self.dataset['train_x'], self.dataset['train_y'])
     else:
         self.get_best_y(self.dataset['high_x'], self.dataset['high_y'])
Beispiel #11
0
    def _cost_batched(self,
                      inputs,
                      targets,
                      hprev,
                      Cprev,
                      weights,
                      disable_tqdm=True):
        W_1, b_1, W_f, b_f, W_i, b_i, W_c, b_c, W_o, b_o, W_2, b_2 = weights
        h = np.copy(hprev)
        C = np.copy(Cprev)
        h = h.reshape((self.batch_size, self.h_size, 1))
        C = C.reshape((self.batch_size, self.h_size, 1))
        loss = 0
        # W_sth_dropout = get_dropout_function((self.h_size, self.h_size + self.x_size), self.keep_prob)
        # b_sth_dropout = get_dropout_function((self.h_size,), self.keep_prob)
        # W_dropout = get_dropout_function((self.y_size, self.h_size), self.keep_prob)
        # b_dropout = get_dropout_function((self.y_size,), self.keep_prob)
        cell_dropout = get_dropout_function((self.batch_size, self.h_size, 1),
                                            self.keep_prob)
        y_dropout = get_dropout_function((self.batch_size, self.y_size, 1),
                                         self.keep_prob)
        for t in tqdm(range(len(inputs)), disable=disable_tqdm):
            x = np.array([char_to_one_hot(c) for c in inputs[:, t]])
            x = x.reshape((self.batch_size, -1, 1))

            x = np.matmul(W_1, x) + np.reshape(b_1, (-1, 1))
            x = cell_dropout(x)

            f = sigmoid(
                np.matmul(W_f, np.concatenate((h, x), axis=1)) +
                np.reshape(b_f, (-1, 1)))
            f = cell_dropout(f)
            i = sigmoid(
                np.matmul(W_i, np.concatenate((h, x), axis=1)) +
                np.reshape(b_i, (-1, 1)))
            i = cell_dropout(i)
            C_hat = np.tanh(
                np.matmul(W_c, np.concatenate((h, x), axis=1)) +
                np.reshape(b_c, (-1, 1)))
            C_hat = cell_dropout(C_hat)
            C = f * C + i * C_hat
            C = cell_dropout(C)
            o = sigmoid(
                np.matmul(W_o, np.concatenate((h, x), axis=1)) +
                np.reshape(b_o, (-1, 1)))
            o = cell_dropout(o)
            h = o * np.tanh(C)
            h = cell_dropout(h)
            ys = np.matmul(W_2, h) + np.reshape(b_2, (-1, 1))
            ys = y_dropout(ys)

            target_indices = np.array(
                [char_to_index[c] for c in targets[:, t]])
            # ps_target[t] = np.exp(ys[t][target_index])/np.sum(np.exp(ys[t]))  # probability for next chars being target
            # loss += -np.log(ps_target[t])
            loss += np.sum([
                -(y[target_index] - logsumexp(y))
                for y, target_index in zip(ys, target_indices)
            ]) / (self.number_of_steps * self.batch_size)
        return loss
Beispiel #12
0
    def _cost(self, inputs, targets, hprev, Cprev, weights, disable_tqdm=True):
        W_1, b_1, W_f, b_f, W_i, b_i, W_c, b_c, W_o, b_o, W_2, b_2 = weights
        h = np.copy(hprev)
        C = np.copy(Cprev)

        loss = 0
        for t in tqdm(range(len(inputs)), disable=disable_tqdm):
            x = char_to_one_hot(inputs[t])

            x = np.matmul(W_1, x) + b_1

            f = sigmoid(np.matmul(W_f, np.concatenate((h, x))) + b_f)
            i = sigmoid(np.matmul(W_i, np.concatenate((h, x))) + b_i)
            C_hat = np.tanh(np.matmul(W_c, np.concatenate((h, x))) + b_c)
            C = f * C + i * C_hat
            o = sigmoid(np.matmul(W_o, np.concatenate((h, x))) + b_o)
            h = o * np.tanh(C)
            y = np.matmul(W_2, h) + b_2

            target_index = char_to_index[targets[t]]
            # ps_target[t] = np.exp(ys[t][target_index])/np.sum(np.exp(ys[t]))  # probability for next chars being target
            # loss += -np.log(ps_target[t])
            loss += -(y[target_index] - logsumexp(y))

        loss = loss / len(inputs)
        return loss
def validation_points_error(Xi, Xo, Hestimated):
    Xi = np.copy(Xi)
    Xo = np.copy(Xo)
    sum = 0
    for i in range(Xo.shape[1]):
        sum += geometric_distance(Xo[:, i], Xi[:, i], Hestimated)
    return sum / Xo.shape[1]
    def train(self, scale=1.0):
        theta = self.rand_theta(scale)
        self.loss = np.inf
        theta0 = np.copy(theta)
        self.theta = np.copy(theta)

        def loss(theta):
            nlz = self.neg_likelihood(theta)
            return nlz

        gloss = grad(loss)
        
        try:
            fmin_l_bfgs_b(loss, theta0, gloss, maxiter=self.bfgs_iter, m=100, iprint=self.debug)
        except np.linalg.LinAlgError:
            print('Increase noise term and re-optimization')
            theta0 = np.copy(self.theta)
            theta0[1] += np.log(10)
            theta0[2] += np.log(10)
            try:
                fmin_l_bfgs_b(loss, theta0, gloss, maxiter=self.bfgs_iter, m=10, iprint=self.debug)
            except:
                print('Exception caught, L-BFGS early stopping...')
                if self.debug:
                    print(traceback.format_exc())
        except:
            print('Exception caught, L-BFGS early stopping...')
            if self.debug:
                print(traceback.format_exc())

        if(np.isnan(self.loss) or np.isinf(self.loss)):
            print('Fail to build GP model')
            sys.exit(1)

        self.alpha = chol_inv(self.L, self.y.T)
Beispiel #15
0
    def __init__(self, dimension, inputs, obs, mini_batch=False):
        """These functions implement a standard multi-layer perceptron,
        vectorized over both training examples and weight samples."""
        self.dimension = dimension
        self.prior = FiniteDimensionalPrior(self.dimension)

        self.inputs = inputs
        self.inputs_size = len(inputs)
        self.obs = obs

        self.mini_batch = mini_batch
        if self.mini_batch:
            self.it = 0
            self.mini_batch_size = 32
            self.number_batchs = np.int(
                np.ceil(self.inputs_size / self.mini_batch_size))

            self.inputs_all = np.copy(inputs)
            self.obs_all = np.copy(obs)

            self.inputs = inputs[:self.mini_batch_size]
            self.obs = obs[:self.mini_batch_size]

        self.gx = grad(self.cost)
        self.J = jacobian(self.forward)

        self.hx = hessian_vector_product(self.cost)
        self.hvp = hvp(self.hx)
Beispiel #16
0
def leapfrog(M, q, p, dVdq, path_len, step_size):
    """Leapfrog integrator for standard HMC and naive SGHMC

    Parameters
    ----------
    M : np.matrix
      Mass of the Euclidean-Gaussian kinetic energy of shape D x D
    q : np.floatX
      Initial position
    p : np.floatX
      Initial momentum
    dVdq : callable
      Gradient of the velocity
    path_len : float
      How long to integrate for
    step_size : float
      How long each integration step should be

    Returns
    -------
    q, p : np.floatX, np.floatX
      New position and momentum
    """
    q, p = np.copy(q), np.copy(p)
    Minv = np.linalg.inv(M)

    p -= step_size * dVdq(q) / 2  # half step
    for _ in range(int(path_len / step_size) - 1):
        q += step_size * np.dot(Minv, p)  # whole step
        p -= step_size * dVdq(q)  # whole step
    q += step_size * np.dot(Minv, p)  # whole step
    p -= step_size * dVdq(q) / 2  # half step

    # momentum flip at end
    return q, -p
Beispiel #17
0
    def drift_fd(self, param, pos, eps=1E-6):
        """Compute the drift force on the points.
        Args :
            pos : position of the electrons
            metod (string) : mehod to compute the derivatives
        Returns : value of Grad Psi
        """

        ndim = pos.shape[1]
        out = np.zeros_like(pos)

        for icol in range(ndim):

            pos_tmp = np.copy(pos)
            pos_tmp[:, icol] += eps
            feps = self.values(param, pos_tmp)

            pos_tmp = np.copy(pos)
            pos_tmp[:, icol] -= eps
            feps -= self.values(param, pos_tmp)

            out[:, icol] = feps.reshape(-1)/(2*eps)

        if self.ndim_tot == 1:
            return 2*out.reshape(-1, 1)/self.values(param, pos)
        else:
            return 2*out/self.values(param, pos)
Beispiel #18
0
    def optimize_constr(self, x):
        x0 = np.copy(x).reshape(-1)
        best_x = np.copy(x)
        best_loss = np.inf
        tmp_loss = np.inf

        def loss(x0):
            nonlocal tmp_loss
            x0 = x0.reshape(self.dim, -1)
            py, ps2 = self.models[0].predict(x0)
            tmp_loss = py.sum()
            for i in range(1, self.outdim):
                py, ps2 = self.models[i].predict(x0)
                tmp_loss += np.maximum(0, py).sum()
            return tmp_loss

        def callback(x):
            nonlocal best_x
            nonlocal best_loss
            if tmp_loss < best_loss:
                best_loss = tmp_loss
                best_x = np.copy(x)

        gloss = value_and_grad(loss)
        try:
            #starting point for fmin_l_bfgs_b should be a one-dimensional vector
            fmin_l_bfgs_b(gloss,
                          x0,
                          bounds=[[-0.5, 0.5]] * x.size,
                          maxiter=2000,
                          m=100,
                          iprint=self.debug,
                          callback=callback)
        except np.linalg.LinAlgError:
            print(
                'Optimizing constrains. Increase noise term and re-optimization'
            )
            x0 = np.copy(best_x).reshape(-1)
            x0[0] += 0.01
            try:
                fmin_l_bfgs_b(gloss,
                              x0,
                              bounds=[[-0.5, 0.5]] * x.size,
                              maxiter=2000,
                              m=10,
                              iprint=self.debug,
                              callback=callback)
            except:
                print(
                    'Optimizing constrains. Exception caught, L-BFGS early stopping...'
                )
                print(traceback.format_exc())
        except:
            print(
                'Optimizing constrains. Exception caught, L-BFGS early stopping...'
            )
            print(traceback.format_exc())

        best_x = best_x.reshape(self.dim, -1)
        return best_x
Beispiel #19
0
def backtracking(weights,
                 levels,
                 steps,
                 loss,
                 gradients,
                 l_rate,
                 x,
                 idx_to_skip,
                 learn_levels,
                 learn_steps,
                 variables,
                 n_layers_learn,
                 loss_init=None,
                 cst_mul=2.,
                 n_tries=30,
                 l_rate_min=1e-6):
    if loss_init is None:
        loss_init = loss(optim_vars, x)
    l_rate *= cst_mul
    for _ in range(n_tries):
        new_vars = change_params(deepcopy(weights), np.copy(levels),
                                 np.copy(steps), l_rate, gradients,
                                 idx_to_skip, learn_levels, learn_steps,
                                 variables, n_layers_learn)
        new_loss = loss(new_vars, x)
        if new_loss < loss_init or l_rate < l_rate_min:
            break
        l_rate /= cst_mul
    return new_vars, new_loss, l_rate
Beispiel #20
0
    def sample(self, n_samples=2000, observed_states=None, random_state=None):
        """Generate random samples from the self.

        Parameters
        ----------
        n : int
            Number of samples to generate.

        observed_states : array
            If provided, states are not sampled.

        random_state: RandomState or an int seed
            A random number generator instance. If None is given, the
            object's random_state is used

        Returns
        -------
        samples : array_like, length (``n_samples``)
                  List of samples

        states : array_like, shape (``n_samples``)
                 List of hidden states (accounting for tied states by giving
                 them the same index)
        """
        if random_state is None:
            random_state = self.random_state
        random_state = check_random_state(random_state)

        samples = np.zeros(n_samples)
        states = np.zeros(n_samples)

        if observed_states is None:
            startprob_pdf = np.exp(np.copy(self._log_startprob))
            startdist = stats.rv_discrete(name='custm',
                                      values=(np.arange(startprob_pdf.shape[0]),
                                                        startprob_pdf),
                                      seed=random_state)
            states[0] = startdist.rvs(size=1)[0]

            transmat_pdf = np.exp(np.copy(self._log_transmat))
            transmat_cdf = np.cumsum(transmat_pdf, 1)

            nrand = random_state.rand(n_samples)
            for idx in range(1,n_samples):
                newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax()
                states[idx] = newstate
        else:
            states = observed_states

        mu = np.copy(self._mu_)
        precision = np.copy(self._precision_)
        for idx in range(n_samples):
            mean_ = self._mu_[states[idx]]
            var_ = np.sqrt(1/precision[states[idx]])
            samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1,
                                    random_state=random_state)
        states = self._process_sequence(states)
        return samples, states
Beispiel #21
0
    def optimize_wEI(self, x):
        x0 = np.copy(x).reshape(-1)
        best_x = np.copy(x)
        best_loss = np.inf
        tmp_loss = np.inf

        def loss(x0):
            nonlocal tmp_loss
            x0 = x0.reshape(self.dim, -1)
            tmp_loss = -self.calc_log_wEI_approx(x0)
            tmp_loss = tmp_loss.sum()
            return tmp_loss

        def callback(x):
            nonlocal best_x
            nonlocal best_loss
            if tmp_loss < best_loss:
                best_loss = tmp_loss
                best_x = np.copy(x)

        gloss = value_and_grad(loss)

        try:
            fmin_l_bfgs_b(gloss,
                          x0,
                          bounds=[[-0.5, 0.5]] * x.size,
                          maxiter=2000,
                          m=100,
                          iprint=self.debug,
                          callback=callback)
        except np.linalg.LinAlgError:
            print(
                'Acquisition func optimization error, Increase noise term and re-optimization'
            )
            x0 = np.copy(best_x).reshape(-1)
            x0[0] += 0.01
            try:
                fmin_l_bfgs_b(loss,
                              x0,
                              gloss,
                              bounds=[[-0.5, 0.5]] * x.size,
                              maxiter=2000,
                              m=10,
                              iprint=self.debug,
                              callback=callback)
            except:
                print(
                    'Optimizing acquisition function, Exception caught, L-BFGS early stopping...'
                )
                print(traceback.format_exc())
        except:
            print(
                'Optimizing acquisition function, Exception caught, L-BFGS early stopping...'
            )
            print(traceback.format_exc())

        best_x = best_x.reshape(self.dim, -1)
        return best_x
def transfer_error(Xo, Xi, H):
    """transfer error including normalization
  Xo: Object points in 2D Homogeneous Coordinates (3xn)
  Xi: Image points in 2D Homogeneous Coordinates (3xn)
  """
    Xo = np.copy(Xo)
    Xi = np.copy(Xi)
    H = np.copy(H)
    return d(Xi, np.dot(H, Xo))
Beispiel #23
0
 def __init__(self, ll, lu, path):
     self.ll = np.copy(ll)
     self.lu = np.copy(lu)
     self.path1 = path
     self.num_layers = self.lu.size
     self.zeta = 0.1
     self.calc_lm()
     self.calc_path2()
     self.path_lengths()
Beispiel #24
0
    def _init_params(self, data, lengths=None, params='stmp'):
        X = data['obs']

        if 's' in params:
            self.startprob_.fill(1.0 / self.n_components)

        if 't' in params or 'm' in params or 'p' in params:

            kmmod = cluster.KMeans(n_clusters=self.n_unique,
                                   random_state=self.random_state).fit(X)
            kmeans = kmmod.cluster_centers_

        if 't' in params:
            # TODO: estimate transitions from data (!) / consider n_tied=1
            if self.n_tied == 0:
                transmat = np.ones([self.n_components, self.n_components])
                np.fill_diagonal(transmat, 10.0)
                self.transmat_ = transmat  # .90 for self-transition

            else:
                transmat = np.zeros((self.n_components, self.n_components))
                transmat[range(self.n_components),
                         range(self.n_components)] = 100.0  # diagonal
                transmat[range(self.n_components - 1),
                         range(1, self.n_components)] = 1.0  # diagonal + 1
                transmat[[
                    r * (self.n_chain) - 1
                    for r in range(1, self.n_unique + 1)
                    for c in range(self.n_unique - 1)
                ], [
                    c * (self.n_chain) for r in range(self.n_unique)
                    for c in range(self.n_unique) if c != r
                ]] = 1.0

                self.transmat_ = np.copy(transmat)

        if 'm' in params:
            mu_init = np.zeros((self.n_unique, self.n_features))
            for u in range(self.n_unique):
                for f in range(self.n_features):
                    mu_init[u][f] = kmeans[u, f]

            self.mu_ = np.copy(mu_init)

        if 'p' in params:
            precision_init = np.zeros(
                (self.n_unique, self.n_features, self.n_features))
            for u in range(self.n_unique):
                if self.n_features == 1:
                    precision_init[u] = np.linalg.inv(
                        np.cov(X[kmmod.labels_ == u], bias=1))
                else:
                    precision_init[u] = np.linalg.inv(
                        np.cov(np.transpose(X[kmmod.labels_ == u])))

            self.precision_ = np.copy(precision_init)
Beispiel #25
0
    def train(self):
        theta0 = self.get_default_theta()
        self.loss = np.inf
        self.theta = np.copy(theta0)

        nlz = self.neg_log_likelihood(theta0)

        def loss(theta):
            nlz = self.neg_log_likelihood(theta)
            return nlz

        def callback(theta):
            if self.nlz < self.loss:
                self.loss = self.nlz
                self.theta = np.copy(theta)

        gloss = value_and_grad(loss)
        try:
            fmin_l_bfgs_b(gloss,
                          theta0,
                          maxiter=self.bfgs_iter,
                          m=100,
                          iprint=self.debug,
                          callback=callback)
        except np.linalg.LinAlgError:
            print('GP. Increase noise term and re-optimization.')
            theta0 = np.copy(self.theta)
            theta0[0] += np.log(10)
            try:
                fmin_l_bfgs_b(gloss,
                              theta0,
                              maxiter=self.bfgs_iter,
                              m=10,
                              iprint=self.debug,
                              callback=callback)
            except:
                print('GP. Exception caught, L-BFGS early stopping...')
                if self.debug:
                    print(traceback.format_exc())
        except:
            print('GP. Exception caught, L-BFGS early stopping...')
            if self.debug:
                print(traceback.format_exc())

        sn2 = np.exp(self.theta[0])
        hyp = self.theta[1:]
        K = self.kernel(self.train_x, self.train_x, hyp) + sn2 * np.eye(
            self.num_train) + self.jitter * np.eye(self.num_train)
        self.L = np.linalg.cholesky(K)
        self.alpha = chol_inv(self.L, self.train_y.T)
        if self.k:
            self.for_diag = np.exp(self.theta[1]) * np.exp(
                self.theta[3]) + np.exp(self.theta[3 + self.dim])
        else:
            self.for_diag = np.exp(self.theta[1])
        print('GP. Finished training process.')
Beispiel #26
0
def dlyap(A, Q):
    """
    Solve the discrete-time Lyapunov equation.
    Wrapper around scipy.linalg.solve_discrete_lyapunov.
    Pass a copy of input matrices to protect them from modification.
    """
    try:
        return solve_discrete_lyapunov(np.copy(A), np.copy(Q))
    except ValueError:
        return np.full_like(Q, np.inf)
def sym_transfer_error(Xo, Xi, H):
    """Symetric transfer error
  Xo: Object points in 2D Homogeneous Coordinates (3xn)
  Xi: Image points in 2D Homogeneous Coordinates (3xn)
  """
    Xo = np.copy(Xo)
    Xi = np.copy(Xi)
    H = np.copy(H)
    error1 = d(Xi, np.dot(H, Xo))
    error2 = d(Xo, np.dot(np.linalg.inv(H), Xi))
    return error1 + error2
Beispiel #28
0
def fit_new_py(x, model):
    x0 = np.copy(x).reshape(-1)
    best_x = np.copy(x)
    best_loss = np.inf
    tmp_loss = np.inf

    def loss(x0):
        nonlocal tmp_loss
        x0 = x0.reshape(model.dim, -1)
        py, ps2 = model.models[0].predict(x0)
        tmp_loss = py.sum()
        for i in range(1, model.outdim):
            py, ps2 = model.models[0].predict(x0)
            tmp_loss += np.maximum(0, py).sum()
        return tmp_loss

    def callback(x):
        nonlocal best_loss
        nonlocal best_x
        if tmp_loss < best_loss:
            best_loss = tmp_loss
            best_x = np.copy(x)

    gloss = value_and_grad(loss)

    try:
        fmin_l_bfgs_b(gloss,
                      x0,
                      bounds=[[-0.5, 0.5]] * x.size,
                      maxiter=2000,
                      m=100,
                      iprint=model.debug,
                      callback=callback)
    except np.linalg.LinAlgError:
        print('Fit_new_py. Increase noise term and re-optimization')
        x0 = np.copy(best_x).reshape(-1)
        x0[0] += 0.01
        try:
            fmin_l_bfgs_b(gloss,
                          x0,
                          bounds=[[-0.5, 0.5]] * x.size,
                          maxiter=2000,
                          m=10,
                          iprint=model.debug,
                          callback=callback)
        except:
            print('Fit_new_py. Exception caught, L-BFGS early stopping...')
            print(traceback.format_exc())
    except:
        print('Fit_new_py. Exception caught, L-BFGS early stopping...')
        print(traceback.format_exc())

    return best_x
Beispiel #29
0
def fit_new_py(x, model):
    x0 = np.copy(x).reshape(-1)
    best_x = np.copy(x)
    best_loss = np.inf

    def loss(x0):
        nonlocal best_x
        nonlocal best_loss
        x0 = x0.reshape(model.dim, -1)
        py, ps2 = model.models[0].predict(x0)
        tmp_loss = py.sum()
        for i in range(1, model.outdim):
            py, ps2 = model.models[0].predict(x0)
            tmp_loss += np.maximum(0, py).sum()
        if tmp_loss < best_loss:
            best_loss = tmp_loss
            best_x = np.copy(x0)
        return tmp_loss

    gloss = grad(loss)

    try:
        fmin_l_bfgs_b(loss,
                      x0,
                      gloss,
                      bounds=[[-0.5, 0.5]] * x.size,
                      maxiter=2000,
                      m=100,
                      iprint=model.debug)
    except np.linalg.LinAlgError:
        print('Fit_new_py. Increase noise term and re-optimization')
        x0 = np.copy(best_x).reshape(-1)
        x0[0] += 0.01
        try:
            fmin_l_bfgs_b(loss,
                          x0,
                          gloss,
                          bounds=[[-0.5, 0.5]] * x.size,
                          maxiter=2000,
                          m=10,
                          iprint=model.debug)
        except:
            print('Fit_new_py. Exception caught, L-BFGS early stopping...')
            print(traceback.format_exc())
    except:
        print('Fit_new_py. Exception caught, L-BFGS early stopping...')
        print(traceback.format_exc())

    if (np.isnan(best_loss) or np.isinf(best_loss)):
        print('Fit_new_py. Fail to build GP model')
        sys.exit(1)

    return best_x
Beispiel #30
0
 def get_best_y(self, x, y):
     for i in range(y.shape[1]):
         constr = np.maximum(y[1:, i], 0).sum()
         if constr < self.best_constr and self.best_constr > 0:
             self.best_constr = constr
             self.best_y = np.copy(y[:, i])
             self.best_x = np.copy(x[:, i])
         elif constr <= 0 and self.best_constr <= 0 and y[
                 0, i] < self.best_y[0]:
             self.best_constr = constr
             self.best_y = np.copy(y[:, i])
             self.best_x = np.copy(x[:, i])
Beispiel #31
0
def geometric_distance(Xo,Xi,H):
  """
  Xi point measured in the image
  Xo real value of the model point
  H an estimated homography
  as defined in Multiple View Geometry in Computer vision
  """
  Xo = np.copy(Xo)
  Xi = np.copy(Xi)
  H = np.copy(H)
  Xio = np.dot(H,Xo)
  return np.sqrt((Xi[0]/Xi[2] - Xio[0]/Xio[2])**2+(Xi[1]/Xi[2] - Xio[1]/Xio[2])**2)
Beispiel #32
0
    def _init_params(self, data, lengths=None, params='stmp'):
        X = data['obs']

        if 's' in params:
            self.startprob_.fill(1.0 / self.n_components)

        if 't' in params or 'm' in params or 'p' in params:

            kmmod = cluster.KMeans(n_clusters=self.n_unique,
                                   random_state=self.random_state).fit(X)
            kmeans = kmmod.cluster_centers_

        if 't' in params:
            # TODO: estimate transitions from data (!) / consider n_tied=1
            if self.n_tied == 0:
                transmat = np.ones([self.n_components, self.n_components])
                np.fill_diagonal(transmat, 10.0)
                self.transmat_ = transmat  # .90 for self-transition

            else:
                transmat = np.zeros((self.n_components, self.n_components))
                transmat[range(self.n_components),
                         range(self.n_components)] = 100.0  # diagonal
                transmat[range(self.n_components-1),
                         range(1, self.n_components)] = 1.0  # diagonal + 1
                transmat[[r * (self.n_chain) - 1
                          for r in range(1, self.n_unique+1)
                          for c in range(self.n_unique-1)],
                         [c * (self.n_chain)
                          for r in range(self.n_unique)
                          for c in range(self.n_unique) if c != r]] = 1.0

                self.transmat_ = np.copy(transmat)

        if 'm' in params:
            mu_init = np.zeros((self.n_unique, self.n_features))
            for u in range(self.n_unique):
                for f in range(self.n_features):
                    mu_init[u][f] = kmeans[u, f]

            self.mu_ = np.copy(mu_init)

        if 'p' in params:
            precision_init = np.zeros((self.n_unique, self.n_features, self.n_features))
            for u in range(self.n_unique):
                if self.n_features == 1:
                    precision_init[u] = np.linalg.inv(np.cov(X[kmmod.labels_ == u], bias = 1))
                else:
                    precision_init[u] = np.linalg.inv(np.cov(np.transpose(X[kmmod.labels_ == u])))

            self.precision_ = np.copy(precision_init)
Beispiel #33
0
    def _set_startprob(self, startprob):

        if startprob is None:
            startprob = np.tile(1.0 / self.n_components, self.n_components)
        else:
            startprob = np.asarray(startprob, dtype=np.float)

            if not np.alltrue(startprob <= 1.0):
                normalize(startprob)

            if len(startprob) != self.n_components:
                if len(startprob) == self.n_unique:
                    startprob_split = np.copy(startprob) / (1.0+self.n_tied)
                    startprob = np.zeros(self.n_components)
                    for u in range(self.n_unique):
                        for t in range(self.n_chain):
                            startprob[u*(self.n_chain)+t] = \
                                startprob_split[u].copy()
                else:
                    raise ValueError("cannot match shape of startprob")

        if not np.allclose(np.sum(startprob), 1.0):
            raise ValueError('startprob must sum to 1.0')

        self._log_startprob = np.log(np.asarray(startprob).copy())
Beispiel #34
0
    def _obj_grad(self, wrt, m, p, a, xn, xln, gn, entries='all', **kwargs):
        m = m.reshape(self.n_unique, self.n_features, 1)  # tm

        if wrt == 'm':
            wrt_num = 0
        elif wrt == 'p':
            wrt_num = 1
        elif wrt == 'a':
            wrt_num = 2
        else:
            raise ValueError('unknown parameter')
        res = grad(self._obj, wrt_num)(m, p, a, xn, xln, gn)

        if wrt == 'p' and self.n_features > 1:
            if entries == 'diag':
                res_new = \
                np.zeros((self.n_unique, self.n_features, self.n_features))
                for u in range(self.n_unique):
                    for f in range(self.n_features):
                        res_new[u,f,f] = res[u,f,f]
                res = np.copy(res_new)

            elif entries == 'offdiag':
                for u in range(self.n_unique):
                    for f in range(self.n_features):
                        res[u,f,f] = 0.

        res = np.array([res])
        return res
Beispiel #35
0
def hard_thr(x, lambdaPar, lower=None, upper=None):
    out = np.copy(x)
    out[np.abs(x) < lambdaPar] = 0.0

    if (lower != None):
        out[out < lower] = 0.0
    if (upper != None):
        out[out > upper] = 0.0
    return out
Beispiel #36
0
def draw(linePoints, points):
    linePoints = np.copy(linePoints)
    points = np.copy(points)
    #assert False # not implemented
    canvasSize = (256, 256, 3)
    scale = 20
    offset = (5, 5)

    linePoints += offset
    linePoints *= scale

    p1 = linePoints[0]
    p2 = linePoints[1]

    p1 = (int(p1[0]), int(p1[1]))
    p2 = (int(p2[0]), int(p2[1]))

    canvas = np.zeros(canvasSize, dtype=np.uint8)
    print p1, p2
    #cv2.circle(canvas, p1, 2, (0, 255, 0), 2)
    #cv2.circle(canvas, p2, 2, (0, 255, 0), 2)
    cv2.line(canvas, p1, p2, (0, 255, 0))
    cv2.imshow('', canvas)
    cv2.waitKey()
Beispiel #37
0
    def _set_transmat_prior(self, transmat_prior_val):
        # new val needs be n_unique x n_unique
        # internally, n_components x n_components
        # _ntied_transmat_prior is
        # called to get n_components x n_components
        transmat_prior_new = np.zeros((self.n_components, self.n_components))
        if transmat_prior_val is not None:

            if transmat_prior_val.shape == (self.n_unique, self.n_unique):
                transmat_prior_new = \
                np.copy(self._ntied_transmat_prior(transmat_prior_val))

            else:
                raise ValueError("cannot match shape of transmat_prior")


        self.transmat_prior = transmat_prior_new
Beispiel #38
0
    def _set_startprob_prior(self, startprob_prior):
        if startprob_prior is None or startprob_prior == 1.0:
            startprob_prior = np.zeros(self.n_components)
        else:
            startprob_prior = np.asarray(startprob_prior, dtype=np.float)

            if len(startprob_prior) != self.n_components:
                if len(startprob_prior) == self.n_unique:
                    startprob_prior_split = np.copy(startprob_prior) / \
                        (1.0 + self.n_tied)
                    startprob_prior = np.zeros(self.n_components)
                    for u in range(self.n_unique):
                        for t in range(self.n_chain):
                            startprob_prior[u*(self.n_chain)+t] = \
                                startprob_prior_split[u].copy()
                else:
                    raise ValueError("cannot match shape of startprob")

        self.startprob_prior = np.asarray(startprob_prior).copy()
Beispiel #39
0
    def _set_transmat(self, transmat_val):
        if transmat_val is None:
            transmat = np.tile(1.0 / self.n_components,
                               (self.n_components, self.n_components))
        else:
            transmat_val[np.isnan(transmat_val)] = 0.0
            normalize(transmat_val, axis=1)

            if (np.asarray(transmat_val).shape == (self.n_components,
                                                   self.n_components)):
                transmat = np.copy(transmat_val)
            elif transmat_val.shape[0] == self.n_unique:
                transmat = self._ntied_transmat(transmat_val)
            else:
                raise ValueError("cannot match shape of transmat")

        if not np.all(np.allclose(np.sum(transmat, axis=1), 1.0)):
            raise ValueError('Rows of transmat must sum to 1.0')
        self._log_transmat = np.log(np.asarray(transmat).copy())
        underflow_idx = np.isnan(self._log_transmat)
        self._log_transmat[underflow_idx] = NEGINF
Beispiel #40
0
    def _do_mstep_grad(self, gn, data):

        wrt = [str(p) for p in self.wrt if str(p) in self.params]

        for update_idx in range(self.n_iter_update):
            for p in wrt:
                if p in 'm':
                    optim_x0 = self.mu_
                    newv = self._do_optim(p, optim_x0, gn, data)
                    self.mu_ = newv
                elif p in 'a':
                    optim_x0 = self.alpha_
                    newv = self._do_optim(p, optim_x0, gn, data)
                    self.alpha_ = newv
                elif p == 'p':
                    optim_x0 = self.precision_

                    # update just diagonal
                    newv = self._do_optim(p, optim_x0, gn, data, entries='diag')

                    template = np.copy(self.precision_)
                    for u in range(self.n_unique):
                        for f in range(self.n_features):
                            template[u,f,f] = newv[u,f,f]

                    self.precision_ = template

                    optim_x0 = self.precision_

                    # update just off diagonal
                    newv = self._do_optim(p, optim_x0, gn, data, entries='offdiag')
                    for u in range(self.n_unique):
                        for f in range(self.n_features):
                            newv[u,f,f] = self.precision_[u,f,f] + 0.
                    # ensure that precision matrix is symmetric
                    for u in range(self.n_unique):
                        newv[u,:,:] = (newv[u,:,:] + newv[u,:,:].T)/2.0

                    self.precision_ = newv
Beispiel #41
0
    def finite_difference_second_order_(self, func, x):
        n_dim = len(x)
        func_x = func(x)
        hessian = np.zeros((n_dim, n_dim))
        for i in range(n_dim):
            for j in range(n_dim):
                x_copy = np.copy(x)
                x_copy[i] += self.finite_diff_eps
                x_copy[j] += self.finite_diff_eps
                fpp = func(x_copy)

                x_copy = np.copy(x)
                x_copy[i] += self.finite_diff_eps
                fp_ = func(x_copy)

                x_copy = np.copy(x)
                x_copy[j] += self.finite_diff_eps
                f_p = func(x_copy)

                x_copy = np.copy(x)
                x_copy[i] -= self.finite_diff_eps
                fn_ = func(x_copy)

                x_copy = np.copy(x)
                x_copy[j] -= self.finite_diff_eps
                f_n = func(x_copy)
        
                x_copy = np.copy(x)
                x_copy[i] -= self.finite_diff_eps
                x_copy[j] -= self.finite_diff_eps
                fnn = func(x_copy)

                hessian[i, j] = fpp - fp_ - f_p - f_n - fn_ + fnn

        hessian = (hessian + 2*func_x) / (2*self.finite_diff_eps**2)
        return hessian
Beispiel #42
0
    def build_ilqr_tracking_solver(self, ref_pnts, weight_mats):
        #figure out dimension
        self.T_ = len(ref_pnts)
        self.n_dims_ = len(ref_pnts[0])

        self.ref_array = np.copy(ref_pnts)
        self.weight_array = [mat for mat in weight_mats]
        #clone weight mats if there are not enough weight mats
        for i in range(self.T_ - len(self.weight_array)):
            self.weight_array.append(self.weight_array[-1])

        #build dynamics, second-order linear dynamical system
        self.A_ = np.eye(self.n_dims_*2)
        self.A_[0:self.n_dims_, self.n_dims_:] = np.eye(self.n_dims_) * self.dt_
        self.B_ = np.zeros((self.n_dims_*2, self.n_dims_))
        self.B_[self.n_dims_:, :] = np.eye(self.n_dims_) * self.dt_

        self.plant_dyn_ = lambda x, u, t, aux: np.dot(self.A_, x) + np.dot(self.B_, u)

        #build cost functions, quadratic ones
        def tmp_cost_func(x, u, t, aux):
            err = x[0:self.n_dims_] - self.ref_array[t]
            #autograd does not allow A.dot(B)
            cost = np.dot(np.dot(err, self.weight_array[t]), err) + np.sum(u**2) * self.R_
            if t > self.T_-1:
                #regularize velocity for the termination point
                #autograd does not allow self increment
                cost = cost + np.sum(x[self.n_dims_:]**2)  * self.R_ * self.Q_vel_ratio_
            return cost
        
        self.cost_ = tmp_cost_func
        self.ilqr_ = pylqr.PyLQR_iLQRSolver(T=self.T_-1, plant_dyn=self.plant_dyn_, cost=self.cost_, use_autograd=self.use_autograd)
        if not self.use_autograd:
            self.plant_dyn_dx_ = lambda x, u, t, aux: self.A_
            self.plant_dyn_du_ = lambda x, u, t, aux: self.B_
            
            def tmp_cost_func_dx(x, u, t, aux):
                err = x[0:self.n_dims_] - self.ref_array[t]
                grad = np.concatenate([2*err.dot(self.weight_array[t]), np.zeros(self.n_dims_)])
                if t > self.T_-1:
                    grad[self.n_dims_:] = grad[self.n_dims_:] + 2 * self.R_ * self.Q_vel_ratio_ * x[self.n_dims_, :]
                return grad

            self.cost_dx_ = tmp_cost_func_dx

            self.cost_du_ = lambda x, u, t, aux: 2 * self.R_ * u

            def tmp_cost_func_dxx(x, u, t, aux):
                hessian = np.zeros((2*self.n_dims_, 2*self.n_dims_))
                hessian[0:self.n_dims_, 0:self.n_dims_] = 2 * self.weight_array[t]

                if t > self.T_-1:
                    hessian[self.n_dims_:, self.n_dims_:] = 2 * np.eye(self.n_dims_) * self.R_ * self.Q_vel_ratio_
                return hessian

            self.cost_dxx_ = tmp_cost_func_dxx

            self.cost_duu_ = lambda x, u, t, aux: 2 * self.R_ * np.eye(self.n_dims_)
            self.cost_dux_ = lambda x, u, t, aux: np.zeros((self.n_dims_, 2*self.n_dims_))

            #build an iLQR solver based on given functions...
            self.ilqr_.plant_dyn_dx = self.plant_dyn_dx_
            self.ilqr_.plant_dyn_du = self.plant_dyn_du_
            self.ilqr_.cost_dx = self.cost_dx_
            self.ilqr_.cost_du = self.cost_du_
            self.ilqr_.cost_dxx = self.cost_dxx_
            self.ilqr_.cost_duu = self.cost_duu_
            self.ilqr_.cost_dux = self.cost_dux_

        return
Beispiel #43
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(
                    n_clusters=self.n_unique,
                    random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []

                if not self.shared_alpha:
                    count = 0
                    for u in range(self.n_unique):
                        for f in range(self.n_features):
                            ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                            u,f]).fit(self.n_lags))
                            ar_alpha.append(ar_mod[count].params[1:])
                            ar_resid.append(ar_mod[count].resid)
                            count += 1
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    for f in range(self.n_features):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                    mf,f]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[f].params[1:])
                        ar_resid.append(ar_mod[f].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    for f in range(self.n_features):
                        ar_idx = u
                        if self.shared_alpha:
                            ar_idx = 0
                        mu_init[u,f] = kmeans[u, f] - np.dot(
                        np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:

                precision_init = \
                np.zeros((self.n_unique, self.n_features, self.n_features))

                for u in range(self.n_unique):
                    if self.n_features == 1:
                        precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u]))

                    else:
                        precision_init[u] = np.linalg.inv\
                        (np.cov(np.transpose(X[kmmod.labels_ == u])))

                        # Alternative: Initialization using ar_resid
                        #for f in range(self.n_features):
                        #    if not self.shared_alpha:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[count])
                        #        count += 1
                        #    else:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[f])'''

                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                if self.shared_alpha:
                    alpha_init = np.zeros((1, self.n_lags))
                    alpha_init = ar_alpha[0].reshape((1, self.n_lags))
                else:
                    alpha_init = np.zeros((self.n_unique, self.n_lags))
                    for u in range(self.n_unique):
                        ar_idx = 0
                        alpha_init[u] = ar_alpha[ar_idx]
                        ar_idx += self.n_features
                self.alpha_ = np.copy(alpha_init)
Beispiel #44
0
    def _accumulate_sufficient_statistics(self, stats, X, framelogprob,
                                          posteriors, fwdlattice, bwdlattice):
        """Updates sufficient statistics from a given sample.
        Parameters
        ----------
        stats : dict
            Sufficient statistics as returned by
            :meth:`~base._BaseHMM._initialize_sufficient_statistics`.
        X : array, shape (n_samples, n_features)
            Sample sequence.
        framelogprob : array, shape (n_samples, n_components)
            Log-probabilities of each sample under each of the model states.
        posteriors : array, shape (n_samples, n_components)
            Posterior probabilities of each sample being generated by each
            of the model states.
        fwdlattice, bwdlattice : array, shape (n_samples, n_components)
            Log-forward and log-backward probabilities.
        """

        # Based on hmmlearn's _BaseHMM
        safe_transmat = self.transmat_ + np.finfo(float).eps

        stats['nobs'] += 1
        if 's' in self.params:
            stats['start'] += posteriors[0]
        if 't' in self.params:
            n_samples, n_components = framelogprob.shape
            # when the sample is of length 1, it contains no transitions
            # so there is no reason to update our trans. matrix estimate
            if n_samples <= 1:
                return

            lneta = np.zeros((n_samples - 1, n_components, n_components))
            _hmmc._compute_lneta(n_samples, n_components, fwdlattice,
                                 np.log(safe_transmat),
                                 bwdlattice, framelogprob, lneta)

            stats['trans'] += np.exp(logsumexp(lneta, axis=0))
            # stats['trans'] = np.round(stats['trans'])
            # if np.sum(stats['trans']) != X.shape[0]-1:
            #     warnings.warn("transmat counts != n_samples", RuntimeWarning)
            #     import pdb; pdb.set_trace()

            template = np.zeros((self.n_components, self.n_components))
            for u in range(self.n_components):
                template[u,u] = stats['trans'][u,u] + 0.


            for l in range(self.n_components - 1):
                template[l, (l + 1)] = stats['trans'][l, (l + 1)] + 0.


            for b in range(self.n_unique):
                transition_index = \
                [i * self.n_chain for i in range(self.n_unique)]
                transition_index.remove(b * self.n_chain)

                block = \
                stats['trans'][self.n_chain * b : self.n_chain * (b + 1)][:] + 0.

                template_block = \
                template[self.n_chain * b : self.n_chain * (b + 1)][:] + 0.

                for i in transition_index:
                    template_block[(self.n_chain - 1), i] = \
                    block[(self.n_chain - 1), i]

                template[self.n_chain * b : self.n_chain * (b + 1)][:] = \
                template_block

            stats['trans'] = np.copy(template)
Beispiel #45
0
    def _do_mstep(self, stats, params):  # M-Step for startprob and transmat
        if 's' in params:
            startprob_ = self.startprob_prior + stats['start']
            normalize(startprob_)
            self.startprob_ = np.where(self.startprob_ <= np.finfo(float).eps,
                                       self.startprob_, startprob_)
        if 't' in params:

            if self.n_tied == 0:
                transmat_ = self.transmat_prior + stats['trans']
                normalize(transmat_, axis=1)
                self.transmat_ = np.where(self.transmat_ <= np.finfo(float).eps,
                                          self.transmat_, transmat_)
            else:
                transmat_ = np.zeros((self.n_components, self.n_components))
                transitionCnts = stats['trans'] + self.transmat_prior
                transition_index = [i * self.n_chain for i in range(self.n_unique)]

                for b in range(self.n_unique):

                    block = \
                    transitionCnts[self.n_chain * b : self.n_chain * (b + 1)][:] + 0.

                    denominator_diagonal = np.sum(block)
                    diagonal = 0.0

                    index_line = range(0, self.n_chain)
                    index_row = range(self.n_chain * b, self.n_chain * (b + 1))

                    for l, r in zip(index_line, index_row):
                        diagonal += (block[l][r])

                    for l, r in zip(index_line, index_row):
                        block[l][r] = diagonal / denominator_diagonal

                    self_transition = block[0][self.n_chain * b]
                    denominator_off_diagonal = \
                    (np.sum(block[self.n_chain-1])) - self_transition
                    template = block[self.n_chain - 1] + 0.

                    for entry in range(len(template)):
                        template[entry] = (template[entry] * (1 - self_transition)) \
                        / float(denominator_off_diagonal)

                    template[(self.n_chain * (b + 1)) - 1] = 0.
                    line_value = 1 - self_transition

                    for entry in range(len(template)):
                        line_value = line_value - template[entry]

                    for index in transition_index:
                        if index != (b * self.n_chain):
                            block[self.n_chain - 1][index] = \
                            line_value + template[index]

                    line = range(self.n_chain - 1)
                    row = [b * self.n_chain + i for i in range(1, self.n_chain)]

                    for x, y in zip(line, row):
                        block[x][y] = 1 - self_transition


                    transmat_[self.n_chain * b : self.n_chain * (b + 1)][:] = block

                self.transmat_ = np.copy(transmat_)
Beispiel #46
0
    def sample(self, n_samples=2000, observed_states=None,
               init_samples=None, init_state=None, random_state=None):
        """Generate random samples from the self.

        Parameters
        ----------
        n : int
            Number of samples to generate.

        observed_states : array
            If provided, states are not sampled.

        random_state: RandomState or an int seed
            A random number generator instance. If None is given, the
            object's random_state is used

        init_state : int
            If provided, initial state is not sampled.

        init_samples : array, default: None
            If provided, initial samples (for AR) are not sampled.

        E : array-like, shape (n_samples, n_inputs)
            Feature matrix of individual inputs.

        Returns
        -------
        samples : array_like, length (``n_samples``)
                  List of samples

        states : array_like, shape (``n_samples``)
                 List of hidden states (accounting for tied states by giving
                 them the same index)
        """
        if random_state is None:
            random_state = self.random_state
        random_state = check_random_state(random_state)

        samples = np.zeros(n_samples)
        states = np.zeros(n_samples)

        order = self.n_lags

        if init_state is None:
            startprob_pdf = np.exp(np.copy(self._log_startprob))
            start_dist = stats.rv_discrete(name='custm',
                                      values=(np.arange(startprob_pdf.shape[0]),
                                                        startprob_pdf),
                                      seed=random_state)
            start_state = start_dist.rvs(size=1)[0]
        else:
            start_state = init_state

        if self.n_lags > 0:
            if init_samples is None:
                """
                n_init_samples = order + 10
                noise = np.sqrt(1.0/self._precision_[start_state]) * \
                        random_state.randn(n_init_samples)

                pad_after = n_init_samples - order - 1
                col = np.pad(1*self._alpha_[start_state, :], (1, pad_after),
                             mode='constant')
                row = np.zeros(n_init_samples)
                col[0] = row[0] = 1

                A = toeplitz(col, row)
                init_samples = np.dot(pinv(A), noise + self._mu_[start_state])
                # TODO: fix bug with n_lags > 1, blows up
                """
                init_samples = 0.01*np.ones((self.n_lags, self.n_features))  # temporary fix

        if observed_states is None:
            transmat_pdf = np.exp(np.copy(self._log_transmat))
            transmat_cdf = np.cumsum(transmat_pdf, 1)

            states[0] = (transmat_cdf[start_state] >
                         random_state.rand()).argmax()

            transmat_pdf = np.exp(self._log_transmat)
            transmat_cdf = np.cumsum(transmat_pdf, 1)

            nrand = random_state.rand(n_samples)
            for idx in range(1,n_samples):
                newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax()
                states[idx] = newstate
        else:
            states = observed_states

        precision = np.copy(self._precision_)
        for idx in range(n_samples):
            state_ = int(states[idx])
            var_ = np.sqrt(1/precision[state_])

            if self.n_lags == 0:
                mean_ = np.copy(self._mu_[state_])
            else:
                mean_ = np.copy(self._mu_[state_])

                for lag in range(1, order+1):
                    if idx < lag:
                        prev_ = init_samples[len(init_samples)-lag]
                    else:
                        prev_ = samples[idx-lag]
                    mean_ += np.copy(self._alpha_[state_, lag-1])*prev_

            samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1,
                                    random_state=random_state)

        states = self._process_sequence(states)
        return samples, states
Beispiel #47
0
    def manual_grads(params):
      """
      Compute the gradient of the loss WRT the parameters
      Ordering of the operations is reverse of that in fprop()
      """
      deltas = {}
      for key, val in params.iteritems():
        deltas[key] = np.zeros_like(val)

      [loss, mems, ps, ys, os, zos, hs, zhs, xs, rs, w_rs,
       w_ws, adds, erases, k_rs, k_ws, g_rs, g_ws, wc_rs, wc_ws,
       zbeta_rs, zbeta_ws, zs_rs, zs_ws, wg_rs, wg_ws] = self.stats
      dd = {}
      drs = {}
      dzh = {}
      dmem = {} # might not need this, since we have dmemtilde
      dmemtilde = {}
      du_r = {}
      du_w = {}
      dwg_r = {}
      dwg_w = {}
      for t in reversed(xrange(len(targets))):

        dy = np.copy(ps[t])
        dy -= targets[t].T # backprop into y

        deltas['oy'] += np.dot(dy, os[t].T)
        deltas['by'] += dy

        if t < len(targets) - 1:
          # r[t] affects cost through zh[t+1] via Wrh
          drs[t] = np.dot(self.W['rh'].T, dzh[t + 1])

          # right now, mems[t] influences cost through rs[t+1], via w_rs[t+1]
          dmem[t] = np.dot( w_rs[t + 1], drs[t + 1].reshape((self.M,1)).T )
          # and also through mems at next step
          W = np.reshape(w_ws[t+1], (w_ws[t+1].shape[0], 1))
          E = np.reshape(erases[t+1], (erases[t+1].shape[0], 1))
          WTE = np.dot(W, E.T)
          KEEP = np.ones(mems[0].shape) - WTE
          dmem[t] += np.multiply(dmemtilde[t+1], KEEP)
          # and also through its influence on the content weighting next step
          dmem[t] += du_r[t+1] + du_w[t+1]

          dmemtilde[t] = dmem[t]

          # erases[t] affects cost through mems[t], via w_ws[t]
          derase = np.dot(np.multiply(dmemtilde[t], -mems[t-1]).T, w_ws[t])

          # zerase affects just erases through a sigmoid
          dzerase = derase * (erases[t] * (1 - erases[t]))

          # adds[t] affects costs through mems[t], via w_ws
          dadd = np.dot(dmem[t].T, w_ws[t])

          # zadds affects just adds through a tanh
          dzadd = dadd * (1 - adds[t] * adds[t])

          # dbadds is just dzadds
          deltas['badds'] += dzadd

          deltas['oadds'] += np.dot(dzadd, os[t].T)

          deltas['berases'] += dzerase

          deltas['oerases'] += np.dot(dzerase, os[t].T)

          # # read weights affect what is read, via what's in mems[t-1]
          # dwc_r = np.dot(mems[t-1], drs[t])

          # # write weights affect mem[t] through adding
          # dwc_w = np.dot(dmem[t], adds[t])
          # # they also affect memtilde[t] through erasing
          # dwc_w += np.dot(np.multiply(dmemtilde[t], -mems[t-1]), erases[t])

          dw_r = np.dot(mems[t-1], drs[t])
          dw_r += dwg_r[t+1] * (1 - g_rs[t+1])

          # write weights affect mem[t] through adding
          dw_w = np.dot(dmem[t], adds[t])
          # they also affect memtilde[t] through erasing
          dw_w += np.dot(np.multiply(dmemtilde[t], -mems[t-1]), erases[t])
          dw_w += dwg_w[t+1] * (1 - g_ws[t+1])

          sgwr = np.zeros((self.N, self.N))
          sgww = np.zeros((self.N, self.N))
          for i in range(self.N):
            sgwr[i,i] = softmax(zs_rs[t])[0]
            sgwr[i,(i+1) % self.N] = softmax(zs_rs[t])[2]
            sgwr[i,(i-1) % self.N] = softmax(zs_rs[t])[1]

            sgww[i,i] = softmax(zs_ws[t])[0]
            sgww[i,(i+1) % self.N] = softmax(zs_ws[t])[2]
            sgww[i,(i-1) % self.N] = softmax(zs_ws[t])[1]

          # right now, shifted weights are final weight
          dws_r = dw_r
          dws_w = dw_w

          dwg_r[t] = np.dot(sgwr.T, dws_r)
          dwg_w[t] = np.dot(sgww.T, dws_w)

          dwc_r = dwg_r[t] * g_rs[t]
          dwc_w = dwg_w[t] * g_ws[t]


          """
          We need dw/dK
          now w has N elts and K has N elts
          and we want, for every elt of W, the grad of that elt w.r.t. each
          of the N elts of K. that gives us N * N things
          """
          # first, we must build up the K values (should be taken from fprop)
          K_rs = []
          K_ws = []
          for i in range(self.N):
            K_rs.append(cosine_sim(mems[t-1][i, :], k_rs[t]))
            K_ws.append(cosine_sim(mems[t-1][i, :], k_ws[t]))

          # then, we populate the grads
          dwdK_r = np.zeros((self.N, self.N))
          dwdK_w = np.zeros((self.N, self.N))
          # for every row in the memory
          for i in range(self.N):
            # for every element in the weighting
            for j in range(self.N):
              dwdK_r[i,j] += softmax_grads(K_rs, softplus(zbeta_rs[t]), i, j)
              dwdK_w[i,j] += softmax_grads(K_ws, softplus(zbeta_ws[t]), i, j)

          # compute dK for all i in N
          # K is the evaluated cosine similarity for the i-th row of mem matrix
          dK_r = np.zeros_like(w_rs[0])
          dK_w = np.zeros_like(w_ws[0])

          # for all i in N (for every row that we've simmed)
          for i in range(self.N):
            # for every j in N (for every elt of the weighting)
            for j in range(self.N):
              # specifically, dwdK_r will change, and for write as well
              dK_r[i] += dwc_r[j] * dwdK_r[i,j] 
              dK_w[i] += dwc_w[j] * dwdK_w[i,j]

          """
          dK_r_dk_rs is a list of N things
          each elt of the list corresponds to grads of K_idx
          w.r.t. the key k_t
          so it should be a length N list of M by 1 vectors
          """

          dK_r_dk_rs = []
          dK_r_dmem = []
          for i in range(self.N):
            # let k_rs be u, Mem[i] be v
            u = np.reshape(k_rs[t], (self.M,))
            v = mems[t-1][i, :]
            dK_r_dk_rs.append( dKdu(u,v) )
            dK_r_dmem.append( dKdu(v,u))

          dK_w_dk_ws = []
          dK_w_dmem = []
          for i in range(self.N):
            # let k_ws be u, Mem[i] be v
            u = np.reshape(k_ws[t], (self.M,))
            v = mems[t-1][i, :]
            dK_w_dk_ws.append( dKdu(u,v) )
            dK_w_dmem.append( dKdu(v,u))

          # compute delta for keys
          dk_r = np.zeros_like(k_rs[0])
          dk_w = np.zeros_like(k_ws[0])
          # for every one of M elt of dk_r
          for i in range(self.M):
            # for every one of the N Ks
            for j in range(self.N):
              # add delta K_r[j] * dK_r[j] / dk_r[i]
              # add influence on through K_r[j]
              dk_r[i] += dK_r[j] * dK_r_dk_rs[j][i]
              dk_w[i] += dK_w[j] * dK_w_dk_ws[j][i]

          # these represent influence of mem on next K
          """
          Let's let du_r[t] represent the
          influence of mems[t-1] on the cost through the K values
          this is analogous to dk_w, but, k only every affects that
          whereas mems[t-1] will also affect what is read at time t+1
          and through memtilde at time t+1
          """
          du_r[t] = np.zeros_like(mems[0])
          du_w[t] = np.zeros_like(mems[0])
          # for every row in mems[t-1]
          for i in range(self.N):
            # for every elt of this row (one of M)
            for j in range(self.M):
              du_r[t][i,j] = dK_r[i] * dK_r_dmem[i][j]
              du_w[t][i,j] = dK_w[i] * dK_w_dmem[i][j]

          # key values are activated as tanh
          dzk_r = dk_r * (1 - k_rs[t] * k_rs[t])
          dzk_w = dk_w * (1 - k_ws[t] * k_ws[t])

          deltas['ok_r'] += np.dot(dzk_r, os[t].T)
          deltas['ok_w'] += np.dot(dzk_w, os[t].T)

          deltas['bk_r'] += dzk_r
          deltas['bk_w'] += dzk_w

          dg_r = np.dot(dwg_r[t].T, (wc_rs[t] - w_rs[t-1]) )
          dg_w = np.dot(dwg_w[t].T, (wc_ws[t] - w_ws[t-1]) )

          # compute dzg_r, dzg_w
          dzg_r = dg_r * (g_rs[t] * (1 - g_rs[t]))
          dzg_w = dg_w * (g_ws[t] * (1 - g_ws[t]))

          deltas['og_r'] += np.dot(dzg_r, os[t].T)
          deltas['og_w'] += np.dot(dzg_w, os[t].T)

          deltas['bg_r'] += dzg_r
          deltas['bg_w'] += dzg_w

          # compute dbeta, which affects w_content through interaction with Ks

          dwcdbeta_r = np.zeros_like(w_rs[0])
          dwcdbeta_w = np.zeros_like(w_ws[0])
          for i in range(self.N):
            dwcdbeta_r[i] = beta_grads(K_rs, softplus(zbeta_rs[t]), i)
            dwcdbeta_w[i] = beta_grads(K_ws, softplus(zbeta_ws[t]), i)

          dbeta_r = np.zeros_like(zbeta_rs[0])
          dbeta_w = np.zeros_like(zbeta_ws[0])
          for i in range(self.N):
            dbeta_r[0] += dwc_r[i] * dwcdbeta_r[i]
            dbeta_w[0] += dwc_w[i] * dwcdbeta_w[i]

          # beta is activated from zbeta by softplus, grad of which is sigmoid
          dzbeta_r = dbeta_r * sigmoid(zbeta_rs[t])
          dzbeta_w = dbeta_w * sigmoid(zbeta_ws[t])

          deltas['obeta_r'] += np.dot(dzbeta_r, os[t].T)
          deltas['obeta_w'] += np.dot(dzbeta_w, os[t].T)

          deltas['bbeta_r'] += dzbeta_r
          deltas['bbeta_w'] += dzbeta_w

          sgsr = np.zeros((self.N, 3))
          sgsw = np.zeros((self.N, 3))
          for i in range(self.N):
            sgsr[i,1] = wg_rs[t][(i - 1) % self.N]
            sgsr[i,0] = wg_rs[t][i]
            sgsr[i,2] = wg_rs[t][(i + 1) % self.N]

            sgsw[i,1] = wg_ws[t][(i - 1) % self.N]
            sgsw[i,0] = wg_ws[t][i]
            sgsw[i,2] = wg_ws[t][(i + 1) % self.N]

          ds_r = np.dot(sgsr.T, dws_r)
          ds_w = np.dot(sgsw.T, dws_w)

          shift_act_jac_r = np.zeros((3,3))
          shift_act_jac_w = np.zeros((3,3))
          bf = np.array([[1.0]])
          for i in range(3):
            for j in range(3):
              shift_act_jac_r[i,j] = softmax_grads(zs_rs[t], bf, i, j)
              shift_act_jac_w[i,j] = softmax_grads(zs_ws[t], bf, i, j)

          dzs_r = np.dot(shift_act_jac_r.T, ds_r)
          dzs_w = np.dot(shift_act_jac_w.T, ds_w)

          deltas['os_r'] += np.dot(dzs_r, os[t].T)
          deltas['os_w'] += np.dot(dzs_w, os[t].T)

          deltas['bs_r'] += dzs_r
          deltas['bs_w'] += dzs_w

        else:
          drs[t] = np.zeros_like(rs[0])
          dmemtilde[t] = np.zeros_like(mems[0])
          du_r[t] = np.zeros_like(mems[0])
          du_w[t] = np.zeros_like(mems[0])
          dwg_r[t] = np.zeros_like(w_rs[0])
          dwg_w[t] = np.zeros_like(w_ws[0])

        # o affects y through Woy
        do = np.dot(params['oy'].T, dy)
        if t < len(targets) - 1:
          # and also zadd through Woadds
          do += np.dot(params['oadds'].T, dzadd)
          do += np.dot(params['oerases'].T, dzerase)
          # and also through the keys
          do += np.dot(params['ok_r'].T, dzk_r)
          do += np.dot(params['ok_w'].T, dzk_w)
          # and also through the interpolators
          do += np.dot(params['og_r'].T, dzg_r)
          do += np.dot(params['og_w'].T, dzg_w)
          # and also through beta
          do += np.dot(params['obeta_r'].T, dzbeta_r)
          do += np.dot(params['obeta_w'].T, dzbeta_w)
          # and also through the shift values
          do += np.dot(params['os_r'].T, dzs_r)
          do += np.dot(params['os_w'].T, dzs_w)


        # compute deriv w.r.t. pre-activation of o
        dzo = do * (1 - os[t] * os[t])

        deltas['ho'] += np.dot(dzo, hs[t].T)
        deltas['bo'] += dzo

        # compute hidden dh
        dh = np.dot(params['ho'].T, dzo)

        # compute deriv w.r.t. pre-activation of h
        dzh[t] = dh * (1 - hs[t] * hs[t])

        deltas['xh'] += np.dot(dzh[t], xs[t].T)
        deltas['bh'] += dzh[t]

        # Wrh affects zh via rs[t-1]
        deltas['rh'] += np.dot(dzh[t], rs[t-1].reshape((self.M, 1)).T)

      return deltas
Beispiel #48
0
        def sample(self, n_samples=2000, observed_states=None,
                   init_samples=None, init_state=None, random_state=None):
            """Generate random samples from the self.

            Parameters
            ----------
            n : int
                Number of samples to generate.

            observed_states : array
                If provided, states are not sampled.

            random_state: RandomState or an int seed
                A random number generator instance. If None is given, the
                object's random_state is used

            init_state : int
                If provided, initial state is not sampled.

            init_samples : array, default: None
                If provided, initial samples (for AR) are not sampled.

            E : array-like, shape (n_samples, n_inputs)
                Feature matrix of individual inputs.

            Returns
            -------
            samples : array_like, length (``n_samples``, ``n_features``)
                      List of samples

            states : array_like, shape (``n_samples``)
                     List of hidden states (accounting for tied states by giving
                     them the same index)
            """
            if random_state is None:
                random_state = self.random_state
            random_state = check_random_state(random_state)


            samples = np.zeros((n_samples, self.n_features))
            states = np.zeros(n_samples)

            order = self.n_lags

            if init_state is None:
                startprob_pdf = np.exp(np.copy(self._log_startprob))
                start_dist = stats.rv_discrete(name='custm',
                                          values=(np.arange(startprob_pdf.shape[0]),
                                                            startprob_pdf),
                                          seed=random_state)
                start_state = start_dist.rvs(size=1)[0]

            else:
                start_state = init_state

            if self.n_lags > 0:
                if init_samples is None:
                    init_samples = 0.01*np.ones((self.n_lags, self.n_features))  # TODO: better init

            if observed_states is None:
                transmat_pdf = np.exp(np.copy(self._log_transmat))
                transmat_cdf = np.cumsum(transmat_pdf, 1)

                states[0] = (transmat_cdf[start_state] >
                             random_state.rand()).argmax()

                transmat_pdf = np.exp(self._log_transmat)
                transmat_cdf = np.cumsum(transmat_pdf, 1)

                nrand = random_state.rand(n_samples)
                for idx in range(1,n_samples):
                    newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax()
                    states[idx] = newstate

            else:
                states = observed_states
            precision = np.copy(self._precision_)
            for idx in range(n_samples):
                state_ = int(states[idx])


                covar_ = np.linalg.inv(precision[state_])

                if self.n_lags == 0:
                    mean_ = np.copy(self._mu_[state_])
                else:
                    mean_ = np.copy(self._mu_[state_])

                    for lag in range(1, order+1):
                        if idx < lag:
                            prev_ = init_samples[len(init_samples)-lag]
                        else:
                            prev_ = samples[idx-lag]

                        mean_ += np.copy(self._alpha_[state_, lag-1])*prev_


                samples[idx] = self.multivariate_t_rvs(mean_, covar_,
                                                       random_state)

            states = self._process_sequence(states)

            return samples, states
Beispiel #49
0
def reporter(p):
    """Reporter function to capture intermediate states of optimization."""
    global ps
    #ps.append(p)
    ps.append(np.copy(p))
Beispiel #50
0
    def ilqr_iterate(self, x0, u_init, n_itrs=50, tol=1e-6, verbose=True):
        #initialize the regularization term
        self.reg = 1

        #derive the initial guess trajectory from the initial guess of u
        x_array = self.forward_propagation(x0, u_init)
        u_array = np.copy(u_init)
        #initialize current trajectory cost
        J_opt = self.evaluate_trajectory_cost(x_array, u_init)
        J_hist = [J_opt]
        #iterates...
        converged = False
        for i in range(n_itrs):
            k_array, K_array = self.back_propagation(x_array, u_array)
            norm_k = np.mean(np.linalg.norm(k_array, axis=1))
            #apply the control to update the trajectory by trying different alpha
            accept = False
            for alpha in self.alpha_array:
                x_array_new, u_array_new = self.apply_control(x_array, u_array, k_array, K_array, alpha)
                #evaluate the cost of this trial
                J_new = self.evaluate_trajectory_cost(x_array_new, u_array_new)

                if J_new < J_opt:
                    #see if it is converged
                    if np.abs((J_opt - J_new )/J_opt) < tol:
                        #replacement for the next iteration
                        J_opt = J_new
                        x_array = x_array_new
                        u_array = u_array_new
                        converged = True
                        break
                    else:
                        #replacement for the next iteration
                        J_opt = J_new
                        x_array = x_array_new
                        u_array = u_array_new
                        #successful step, decrease the regularization term
                        #momentum like adaptive regularization
                        self.reg = np.max([self.reg_min, self.reg / self.reg_factor])
                        accept = True
                        print 'Iteration {0}:\tJ = {1};\tnorm_k = {2};\treg = {3}'.format(i+1, J_opt, norm_k, np.log10(self.reg))
                        break
                else:
                    #don't accept this
                    accept = False
            
            J_hist.append(J_opt)

            #exit if converged...
            if converged:
                print 'Converged at iteration {0}; J = {1}; reg = {2}'.format(i+1, J_opt, self.reg)
                break

            #see if all the trials are rejected
            if not accept:
                #need to increase regularization
                #check if the regularization term is too large
                if self.reg > self.reg_max:
                    print 'Exceeds regularization limit at iteration {0}; terminate the iterations'.format(i+1)
                    break

                self.reg = self.reg * self.reg_factor
                if verbose:
                    print 'Reject the control perturbation. Increase the regularization term.'


        #prepare result dictionary
        res_dict = {
        'J_hist':np.array(J_hist),
        'x_array_opt':np.array(x_array),
        'u_array_opt':np.array(u_array),
        'k_array_opt':np.array(k_array),
        'K_array_opt':np.array(K_array)
        }

        return res_dict
Beispiel #51
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(
                    n_clusters=self.n_unique,
                    random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []
                if not self.shared_alpha:
                    for u in range(self.n_unique):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                u]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[u].params[1:])
                        ar_resid.append(ar_mod[u].resid)
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                              mf]).fit(self.n_lags))
                    ar_alpha.append(ar_mod[0].params[1:])
                    ar_resid.append(ar_mod[0].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    ar_idx = u
                    if self.shared_alpha:
                        ar_idx = 0
                    mu_init[u] = kmeans[u, 0] - np.dot(
                            np.repeat(kmeans[u, 0], self.n_lags),
                            ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:
                precision_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    if not self.shared_alpha:
                        maxVar = np.max([np.var(ar_resid[i]) for i in
                                        range(self.n_unique)])
                    else:
                        maxVar = np.var(ar_resid[0])
                    precision_init[u] = 1.0 / maxVar
                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                alpha_init = np.zeros((self.n_unique, self.n_lags))
                for u in range(self.n_unique):
                    ar_idx = u
                    if self.shared_alpha:
                        ar_idx = 0
                    alpha_init[u, :] = ar_alpha[ar_idx]
                self.alpha_ = alpha_init