Ejemplo n.º 1
0
def kernelpdf(scale, sigma, dataset, datasetGen):

    #dataset is binned as eta1,eta2,mass,pt2,pt1

    maxR = np.full((100), 3.3)
    minR = np.full((100), 2.9)

    valsReco = np.linspace(minR[0], maxR[0], 100)
    valsGen = valsReco

    h = np.tensordot(
        scale, valsGen, axes=0
    )  #get a 5D vector with np.newaxis with all possible combos of kinematics and gen mass values
    h_ext = np.swapaxes(np.swapaxes(h, 2, 4), 3, 4)[:, :, np.newaxis, :, :, :]

    sigma_ext = sigma[:, :, np.newaxis, np.newaxis, :, :]

    xscale = np.sqrt(2.) * sigma_ext

    maxR_ext = maxR[np.newaxis, np.newaxis, :, np.newaxis, np.newaxis,
                    np.newaxis]
    minR_ext = minR[np.newaxis, np.newaxis, :, np.newaxis, np.newaxis,
                    np.newaxis]

    maxZ = ((maxR_ext - h_ext.astype('float64')) / xscale)
    minZ = ((minR_ext - h_ext.astype('float64')) / xscale)

    arg = np.sqrt(np.pi / 2.) * sigma_ext * (erf(maxZ) - erf(minZ))

    #take tensor product between mass and genMass dimensions and sum over gen masses
    #divide each bin by the sum of gen events in that bin
    den = np.where(
        np.sum(datasetGen, axis=2) > 1000., np.sum(datasetGen, axis=2),
        -1)[:, :, np.newaxis, :, :]

    I = np.sum(arg * datasetGen[:, :, np.newaxis, :, :, :], axis=3) / den

    #give vals the right shape -> add dimension for gen mass (axis = 3)
    vals_ext = valsReco[np.newaxis, np.newaxis, :, np.newaxis, np.newaxis,
                        np.newaxis]

    gaus = np.exp(-np.power(vals_ext - h_ext.astype('float64'), 2.) /
                  (2 * np.power(sigma_ext, 2.)))

    #take tensor product between mass and genMass dimensions and sum over gen masses
    #divide each bin by the sum of gen events in that bin
    den2 = np.where(
        np.sum(datasetGen, axis=2) > 1000., np.sum(datasetGen, axis=2),
        1)[:, :, np.newaxis, :, :]

    pdf = np.sum(gaus * datasetGen[:, :, np.newaxis, :, :, :],
                 axis=3) / den2 / np.where(I > 0., I, -1)

    pdf = np.where(pdf > 0., pdf, 0.)

    massbinwidth = (maxR[0] - minR[0]) / 100

    pdf = pdf * massbinwidth

    return pdf
Ejemplo n.º 2
0
    def forward_pass(self, inputs, param_vector):
        if inputs.shape[2] == 32492:
            pool_map = genfromtxt('../mesh/neighs_L1.csv', delimiter=',')
            coords_old = coords_0
            faces_old = faces_0
            coords = coords_1
            faces = faces_1
        elif inputs.shape[2] == 5356:
            pool_map = genfromtxt('../mesh/neighs_L2.csv', delimiter=',')
            coords_old = coords_1
            faces_old = faces_1
            coords = coords_2
            faces = faces_2

        adj_mtx_old, _, _ = mesh_traversal.create_adj_mtx(
            coords_old, faces_old)
        adj_mtx, _, _ = mesh_traversal.create_adj_mtx(coords, faces)
        pool_map = list(map(int, pool_map))

        patches = []
        for i in range(coords.shape[0]):
            org_vert = int(pool_map[i])
            neighs = mesh_traversal.get_neighs(adj_mtx_old, coords_old,
                                               org_vert, 1)
            patch = inputs[:, :, neighs]
            patch = np.mean(patch, axis=2)
            patches.append(patch)

        out = np.array(patches)
        out = np.swapaxes(out, 0, 1)
        out = np.swapaxes(out, 1, 2)

        return out
Ejemplo n.º 3
0
    def emissionLikelihood( self, x, ys ):
        # Compute P( y | x, ϴ )
        if( x.ndim == 2 ):
            # Multiple time steps
            if( ys.ndim == 2 ):
                assert x.shape[ 0 ] == ys.shape[ 0 ]
            else:
                # There are multiple measurements per latent state
                assert ys.ndim == 3
                assert x.shape[ 0 ] == ys.shape[ 1 ]

                # Put the time index in front
                ys = np.swapaxes( ys, 0, 1 )

            assert x.shape[ 0 ] == ys.shape[ 0 ]

            ans = 0.0
            for t, ( _x, _ys ) in enumerate( zip( x, ys ) ):
                ans += Normal.log_likelihood( _ys, nat_params=( -0.5 * self.J1Emiss, self._hy.dot( _x ) ) )
            return ans

        else:
            # Only 1 example.  I don't think this code will ever be called
            assert x.ndim == 1
            if( ys.ndim == 1 ):
                pass
            else:
                assert ys.ndim == 2

            return Normal.log_likelihood( _ys, nat_params=( -0.5 * self.J1Emiss, self._hy.dot( _x ) ) )
Ejemplo n.º 4
0
    def E_step(self, verbose=False):
        self.gaussian_states = self.laplace_approximation(verbose=verbose)

        # Compute normalizer and covariances with E step
        T, D = self.T, self.D_latent
        H_diag, H_upper_diag = self.sparse_hessian_log_joint(
            self.gaussian_states)
        J_init = J_11 = J_22 = np.zeros((D, D))
        h_init = h_1 = h_2 = np.zeros((D, ))

        # Negate the Hessian since precision is -H
        J_21 = np.swapaxes(-H_upper_diag, -1, -2)
        J_node = -H_diag
        h_node = np.zeros((T, D))

        logZ, _, self.smoothed_sigmas, E_xtp1_xtT = \
            info_E_step(J_init, h_init, 0,
                        J_11, J_21, J_22, h_1, h_2, np.zeros((T - 1)),
                        J_node, h_node, np.zeros(T))

        # Laplace approximation -- normalizer is the joint times
        # the normalizer from the Gaussian approx.
        self._normalizer = self.log_joint(self.gaussian_states) + logZ

        self._set_expected_stats(self.gaussian_states, self.smoothed_sigmas,
                                 E_xtp1_xtT)
Ejemplo n.º 5
0
    def _ll(self, m, p, a, xn, xln, **kwargs):
        """Computation of log likelihood

            Dimensions
            ----------
            m :  n_unique x n_features
            p :  n_unique x n_features x n_features
            a :  n_unique x n_lags (shared_alpha=F)
                 OR     1 x n_lags (shared_alpha=T)
            xn:  N x n_features
            xln: N x n_features x n_lags
            """

        samples = xn.shape[0]
        xn = xn.reshape(samples, 1, self.n_features)
        m = m.reshape(1, self.n_unique, self.n_features)
        det = np.linalg.det(np.linalg.inv(p))
        det = det.reshape(1, self.n_unique)

        lagged = np.dot(xln, a.T)  # NFU
        lagged = np.swapaxes(lagged, 1, 2)  # NUF
        xm = xn - (lagged + m)
        tem = np.einsum('NUF,UFX,NUX->NU', xm, p, xm)

        # TODO division in gamma function
        res = np.log(gamma((self.degree_freedom + self.n_features)/2)) - \
              np.log(gamma(self.degree_freedom/2)) - (self.n_features/2.0) * \
              np.log(self.degree_freedom) - \
              (self.n_features/2.0) * np.log(np.pi) - 0.5 * np.log(det) - \
              ((self.degree_freedom + self.n_features) / 2.0) * \
              np.log(1 + (1/self.degree_freedom) * tem)

        return res
Ejemplo n.º 6
0
def test_blocks_to_banded(T=5, D=3):
    """
    Test blocks_to_banded correctness
    """
    Ad = np.zeros((T, D, D))
    Aod = np.zeros((T-1, D, D))

    M = np.arange(1, D+1)[:, None] * 10 + np.arange(1, D+1)
    for t in range(T):
        Ad[t, :, :] = 100 * ((t+1)*10 + (t+1)) + M

    for t in range(T-1):
        Aod[t, :, :] = 100 * ((t+2)*10 + (t+1)) + M

    # print("Lower")
    # L = blocks_to_bands(Ad, Aod, lower=True)
    # print(L)

    # print("Upper")
    # U = blocks_to_bands(Ad, Aod, lower=False)
    # print(U)

    # Check inverse with random symmetric matrices
    Ad = npr.randn(T, D, D)
    Ad = (Ad + np.swapaxes(Ad, -1, -2)) / 2
    Aod = npr.randn(T-1, D, D)

    Ad2, Aod2 = bands_to_blocks(blocks_to_bands(Ad, Aod, lower=True), lower=True)
    assert np.allclose(np.tril(Ad), np.tril(Ad2))
    assert np.allclose(Aod, Aod2)

    Ad3, Aod3 = bands_to_blocks(blocks_to_bands(Ad, Aod, lower=False), lower=False)
    assert np.allclose(np.triu(Ad), np.triu(Ad3))
    assert np.allclose(Aod, Aod3)
Ejemplo n.º 7
0
        def _ll(self, m, p, a, xn, xln, **kwargs):
            """Computation of log likelihood

            Dimensions
            ----------
            m :  n_unique x n_features
            p :  n_unique x n_features x n_features
            a :  n_unique x n_lags (shared_alpha=F)
                 OR     1 x n_lags (shared_alpha=T)
            xn:  N x n_features
            xln: N x n_features x n_lags
            """

            samples = xn.shape[0]
            xn = xn.reshape(samples, 1, self.n_features)
            m = m.reshape(1, self.n_unique, self.n_features)
            det = np.linalg.det(np.linalg.inv(p))
            det = det.reshape(1, self.n_unique)

            lagged = np.dot(xln, a.T)  # NFU
            lagged = np.swapaxes(lagged, 1, 2)  # NUF
            xm = xn-(lagged + m)
            tem = np.einsum('NUF,UFX,NUX->NU', xm, p, xm)

            # TODO division in gamma function
            res = np.log(gamma((self.degree_freedom + self.n_features)/2)) - \
                  np.log(gamma(self.degree_freedom/2)) - (self.n_features/2.0) * \
                  np.log(self.degree_freedom) - \
                  (self.n_features/2.0) * np.log(np.pi) - 0.5 * np.log(det) - \
                  ((self.degree_freedom + self.n_features) / 2.0) * \
                  np.log(1 + (1/self.degree_freedom) * tem)

            return res
Ejemplo n.º 8
0
    def _continuous_entropy(self):
        negentropy = 0
        continuous_expectations = self.continuous_expectations
        for prms, (log_Z, Ex, smoothed_sigmas, ExxnT) in \
                zip(self.continuous_state_params, continuous_expectations):

            # Kalman smoother outputs the smoothed covariance matrices. Add
            # back the mean to get E[x_t x_{t+1}^T]
            mumuT = np.swapaxes(Ex[:, None], 2,1) @ Ex[:, None]
            ExxT = smoothed_sigmas + mumuT

            # Pairwise terms
            negentropy += np.sum(-0.5 * trace_product(prms["J_ini"], ExxT[0]))
            negentropy += np.sum(-0.5 * trace_product(prms["J_dyn_11"], ExxT[:-1]))
            negentropy += np.sum(-0.5 * trace_product(prms["J_dyn_22"], ExxT[1:]))
            negentropy += np.sum(-0.5 * trace_product(prms["J_obs"], ExxT))
            negentropy += np.sum(-1.0 * trace_product(prms["J_dyn_21"], ExxnT))

            # Unary terms
            negentropy += np.sum(prms["h_ini"] * Ex[0])
            negentropy += np.sum(prms["h_dyn_1"] * Ex[:-1])
            negentropy += np.sum(prms["h_dyn_2"] * Ex[1:])
            negentropy += np.sum(prms["h_obs"] * Ex)

            # Log normalizer
            negentropy -= log_Z
        return -negentropy
Ejemplo n.º 9
0
    def _ll(self, m, p, a, xn, xln, **kwargs):
        """Computation of log likelihood

        Dimensions
        ----------
        m :  n_unique x n_features
        p :  n_unique x n_features x n_features
        a :  n_unique x n_lags (shared_alpha=F)
             OR     1 x n_lags (shared_alpha=T)
        xn:  N x n_features
        xln: N x n_features x n_lags
        """

        samples = xn.shape[0]
        xn = xn.reshape(samples, 1, self.n_features)
        m = m.reshape(1, self.n_unique, self.n_features)
        det = np.linalg.det(np.linalg.inv(p))
        det = det.reshape(1, self.n_unique)

        lagged = np.dot(xln, a.T)  # NFU
        lagged = np.swapaxes(lagged, 1, 2)  # NUF
        xm = xn - (lagged + m)
        tem = np.einsum('NUF,UFX,NUX->NU', xm, p, xm)

        res = (-self.n_features / 2.0) * np.log(
            2 * np.pi) - 0.5 * tem - 0.5 * np.log(det)

        return res
Ejemplo n.º 10
0
def test_multivariate_normal_logpdf_batches_and_states_masked(D=10):
    # Test broadcasting over B batches, N datapoints, and K parameters with masks
    B = 3
    N = 100
    K = 5
    x = npr.randn(B, N, D)
    mask = npr.rand(B, N, D) < .5
    mu = npr.randn(K, D)
    L = npr.randn(K, D, D)
    Sigma = np.matmul(L, np.swapaxes(L, -1, -2))

    ll1 = multivariate_normal_logpdf(x[:, :, None, :],
                                     mu,
                                     Sigma,
                                     mask=mask[:, :, None, :])
    assert ll1.shape == (B, N, K)

    ll2 = np.empty((B, N, K))
    for b in range(B):
        for n in range(N):
            m = mask[b, n]
            if m.sum() == 0:
                ll2[b, n] = 0
            else:
                for k in range(K):
                    ll2[b, n, k] = mvn.logpdf(x[b, n][m], mu[k][m],
                                              Sigma[k][np.ix_(m, m)])

    assert np.allclose(ll1, ll2)
Ejemplo n.º 11
0
def tensorize_and_convolve_mesh(a, adj_mtx, vals_list, coords, r, stride):
    """
    Strides the mesh and applies convolution operation. Prepares tensors within the function, so not as efficient as
    mesh_convolve_tensorized(). If operating on already strided data, use mesh_convolve_tensorized() or
    mesh_convolve_tensorized_dyn().
    :param filters: list of filters
    :param adj_mtx: adjacency matrix
    :param coords: coordinates of each vertex
    :return: result of the convolution operation
    """

    strided_mesh = mesh_strider_batch(adj_mtx, vals_list, coords, r, stride,
                                      None)
    try:
        out = npo.einsum(a, [0, 1, 2], strided_mesh, [3, 4, 2])
    except:
        try:
            a = a._value
            out = npo.einsum(a, [0, 1, 2], strided_mesh, [3, 4, 2])
        except:
            strided_mesh = strided_mesh._value
            out = npo.einsum(a, [0, 1, 2], strided_mesh, [3, 4, 2])

    out = out[0]
    out = np.swapaxes(out, 0, 1)
    return out
Ejemplo n.º 12
0
    def _ll(self, m, p, a, xn, xln, **kwargs):
        """Computation of log likelihood

        Dimensions
        ----------
        m :  n_unique x n_features
        p :  n_unique x n_features x n_features
        a :  n_unique x n_lags (shared_alpha=F)
             OR     1 x n_lags (shared_alpha=T)
        xn:  N x n_features
        xln: N x n_features x n_lags
        """

        samples = xn.shape[0]
        xn = xn.reshape(samples, 1, self.n_features)
        m = m.reshape(1, self.n_unique, self.n_features)
        det = np.linalg.det(np.linalg.inv(p))
        det = det.reshape(1, self.n_unique)

        lagged = np.dot(xln, a.T)  # NFU
        lagged = np.swapaxes(lagged, 1, 2)  # NUF
        xm = xn-(lagged + m)
        tem = np.einsum('NUF,UFX,NUX->NU', xm, p, xm)

        res = (-self.n_features/2.0)*np.log(2*np.pi) - 0.5*tem - 0.5*np.log(det)

        return res
Ejemplo n.º 13
0
def test_solveh_banded_grad(T=10, D=4):
    """
    Test solveh_banded gradient
    """
    J_diag, J_lower_diag, J_full = make_block_tridiag(T, D)
    J_diag = np.tile(J_diag[None, :, :], (T, 1, 1))
    J_lower_diag = np.tile(J_lower_diag[None, :, :], (T - 1, 1, 1))
    b = npr.randn(T * D)

    J_banded = blocks_to_bands(J_diag, J_lower_diag, lower=True)
    check_grads(solveh_banded, argnum=0, modes=['rev'], order=1)(J_banded,
                                                                 b,
                                                                 lower=True)
    check_grads(solveh_banded, argnum=1, modes=['rev'], order=1)(J_banded,
                                                                 b,
                                                                 lower=True)

    J_banded = blocks_to_bands(J_diag,
                               np.swapaxes(J_lower_diag, -1, -2),
                               lower=False)
    check_grads(solveh_banded, argnum=0, modes=['rev'], order=1)(J_banded,
                                                                 b,
                                                                 lower=False)
    check_grads(solveh_banded, argnum=1, modes=['rev'], order=1)(J_banded,
                                                                 b,
                                                                 lower=False)
Ejemplo n.º 14
0
def logdet_symm_block_tridiag(H_diag, H_upper_diag):
    """
    compute the log determinant of a positive definite,
    symmetric block tridiag matrix.  Use the Kalman
    info filter to do so.  Specifically, the KF computes
    the normalizer:

        log Z = 1/2 h^T J^{-1} h -1/2 log |J| +n/2 log 2 \pi

    We set h=0 to get -1/2 log |J| + n/2 log 2 \pi and from
    this we solve for log |J|.
    """
    T, D, _ = H_diag.shape
    assert H_diag.ndim == 3 and H_diag.shape[2] == D
    assert H_upper_diag.shape == (T - 1, D, D)

    J_init = J_11 = J_22 = np.zeros((D, D))
    h_init = h_1 = h_2 = np.zeros((D, ))
    log_Z_init = 0

    J_21 = np.swapaxes(H_upper_diag, -1, -2)
    log_Z_pair = 0

    J_node = H_diag
    h_node = np.zeros((T, D))
    log_Z_node = 0

    logZ, _, _ = kalman_info_filter(J_init, h_init, log_Z_init, J_11, J_21,
                                    J_22, h_1, h_2, log_Z_pair, J_node, h_node,
                                    log_Z_node)

    # logZ = -1/2 log |J| + n/2 log 2 \pi
    logdetJ = -2 * (logZ - (T * D) / 2 * np.log(2 * np.pi))
    return logdetJ
Ejemplo n.º 15
0
def exppdf(slope):

    maxR = 3.3
    minR = 2.9

    valsReco = np.linspace(minR, maxR, 100)

    I = (np.exp(-slope * minR) - np.exp(-slope * maxR)) / slope

    massbinwidth = (maxR - minR) / 100

    h = np.tensordot(slope, valsReco, axes=0)
    h_ext = np.swapaxes(np.swapaxes(h, 2, 4), 3, 4)

    pdf = np.exp(-h_ext) / I

    return pdf * massbinwidth
Ejemplo n.º 16
0
def convert_lds_to_block_tridiag(As, bs, Qi_sqrts, ms, Ri_sqrts):
    """
    Parameterize the LDS in terms of pairwise linear Gaussian dynamics
    and per-timestep Gaussian observations.

        p(x_{1:T}; theta) 
            = [prod_{t=1}^{T-1} N(x_{t+1} | A_t x_t + b_t, Q_t)] 
                * [prod_{t=1}^T N(x_t | m_t, R_t)]  

    We can rewrite this as a Gaussian with a block tridiagonal precision
    matrix J.  The blocks of this matrix are:

    J_{t,t} = A_t.T Q_t^{-1} A_t + Q_{t-1}^{-1} + R_t^{-1}

    J_{t,t+1} = -Q_t^{-1} A_t

    The linear term is h_t

    h_t = -A_t.T Q_t^{-1} b_t + Q_{t-1}^{-1} b_{t-1} + R_t^{-1} m_t 

    We parameterize the model in terms of 

    theta = {A_t, b_t, Q_t^{-1/2}}_{t=1}^{T-1},  {m_t, R_t^{-1/2}}_{t=1}^T
    """
    T, D = ms.shape
    assert As.shape == (T-1, D, D)
    assert bs.shape == (T-1, D)
    assert Qi_sqrts.shape == (T-1, D, D)
    assert Ri_sqrts.shape == (T, D, D)

    # Construnct the inverse covariance matrices
    Qis = np.matmul(Qi_sqrts, np.swapaxes(Qi_sqrts, -1, -2))
    Ris = np.matmul(Ri_sqrts, np.swapaxes(Ri_sqrts, -1, -2))

    # Construct the joint, block-tridiagonal precision matrix
    J_lower_diag = -np.matmul(Qis, As)
    J_diag = np.concatenate([-np.matmul(np.swapaxes(As, -1, -2), J_lower_diag), np.zeros((1, D, D))]) \
           + np.concatenate([np.zeros((1, D, D)), Qis]) \
           + Ris

    # Construct the linear term
    h = np.concatenate([np.matmul(J_lower_diag, bs[:, :, None])[:, :, 0], np.zeros((1, D))]) \
      + np.concatenate([np.zeros((1, D)), np.matmul(Qis, bs[:, :, None])[:, :, 0]]) \
      + np.matmul(Ris, ms[:, :, None])[:, :, 0]

    return J_diag, J_lower_diag, h
Ejemplo n.º 17
0
def build_batch(idxs, cache=None):
    try:
        with h5py.File('train_0.h5', 'r') as hf:
            zero_train = hf['train'][:, idxs]
        with h5py.File('train_1.h5', 'r') as hf:
            one_train = hf['train'][:, idxs]
        with h5py.File('train_2.h5', 'r') as hf:
            two_train = hf['train'][:, idxs]
        with h5py.File('train_3.h5', 'r') as hf:
            three_train = hf['train'][:, idxs]
    except:
        ct_train = train_images.shape[0]
        for i in range(4):

            print(
                datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d %H:%M:%S'))

            data = train_images[(i * int(ct_train / 4)):((i + 1) *
                                                         int(ct_train / 4))]
            train_batch = mesh_traversal.mesh_strider_batch(
                adj_mtx, data, coords, r, stride, cache)

            if i == 0:
                with h5py.File('train_0.h5', 'w') as hf:
                    hf.create_dataset("train", data=train_batch)
            elif i == 1:
                with h5py.File('train_1.h5', 'w') as hf:
                    hf.create_dataset("train", data=train_batch)
            elif i == 2:
                with h5py.File('train_2.h5', 'w') as hf:
                    hf.create_dataset("train", data=train_batch)
            elif i == 3:
                with h5py.File('train_3.h5', 'w') as hf:
                    hf.create_dataset("train", data=train_batch)

    tr_batch = np.concatenate((zero_train, one_train, two_train, three_train),
                              axis=1)

    tr_batch = np.swapaxes(tr_batch, 1, 0)
    tr_batch = np.swapaxes(tr_batch, 1, 5)
    tr_batch = np.swapaxes(tr_batch, 4, 5)
    tr_batch = np.squeeze(tr_batch, axis=(2, 3))

    return tr_batch
Ejemplo n.º 18
0
def lower_half(mat):
    # Takes the lower half of the matrix, and half the diagonal.
    # Necessary since numpy only uses lower half of covariance matrix.
    if len(mat.shape) == 2:
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1, 2))
    else:
        raise ArithmeticError
Ejemplo n.º 19
0
def convolve_tensor(a, b):
    """
    Convolves two tensorized arrays, most efficient convolution method.
    :param a: first array to be convoluted
    :param b: second array to be convoluted
    :return: convolved array
    """
    b = as_strided_seq(b, 5, 1)
    b = np.moveaxis(b, [0, 1, 2, 3, 4, 5], [0, 3, 4, 5, 1, 2])
    b = np.moveaxis(b, 5, 1)
    try:
        out = npo.einsum(a, [12, 1, 10, 11], b, [4, 12, 10, 11, 8, 9])
        out = np.swapaxes(out, 0, 1)
    except:
        a = a._value
        out = npo.einsum(a, [12, 1, 10, 11], b, [4, 12, 10, 11, 8, 9])
        out = np.swapaxes(out, 0, 1)
    return out
Ejemplo n.º 20
0
def lower_half(mat):
    # Takes the lower half of the matrix, and half the diagonal.
    # Necessary since numpy only uses lower half of covariance matrix.
    if len(mat.shape) == 2:
        return 0.5 * (np.tril(mat) + np.triu(mat, 1).T)
    elif len(mat.shape) == 3:
        return 0.5 * (np.tril(mat) + np.swapaxes(np.triu(mat, 1), 1,2))
    else:
        raise ArithmeticError
Ejemplo n.º 21
0
    def _laplace_neg_hessian_params_to_hs(self, x, J_ini, J_dyn_11, J_dyn_21,
                                          J_dyn_22, J_obs):
        h_ini = J_ini @ x[0]

        h_dyn_1 = (J_dyn_11 @ x[:-1][:, :, None])[:, :, 0]
        h_dyn_1 += (np.swapaxes(J_dyn_21, -1, -2) @ x[1:][:, :, None])[:, :, 0]

        h_dyn_2 = (J_dyn_22 @ x[1:][:, :, None])[:, :, 0]
        h_dyn_2 += (J_dyn_21 @ x[:-1][:, :, None])[:, :, 0]

        h_obs = (J_obs @ x[:, :, None])[:, :, 0]
        return h_ini, h_dyn_1, h_dyn_2, h_obs
Ejemplo n.º 22
0
    def Cs(self):
        D = self.D
        T = lambda X: np.swapaxes(X, -1, -2)

        Bs = 0.5 * (self._Ms - T(self._Ms))  # Bs is skew symmetric
        Fs = np.matmul(T(self._As), self._As) - Bs
        trm1 = np.concatenate((np.eye(D) - Fs, 2 * self._As), axis=1)
        trm2 = np.eye(D) + Fs
        Cs = T(np.linalg.solve(T(trm2), T(trm1)))
        assert np.allclose(np.matmul(T(Cs), Cs),
                           np.tile(np.eye(D)[None, :, :], (Cs.shape[0], 1, 1)))
        return Cs
Ejemplo n.º 23
0
    def _hessian(self, sigma, Y, K_X):
        by_dim = False
        n_y, d = Y.shape
        K = self._weighted_kernel(sigma, Y, None, K_X)
        hessian = np.zeros((d * d, n_y, n_y))
        Y = np.array(Y, order='F')
        for i in range(d):
            for j in range(d):
                c_start, c_end = j * n_y, j * n_y + n_y
                r_start, r_end = i * n_y, i * n_y + n_y
                tmp = self._hessian_bloc_dim(sigma, Y[:, i], Y[:, j], K, i, j)
                tmp = np.reshape(tmp, [1, tmp.shape[0], tmp.shape[0]])
                if i == 0 and j == 0:
                    hessian = 1. * tmp
                else:
                    hessian = np.concatenate([hessian, tmp], axis=0)
        hessian = np.reshape(hessian, [d, d, n_y, n_y])
        hessian = np.swapaxes(hessian, 0, 2)
        hessian = np.swapaxes(hessian, 2, 3)
        hessian = np.reshape(hessian, [d * n_y, d * n_y])

        return hessian
Ejemplo n.º 24
0
    def sample(cls, params=None, nat_params=None, size=1):
        # Sample from P( x | Ѳ; α )
        assert (params is None) ^ (nat_params is None)

        (alpha, ) = params if params is not None else cls.natToStandard(
            *nat_params)

        ans = np.swapaxes(
            np.array(
                [Dirichlet.sample(params=(a, ), size=size) for a in alpha]), 0,
            1)
        cls.checkShape(ans)
        return ans
Ejemplo n.º 25
0
def train_blstm(X_train,
                Y_train,
                X_test,
                Y_test,
                cap_train,
                cap_test,
                train_mask,
                test_mask,
                num_hiddens=100,
                batch_size=15,
                L1_REG=1e-5,
                step_size=0.001,
                num_iters=5000,
                init_params=None,
                one_hot=None):
    '''
        X_train and X_test are N x W x D arrays where N is the number of 
        sentences, W is the max number of words in a sentence, and D is the
        vector representation for a word.
        For Y_train and Y_test the third dimension D is the one hot 
        representation of the part of speech, which is described by the dict
        one_hot.
        For cap_train and cap_test the third dimension is the cap_vector.
        train_mask and test_mask are N dimensional vectors where each item i
        represents the number of words in sentence i
    '''
    if one_hot is None:
        with open('storage/one_hot_list', 'rb') as f:
            one_hot = pickle.load(f)

    X_train = np.swapaxes(X_train, 0, 1)
    Y_train = np.swapaxes(Y_train, 0, 1)
    X_test = np.swapaxes(X_test, 0, 1)
    Y_test = np.swapaxes(Y_test, 0, 1)
    cap_train = np.swapaxes(cap_train, 0, 1)
    cap_test = np.swapaxes(cap_test, 0, 1)

    index_generator = batch_index_generator(X_train.shape[1],
                                            batch_size=batch_size)
    if init_params is None:
        init_params = init_blstm_params(input_size=X_train.shape[2],
                                        output_size=Y_train.shape[2],
                                        state_size=num_hiddens,
                                        param_scale=0.05)

    def training_loss(params, iter):
        sample_indices = index_generator.next()
        log_lik = -log_likelihood(
            params, X_train[:, sample_indices, :], Y_train[:,
                                                           sample_indices, :],
            train_mask[sample_indices], cap_train[:, sample_indices, :])
        return log_lik + L1_REG * l1_norm(params)

    trained_params = adam(training_loss_grad,
                          init_params,
                          step_size=step_size,
                          num_iters=num_iters)
    return trained_params
Ejemplo n.º 26
0
def test_multivariate_normal_logpdf_unique_params(D=10):
    # Test broadcasting over datapoints and corresponding parameters
    leading_ndim = npr.randint(1, 4)
    shp = npr.randint(1, 10, size=leading_ndim)
    x = npr.randn(*shp, D)
    mu = npr.randn(*shp, D)
    L = npr.randn(*shp, D, D)
    Sigma = np.matmul(L, np.swapaxes(L, -1, -2))

    ll1 = multivariate_normal_logpdf(x, mu, Sigma)
    ll2 = np.empty(shp)
    for inds in product(*[np.arange(s) for s in shp]):
        ll2[inds] = mvn.logpdf(x[inds], mu[inds], Sigma[inds])
    assert np.allclose(ll1, ll2)
Ejemplo n.º 27
0
    def _m_step_ar(self, expectations, datas, inputs, masks, tags, num_em_iters):
        K, D, M, lags = self.K, self.D, self.M, self.lags

        # Collect data for this dimension
        xs, ys, Ezs = [], [], []
        for (Ez, _, _), data, input, mask, tag in zip(expectations, datas, inputs, masks, tags):
            # Only use data if it is complete
            if not np.all(mask):
                raise Exception("Encountered missing data in AutoRegressiveObservations!") 

            xs.append(
                np.hstack([data[self.lags-l-1:-l-1] for l in range(self.lags)] 
                          + [input[self.lags:, :self.M], np.ones((data.shape[0]-self.lags, 1))]))
            ys.append(data[self.lags:])
            Ezs.append(Ez[self.lags:])

        for itr in range(num_em_iters):
            # Compute expected precision for each data point given current parameters
            taus = []
            for x, y in zip(xs, ys):
                # mus = self._compute_mus(data, input, mask, tag)
                # sigmas = self._compute_sigmas(data, input, mask, tag)
                Afull = np.concatenate((self.As, self.Vs, self.bs[:, :, None]), axis=2)
                mus = np.matmul(Afull[None, :, :, :], x[:, None, :, None])[:, :, :, 0]
                sigmas = np.exp(self.inv_sigmas)

                # nu: (K,)  mus: (T, K, D)  sigmas: (K, D)  y: (T, D)  -> tau: (T, K, D)
                alpha = np.exp(self.inv_nus[:, None])/2 + 1/2
                beta = np.exp(self.inv_nus[:, None])/2 + 1/2 * (y[:, None, :] - mus)**2 / sigmas
                taus.append(alpha / beta)

            # Fit the weighted linear regressions for each K and D
            J = np.tile(np.eye(D * lags + M + 1)[None, None, :, :], (K, D, 1, 1))
            h = np.zeros((K, D,  D*lags + M + 1,))
            for x, y, Ez, tau in zip(xs, ys, Ezs, taus):
                robust_ar_statistics(Ez, tau, x, y, J, h)

            mus = np.linalg.solve(J, h)
            self.As = mus[:, :, :D*lags]
            self.Vs = mus[:, :, D*lags:D*lags+M]
            self.bs = mus[:, :, -1]

            # Fit the variance
            sqerr = 0
            weight = 0
            for x, y, Ez, tau in zip(xs, ys, Ezs, taus):
                yhat = np.matmul(x[None, :, :], np.swapaxes(mus, -1, -2))
                sqerr += np.einsum('tk, tkd, ktd -> kd', Ez, tau, (y - yhat)**2)
                weight += np.sum(Ez, axis=0)
            self.inv_sigmas = np.log(sqerr / weight[:, None] + 1e-16)
Ejemplo n.º 28
0
 def vjp(g):
     ge, gu = g
     ge = _matrix_diag(ge)
     f = 1/(e[..., anp.newaxis, :] - e[..., :, anp.newaxis] + 1.e-20)
     f -= _diag(f)
     ut = anp.swapaxes(u, -1, -2)
     r1 = f * _dot(ut, gu)
     r2 = -f * (_dot(_dot(ut, anp.conj(u)), anp.real(_dot(ut, gu)) * anp.eye(n)))
     r = _dot(_dot(anp.linalg.inv(ut), ge + r1 + r2), ut)
     if not anp.iscomplexobj(x):
         r = anp.real(r)
         # the derivative is still complex for real input (imaginary delta is allowed), real output
         # but the derivative should be real in real input case when imaginary delta is forbidden
     return r
Ejemplo n.º 29
0
    def Cs(self):
        # See https://pubs.acs.org/doi/pdf/10.1021/acs.jpca.5b02015
        # for a derivation of the rational Cayley transform.
        D = self.D
        T = lambda X: np.swapaxes(X, -1, -2)

        Bs = 0.5 * (self._Ms - T(self._Ms))  # Bs is skew symmetric
        Fs = np.matmul(T(self._As), self._As) - Bs
        trm1 = np.concatenate((np.eye(D) - Fs, 2 * self._As), axis=1)
        trm2 = np.eye(D) + Fs
        Cs = T(np.linalg.solve(T(trm2), T(trm1)))
        assert np.allclose(np.matmul(T(Cs), Cs),
                           np.tile(np.eye(D)[None, :, :], (Cs.shape[0], 1, 1)))
        return Cs
Ejemplo n.º 30
0
def tensorize_and_convolve_fmri(a, adj_mtx, vals_list, coords, r, stride):
    """
    Strides the mesh and applies convolution operation. Prepares tensors within the function, so not as efficient as
    mesh_convolve_tensorized(). If operating on already strided data, use mesh_convolve_tensorized() or
    mesh_convolve_tensorized_dyn().
    :param filters: list of filters
    :param adj_mtx: adjacency matrix
    :param coords: coordinates of each vertex
    :return: result of the convolution operation
    """

    vals_list = np.expand_dims(vals_list, axis=1)
    vals_list = np.swapaxes(vals_list, 2, 3)

    try:
        vals_list = vals_list._value
    except:
        pass

    strided_mesh = mesh_strider_batch(adj_mtx, vals_list, coords, r, stride,
                                      None)
    strided_vers = np.squeeze(np.array(strided_mesh))
    a = np.array([a])
    try:
        out = npo.einsum(a, [5, 3, 4, 2], strided_vers, [0, 1, 2, 3])
    except:
        try:
            a = a._value
            out = npo.einsum(a, [5, 3, 4, 2], strided_vers, [0, 1, 2, 3])
        except:
            strided_vers = strided_vers._value
            out = npo.einsum(a, [5, 3, 4, 2], strided_vers, [0, 1, 2, 3])

    #out = out[0]
    out = np.swapaxes(out, 0, 1)
    out = np.swapaxes(out, 1, 2)
    return out
Ejemplo n.º 31
0
def conjugate_transpose(matrix):
    """
    Compute the conjugate transpose of a matrix.
    Args:
    matrix :: numpy.ndarray - the matrix to compute
        the conjugate transpose of
    operation_policy :: qoc.OperationPolicy - what data type is
        used to perform the operation and with which method
    Returns:
    _conjugate_tranpose :: numpy.ndarray the conjugate transpose
        of matrix
    """
    conjugate_transpose_ = anp.conjugate(anp.swapaxes(matrix, -1, -2))

    return conjugate_transpose_
Ejemplo n.º 32
0
def trace_product(A, B):
    """ Compute trace of the matrix product A*B efficiently.

    A, B can be 2D or 3D arrays, in which case the trace is computed along
    the last two axes. In this case, the function will return an array.
    Computed using the fact that tr(AB) = sum_{ij}A_{ij}B_{ji}.
    """
    ndimsA = A.ndim
    ndimsB = B.ndim
    assert ndimsA == ndimsB, "Both A and B must have same number of dimensions."
    assert ndimsA <= 3, "A and B must have 3 or fewer dimensions"

    # We'll take the trace along the last two dimensions.
    BT = np.swapaxes(B, -1, -2)
    return np.sum(A*BT, axis=(-1, -2))
Ejemplo n.º 33
0
def expm_eigh(h):
    """
    Compute the unitary operator of a hermitian matrix.
    U = expm(-1j * h)

    Arguments:
    h :: ndarray (N X N) - The matrix to exponentiate, which must be hermitian.
    
    Returns:
    expm_h :: ndarray(N x N) - The unitary operator of a.
    """
    eigvals, p = anp.linalg.eigh(h)
    p_dagger = anp.conjugate(anp.swapaxes(p, -1, -2))
    d = anp.exp(-1j * eigvals)
    return anp.matmul(p *d, p_dagger)
Ejemplo n.º 34
0
 def T(X): return np.swapaxes(X, -1, -2) if np.ndim(X) > 1 else X
 def symmetrize(X): return 0.5 * (X + T(X))
Ejemplo n.º 35
0
def generalized_outer_product(x):
    if np.ndim(x) == 1:
        return np.outer(x, x)
    return np.matmul(x, np.swapaxes(x, -1, -2))
Ejemplo n.º 36
0
import autograd.numpy.random as npr
import autograd.scipy.linalg as spla
from autograd.util import *
from autograd import grad
from builtins import range

npr.seed(1)

def check_symmetric_matrix_grads(fun, *args):
    def symmetrize(A):
        L = np.tril(A)
        return (L + T(L))/2.
    new_fun = lambda *args: fun(symmetrize(args[0]), *args[1:])
    return check_grads(new_fun, *args)

T = lambda A : np.swapaxes(A, -1, -2)

def rand_psd(D):
    mat = npr.randn(D,D)
    return np.dot(mat, mat.T)

def test_inv():
    def fun(x): return to_scalar(np.linalg.inv(x))
    d_fun = lambda x : to_scalar(grad(fun)(x))
    D = 8
    mat = npr.randn(D, D)
    mat = np.dot(mat, mat) + 1.0 * np.eye(D)
    check_grads(fun, mat)
    check_grads(d_fun, mat)

def test_inv_3d():