Example #1
0
def alt_least_squares_prox_iter(A, first, size, fixed_vecs, factors, lambda_,
                                Lk, x_start):
    num_fixed = fixed_vecs.shape[0]
    YTY = fixed_vecs.T.dot(fixed_vecs)
    eye = np.eye(num_fixed)
    lambda_eye = lambda_ * np.eye(factors)
    half_L_eye = 0.5 * Lk * np.eye(factors)
    solve_vecs = np.zeros((size, factors))

    x0 = x_start.reshape((-1, factors))

    for i in range(size):
        if first:
            counts_i = A[i]
        else:
            counts_i = A[:, i].T
        CuI = np.eye(counts_i.shape[0])
        np.fill_diagonal(CuI, counts_i)

        pu = counts_i.copy()
        pu[np.where(pu != 0)] = 1.0
        YTCuIY = fixed_vecs.T.dot(CuI).dot(fixed_vecs)
        YTCupu = fixed_vecs.T.dot(CuI + eye).dot(pu.T)
        xu = spsolve(YTY + YTCuIY + lambda_eye + half_L_eye,
                     YTCupu + 0.5 * Lk * x0[i])
        solve_vecs[i] = xu

    return solve_vecs
Example #2
0
def rbk_relative_σ(db, X, Y=None):  #This should take 2 values
    D = sklearn.metrics.pairwise.pairwise_distances(X,
                                                    metric='euclidean',
                                                    n_jobs=1)
    K = np.exp(-(D * D * db['Σ']) / 2.0)
    np.fill_diagonal(K, 0)
    return K
Example #3
0
def distance_to_weights(D):
    """Compute the weight matrix W from the distance matrix D.

    The weight matrix corresponding to a distance matrix `D = [d_ij]` is
    given by `W = [w_ij]` with

        .. :math:`w_{ij} = \frac{1}{\sqrt{1 - cos^2(d_ij)}}`.

    Since this is undefined when `d_ij = 0`, so we set the diagonal
    entries of `W` to 1.    

    Parameters
    ----------
    D : ndarray (n,n)
        Distance matrix. Must be square and contain no off-diagonal zeros.
    
    Returns
    -------
    W : ndarray (n,n)
        Weights matrix.

    """
    # TODO: no longer identical to pmds version. This version should
    # always be used.
    W_inv = (1 - np.cos(D)**2)
    W = np.sqrt((W_inv + np.eye(D.shape[0]))**-1)
    np.fill_diagonal(W, 1)
    return W
Example #4
0
def hess_lnpost(ws,fdensity,alpha,sig):
    print('hess')
    #print(ws);
    mo = np.exp(-4.);
    #hval = hfunc(ws);
    ws = ws.reshape((n_grid,n_grid));
    #calc l1
    lsis = np.array([-1*np.sum(psi(index)**2)/sig_noise**2 for (index,w) in np.ndenumerate(ws)]);
    lsis = lsis.reshape((n_grid,n_grid));
    l1 = lsis#*np.sum((Psi(ws)-data)/2/sig_noise**2);
    xsi = (1.-fdensity ) * gaussian(np.log(ws),loc=np.log(mo), scale=sig)/ws + fdensity*(ws**alpha /w_norm)
    dxsi = -1*gaussian(np.log(ws),loc=np.log(mo), scale=sig)*(1.-fdensity)/ws**2 - (1.-fdensity)*np.log(ws/mo)*np.exp(-np.log(ws/mo)**2 /2/sig**2)/np.sqrt(2*np.pi)/ws**2 /sig**3 + fdensity*alpha*ws**(alpha-1) /w_norm;
    dxsi_st = -1*gaussian(np.log(ws),loc=np.log(mo), scale=sig)*(1.-fdensity)/ws**2 - (1.-fdensity)*np.log(ws/mo)*np.exp(-np.log(ws/mo)**2 /2/sig**2)/np.sqrt(2*np.pi)/ws**2 /sig**3;
    ddxsi_st = -1*dxsi_st/ws - dxsi_st*np.log(ws/mo)/ws /sig**2 -(1.-fdensity)*(1/np.sqrt(2*np.pi)/sig)*np.exp(-np.log(ws/mo)**2 /2/sig**2)*(1/sig**2 - np.log(ws/mo)/sig**2 -1)/ ws**3;
    ddxsi = ddxsi_st + fdensity*alpha*(alpha-1)*ws**(alpha-2) /w_norm   ;
    l2 = -1*(dxsi/xsi)**2 + ddxsi/np.absolute(xsi);
    l_tot = l1+l2;
    #those are the diagonal terms, now need to build off diagonal
    hess_m = np.zeros((n_grid**2,n_grid**2));
    np.fill_diagonal(hess_m,l_tot);
    '''
    for i in range(0,n_grid**2):
        for j in range(i+1,n_grid**2):
            ind1 = (int(i/n_grid),i%n_grid);
            ind2 = (int(j/n_grid),j%n_grid);
            hess_m[i,j] = -1*np.sum(psi(ind1)*psi(ind2))/sig_noise**2;

    hess_m = symmetrize(hess_m);
    '''
    print('hess fin');
    #print(l_tot);
    #print('new it');
    #print(np.average(hval[0][:][:]-hess_m));
    return -1*hess_m;
Example #5
0
def corrmat_from_vec(v):
    """
    Convert a vector of correlations to a matrix.
    Elements of v are read out row-wise.
    """
    C = vec_to_U(v)
    C = C + C.T  # symmetrize
    np.fill_diagonal(C, 1)
    return C
Example #6
0
    def _init_params(self, data, lengths=None, params='stmp'):
        X = data['obs']

        if 's' in params:
            self.startprob_.fill(1.0 / self.n_components)

        if 't' in params or 'm' in params or 'p' in params:

            kmmod = cluster.KMeans(n_clusters=self.n_unique,
                                   random_state=self.random_state).fit(X)
            kmeans = kmmod.cluster_centers_

        if 't' in params:
            # TODO: estimate transitions from data (!) / consider n_tied=1
            if self.n_tied == 0:
                transmat = np.ones([self.n_components, self.n_components])
                np.fill_diagonal(transmat, 10.0)
                self.transmat_ = transmat  # .90 for self-transition

            else:
                transmat = np.zeros((self.n_components, self.n_components))
                transmat[range(self.n_components),
                         range(self.n_components)] = 100.0  # diagonal
                transmat[range(self.n_components - 1),
                         range(1, self.n_components)] = 1.0  # diagonal + 1
                transmat[[
                    r * (self.n_chain) - 1
                    for r in range(1, self.n_unique + 1)
                    for c in range(self.n_unique - 1)
                ], [
                    c * (self.n_chain) for r in range(self.n_unique)
                    for c in range(self.n_unique) if c != r
                ]] = 1.0

                self.transmat_ = np.copy(transmat)

        if 'm' in params:
            mu_init = np.zeros((self.n_unique, self.n_features))
            for u in range(self.n_unique):
                for f in range(self.n_features):
                    mu_init[u][f] = kmeans[u, f]

            self.mu_ = np.copy(mu_init)

        if 'p' in params:
            precision_init = np.zeros(
                (self.n_unique, self.n_features, self.n_features))
            for u in range(self.n_unique):
                if self.n_features == 1:
                    precision_init[u] = np.linalg.inv(
                        np.cov(X[kmmod.labels_ == u], bias=1))
                else:
                    precision_init[u] = np.linalg.inv(
                        np.cov(np.transpose(X[kmmod.labels_ == u])))

            self.precision_ = np.copy(precision_init)
Example #7
0
 def cost(X):
     Y = np.dot(X, X.T)
     # Shift the exponentials by the maximum value to reduce numerical
     # trouble due to possible overflows.
     s = np.triu(Y, 1).max()
     expY = np.exp((Y - s) / epsilon)
     # Zero out the diagonal
     np.fill_diagonal(expY, np.zeros(n))
     u = np.triu(expY, 1).sum()
     return s + epsilon * np.log(u)
 def cost(X):
     Y = np.dot(X, X.T)
     # Shift the exponentials by the maximum value to reduce numerical
     # trouble due to possible overflows.
     s = np.triu(Y, 1).max()
     expY = np.exp((Y - s) / epsilon)
     # Zero out the diagonal
     np.fill_diagonal(expY, np.zeros(n))
     u = np.triu(expY, 1).sum()
     return s + epsilon * np.log(u)
Example #9
0
def fit(kernel_x, kernel_y, base_density, X, Y, lmbda):
    n_y, d_y = Y.shape
    K_X = kernel_x.kernel(X)

    h = compute_h(kernel_y, base_density, Y, K_X)
    G = compute_G(kernel_y, Y, K_X)

    np.fill_diagonal(G, np.diag(G) + n_y * lmbda)
    cho_lower = lg.cho_factor(G)
    beta = lg.cho_solve(cho_lower, h / lmbda)
    return beta.reshape(n_y, d_y)
 def egrad(Y):
     """Derivative of the cost function."""
     # tmp = -1*(np.ones(D.shape) - (Y.T@Y)**2 + np.eye(D.shape[0]))**(-0.5)
     zero_tol = 1e-12
     ip = acos_validate(Y.T @ Y)
     tmp = np.ones(D.shape) - (ip)**2
     idx = np.where(np.abs(tmp) < zero_tol)  # Avoid division by zero.
     tmp[idx] = 1
     tmp = -1 * tmp**(-0.5)
     fill_val = np.min(tmp)  # All entries are negative.
     tmp[idx] = fill_val  # Make non-diagonal zeros large.
     np.fill_diagonal(tmp, 0)  # Ignore known zeros on diagonal.
     return 2 * Y @ ((np.arccos(np.abs(ip)) - D) * tmp * np.sign(ip))
Example #11
0
    def _init_params(self, data, lengths=None, params='stmp'):
        X = data['obs']

        if 's' in params:
            self.startprob_.fill(1.0 / self.n_components)

        if 't' in params or 'm' in params or 'p' in params:

            kmmod = cluster.KMeans(n_clusters=self.n_unique,
                                   random_state=self.random_state).fit(X)
            kmeans = kmmod.cluster_centers_

        if 't' in params:
            # TODO: estimate transitions from data (!) / consider n_tied=1
            if self.n_tied == 0:
                transmat = np.ones([self.n_components, self.n_components])
                np.fill_diagonal(transmat, 10.0)
                self.transmat_ = transmat  # .90 for self-transition

            else:
                transmat = np.zeros((self.n_components, self.n_components))
                transmat[range(self.n_components),
                         range(self.n_components)] = 100.0  # diagonal
                transmat[range(self.n_components-1),
                         range(1, self.n_components)] = 1.0  # diagonal + 1
                transmat[[r * (self.n_chain) - 1
                          for r in range(1, self.n_unique+1)
                          for c in range(self.n_unique-1)],
                         [c * (self.n_chain)
                          for r in range(self.n_unique)
                          for c in range(self.n_unique) if c != r]] = 1.0

                self.transmat_ = np.copy(transmat)

        if 'm' in params:
            mu_init = np.zeros((self.n_unique, self.n_features))
            for u in range(self.n_unique):
                for f in range(self.n_features):
                    mu_init[u][f] = kmeans[u, f]

            self.mu_ = np.copy(mu_init)

        if 'p' in params:
            precision_init = np.zeros((self.n_unique, self.n_features, self.n_features))
            for u in range(self.n_unique):
                if self.n_features == 1:
                    precision_init[u] = np.linalg.inv(np.cov(X[kmmod.labels_ == u], bias = 1))
                else:
                    precision_init[u] = np.linalg.inv(np.cov(np.transpose(X[kmmod.labels_ == u])))

            self.precision_ = np.copy(precision_init)
def hess_k(ws, fdensity, alpha, sig, psf_k):
    #print('hess_k begin');
    #mo = np.exp(-4.);
    #ws = real_to_complex(ws);
    #ws = ws.reshape((n_grid,n_grid));
    #ws = np.real(fft.ifft2(ws));
    #calc l1 we only get diagonals here
    l1 = -1 * (psf_k**2 / sig_noise**2 / n_grid**2).flatten()
    hess_l1 = np.zeros((2 * n_grid**2, 2 * n_grid**2), dtype=complex)
    np.fill_diagonal(hess_l1, complex_to_real(l1))
    l_tot = hess_l1
    #print('hess is:');
    print(l_tot)
    return l_tot
Example #13
0
    def m_step(self, expectations, datas, inputs, masks, tags, samples, **kwargs):
        # Update the transition matrix between super states
        P = sum([np.sum(Ezzp1, axis=0) for _, Ezzp1, _ in expectations]) + 1e-16
        np.fill_diagonal(P, 0)
        P /= P.sum(axis=-1, keepdims=True)
        self.Ps = P

        # Fit negative binomial models for each duration based on sampled states
        states, durations = map(np.concatenate, zip(*[rle(z_smpl) for z_smpl in samples]))
        for k in range(self.K):
            self.rs[k], self.ps[k] = \
                fit_negative_binomial_integer_r(durations[states == k], self.r_min, self.r_max)

        # Reset the transition matrix
        self._transition_matrix = None
Example #14
0
    def __init__(self, K, D, M=0):
        super(NegativeBinomialSemiMarkovTransitions, self).__init__(K, D, M=M)

        # Initialize the super state transition probabilities
        self.Ps = npr.rand(K, K)
        np.fill_diagonal(self.Ps, 0)
        self.Ps /= self.Ps.sum(axis=1, keepdims=True)

        # Initialize the negative binomial duration probabilities
        self.rs = npr.randint(1, 11, size=K)
        # self.rs = np.ones(K, dtype=int)
        # self.ps = npr.rand(K)
        self.ps = 0.5 * np.ones(K)

        # Initialize the transition matrix
        self._trans_matrix = None
Example #15
0
def fubinistudy(X):
    """Distance matrix of X using Fubini-Study metric.
    
    Parameters
    ----------
    X : ndarray (complex, d,n)
        Data.
    Returns
    -------
    D : ndarray (real, n,n)
        Distance matrix.
    
    """

    D = np.arccos(np.sqrt((X.conj().T @ X) * (X.conj().T @ X).conj().T))
    np.fill_diagonal(D, 0)  # Things work better if diagonal is exactly zero.
    return np.real(D)
Example #16
0
    def __init__(self, K, D, M=0, r_min=1, r_max=20):
        assert K > 1, "Explicit duration models only work if num states > 1."
        super(NegativeBinomialSemiMarkovTransitions, self).__init__(K, D, M=M)

        # Initialize the super state transition probabilities
        self.Ps = npr.rand(K, K)
        np.fill_diagonal(self.Ps, 0)
        self.Ps /= self.Ps.sum(axis=1, keepdims=True)

        # Initialize the negative binomial duration probabilities
        self.r_min, self.r_max = r_min, r_max
        self.rs = npr.randint(r_min, r_max + 1, size=K)
        # self.rs = np.ones(K, dtype=int)
        # self.ps = npr.rand(K)
        self.ps = 0.5 * np.ones(K)

        # Initialize the transition matrix
        self._transition_matrix = None
Example #17
0
def complex_as_matrix(z, n):
    """Represent a complex number as a matrix.
    
    Parameters
    ----------
    z : complex float
    n : int (even)
    
    Returns
    -------
    Z : ndarray (n,n)
        Real-valued n*n tri-diagonal matrix representing z in the ring of n*n matrices.
        
    """

    Z = np.zeros((n, n))
    ld = np.zeros(n - 1)
    ld[0::2] = np.imag(z)
    np.fill_diagonal(Z[1:], ld)
    Z = Z - Z.T
    np.fill_diagonal(Z, np.real(z))
    return Z
Example #18
0
def Kx_D_given_W(db, setX=None, setW=None):
    if setX is None: outX = db['data'].X.dot(db['W'])
    else: outX = setX.dot(db['W'])

    if setW is None: outX = db['data'].X.dot(db['W'])
    else: outX = db['data'].X.dot(setW)
    #print(outX[0:5,0:5])

    if db['kernel_type'] == 'rbf':
        Kx = rbk_sklearn(outX, db['data'].σ)
    elif db['kernel_type'] == 'relative':
        Kx = rbk_relative_σ(db, outX)
    elif db['kernel_type'] == 'rbf_slow':
        Kx = rbk_sklearn(outX, db['data'].σ)
    elif db['kernel_type'] == 'linear':
        Kx = outX.dot(outX.T)
    elif db['kernel_type'] == 'polynomial':
        Kx = poly_sklearn(outX, db['poly_power'], db['poly_constant'])
    elif db['kernel_type'] == 'squared':
        Kx = squared_kernel(outX)
    elif db['kernel_type'] == 'multiquadratic':
        Kx = multiquadratic_kernel(outX)
    elif db['kernel_type'] == 'mkl':  # multiple kernel learning
        Kx = mkl_kernel(db)
    else:
        print(
            '\nError  in kernel_lib.py, within Kx_D_given_W, unrecognized kernel type : %s\n\n'
            % db['kernel_type'])
        sys.exit()

    np.fill_diagonal(Kx, 0)  #	Set diagonal of adjacency matrix to 0
    D = compute_inverted_Degree_matrix(Kx)

    #if np.isnan(D).any():
    #	Kx = Kx - np.min(Kx)
    #	D = compute_inverted_Degree_matrix(Kx)

    return [Kx, D]
Example #19
0
def doubleIntAI(simulation, iterations):
    # environment parameters
    x = np.zeros((hidden_states, temp_orders_states))  # position

    v = np.zeros((hidden_causes, temp_orders_states - 1))
    y = np.zeros((obs_states, temp_orders_states))
    eta = np.zeros((hidden_causes, temp_orders_states - 1))

    ### free energy variables
    # parameters for generative model
    if simulation == 0:
        alpha = np.exp(2)
        alpha2 = np.exp(1)
    elif simulation == 1:
        alpha = np.exp(1)
        alpha2 = np.exp(.5)
    elif simulation == 2:
        alpha = np.exp(-1)
        alpha2 = np.exp(0)
    elif simulation == 3:
        alpha = np.exp(2)
        alpha2 = np.exp(1)

    beta = np.exp(1)

    A_gm = np.array([[0, 1, 0], [-alpha, -alpha2, 0],
                     [0, 0, 0]])  # state transition matrix
    B_gm = np.array([[0, 0, 0], [0, beta, 0], [0, 0, 0]])  # input matrix
    H_gm = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 0]])  # measurement matrix

    # actions
    a = np.zeros((hidden_states, temp_orders_states - 1))

    # states
    mu_x = np.zeros((hidden_states, temp_orders_states))

    # inputs
    v = np.zeros((hidden_causes, temp_orders_causes - 1))

    # minimisation variables and parameters
    dFdmu_x = np.zeros((hidden_states, temp_orders_states))
    Dmu_x = np.zeros((hidden_states, temp_orders_states))

    k_mu_x = 1  # learning rate perception
    k_a = np.exp(14)  # learning rate action

    # noise on sensory input (world - generative process)
    gamma_z = 0 * np.ones((obs_states, obs_states))  # log-precisions
    #gamma_z[:,1] = gamma_z[:,0] - np.log(2 * gamma)
    pi_z = np.zeros((obs_states, obs_states))
    np.fill_diagonal(pi_z, np.exp(gamma_z))
    sigma_z = np.linalg.inv(splin.sqrtm(pi_z))
    z = np.random.randn(iterations, obs_states)

    # noise on motion of hidden states (world - generative process)
    gamma_w = 2  # log-precision
    pi_w = np.zeros((hidden_states, hidden_states))
    np.fill_diagonal(pi_w, np.exp(gamma_w))
    sigma_w = np.linalg.inv(splin.sqrtm(pi_w))
    w = np.random.randn(iterations, hidden_states)

    # agent's estimates of the noise (agent - generative model)
    mu_gamma_z = -8 * np.identity((obs_states))  # log-precisions
    mu_gamma_z[1, 1] = mu_gamma_z[0, 0] - np.log(2 * gamma)
    mu_gamma_z[2, 2] = mu_gamma_z[1, 1] - np.log(2 * gamma)
    mu_pi_z = np.exp(mu_gamma_z) * np.identity((obs_states))

    mu_gamma_w = -1 * np.identity((hidden_states))  # log-precision
    mu_gamma_w[1, 1] = mu_gamma_w[0, 0] - np.log(2 * gamma)
    mu_gamma_w[2, 2] = mu_gamma_w[1, 1] - np.log(2 * gamma)
    mu_pi_w = np.exp(mu_gamma_w) * np.identity((hidden_states))

    # history
    y_history = np.zeros((iterations, obs_states, temp_orders_states))
    psi_history = np.zeros((iterations, obs_states, temp_orders_states - 1))
    mu_x_history = np.zeros((iterations, hidden_states, temp_orders_states))
    a_history = np.zeros((iterations, obs_states, temp_orders_states))
    FE_history = np.zeros((iterations, ))
    v_history = np.zeros((iterations, hidden_causes, temp_orders_states - 1))

    x = 300 * np.random.rand(hidden_states, temp_orders_states) - 150
    x[1, 0] = x[0, 1]
    x[2, 0] = x[1, 1]
    x[2, 1] = 0.

    # if the initialisation is too random, then this agent becomes ``disillusioned''
    mu_x[0, 0] = x[0, 0] + .1 * np.random.randn()
    mu_x[1, 0] = x[0, 1] + .1 * np.random.randn()
    mu_x[0, 1] = mu_x[1, 0]

    # automatic differentiation
    dFdmu_states = grad(F, 1)

    for i in range(iterations - 1):
        if simulation == 3 and i >= iterations / 2:
            v[1, 0] = 50

        mu_x_history[
            i, :, :] = mu_x  # save it at the very beginning since the first jump is rather quick

        y[:, :] = getObservation(x, v, a, np.dot(np.dot(C, sigma_w), w[i, :]))
        y[2, 0] = y[
            1, 1]  # manually assign the acceleration as observed by the agent

        psi = y[:, :-1] + np.dot(np.dot(D, sigma_z), z[i, :, None])

        ### minimise free energy ###
        # perception
        dFdmu_x = dFdmu_states(psi, mu_x, eta, mu_pi_z, mu_pi_w, A_gm, B_gm,
                               H_gm)
        Dmu_x = mode_path(mu_x)

        # action
        dFdy = np.dot(mu_pi_z, (psi - mu_x[:, :-1]))
        dyda = np.ones((obs_states, temp_orders_states - 1))

        # save history
        y_history[i, :] = y
        psi_history[i, :] = psi
        mu_x_history[i, :, :] = mu_x
        a_history[i] = a
        v_history[i] = v

        FE_history[i] = F(psi, mu_x, eta, mu_gamma_z, mu_pi_w, A_gm, B_gm,
                          H_gm)

        # update equations
        mu_x += dt * k_mu_x * (Dmu_x - dFdmu_x)
        a[1, 0] += dt * -k_a * dyda.transpose().dot(dFdy)

    return psi_history, mu_x_history, a_history, v_history
Example #20
0
def rbk_sklearn(data, σ):
    gammaV = 1.0 / (2 * σ * σ)
    rbk = sklearn.metrics.pairwise.rbf_kernel(data, gamma=gammaV)
    np.fill_diagonal(rbk, 0)  #	Set diagonal of adjacency matrix to 0
    return rbk
Example #21
0
    for key in keys_del:
        try:
            del theta_dict_sv[key]
        except KeyError:
            print("key not found")

# TODO: test affinity shocks on nonzero other values
id = 5
b_test = np.array([0.3, 1., 1., 1., 0.1, 0.4])
epsilon = np.zeros((pecmy.N, pecmy.N))
wv_m = pecmy.war_vals(b_test, m, theta_dict, epsilon)  # calculate war values
ids_j = np.delete(np.arange(pecmy.N), id)
wv_m_i = wv_m[:, id][ids_j]

tau_hat_nft = 1.25 / pecmy.ecmy.tau
np.fill_diagonal(tau_hat_nft, 1)
ge_x_sv = np.ones(pecmy.x_len)
ge_dict = pecmy.ecmy.rewrap_ge_dict(ge_x_sv)
tau_hat_sv = ge_dict["tau_hat"]
tau_hat_sv[id] = tau_hat_nft[id]  # start slightly above free trade
ge_dict_sv = pecmy.ecmy.geq_solve(tau_hat_sv, np.ones(pecmy.N))
ge_x_sv = pecmy.ecmy.unwrap_ge_dict(ge_dict_sv)

test = pecmy.br(ge_x_sv, b_test, m, wv_m_i, id)
test
# wv_m
# wv_m_i

#
# m = pecmy.M / np.ones((pecmy.N, pecmy.N))
# m = m.T
Example #22
0
def MI2AMI(y, n_clusters, r, k, init, var_distrib, nj,\
          nan_mask, target_nb_pseudo_obs = 500, it = 50, \
          eps = 1E-05, maxstep = 100, seed = None, perform_selec = True,\
          dm = [], max_patience = 1): # dm: Hack to remove
    ''' Complete the missing values using a trained M1DGMM
    
    y (numobs x p ndarray): The observations containing mixed variables
    n_clusters (int): The number of clusters to look for in the data
    r (list): The dimension of latent variables through the first 2 layers
    k (list): The number of components of the latent Gaussian mixture layers
    init (dict): The initialisation parameters for the algorithm
    var_distrib (p 1darray): An array containing the types of the variables in y 
    nj (p 1darray): For binary/count data: The maximum values that the variable can take. 
                    For ordinal data: the number of different existing categories for each variable
    nan_mask (ndarray): A mask array equal to True when the observation value is missing False otherwise
    target_nb_pseudo_obs (int): The number of pseudo-observations to generate         
    it (int): The maximum number of MCEM iterations of the algorithm
    eps (float): If the likelihood increase by less than eps then the algorithm stops
    maxstep (int): The maximum number of optimisation step for each variable
    seed (int): The random state seed to set (Only for numpy generated data for the moment)
    perform_selec (Bool): Whether to perform architecture selection or not
    dm (np array): The distance matrix of the observations. If not given M1DGMM computes it
    n_neighbors (int): The number of neighbors to use for NA imputation
    ------------------------------------------------------------------------------------------------
    returns (dict): The predicted classes, the likelihood through the EM steps
                    and a continuous representation of the data
    '''

    # !!! Hack
    cols = y.columns
    # Formatting
    if not isinstance(nan_mask, np.ndarray): nan_mask = np.asarray(nan_mask)
    if not isinstance(y, np.ndarray): y = np.asarray(y)

    assert len(k) < 2  # Not implemented for deeper MDGMM for the moment

    # Keep complete observations
    complete_y = y[~np.isnan(y.astype(float)).any(1)]
    completed_y = deepcopy(y)

    out = M1DGMM(complete_y, 'auto', r, k, init, var_distrib, nj, it,\
             eps, maxstep, seed, perform_selec = perform_selec,\
                 dm = dm, max_patience = max_patience, use_silhouette = True)

    # Compute the associations
    vc = vars_contributions(pd.DataFrame(complete_y, columns = cols), out['Ez.y'], assoc_thr = 0.0, \
                           title = 'Contribution of the variables to the latent dimensions',\
                           storage_path = None)

    # Upacking the model from the M1DGMM output
    #p = y.shape[1]
    k = out['best_k']
    r = out['best_r']
    mu = out['mu'][0]
    lambda_bin = np.array(out['lambda_bin'])
    lambda_ord = out['lambda_ord']
    lambda_categ = out['lambda_categ']
    lambda_cont = np.array(out['lambda_cont'])

    nj_bin = nj[pd.Series(var_distrib).isin(['bernoulli',
                                             'binomial'])].astype(int)
    nj_ord = nj[var_distrib == 'ordinal'].astype(int)
    nj_categ = nj[var_distrib == 'categorical'].astype(int)

    nb_cont = np.sum(var_distrib == 'continuous')
    nb_bin = np.sum(var_distrib == 'binomial')

    y_std = complete_y[:,var_distrib == 'continuous'].astype(float).std(axis = 0,\
                                                                    keepdims = True)
    cat_features = var_distrib != 'categorical'

    # Compute the associations between variables and use them as weights for the optimisation
    assoc = cosine_similarity(vc, dense_output=True)
    np.fill_diagonal(assoc, 0.0)
    assoc = np.abs(assoc)
    weights = (assoc / assoc.sum(1, keepdims=True))

    #==============================================
    # Optimisation sandbox
    #==============================================

    # Define the observation generated by the center of each cluster
    cluster_obs = [impute(mu[kk,:,0], var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\
                 lambda_ord, nj_ord, lambda_cont, y_std) for kk in range(k[0])]

    # Use only of the observed variables as references
    types = {'bin': ['bernoulli', 'binomial'], 'categ': ['categorical'],\
             'cont': ['continuous'], 'ord': 'ordinal'}

    # Gradient optimisation
    nan_indices = np.where(nan_mask.any(1))[0]
    imputed_y = np.zeros_like(y)
    numobs = y.shape[0]

    #************************************
    # Linear constraint to stay in the support of continuous variables
    #************************************

    lb = np.array([])
    ub = np.array([])
    A = np.array([[]]).reshape((0, r[0]))

    if nb_bin > 0:
        ## Corrected Binomial bounds (ub is actually +inf)
        bin_indices = var_distrib[np.logical_or(var_distrib == 'bernoulli',
                                                var_distrib == 'binomial')]
        binomial_indices = bin_indices == 'binomial'

        lb_bin = np.nanmin(y[:, var_distrib == 'binomial'], 0)
        lb_bin = logit(
            lb_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices,
                                                            0]
        ub_bin = np.nanmax(y[:, var_distrib == 'binomial'], 0)
        ub_bin = logit(
            ub_bin / nj_bin[binomial_indices]) - lambda_bin[binomial_indices,
                                                            0]
        A_bin = lambda_bin[binomial_indices, 1:]

        ## Concatenate the constraints
        lb = np.concatenate([lb, lb_bin])
        ub = np.concatenate([ub, ub_bin])
        A = np.concatenate([A, A_bin], axis=0)

    if nb_cont > 0:
        ## Corrected Gaussian bounds
        lb_cont = np.nanmin(y[:, var_distrib == 'continuous'],
                            0) / y_std[0] - lambda_cont[:, 0]
        ub_cont = np.nanmax(y[:, var_distrib == 'continuous'],
                            0) / y_std[0] - lambda_cont[:, 0]
        A_cont = lambda_cont[:, 1:]

        ## Concatenate the constraints
        lb = np.concatenate([lb, lb_cont])
        ub = np.concatenate([ub, ub_cont])
        A = np.concatenate([A, A_cont], axis=0)

    lc = LinearConstraint(A, lb, ub, keep_feasible=True)

    zz = []
    fun = []
    for i in range(numobs):
        if i in nan_indices:

            # Design the nan masks for the optimisation process
            nan_mask_i = nan_mask[i]
            weights_i = weights[nan_mask_i].mean(0)

            # Look for the best starting point
            cluster_dist = [error(y[i, ~nan_mask_i], obs[~nan_mask_i],\
                            cat_features[~nan_mask_i], weights_i)\
                            for obs in cluster_obs]
            z02 = mu[np.argmin(cluster_dist), :, 0]

            # Formatting
            vars_i = {type_alias: np.where(~nan_mask_i[np.isin(var_distrib, vartype)])[0] \
                             for type_alias, vartype in types.items()}

            complete_categ = [
                l for idx, l in enumerate(lambda_categ)
                if idx in vars_i['categ']
            ]
            complete_ord = [
                l for idx, l in enumerate(lambda_ord) if idx in vars_i['ord']
            ]

            opt = minimize(stat_all, z02, \
                   args = (y[i, ~nan_mask_i], var_distrib[~nan_mask_i],\
                   weights_i[~nan_mask_i],\
                   lambda_bin[vars_i['bin']], nj_bin[vars_i['bin']],\
                   complete_categ,\
                   nj_categ[vars_i['categ']],\
                   complete_ord,\
                   nj_ord[vars_i['ord']],\
                   lambda_cont[vars_i['cont']], y_std[:, vars_i['cont']]),
                   tol = eps, method='trust-constr', jac = grad_stat,\
                   constraints = lc,
                   options = {'maxiter': 1000})

            z = opt.x
            zz.append(z)
            fun.append(opt.fun)

            imputed_y[i] = impute(z, var_distrib, lambda_bin, nj_bin, lambda_categ, nj_categ,\
                         lambda_ord, nj_ord, lambda_cont, y_std)

        else:
            imputed_y[i] = y[i]

    completed_y = np.where(nan_mask, imputed_y, y)

    out['completed_y'] = completed_y
    out['zz'] = zz
    out['fun'] = fun
    return (out)
Example #23
0
# v = np.ones(N)
# v = np.array([1.08, 1.65, 1.61, 1.05, 1.05, 1.30])
# v = np.repeat(1.4, N)

# TODO try just running inner loop, problem is that values of v change with theta as well, no reason we should run theta until covergence rather than iterating on v first.

imp.reload(policies)
imp.reload(economy)
pecmy = policies.policies(data, params, ROWname)
pecmy.W
m_diag = np.diagonal(pecmy.m)
m_frac = pecmy.m / m_diag
m_frac[:, N - 1]

tau_min_mat = copy.deepcopy(pecmy.ecmy.tau)
np.fill_diagonal(tau_min_mat, 5)

theta_dict = dict()
theta_dict["eta"] = 1.
theta_dict["c_hat"] = 25.
theta_dict["alpha1"] = 0.
theta_dict["alpha2"] = 0.
theta_dict["gamma"] = 0.
theta_dict["C"] = np.repeat(25., pecmy.N)
theta_x = pecmy.unwrap_theta(theta_dict)

# opt.root(pecmy.pp_wrap_alpha, .5, args=(.99, ))['x']
# pecmy.W ** - .75
np.reshape(
    np.repeat(np.max(pecmy.ecmy.tau + pecmy.tau_buffer, axis=1), pecmy.N),
    (pecmy.N, pecmy.N)) / pecmy.ecmy.tau
Example #24
0
def ord_params_GLLVM(y_ord, nj_ord, lambda_ord_old, ps_y, pzl1_ys, zl1_s, AT,\
                     tol = 1E-5, maxstep = 100):
    ''' Determine the GLLVM coefficients related to ordinal coefficients by 
    optimizing each column coefficients separately.
    y_ord (numobs x nb_ord nd-array): The ordinal data
    nj_ord (list of int): The number of modalities for each ord variable
    lambda_ord_old (list of nb_ord_j x (nj_ord + r1) elements): The ordinal coefficients
                                                        of the previous iteration
    ps_y ((numobs, S) nd-array): p(s | y) for all s in Omega
    pzl1_ys (nd-array): p(z1 | y, s)
    zl1_s ((M1, r1, s1) nd-array): z1 | s 
    AT ((r1 x r1) nd-array): Var(z1)^{-1/2}
    tol (int): Control when to stop the optimisation process
    maxstep (int): The maximum number of optimization step.
    ----------------------------------------------------------------------
    returns (list of nb_ord_j x (nj_ord + r1) elements): The new ordinal coefficients
    '''
    #****************************
    # Ordinal link parameters
    #****************************

    r0 = zl1_s.shape[1]
    S0 = zl1_s.shape[2]
    nb_ord = len(nj_ord)

    new_lambda_ord = []

    for j in range(nb_ord):
        enc = OneHotEncoder(categories='auto')
        y_oh = enc.fit_transform(y_ord[:, j][..., n_axis]).toarray()

        # Define the constraints such that the threshold coefficients are ordered
        nb_constraints = nj_ord[j] - 2
        nb_params = nj_ord[j] + r0 - 1

        lcs = np.full(nb_constraints, -1)
        lcs = np.diag(lcs, 1)
        np.fill_diagonal(lcs, 1)

        lcs = np.hstack([lcs[:nb_constraints, :], \
                np.zeros([nb_constraints, nb_params - (nb_constraints + 1)])])

        linear_constraint = LinearConstraint(lcs, np.full(nb_constraints, -np.inf), \
                            np.full(nb_constraints, 0), keep_feasible = True)

        opt = minimize(ord_loglik_j, lambda_ord_old[j] ,\
                args = (y_oh, zl1_s, S0, ps_y, pzl1_ys, nj_ord[j]),
                tol = tol, method='trust-constr',  jac = ord_grad_j, \
                constraints = linear_constraint, hess = '2-point',\
                    options = {'maxiter': maxstep})

        res = opt.x
        if not (opt.success
                ):  # If the program fail, keep the old estimate as value
            res = lambda_ord_old[j]
            warnings.warn('One of the ordinal optimisations has failed',
                          RuntimeWarning)

        # Ensure identifiability for Lambda_j
        new_lambda_ord_j = (res[-r0:].reshape(1, r0) @ AT[0]).flatten()
        new_lambda_ord_j = np.hstack(
            [deepcopy(res[:nj_ord[j] - 1]), new_lambda_ord_j])
        new_lambda_ord.append(new_lambda_ord_j)

    return new_lambda_ord
Example #25
0
    def m_step(self,
               expectations,
               datas,
               inputs,
               masks,
               tags,
               optimizer="adam",
               num_iters=5,
               **kwargs):
        """
        to find most likely labels, ell_labels; coordinate descent
        
            {\label_k} = argmax E_{z~p(z|x)}[log p(z)]
            \likelihood(\theta) = E_{z~p(z|x)}[\sum_{t=1}^T-1 log p(z_{t+1} | z_t; \theta)]
            
        weights entries are E[z_t = k], E[z_t = k, z_{t+1}=k'], log p(x_{1:T})
        """
        K = self.K

        zzps = np.concatenate([Ezzp1
                               for _, Ezzp1, _ in expectations])  # T by K by K

        ell_labels, dist_norm, L, log_p = self.ell_labels, self.dist_norm, self.L, self.log_p

        for itr in range(num_iters):

            for kk in range(K):  # index kk
                ### create null matrix with all possible values for the k-th label
                ###    while fixing all other k-1 labels the same
                ### I. 'changing'
                k = ell_labels[kk]
                ell_labels_new = np.array([ell_labels] * K)
                ell_labels_new[:, kk] = ell_labels
                ### II. 'swaping': add a line
                np.fill_diagonal(ell_labels_new, np.repeat(k, K))

                log_L = np.zeros(K)
                ### for every possible swapping
                for l in range(K):  # row index
                    ell_labels_new_eg = ell_labels_new[l, :]
                    log_p_new = log_p[ell_labels_new_eg]

                    ### compute log_transition matrix
                    dist_labeled = np.zeros((K, K))
                    for i in range(K):
                        for j in range(K):
                            dist_labeled[i,
                                         j] = dist_norm[ell_labels_new_eg[i],
                                                        ell_labels_new_eg[j]]

                    log_Ps = -dist_labeled / L
                    log_Ps += np.diag(log_p_new)
                    log_Ps -= logsumexp(log_Ps, axis=1, keepdims=True)

                    ### compuate log_likelihood
                    log_L[l] = np.sum(zzps * log_Ps[None, :, :])

                ### update k-the label with mle
                ell_labels = ell_labels_new[np.argmax(log_L), :]

        self.ell_labels = ell_labels
def hess_k(ws, fdensity, alpha, sig, psf_k):
    print('hess_k begin')
    mo = np.exp(-4.)
    ws = real_to_complex(ws)
    ws = ws.reshape((n_grid, n_grid))
    ws = np.real(fft.ifft2(ws))
    #calc l1 we only get diagonals here
    l1 = -1 * (psf_k**2 / sig_noise**2 / n_grid**2).flatten()
    #calc l2, the hessian of the prior is messy
    xsi = (1. - fdensity) * gaussian(np.log(ws), loc=np.log(
        mo), scale=sig) / ws + fdensity * (ws**alpha / w_norm)
    dxsi = -1 * gaussian(np.log(ws), loc=np.log(mo), scale=sig) * (
        1. - fdensity) / ws**2 - (1. - fdensity) * np.log(ws / mo) * np.exp(
            -np.log(ws / mo)**2 / 2 / sig**2) / np.sqrt(
                2 * np.pi) / ws**2 / sig**3 + fdensity * alpha * ws**(
                    alpha - 1) / w_norm
    dxsi_st = -1 * gaussian(np.log(ws), loc=np.log(mo), scale=sig) * (
        1. - fdensity) / ws**2 - (1. - fdensity) * np.log(ws / mo) * np.exp(
            -np.log(ws / mo)**2 / 2 / sig**2) / np.sqrt(
                2 * np.pi) / ws**2 / sig**3
    ddxsi_st = -1 * dxsi_st / ws - dxsi_st * np.log(ws / mo) / ws / sig**2 - (
        1. - fdensity) * (1 / np.sqrt(2 * np.pi) / sig) * np.exp(
            -np.log(ws / mo)**2 / 2 /
            sig**2) * (1 / sig**2 - np.log(ws / mo) / sig**2 - 1) / ws**3
    ddxsi = ddxsi_st + fdensity * alpha * (alpha - 1) * ws**(alpha -
                                                             2) / w_norm
    l2 = -1 * (dxsi / xsi)**2 + ddxsi / np.absolute(xsi)
    #this is the hessian of the prior wrt m_x, not m_k
    l2_k = fft.ifft2(l2).flatten() / n_grid**2
    #we assume that hessian of l2 is diagonal. Under assumption k = -k', then we only get the zeroth element along the diag
    #lets fill the entire matrix and see whats up;
    hess_m = np.zeros((n_grid**2, n_grid**2), dtype=complex)
    hess_l1 = np.zeros((n_grid**2, n_grid**2), dtype=complex)
    np.fill_diagonal(hess_l1, l1)
    off = []
    #print(l2_k[0]);
    for i in range(0, n_grid**2):
        for j in range(0, n_grid**2):
            hess_m[i, j] = l2_k[int(np.absolute(i - j))]
            #check the off diagonals to make sure they are small
            if i != j:
                off.append(l2_k[int(np.absolute(i - j))])
    hess_m = hess_l1 + hess_m
    '''
    print('Sigma Real is:');
    print(np.std(np.real(off)));
    print('Simga Imag is:');
    print(np.std(np.imag(off)));
    fig, ax = plt.subplots(1,2)
    ax[0].imshow(np.real(hess_m));
    ax[0].set_title('Real Hessian')
    #ax[1].imshow(data3[:-4,:-4]);
    ax[1].imshow(np.imag(hess_m));
    ax[1].set_title('Imaginary Hessian')
    plt.show();
    '''
    l_tot = np.diagonal(hess_m)

    l_minr = min(np.real(l_tot))
    l_mini = min(np.imag(l_tot))
    #print(l_tot-l1);
    if l_minr < 0:
        l_tot = l_tot - l_minr + 0.1
    if l_mini < 0:
        l_tot = l_tot - 1j * (l_mini + 0.1)
    '''
    print('diag is:');
    print(l2_k[0]);
    print('other is:');
    print(l1);
    '''
    '''
    hess_m = np.zeros((n_grid**2,n_grid**2));
    np.fill_diagonal(hess_m,l_tot);
    return hess_m;
    '''
    #return l1,l2_k[0];
    l_tot = complex_to_real(l_tot)
    #print('hess is');
    #print(l_tot);
    return l_tot