def elbo(params, t):
        '''
        samples: [n_samples, D]
        u: [D,1]
        w: [D,1]
        b: [1]
        '''

        mean = params[0]
        log_std = params[1]
        u = params[2]
        w = params[3]
        b = params[4]

        samples = sample_diag_gaussian(mean, log_std, num_samples, rs)
        z_k = normalizing_flows(samples, u, w, b)

        logp_zk = logprob(z_k)
        logp_zk = np.reshape(logp_zk, [num_samples, 1])

        logq_zk = variational_log_density(params, samples)
        logq_zk = np.reshape(logq_zk, [num_samples, 1])

        elbo = logp_zk - logq_zk
  
        return np.mean(elbo) #over samples
Example #2
0
    def _hmc_log_probability(self, L, b, A, W):
        """
        Compute the log probability as a function of L.
        This allows us to take the gradients wrt L using autograd.
        :param L:
        :param A:
        :return:
        """
        assert self.B == 1
        import autograd.numpy as anp

        # Compute pairwise distance
        L1 = anp.reshape(L,(self.N,1,self.dim))
        L2 = anp.reshape(L,(1,self.N,self.dim))
        # Mu = a * anp.sqrt(anp.sum((L1-L2)**2, axis=2)) + b
        Mu = -anp.sum((L1-L2)**2, axis=2) + b

        Aoff = A * (1-anp.eye(self.N))
        X = (W - Mu[:,:,None]) * Aoff[:,:,None]

        # Get the covariance and precision
        Sig = self.cov.sigma[0,0]
        Lmb = 1./Sig

        lp = anp.sum(-0.5 * X**2 * Lmb)

        # Log prior of L under spherical Gaussian prior
        lp += -0.5 * anp.sum(L * L / self.eta)

        # Log prior of mu0 under standardGaussian prior
        lp += -0.5 * b ** 2

        return lp
Example #3
0
def initParam(prior, X, N, D, G, M, K, dir_param, prng):
    """ initialize variational parameters with prior parameters
    """
    
    [tpM, tpG, lb, ub] = [np.ones(M), np.ones(G), 10., 10.]
    tpR = prng.rand(2*M)
    [tau_a1, tau_a2, tau_b1, tau_b2, tau_v1, tau_v2] = \
            [lb+(ub-lb)*tpR[0 : M], tpM,\
             lb+(ub-lb)*tpR[M : 2*M], tpM, \
             tpG, tpG]

    mu_w = prng.randn(G,D,K)/np.sqrt(D)
    sigma_w = np.ones(G*D*K) * 1e-3
    mu_b = prng.randn(G, K)/np.sqrt(D)
    sigma_b = np.ones(G*K) * 1e-3

    phi = np.reshape(prng.dirichlet(np.ones(G)*dir_param, M), M*G)
    
    mu_w = np.reshape(mu_w, G*D*K)
    mu_b = np.reshape(mu_b, G*K)

    param_init = np.concatenate((tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1,\
        tau_v2, mu_w, sigma_w, mu_b, sigma_b))
    
    return param_init
Example #4
0
    def _hmc_log_probability(self, L, mu_0, mu_self, A):
        """
        Compute the log probability as a function of L.
        This allows us to take the gradients wrt L using autograd.
        :param L:
        :param A:
        :return:
        """
        import autograd.numpy as anp
        # Compute pairwise distance
        L1 = anp.reshape(L,(self.N,1,self.dim))
        L2 = anp.reshape(L,(1,self.N,self.dim))
        D = - anp.sum((L1-L2)**2, axis=2)

        # Compute the logit probability
        logit_P = D + mu_0 + mu_self * np.eye(self.N)

        # Take the logistic of the negative distance
        P = 1.0 / (1+anp.exp(-logit_P))

        # Compute the log likelihood
        ll = anp.sum(A * anp.log(P) + (1-A) * anp.log(1-P))

        # Log prior of L under spherical Gaussian prior
        lp = -0.5 * anp.sum(L * L / self.sigma)

        # Log prior of mu0 under standardGaussian prior
        lp += -0.5 * mu_0**2

        lp += -0.5 * mu_self**2

        return ll + lp
Example #5
0
def unpackParam(param, N, D, G, M, K):
    """ This function unpack the vector-shaped parameter to separate variables,
    including those described in objective.py
    
    1) tau_a1: len(M), first parameter of q(alpha_m)
    2) tau_a2: len(M), second parameter of q(alpha_m)
    3) tau_b1: len(M), first parameter of q(beta_m)
    4) tau_b2: len(M), second parameter of q(beta_m)
    5) phi: shape(M, G), phi[m,:] is the paramter vector of q(c_m)
    6) tau_v1: len(G), first parameter of q(nu_g)
    7) tau_v2: len(G), second parameter of q(nu_g)
    8) mu_w: shape(G, D, K), mu_w[g,d,k] is the mean parameter of 
        q(W^g_{dk})
    9) sigma_w: shape(G, D, K), sigma_w[g,d,k] is the std parameter of 
        q(W^g_{dk})
    10) mu_b: shape(G, K), mu_b[g,k] is the mean parameter of q(b^g_k)
    11) sigma_b: shape(G, K), sigma_b[g,k] is the std parameter of q(b^g_k)
    """
    
    tp_1 = [0, M, 2*M, 3*M, 4*M, 4*M+M*G, 4*M+M*G+G, 
            4*M+M*G+2*G, 4*M+M*G+2*G+G*D*K, 4*M+M*G+2*G+G*(2*D)*K,
            4*M+M*G+2*G+G*(2*D+1)*K, 4*M+M*G+2*G+G*(2*D+2)*K]
    tp_2 = []
    for i in np.arange(len(tp_1)-1):
        tp_2.append(param[tp_1[i] : tp_1[i+1]])
    [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\
            mu_b, sigma_b] = tp_2
    phi = np.reshape(phi, (M,G))
    mu_w = np.reshape(mu_w, (G,D,K))
    sigma_w = np.reshape(sigma_w, (G,D,K))
    mu_b = np.reshape(mu_b, (G,K))
    sigma_b = np.reshape(sigma_b, (G,K))

    return(tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, \
            sigma_w, mu_b, sigma_b)
Example #6
0
def projectParam_vec(param, N, D, G, M, K, lb=1e-6):
    # unpack the input parameter vector
    tp_1 = [0, M, 2*M, 3*M, 4*M, 4*M+M*G, 4*M+M*G+G, 4*M+M*G+2*G, 
            4*M+M*G+2*G+G*N*K, 4*M+M*G+2*G+G*(N+D)*K, 4*M+M*G+2*G+G*(N+2*D)*K,
            4*M+M*G+2*G+G*(N+2*D+1)*K, 4*M+M*G+2*G+G*(N+2*D+2)*K]
    tp_2 = []
    for i in np.arange(len(tp_1)-1):
        tp_2.append(param[tp_1[i] : tp_1[i+1]])
    [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, eta, mu_w, sigma_w,\
            mu_b, sigma_b] = tp_2
    phi = np.reshape(phi, (M,G))
    eta = np.reshape(eta, (G,N,K))
    
    # apply projections
    w_tau_ab = projectLB(np.concatenate((tau_a1,tau_a2,tau_b1,tau_b2)), lb)
    w_phi = np.zeros((M,G))
    for m in np.arange(M):
        w_phi[m] = projectSimplex_vec(phi[m])
    w_tau_v = projectLB(np.concatenate((tau_v1,tau_v2)), lb)

    w_eta = np.zeros((G,N,K))
    for g in np.arange(G):
        for n in np.arange(N):
            w_eta[g,n] = projectSimplex_vec(eta[g,n])

    w = np.concatenate((w_tau_ab, w_phi.reshape(M*G), w_tau_v, \
            w_eta.reshape(G*N*K), mu_w, projectLB(sigma_w,lb), mu_b, \
            projectLB(sigma_b,lb)))
    return w
    def variational_log_density(params, samples):
        '''
        samples: [n_samples, D]
        u: [D,1]
        w: [D,1]
        b: [1]
        Returns: [num_samples]
        '''
        n_samples = len(samples)

        mean = params[0]
        log_std = params[1]
        u = params[2]
        w = params[3]
        b = params[4]

        # print (samples.shape)

        # samples = sample_diag_gaussian(mean, log_std, num_samples, rs)
        z_k = normalizing_flows(samples, u, w, b)

        logp_zk = logprob(z_k)
        logp_zk = np.reshape(logp_zk, [n_samples, 1])

        logq_z0 = diag_gaussian_log_density(samples, mean, log_std)
        logq_z0 = np.reshape(logq_z0, [n_samples, 1])

        # [n_samples, D]
        phi = np.dot((1.-np.tanh(np.dot(samples,w)+b)**2), w.T)

        # [n_samples, 1]
        sum_nf = np.log(abs(1+np.dot(phi, u)))

        # return logq_z0 - sum_nf
        return np.reshape(logq_z0 - sum_nf, [n_samples])
    def elbo(params, t):
        '''
        samples: [n_samples, D]
        u: [D,1]
        w: [D,1]
        b: [1]
        '''

        beta = t/100 + .001

        if beta > .99:
            beta = 1.

        beta = 1

        mean = params[0]
        log_std = params[1]
        norm_flow_params = params[2]

        samples = sample_diag_gaussian(mean, log_std, n_samples, rs)
        z_k, all_zs = normalizing_flows(samples, norm_flow_params)

        logp_zk = logprob(z_k)
        logp_zk = np.reshape(logp_zk, [n_samples, 1])

        logq_zk = variational_log_density(params, samples)
        logq_zk = np.reshape(logq_zk, [n_samples, 1])

        elbo = (beta*logp_zk) - logq_zk 
  
        return np.mean(elbo) #over samples
 def mixture_lower_bound(params):
     """Provides a stochastic estimate of the variational lower bound."""
     samples = component_sample(params, num_samples, rs)
     log_qs = mixture_log_density(var_mixture_params, samples)
     log_ps = logprob(samples, t)
     log_ps = np.reshape(log_ps, (num_samples, -1))
     log_qs = np.reshape(log_qs, (num_samples, -1))
     return np.mean(log_ps - log_qs)
Example #10
0
def linear_decode(z, phi):
    C, d = phi
    z = z if z.ndim == 3 else z[:,None,:]  # ensure z.shape == (T, K, n)

    mu = np.dot(z, C.T)
    log_sigmasq = np.tile(d[None,None,:], mu.shape[:2] + (1,))

    shape = z.shape[:-1] + (-1,)
    return np.reshape(mu, shape), np.reshape(log_sigmasq, shape)
    def variational_log_density(params, datapoints):
        '''
        samples: [n_samples, D]
        u: [D,1]
        w: [D,1]
        b: [1]
        Returns: [num_samples]
        '''
        n_samples = len(datapoints)

        mean = params[0]
        log_std = params[1]
        norm_flow_params = params[2]



        # print (samples.shape)

        # z_k, all_zs = normalizing_flows(datapoints, norm_flow_params)

        # logp_zk = logprob(z_k)
        # logp_zk = np.reshape(logp_zk, [n_samples, 1])

        logq_z0 = diag_gaussian_log_density(datapoints, mean, log_std)

        # print (logq_z0.shape)
        logq_z0 = np.reshape(logq_z0, [n_samples, 1])

        log_qk = logq_z0 + np.log(.5)

        # sum_nf = np.zeros([n_samples,1])
        # for params_k in range(len(norm_flow_params)):
        #     u = norm_flow_params[params_k][0]
        #     w = norm_flow_params[params_k][1]
        #     b = norm_flow_params[params_k][2]

        #     # m_x = -1. + np.log(1.+np.exp(np.dot(w.T,u)))
        #     # u_k = u + (m_x - np.dot(w.T,u)) *  (w/np.linalg.norm(w))
        #     u_k = u

        #     # [n_samples, D]
        #     # phi = np.dot((1.-np.tanh(np.dot(all_zs[params_k],w)+b)**2), w.T)
        #     phi = np.reshape(w, [2])
        #     ones = np.ones([n_samples,2])
        #     phi = ones*phi


        #     # [n_samples, 1]
        #     sum_nf = np.log(np.abs(1+np.dot(phi, u_k)))
        #     sum_nf += sum_nf

        # return logq_z0 - sum_nf
        # log_qz = np.reshape(logq_z0 - sum_nf, [n_samples])

        log_qz = np.reshape(log_qk, [n_samples])

        return log_qz
    def variational_log_density(params, samples):
        '''
        samples: [n_samples, D]
        u: [D,1]
        w: [D,1]
        b: [1]
        Returns: [num_samples]
        '''
        n_samples = len(samples)
        d = len(samples[0])

        mean = params[0]
        log_std = params[1]
        norm_flow_params = params[2]

        # print (samples.shape)

        # samples = sample_diag_gaussian(mean, log_std, num_samples, rs)
        z_k, all_zs = normalizing_flows(samples, norm_flow_params)

        logp_zk = logprob(z_k)
        logp_zk = np.reshape(logp_zk, [n_samples, 1])

        logq_z0 = diag_gaussian_log_density(samples, mean, log_std)
        logq_z0 = np.reshape(logq_z0, [n_samples, 1])

        sum_nf = np.zeros([n_samples,1])
        for params_k in range(len(norm_flow_params)):
            z_0_mean = norm_flow_params[params_k][0]
            a = np.abs(norm_flow_params[params_k][1])
            b = norm_flow_params[params_k][2]

            # m_x = -1. + np.log(1.+np.exp(np.dot(w.T,u)))
            # u_k = u + (m_x - np.dot(w.T,u)) *  (w/np.linalg.norm(w))

            # [n_samples, D]
            # phi = np.dot((1.-np.tanh(np.dot(all_zs[params_k],w)+b)**2), w.T)
            # [n_samples, 1]
            current_z = all_zs[params_k]

            z_0_mean = np.reshape(z_0_mean, [len(current_z[0])])

            h = 1./(a + np.abs(current_z - z_0_mean))
            h_prime = -1*(a+np.abs(current_z-z_0_mean))**2 * (np.abs(current_z)/current_z)

            term1 = (1+b*h)**(d-1)
            term2 = 1+ b * h + b * h_prime * np.abs(current_z-z_0_mean)
            term3 = term1 * term2

            sum_nf = np.log(np.abs(term3))
            sum_nf += sum_nf

        # return logq_z0 - sum_nf
        print (logq_z0.shape)
        log_qz = np.reshape(logq_z0 - sum_nf, [n_samples])
        return log_qz
def variational_lower_bound(params, t, logprob, sampler, log_density,
                            num_samples, rs):
    """Provides a stochastic estimate of the variational lower bound,
       for any variational family and model density."""
    samples = sampler(params, num_samples, rs)
    log_qs = log_density(params, samples)
    log_ps = logprob(samples, t)
    log_ps = np.reshape(log_ps, (num_samples, -1))
    log_qs = np.reshape(log_qs, (num_samples, -1))
    return np.mean(log_ps - log_qs)
Example #14
0
def projectParam(param, N, D, G, M, K, lb=1e-6):
    """ project variational parameter vector onto the constraint set, including
    positive constraints for parameters of Beta distributions, simplex
    constraints for parameters of Categorical distributions
    
    Parameters
    ----------
    param: length (2M + 2M + MG + 2G + GDK + GDK + GK + GK) 
        variational parameters, including:
        1) tau_a1: len(M), first parameter of q(alpha_m)
        2) tau_a2: len(M), second parameter of q(alpha_m)
        3) tau_b1: len(M), first parameter of q(beta_m)
        4) tau_b2: len(M), second parameter of q(beta_m)
        5) phi: shape(M, G), phi[m,:] is the paramter vector of q(c_m)
        6) tau_v1: len(G), first parameter of q(nu_g)
        7) tau_v2: len(G), second parameter of q(nu_g)
        8) mu_w: shape(G, D, K), mu_w[g,d,k] is the mean parameter of 
            q(W^g_{dk})
        9) sigma_w: shape(G, D, K), sigma_w[g,d,k] is the std parameter of 
            q(W^g_{dk})
        10) mu_b: shape(G, K), mu_b[g,k] is the mean parameter of q(b^g_k)
        11) sigma_b: shape(G, K), sigma_b[g,k] is the std parameter of q(b^g_k)
    N,D,G,M,K: number of samples (N), features(D), groups(G), experts(M),
        clusters(K)
    lb: float, lower bound of positive constraints
     
    Returns
    -------
    w: length (2M + 2M + MG + 2G + GNK + GDK + GDK + GK + GK) 
    """
    # unpack the input parameter vector
    tp_1 = [0, M, 2*M, 3*M, 4*M, 4*M+M*G, 4*M+M*G+G, 
            4*M+M*G+2*G, 4*M+M*G+2*G+G*D*K, 4*M+M*G+2*G+G*(2*D)*K,
            4*M+M*G+2*G+G*(2*D+1)*K, 4*M+M*G+2*G+G*(2*D+2)*K]
    tp_2 = []
    for i in np.arange(len(tp_1)-1):
        tp_2.append(param[tp_1[i] : tp_1[i+1]])
    [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\
            mu_b, sigma_b] = tp_2
    phi = np.reshape(phi, (M,G))
     
    # apply projections
    w_tau_ab = projectLB(np.concatenate((tau_a1,tau_a2,tau_b1,tau_b2)), lb)
    
    w_phi_vec = np.reshape(projectSimplex(phi), M*G)

    w_tau_v = projectLB(np.concatenate((tau_v1,tau_v2)), lb)
    
    w = np.concatenate((w_tau_ab, w_phi_vec, w_tau_v, \
            mu_w, projectLB(sigma_w,lb), mu_b, projectLB(sigma_b,lb)))
    return w
Example #15
0
def pack_dense(A, b, *args):
    '''Used for packing Gaussian natural parameters and statistics into a dense
    ndarray so that we can use tensordot for all the linear contraction ops.'''
    # we don't use a symmetric embedding because factors of 1/2 on h are a pain
    leading_dim, N = b.shape[:-1], b.shape[-1]
    z1, z2 = np.zeros(leading_dim + (N, 1)), np.zeros(leading_dim + (1, 1))
    c, d = args if args else (z2, z2)

    A = A[...,None] * np.eye(N)[None,...] if A.ndim == b.ndim else A
    b = b[...,None]
    c, d = np.reshape(c, leading_dim + (1, 1)), np.reshape(d, leading_dim + (1, 1))

    return vs(( hs(( A,     b,  z1 )),
                hs(( T(z1), c,  z2 )),
                hs(( T(z1), z2, d  ))))
    def sample_variational_density(params):
        mean = params[0]
        log_std = params[1]
        norm_flow_params = params[2]

        samples = sample_diag_gaussian(mean, log_std, num_samples, rs) 

        logq_zk = variational_log_density(params, samples)
        logq_zk = np.reshape(logq_zk, [num_samples])

        z_k, all_zs = normalizing_flows(samples, norm_flow_params)

        # print (z_k.shape)


        #Need to resample because q0(z) != qk(z)

        normalized_ws = logq_zk / np.sum(logq_zk)

        while np.sum(normalized_ws[:-1]) > 1.:
            print (np.sum(normalized_ws))
            normalized_ws = normalized_ws - .0001
        # normalized_ws = normalized_ws - .0001

        sampled = np.random.multinomial(30, normalized_ws)#, size=1)

        weighted_samples = []
        for i in range(len(sampled)):
            for j in range(sampled[i]):
                weighted_samples.append(z_k[i])

        weighted_samples = np.array(weighted_samples)
        # print (weighted_samples.shape)

        return weighted_samples
Example #17
0
    def convolve_with_basis(self, signal):
        """
        Convolve each column of the event count matrix with this basis
        :param S:     signal: an array-like data, each series is (1, T) shape
        :return: TxB of inputs convolved with bases
        """
        (T,_) = signal.shape
        (R,B) = self.basis.shape


        # Initialize array for filtered stimulus
        F = np.empty((T,B))

        # Compute convolutions fo each basis vector, one at a time
        for b in np.arange(B):
            F[:,b] = sig.fftconvolve(signal,
                                       np.reshape(self.basis[:,b],(R,1)),
                                       'full')[:T,:]

        # Check for positivity
        if np.amin(self.basis) >= 0 and np.amin(signal) >= 0:
            np.clip(F, 0, np.inf, out=F)
            assert np.amin(F) >= 0, "convolution should be >= 0"

        return F
    def normalizing_flows(z_0, norm_flow_params):
        '''
        z_0: [n_samples, D]
        u: [D,1]
        w: [D,1]
        b: [1]
        '''

        current_z = z_0
        all_zs = []
        all_zs.append(z_0)
        for params_k in norm_flow_params:

            z_0_mean = params_k[0]
            a = np.abs(params_k[1])
            b = params_k[2]

            # m_x = -1. + np.log(1.+np.exp(np.dot(w.T,u)))
            # u_k = u + (m_x - np.dot(w.T,u)) *  (w/np.linalg.norm(w))

            # print (a.shape)

            # print (current_z.shape)
            z_0_mean = np.reshape(z_0_mean, [len(current_z[0])])

            h = 1./(a + np.abs(current_z - z_0_mean))
            term1 = b * h * (current_z - z_0_mean)
            current_z = current_z + term1

            all_zs.append(current_z)

        return current_z, all_zs
Example #19
0
 def gradient(params, X, y):
     # gradient of objective = (1/N) sum_n x(n,:)*yerr(n)   // row vector
     y_pred = LinregModel.prediction(params, X)
     N = y.shape[0]
     yerr = np.reshape((y_pred - y), (N, 1))
     gradient = np.sum(X * yerr, 0)/N # broadcast yerr along columns
     return gradient
    def callback(params):
        print("Log likelihood {}, Squared Error {}".format(-objective(params),squared_error(params,X,y,n_samples)))
        
        # Show posterior marginals.
        if dimensions[0] == 1:
            plot_xs = np.reshape(np.linspace(-5, 5, 300), (300,1))
            plot_deep_gp(ax_end_to_end, params, plot_xs)
            deep_map = create_deep_map(params)
            if dimensions == [1,1]:
                ax_end_to_end.plot(np.ndarray.flatten(deep_map[0][0]['x0']),deep_map[0][0]['y0'], 'ro')
            elif dimensions == [1,1,1]:
                plot_single_gp(ax_x_to_h,params,0,0,plot_xs)
                ax_x_to_h.set_title("Inputs to hiddens, pesudo data in red")

                plot_single_gp(ax_h_to_y,params,1,0,plot_xs)
                ax_h_to_y.set_title("Hiddens to outputs, pesudo data in red")
            elif dimensions == [1,1,1,1]:
                plot_single_gp(ax_x_to_h, params,0,0, plot_xs)
                ax_x_to_h.set_title("Inputs to Hidden 1, pesudo data in red")

                plot_single_gp(ax_h_to_h2, params,1,0,plot_xs)
                ax_h_to_h2.set_title("Hidden 1 to Hidden 2, pesudo data in red")

                plot_single_gp(ax_h2_to_y, params,2,0, plot_xs)
                ax_h2_to_y.set_title("Hidden 2 to Outputs, pesudo data in red")
        elif dimensions[0] == 2:
            plot_xs = np.array([np.array([a,b]) for a in np.linspace(-1,1,40) for b in np.linspace(-1,1,40)])
            plot_deep_gp_2d(ax, params, plot_xs)
        plt.draw()
        plt.pause(1.0/60.0)
Example #21
0
 def unpack_params(params):
     # Variational dist is a diagonal Gaussian.
     mean, log_std = params[:D], params[D:2*D]
     inputs=np.reshape(params[2*D:3*D],(D,1))
     len_sc, variance = params[3*D], params[3*D+1]
     meany=mean*inputs
     return mean, log_std, inputs, len_sc, variance
Example #22
0
    def callback(params):
        print("Log likelihood {}".format(-objective(params)))
        plt.cla()
        print(params)
        # Show posterior marginals.
        plot_xs = np.reshape(np.linspace(-7, 7, 300), (300,1))
        pred_mean, pred_cov = predict(params, X, y, plot_xs)
        marg_std = np.sqrt(np.diag(pred_cov))
        ax.plot(plot_xs, pred_mean, 'b')
        ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]),
                np.concatenate([pred_mean - 1.96 * marg_std,
                               (pred_mean + 1.96 * marg_std)[::-1]]),
                alpha=.15, fc='Blue', ec='None')

        # Show samples from posterior.
        rs = npr.RandomState(0)
        sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov, size=10)
        ax.plot(plot_xs, sampled_funcs.T)

        ax.plot(X, y, 'kx')
        ax.set_ylim([-1.5, 1.5])
        ax.set_xticks([])
        ax.set_yticks([])
        plt.draw()
        plt.pause(1.0/60.0)
Example #23
0
 def gradient(self, params, X, y):
     # gradient of objective = (1/N) sum_n x(n,:)*yerr(n)   // row vector
     y_pred = self.prediction(params, X)
     N = y.shape[0]
     yerr = np.reshape((y_pred - y), (N, 1))
     X1 = self.maybe_add_column_of_ones(X)
     gradient = np.sum(X1 * yerr, 0)/N # broadcast yerr along columns
     return gradient
Example #24
0
def simulator(theta,N):
    #get 500*N exponentials
    exponentials = np.random.exponential(1/theta,size=N*M)
    #reshape to Nx500
    exponentials = np.reshape(exponentials,(N,M))
    #get means of the rows
    summaries = np.mean(exponentials,1)
    std = np.std(exponentials,1)
    return summaries, std
Example #25
0
def generate_data(beta,tau,n,num_times):
    num_features = len(beta)-1
    X = np.random.uniform(-2,2,(n,num_times,num_features))
    alpha = np.random.normal(0,tau,n)
    alpha = np.reshape(np.tile(alpha,num_times),(num_times,n))
    alpha = np.transpose(alpha)
    P = logistic(beta[0]+np.dot(X,beta[1:]))#+alpha)
    y = np.random.binomial(1,P)
    return X,y
Example #26
0
def convolve_im2col(img_cols, filt, block_size, skip, orig_img_shape):
    """ convolves an image already in the column representation
        with a filter bank (not in the column representation)
    """
    filtr = im2col(filt, block_size=block_size, skip=skip)
    out_num_rows = (orig_img_shape[0] - block_size[0])/skip + 1
    out_num_cols = (orig_img_shape[1] - block_size[1])/skip + 1
    outr = np.dot(filtr.T, img_cols)
    out  = np.reshape(outr, (out_num_rows, out_num_cols))
    return out
Example #27
0
def vec_node_loss_wrt_w(w):
    x = anp.reshape(w, W.shape)
    dp = anp.dot(m, x.T)
    e = anp.exp(eta * dp)
    P = e / e.sum(1)[:, anp.newaxis]
    wij = P@W
    aw = a[:, anp.newaxis] * wij
    nn = Bd.shape[0]
    v = U - ideg * anp.dot(Bd, aw)
    return (v**2).sum() / nn
    def variational_log_density(params, samples):
        '''
        samples: [n_samples, D]
        u: [D,1]
        w: [D,1]
        b: [1]
        Returns: [num_samples]
        '''
        n_samples = len(samples)

        mean = params[0]
        log_std = params[1]
        norm_flow_params = params[2]

        z_k, all_zs = normalizing_flows(samples, norm_flow_params)

        logp_zk = logprob(z_k)
        logp_zk = np.reshape(logp_zk, [n_samples, 1])

        logq_z0 = diag_gaussian_log_density(samples, mean, log_std)
        logq_z0 = np.reshape(logq_z0, [n_samples, 1])

        sum_nf = np.zeros([n_samples,1])
        for params_k in range(len(norm_flow_params)):
            u = norm_flow_params[params_k][0]
            w = norm_flow_params[params_k][1]
            b = norm_flow_params[params_k][2]

            # Appendix equations
            m_x = -1. + np.log(1.+np.exp(np.dot(w.T,u)))
            u_k = u + (m_x - np.dot(w.T,u)) *  (w/np.linalg.norm(w))
            # u_k = u

            # [n_samples, D]
            phi = np.dot((1.-np.tanh(np.dot(all_zs[params_k],w)+b)**2), w.T)
            # [n_samples, 1]
            sum_nf = np.log(np.abs(1+np.dot(phi, u_k)))
            sum_nf += sum_nf

        # return logq_z0 - sum_nf
        log_qz = np.reshape(logq_z0 - sum_nf, [n_samples])
        return log_qz
Example #29
0
def load_mnist():
    partial_flatten = lambda x : np.reshape(x, (x.shape[0], np.prod(x.shape[1:])))
    one_hot = lambda x, k: np.array(x[:,None] == np.arange(k)[None, :], dtype=int)
    train_images, train_labels, test_images, test_labels = data_mnist.mnist()
    train_images = partial_flatten(train_images) / 255.0
    test_images  = partial_flatten(test_images)  / 255.0
    train_labels = one_hot(train_labels, 10)
    test_labels = one_hot(test_labels, 10)
    N_data = train_images.shape[0]

    return N_data, train_images, train_labels, test_images, test_labels
Example #30
0
def initParams(num):

    mat = np.random.randn(m, m)
    return dict({num + 'z': np.reshape(np.linspace(0.0, 1.0, num=m), (m, 1)),
            num + 'u_mean': np.random.randn(m, 1),
            num + 'u_cov_fac': mat @ mat.T,
            num + 'h_mean': np.random.randn(n, 1),
            num + 'h_cov_fac': np.random.randn(n, 1),
            num + 'kernel_noise': np.ones((1, 1)),
            num + 'kernel_lenscale': np.ones((1, 1)),
            num + 'function_noise': np.ones((1, 1))})
Example #31
0
def neural_ode(thetas):
    weights_1, weights_2, bias_1, bias_2 = theta_reshape(
        thetas)  # Reshape once for utilization in entire iteration
    batch_state_array = np.zeros(shape=(num_batches, batch_tsteps, state_len),
                                 dtype='double')  #
    batch_rhs_array = np.zeros(shape=(num_batches, batch_tsteps, state_len),
                               dtype='double')  #
    batch_time_array = np.zeros(shape=(num_batches, batch_tsteps, 1),
                                dtype='double')  #

    augmented_state = np.zeros(shape=(1, state_len + num_wb + 1))
    batch_ids = np.random.choice(tsteps - batch_tsteps, num_batches)

    # Minibatching within sampled domain
    total_batch_loss = 0.0
    for j in range(num_batches):
        start_id = batch_ids[j]
        end_id = start_id + batch_tsteps

        batch_state_array[j, 0, :] = true_state_array[start_id, :]
        batch_time_array[j, :batch_tsteps] = time_array[start_id:end_id, None]

        # Calculate forward pass - saving results for state and rhs to array - batchwise
        temp_state = np.copy(batch_state_array[j, 0, :])
        for i in range(1, batch_tsteps):
            time = np.reshape(batch_time_array[j, i], newshape=(1, 1))
            output_state, output_rhs = euler_forward(temp_state, weights_1,
                                                     weights_2, bias_1, bias_2,
                                                     time)
            batch_state_array[j, i, :] = output_state[:]
            batch_rhs_array[j, i, :] = output_rhs[:]
            temp_state = np.copy(output_state)

        # Operations at final time step (setting up initial conditions for the adjoint)
        temp_state = np.copy(batch_state_array[j, -2, :])
        temp_state = np.reshape(temp_state,
                                (1, state_len))  # prefinal state vector
        time = np.reshape(batch_time_array[j, -2], (1, 1))  # prefinal time
        pvec = np.concatenate((temp_state, thetas, time), axis=1)

        # Calculate loss related gradients - dldz
        dldz = np.reshape(
            dldz_func(output_state, true_state_array[end_id - 1, :]),
            (1, state_len))
        # With respect to weights,bias and time
        dl = dl_func(pvec, true_state_array[end_id - 1, :])
        dldthetas = np.reshape(dl[:, state_len:-1], newshape=(1, num_wb))
        # Calculate dl/dt
        dldt = np.matmul(dldz, batch_rhs_array[j, -1, :])
        dldt = np.reshape(dldt, newshape=(1, 1))

        # Find batch loss
        total_batch_loss = total_batch_loss + np.sum(
            (output_state - true_state_array[end_id - 1, :])**2)

        # Reverse operation (adjoint evolution in backward time)
        _augmented_state = np.concatenate((dldz, dldthetas, dldt), axis=1)
        for i in range(1, batch_tsteps):
            time = np.reshape(batch_time_array[j, -1 - i], newshape=(1, 1))
            state_now = np.reshape(batch_state_array[j, -1 - i, :],
                                   newshape=(1, state_len))
            pvec = np.concatenate((state_now, thetas, time), axis=1)

            # Adjoint propagation backward in time
            i0 = _augmented_state + dt * adjoint_rhs(_augmented_state, pvec)
            sub_state = np.reshape(i0[0, :state_len], newshape=(1, state_len))

            _augmented_state[:, :] = i0[:, :]

        augmented_state = np.add(augmented_state, _augmented_state)

    return augmented_state, total_batch_loss
Example #32
0
def convolve_trailing_axes(A, B):
    A = np.reshape(A, list(A.shape) + [1])
    B = np.reshape(B, list(B.shape) + [1])
    return convolve_sum_axes(A, B)
Example #33
0
 def evaluate(self, x):
     if self.x_is_matrix:
         x = np.reshape(x, (self.xLen,1))
     result = self.costFunc(x)
     self.fevals += 1
     return result
Example #34
0
 def objective(params):
     cur_occlusion = np.reshape(params, (rows, cols))
     final_vx, final_vy = simulate(init_vx, init_vy, simulation_timesteps, cur_occlusion)
     return -lift(final_vy) / drag(final_vx)
Example #35
0
 def get(self, vect, name):
     idxs, shape = self.idxs_and_shapes[name]
     return np.reshape(vect[idxs], shape)
Example #36
0
    def optimize_locs_widths(
        p,
        dat,
        gwidth0,
        test_locs0,
        reg=1e-2,
        max_iter=100,
        tol_fun=1e-5,
        disp=False,
        locs_bounds_frac=100,
        gwidth_lb=None,
        gwidth_ub=None,
        use_2terms=False,
    ):
        """
        Optimize the test locations and the Gaussian kernel width by 
        maximizing a test power criterion. data should not be the same data as
        used in the actual test (i.e., should be a held-out set). 
        This function is deterministic.

        - data: a Data object
        - test_locs0: Jxd numpy array. Initial V.
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - gwidth0: initial value of the Gaussian width^2
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
            the box defined by coordinate-wise min-max by std of each coordinate
            multiplied by this number.
        - gwidth_lb: absolute lower bound on the Gaussian width^2
        - gwidth_ub: absolute upper bound on the Gaussian width^2
        - use_2terms: If True, then besides the signal-to-noise ratio
          criterion, the objective function will also include the first term
          that is dropped.

        #- If the lb, ub bounds are None, use fraction of the median heuristics 
        #    to automatically set the bounds.
        
        Return (V test_locs, gaussian width, optimization info log)
        """
        J = test_locs0.shape[0]
        X = dat.data()
        n, d = X.shape

        # Parameterize the Gaussian width with its square root (then square later)
        # to automatically enforce the positivity.
        def obj(sqrt_gwidth, V):
            return -GaussFSSD.power_criterion(
                p, dat, sqrt_gwidth**2, V, reg=reg, use_2terms=use_2terms)

        flatten = lambda gwidth, V: np.hstack((gwidth, V.reshape(-1)))

        def unflatten(x):
            sqrt_gwidth = x[0]
            V = np.reshape(x[1:], (J, d))
            return sqrt_gwidth, V

        def flat_obj(x):
            sqrt_gwidth, V = unflatten(x)
            return obj(sqrt_gwidth, V)

        # gradient
        #grad_obj = autograd.elementwise_grad(flat_obj)
        # Initial point
        x0 = flatten(np.sqrt(gwidth0), test_locs0)

        #make sure that the optimized gwidth is not too small or too large.
        fac_min = 1e-2
        fac_max = 1e2
        med2 = util.meddistance(X, subsample=1000)**2
        if gwidth_lb is None:
            gwidth_lb = max(fac_min * med2, 1e-3)
        if gwidth_ub is None:
            gwidth_ub = min(fac_max * med2, 1e5)

        # Make a box to bound test locations
        X_std = np.std(X, axis=0)
        # X_min: length-d array
        X_min = np.min(X, axis=0)
        X_max = np.max(X, axis=0)
        # V_lb: J x d
        V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1))
        V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1))
        # (J*d+1) x 2. Take square root because we parameterize with the square
        # root
        x0_lb = np.hstack((np.sqrt(gwidth_lb), np.reshape(V_lb, -1)))
        x0_ub = np.hstack((np.sqrt(gwidth_ub), np.reshape(V_ub, -1)))
        x0_bounds = list(zip(x0_lb, x0_ub))

        # optimize. Time the optimization as well.
        # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
        grad_obj = autograd.elementwise_grad(flat_obj)
        with util.ContextTimer() as timer:
            opt_result = scipy.optimize.minimize(
                flat_obj,
                x0,
                method='L-BFGS-B',
                bounds=x0_bounds,
                tol=tol_fun,
                options={
                    'maxiter': max_iter,
                    'ftol': tol_fun,
                    'disp': disp,
                    'gtol': 1.0e-06,
                },
                jac=grad_obj,
            )

        opt_result = dict(opt_result)
        opt_result['time_secs'] = timer.secs
        x_opt = opt_result['x']
        sq_gw_opt, V_opt = unflatten(x_opt)
        gw_opt = sq_gw_opt**2

        assert util.is_real_num(
            gw_opt), 'gw_opt is not real. Was %s' % str(gw_opt)

        return V_opt, gw_opt, opt_result
Example #37
0
        def animate(k):
            # clear the panel
            ax1.cla()
            ax2.cla()

            # print rendering update
            if np.mod(k + 1, 5) == 0:
                print('rendering animation frame ' + str(k + 1) + ' of ' +
                      str(len(num_elements)))
            if k == len(num_elements) - 1:
                print('animation rendering complete!')
                time.sleep(1)
                clear_output()

            # loop over panels, produce plots
            self.D = num_elements[k]

            # fit to data
            F = 0
            predict = 0
            w = 0
            if basis == 'poly':
                w = weight_history[k]
                self.D = len(w) - 1
                ax1.set_title(str(self.D) + ' poly units', fontsize=14)
                self.predict = self.poly_predict

            elif basis == 'tanh':
                w = weight_history[k]
                self.D = len(w) - 1
                ax1.set_title(str(self.D) + ' tanh units', fontsize=14)
                self.predict = self.tanh_predict

            elif basis == 'tree':
                w = weight_history[self.D]
                ax1.set_title(str(np.count_nonzero(w)) + ' tree units',
                              fontsize=14)
                self.predict = self.tree_predict
                self.weight_history = weight_history

            ####### plot all and dress panel ######
            # produce learned predictor
            ind0 = np.argwhere(self.y == +1)
            ind0 = [e[0] for e in ind0]
            ax1.scatter(self.x[ind0, 0],
                        self.x[ind0, 1],
                        s=55,
                        color=self.colors[0],
                        edgecolor='k')

            ind1 = np.argwhere(self.y == -1)
            ind1 = [e[0] for e in ind1]
            ax1.scatter(self.x[ind1, 0],
                        self.x[ind1, 1],
                        s=55,
                        color=self.colors[1],
                        edgecolor='k')

            # plot decision boundary
            r1 = np.linspace(xmin1, xmax1, 100)
            r2 = np.linspace(xmin2, xmax2, 100)
            s, t = np.meshgrid(r1, r2)
            s = np.reshape(s, (np.size(s), 1))
            t = np.reshape(t, (np.size(t), 1))
            h = np.concatenate((s, t), axis=1)
            z = []
            for j in range(len(h)):
                a = self.predict(h[j, :], w)
                z.append(a)
            z = np.asarray(z)
            z = np.tanh(z)

            # reshape it
            s.shape = (np.size(r1), np.size(r2))
            t.shape = (np.size(r1), np.size(r2))
            z.shape = (np.size(r1), np.size(r2))

            #### plot contour, color regions ####
            ax1.contour(s,
                        t,
                        z,
                        colors='k',
                        linewidths=2.5,
                        levels=[0],
                        zorder=2)
            ax1.contourf(s,
                         t,
                         z,
                         colors=[self.colors[1], self.colors[0]],
                         alpha=0.15,
                         levels=range(-1, 2))

            # cleanup panel
            ax1.set_xlim([xmin1, xmax1])
            ax1.set_ylim([xmin2, xmax2])
            ax1.set_xlabel(r'$x_1$', fontsize=14, labelpad=10)
            ax1.set_ylabel(r'$x_2$', rotation=0, fontsize=14, labelpad=10)
            ax1.set_xticks(np.arange(round(xmin1), round(xmax1) + 1, 1.0))
            ax1.set_yticks(np.arange(round(xmin2), round(xmax2) + 1, 1.0))

            # cost function value
            ax2.plot([v - 1 for v in num_elements[:k + 1]],
                     cost_evals[:k + 1],
                     color='b',
                     linewidth=1.5,
                     zorder=1)
            ax2.scatter([v - 1 for v in num_elements[:k + 1]],
                        cost_evals[:k + 1],
                        color='b',
                        s=70,
                        edgecolor='w',
                        linewidth=1.5,
                        zorder=3)

            ax2.set_xlabel('iteration', fontsize=12)
            ax2.set_title('cost function plot', fontsize=12)

            # cleanp panel
            ax2.set_xlim([minxc, maxxc])
            ax2.set_ylim([minc, maxc])
            ax2.xaxis.set_major_locator(MaxNLocator(integer=True))
Example #38
0
def flatten_to_dim(X, d):
    assert X.ndim >= d
    assert d > 0
    return np.reshape(X[None, ...], (-1, ) + X.shape[-d:])
Example #39
0
 def unpack_params(params):
     gp_params = np.reshape(params[:total_gp_params],
                            (data_dimension, params_per_gp))
     latents = np.reshape(params[total_gp_params:],
                          (datalen, latent_dimension))
     return gp_params, latents
Example #40
0
grad_f = elementwise_grad(f)
#grad_k = elementwise_grad(k)

# Initialization
i = 0
x_i = np.array([2., 1.])
p_i = np.array([0. + 0j, 0. + 0j])

# Hyper parameters
gamma = 0.2
epsilon = 0.01
delta = 1/(1 + gamma*epsilon)

# Define arrays to save
F = np.array(f(x_i))
X = np.reshape(x_i, (1, -1))

print("Starting Optimization")
while(1): # Untill x converged
    p_ip1 = delta*p_i - epsilon*delta*grad_f(x_i)
    x_ip1 = x_i + epsilon*grad_k(p_ip1+0j)
    i += 1

    if np.all(abs(x_i - x_ip1) < 1e-6):
        break

    # Update x and p
    p_i = p_ip1
    x_i = x_ip1
    # Save results
    F = np.append(F, f(x_i))
Example #41
0
def fit_weights_and_save(
        weights_file,
        ca_data_file='rs_vm_denoise_200605.npy',
        opto_silencing_data_file='vip_halo_data_for_sim.npy',
        opto_activation_data_file='vip_chrimson_data_for_sim.npy',
        constrain_wts=None,
        allow_var=True,
        fit_s02=True,
        constrain_isn=True,
        tv=False,
        l2_penalty=0.01,
        init_noise=0.1,
        init_W_from_lsq=False,
        init_W_from_lbfgs=False,
        scale_init_by=1,
        init_W_from_file=False,
        init_file=None,
        correct_Eta=False,
        init_Eta_with_s02=False,
        init_Eta12_with_dYY=False,
        use_opto_transforms=False,
        share_residuals=False,
        stimwise=False,
        simulate1=True,
        simulate2=False,
        help_constrain_isn=True,
        ignore_halo_vip=False,
        verbose=True,
        free_amplitude=False,
        norm_opto_transforms=False,
        zero_extra_weights=None,
        allow_s2=True):

    nsize, ncontrast = 6, 6

    npfile = np.load(ca_data_file, allow_pickle=True)[(
    )]  #,{'rs':rs,'rs_denoise':rs_denoise},allow_pickle=True)
    rs = npfile['rs']
    #rs_denoise = npfile['rs_denoise']

    nsize, ncontrast, ndir = 6, 6, 8
    #ori_dirs = [[0,4],[2,6]] #[[0,4],[1,3,5,7],[2,6]]
    ori_dirs = [[0, 1, 2, 3, 4, 5, 6, 7]]
    nT = len(ori_dirs)
    nS = len(rs[0])

    def sum_to_1(r):
        R = r.reshape((r.shape[0], -1))
        #R = R/np.nansum(R[:,~np.isnan(R.sum(0))],axis=1)[:,np.newaxis]
        R = R / np.nansum(R, axis=1)[:, np.newaxis]  # changed 8/28
        return R

    def norm_to_mean(r):
        R = r.reshape((r.shape[0], -1))
        R = R / np.nanmean(R[:, ~np.isnan(R.sum(0))], axis=1)[:, np.newaxis]
        return R

    Rs = [[None, None] for i in range(len(rs))]
    Rso = [[[None for iT in range(nT)] for iS in range(nS)]
           for icelltype in range(len(rs))]
    rso = [[[None for iT in range(nT)] for iS in range(nS)]
           for icelltype in range(len(rs))]

    for iR, r in enumerate(rs):  #rs_denoise):
        #print(iR)
        for ialign in range(nS):
            #Rs[iR][ialign] = r[ialign][:,:nsize,:]
            #sm = np.nanmean(np.nansum(np.nansum(Rs[iR][ialign],1),1))
            #Rs[iR][ialign] = Rs[iR][ialign]/sm
            #print('frac isnan Rs %d,%d: %f'%(iR,ialign,np.isnan(r[ialign]).mean()))
            Rs[iR][ialign] = sum_to_1(r[ialign][:, :nsize, :])
    #         Rs[iR][ialign] = von_mises_denoise(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir)))

    kernel = np.ones((1, 2, 2))
    kernel = kernel / kernel.sum()

    for iR, r in enumerate(rs):
        for ialign in range(nS):
            for iori in range(nT):
                #print('this Rs shape: '+str(Rs[iR][ialign].shape))
                #print('this Rs reshaped shape: '+str(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]].shape))
                #print('this Rs max percent nan: '+str(np.isnan(Rs[iR][ialign].reshape((-1,nsize,ncontrast,ndir))[:,:,:,ori_dirs[iori]]).mean(-1).max()))
                Rso[iR][ialign][iori] = np.nanmean(
                    Rs[iR][ialign].reshape(
                        (-1, nsize, ncontrast, ndir))[:, :, :, ori_dirs[iori]],
                    -1)
                Rso[iR][ialign][iori][:, :, 0] = np.nanmean(
                    Rso[iR][ialign][iori][:, :, 0],
                    1)[:, np.newaxis]  # average 0 contrast values
                #print('frac isnan pre-conv Rso %d,%d,%d: %f'%(iR,ialign,iori,np.isnan(Rso[iR][ialign][iori]).mean()))
                Rso[iR][ialign][iori][:, 1:, 1:] = ssi.convolve(
                    Rso[iR][ialign][iori], kernel, 'valid')
                Rso[iR][ialign][iori] = Rso[iR][ialign][iori].reshape(
                    Rso[iR][ialign][iori].shape[0], -1)
                #print('frac isnan Rso %d,%d,%d: %f'%(iR,ialign,iori,np.isnan(Rso[iR][ialign][iori]).mean()))
                #print('sum of Rso isnan: '+str(np.isnan(Rso[iR][ialign][iori]).sum(1)))
                #Rso[iR][ialign][iori] = Rso[iR][ialign][iori]/np.nanmean(Rso[iR][ialign][iori],-1)[:,np.newaxis]

    def set_bound(bd, code, val=0):
        # set bounds to 0 where 0s occur in 'code'
        for iitem in range(len(bd)):
            bd[iitem][code[iitem]] = val

    nN = 36
    nS = 2
    nP = 2
    nT = 1
    nQ = 4

    # code for bounds: 0 , constrained to 0
    # +/-1 , constrained to +/-1
    # 1.5, constrained to [0,1]
    # 2 , constrained to [0,inf)
    # -2 , constrained to (-inf,0]
    # 3 , unconstrained

    Wmx_bounds = 3 * np.ones((nP, nQ), dtype=int)
    Wmx_bounds[0, :] = 2  # L4 PCs are excitatory
    Wmx_bounds[0, 1] = 0  # SSTs don't receive L4 input

    if allow_var:
        Wsx_bounds = 3 * np.ones(
            Wmx_bounds.shape)  #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds)
        Wsx_bounds[0, 1] = 0
    else:
        Wsx_bounds = np.zeros(
            Wmx_bounds.shape)  #Wmx_bounds.copy()*0 #np.zeros_like(Wmx_bounds)

    Wmy_bounds = 3 * np.ones((nQ, nQ), dtype=int)
    Wmy_bounds[0, :] = 2  # PCs are excitatory
    Wmy_bounds[1:, :] = -2  # all the cell types except PCs are inhibitory
    Wmy_bounds[1, 1] = 0  # SSTs don't inhibit themselves
    # Wmy_bounds[3,1] = 0 # PVs are allowed to inhibit SSTs, consistent with Hillel's unpublished results, but not consistent with Pfeffer et al.
    Wmy_bounds[
        2,
        0] = 0  # VIPs don't inhibit L2/3 PCs. According to Pfeffer et al., only L5 PCs were found to get VIP inhibition

    if not zero_extra_weights is None:
        Wmx_bounds[zero_extra_weights[0]] = 0
        Wmy_bounds[zero_extra_weights[1]] = 0

    if allow_var:
        Wsy_bounds = 3 * np.ones(
            Wmy_bounds.shape)  #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds)
        Wsy_bounds[1, 1] = 0
        Wsy_bounds[3, 1] = 0
        Wsy_bounds[2, 0] = 0
    else:
        Wsy_bounds = np.zeros(
            Wmy_bounds.shape)  #Wmy_bounds.copy()*0 #np.zeros_like(Wmy_bounds)

    if not constrain_wts is None:
        for wt in constrain_wts:
            Wmy_bounds[wt[0], wt[1]] = 0
            Wsy_bounds[wt[0], wt[1]] = 0

    def tile_nS_nT_nN(kernel):
        row = np.concatenate([kernel for idim in range(nS * nT)],
                             axis=0)[np.newaxis, :]
        tiled = np.concatenate([row for irow in range(nN)], axis=0)
        return tiled

    def set_bounds_by_code(lb, ub, bdlist):
        set_bound(lb, [bd == 0 for bd in bdlist], val=0)
        set_bound(ub, [bd == 0 for bd in bdlist], val=0)

        set_bound(lb, [bd == 2 for bd in bdlist], val=0)

        set_bound(ub, [bd == -2 for bd in bdlist], val=0)

        set_bound(lb, [bd == 1 for bd in bdlist], val=1)
        set_bound(ub, [bd == 1 for bd in bdlist], val=1)

        set_bound(lb, [bd == 1.5 for bd in bdlist], val=0)
        set_bound(ub, [bd == 1.5 for bd in bdlist], val=1)

        set_bound(lb, [bd == -1 for bd in bdlist], val=-1)
        set_bound(ub, [bd == -1 for bd in bdlist], val=-1)

    if allow_s2:
        if fit_s02:
            s02_bounds = 2 * np.ones(
                (nQ, ))  # permitting noise as a free parameter
        else:
            s02_bounds = np.ones((nQ, ))
    else:
        s02_bounds = np.zeros((nQ, ))

    k_bounds = 1.5 * np.ones((nQ * (nS - 1), ))

    #k_bounds[1] = 0 # temporary: spatial kernel constrained to 0 for SST
    #k_bounds[2] = 0 # temporary: spatial kernel constrained to 0 for VIP

    kappa_bounds = np.ones((1, ))
    # kappa_bounds = 2*np.ones((1,))

    T_bounds = 1.5 * np.ones((nQ * (nT - 1), ))

    X_bounds = tile_nS_nT_nN(np.array([2, 1]))
    # X_bounds = np.array([np.array([2,1,2,1])]*nN)

    Xp_bounds = tile_nS_nT_nN(np.array([3, 1]))
    # Xp_bounds = np.array([np.array([3,1,3,1])]*nN)

    # Y_bounds = tile_nS_nT_nN(2*np.ones((nQ,)))
    # # Y_bounds = 2*np.ones((nN,nT*nS*nQ))

    Eta_bounds = tile_nS_nT_nN(3 * np.ones((nQ, )))
    # Eta_bounds = 3*np.ones((nN,nT*nS*nQ))

    if allow_s2:
        if allow_var:
            Xi_bounds = tile_nS_nT_nN(3 * np.ones((nQ, )))
        else:
            Xi_bounds = tile_nS_nT_nN(np.zeros((nQ, )))
    else:
        Xi_bounds = tile_nS_nT_nN(np.zeros((nQ, )))

    # Xi_bounds = 3*np.ones((nN,nT*nS*nQ))

    h1_bounds = -2 * np.ones((1, ))

    h2_bounds = 2 * np.ones((1, ))

    bl_bounds = 3 * np.ones((nQ, ))

    if free_amplitude:
        amp_bounds = 2 * np.ones((nT * nS * nQ, ))
    else:
        amp_bounds = 1 * np.ones((nT * nS * nQ, ))

    # shapes = [(nP,nQ),(nQ,nQ),(nP,nQ),(nQ,nQ),(nQ,),(nQ,),(1,),(nN,nS*nP),(nN,nS*nQ),(nN,nS*nQ),(nN,nS*nQ)]
    shapes1 = [(nP, nQ), (nQ, nQ), (nP, nQ),
               (nQ, nQ), (nQ, ), (nQ * (nS - 1), ), (1, ), (nQ * (nT - 1), ),
               (1, ), (1, ), (nQ, ), (nQ * nS * nT, )]
    shapes2 = [(nN, nT * nS * nP), (nN, nT * nS * nP), (nN, nT * nS * nQ),
               (nN, nT * nS * nQ)]
    #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1])))
    #print('size of shapes2: '+str(np.sum([np.prod(shp) for shp in shapes2])))
    #         Wmx,    Wmy,    Wsx,    Wsy,    s02,  k,    kappa,T,   h1, h2
    #XX,            XXp,          Eta,          Xi

    #bdlist = [Wmx_bounds,Wmy_bounds,Wsx_bounds,Wsy_bounds,s02_bounds,k_bounds,kappa_bounds,T_bounds,X_bounds,Xp_bounds,Eta_bounds,Xi_bounds,h1_bounds,h2_bounds]
    bd1list = [
        Wmx_bounds, Wmy_bounds, Wsx_bounds, Wsy_bounds, s02_bounds, k_bounds,
        kappa_bounds, T_bounds, h1_bounds, h2_bounds, bl_bounds, amp_bounds
    ]
    bd2list = [X_bounds, Xp_bounds, Eta_bounds, Xi_bounds]

    lb1, ub1 = [[sgn * np.inf * np.ones(shp) for shp in shapes1]
                for sgn in [-1, 1]]
    set_bounds_by_code(lb1, ub1, bd1list)
    lb2, ub2 = [[sgn * np.inf * np.ones(shp) for shp in shapes2]
                for sgn in [-1, 1]]
    set_bounds_by_code(lb2, ub2, bd2list)

    #set_bound(lb,[bd==0 for bd in bdlist],val=0)
    #set_bound(ub,[bd==0 for bd in bdlist],val=0)
    #
    #set_bound(lb,[bd==2 for bd in bdlist],val=0)
    #
    #set_bound(ub,[bd==-2 for bd in bdlist],val=0)
    #
    #set_bound(lb,[bd==1 for bd in bdlist],val=1)
    #set_bound(ub,[bd==1 for bd in bdlist],val=1)
    #
    #set_bound(lb,[bd==1.5 for bd in bdlist],val=0)
    #set_bound(ub,[bd==1.5 for bd in bdlist],val=1)
    #
    #set_bound(lb,[bd==-1 for bd in bdlist],val=-1)
    #set_bound(ub,[bd==-1 for bd in bdlist],val=-1)

    # for bd in [lb,ub]:
    #     for ind in [2,3]:
    #         bd[ind][:,1] = 0

    # temporary for no variation expt.
    # lb[2] = np.zeros_like(lb[2])
    # lb[3] = np.zeros_like(lb[3])
    # lb[4] = np.ones_like(lb[4])
    # lb[5] = np.zeros_like(lb[5])
    # ub[2] = np.zeros_like(ub[2])
    # ub[3] = np.zeros_like(ub[3])
    # ub[4] = np.ones_like(ub[4])
    # ub[5] = np.ones_like(ub[5])
    # temporary for no variation expt.
    lb1 = np.concatenate([a.flatten() for a in lb1])
    ub1 = np.concatenate([b.flatten() for b in ub1])
    lb2 = np.concatenate([a.flatten() for a in lb2])
    ub2 = np.concatenate([b.flatten() for b in ub2])
    bounds1 = [(a, b) for a, b in zip(lb1, ub1)]
    bounds2 = [(a, b) for a, b in zip(lb2, ub2)]

    nS = 2
    #print('nT: '+str(nT))
    ndims = 5
    ncelltypes = 5
    Yhat = [[None for iT in range(nT)] for iS in range(nS)]
    Xhat = [[None for iT in range(nT)] for iS in range(nS)]
    Ypc_list = [[None for iT in range(nT)] for iS in range(nS)]
    Xpc_list = [[None for iT in range(nT)] for iS in range(nS)]
    mx = [None for iS in range(nS)]
    for iS in range(nS):
        mx[iS] = np.zeros((ncelltypes, ))
        yy = [None for icelltype in range(ncelltypes)]
        for icelltype in range(ncelltypes):
            yy[icelltype] = np.nanmean(Rso[icelltype][iS][0], 0)
            mx[iS][icelltype] = np.nanmax(yy[icelltype])
        for iT in range(nT):
            y = [
                np.nanmean(Rso[icelltype][iS][iT], axis=0)[:, np.newaxis] /
                mx[iS][icelltype] for icelltype in range(1, ncelltypes)
            ]
            Ypc_list[iS][iT] = [None for icelltype in range(1, ncelltypes)]
            for icelltype in range(1, ncelltypes):
                # as currently written, penalties involving (X,Y)pc_list are effectively artificially smaller by
                # a factor of mx[iS][icelltype] compared to what one would expect from the (X,Y)-penalty as defined
                # subsequently.
                rss = Rso[icelltype][iS][iT].copy(
                )  #/mx[iS][icelltype] #.reshape(Rs[icelltype][ialign].shape[0],-1)
                #print('sum of isnan: '+str(np.isnan(rss).sum(1)))
                #rss = Rso[icelltype][iS][iT].copy() #.reshape(Rs[icelltype][ialign].shape[0],-1)
                rss = rss[np.isnan(rss).sum(1) == 0]
                #         print(rss.max())
                #         rss[rss<0] = 0
                #         rss = rss[np.random.randn(rss.shape[0])>0]
                try:
                    u, s, v = np.linalg.svd(rss - np.mean(rss, 0)[np.newaxis])
                    Ypc_list[iS][iT][icelltype - 1] = [
                        (s[idim], v[idim]) for idim in range(ndims)
                    ]
    #                 print('yep on Y')
    #                 print(np.min(np.sum(rs[icelltype][iS][iT],axis=1)))
                except:
                    print('nope on Y')
                    #print('shape of rss: '+str(rss.shape))
                    #print('mean of rss: '+str(np.mean(np.isnan(rss))))
                    #print('min of this rs: '+str(np.min(np.sum(rs[icelltype][iS][iT],axis=1))))
            Yhat[iS][iT] = np.concatenate(y, axis=1)
            #         x = sim_utils.columnize(Rso[0][iS][iT])[:,np.newaxis]
            icelltype = 0
            #x = np.nanmean(Rso[icelltype][iS][iT],0)[:,np.newaxis]#/mx[iS][icelltype]
            x = np.nanmean(Rso[icelltype][iS][iT],
                           0)[:, np.newaxis] / mx[iS][icelltype]
            #         opto_column = np.concatenate((np.zeros((nN,)),np.zeros((nNO/2,)),np.ones((nNO/2,))),axis=0)[:,np.newaxis]
            Xhat[iS][iT] = np.concatenate((x, np.ones_like(x)), axis=1)
            #         Xhat[iS][iT] = np.concatenate((x,np.ones_like(x),opto_column),axis=1)
            icelltype = 0
            #rss = Rso[icelltype][iS][iT].copy()/mx[iS][icelltype]
            rss = Rso[icelltype][iS][iT].copy()
            rss = rss[np.isnan(rss).sum(1) == 0]
            #         try:
            u, s, v = np.linalg.svd(rss - rss.mean(0)[np.newaxis])
            Xpc_list[iS][iT] = [None for iinput in range(2)]
            Xpc_list[iS][iT][0] = [(s[idim], v[idim]) for idim in range(ndims)]
            Xpc_list[iS][iT][1] = [(0, np.zeros((Xhat[0][0].shape[0], )))
                                   for idim in range(ndims)]
    #         except:
    #             print('nope on X')
    #             print(np.mean(np.isnan(rss)))
    #             print(np.min(np.sum(Rso[icelltype][iS][iT],axis=1)))
    nN, nP = Xhat[0][0].shape
    #print('nP: '+str(nP))
    nQ = Yhat[0][0].shape[1]

    import sim_utils

    pop_rate_fn = sim_utils.f_miller_troyer
    pop_deriv_fn = sim_utils.fprime_miller_troyer

    def compute_f_(Eta, Xi, s02):
        return sim_utils.f_miller_troyer(
            Eta, Xi**2 + np.concatenate([s02 for ipixel in range(nS * nT)]))

    def compute_fprime_m_(Eta, Xi, s02):
        return sim_utils.fprime_miller_troyer(
            Eta, Xi**2 + np.concatenate([s02
                                         for ipixel in range(nS * nT)])) * Xi

    def compute_fprime_s_(Eta, Xi, s02):
        s2 = Xi**2 + np.concatenate((s02, s02), axis=0)
        return sim_utils.fprime_s_miller_troyer(Eta, s2) * (Xi / s2)

    def sorted_r_eigs(w):
        drW, prW = np.linalg.eig(w)
        srtinds = np.argsort(drW)
        return drW[srtinds], prW[:, srtinds]

    #         0.Wmx,  1.Wmy,  2.Wsx,  3.Wsy,  4.s02,5.K,  6.kappa,7.T,8.XX,        9.XXp,        10.Eta,       11.Xi,   12.h1,  13.h2

    shapes1 = [(nP, nQ), (nQ, nQ), (nP, nQ),
               (nQ, nQ), (nQ, ), (nQ * (nS - 1), ), (1, ), (nQ * (nT - 1), ),
               (1, ), (1, ), (nQ, ), (nT * nS * nQ, )]
    shapes2 = [(nN, nT * nS * nP), (nN, nT * nS * nP), (nN, nT * nS * nQ),
               (nN, nT * nS * nQ)]
    #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1])))
    #print('size of shapes2: '+str(np.sum([np.prod(shp) for shp in shapes2])))

    import calnet.fitting_spatial_feature

    YYhat = calnet.utils.flatten_nested_list_of_2d_arrays(Yhat)
    XXhat = calnet.utils.flatten_nested_list_of_2d_arrays(Xhat)

    opto_dict = np.load(opto_silencing_data_file, allow_pickle=True)[()]

    Yhat_opto = opto_dict['Yhat_opto']
    Yhat_opto = np.nanmean(np.reshape(Yhat_opto, (nN, 2, nS, 2, nQ)),
                           3).reshape((nN * 2, -1))
    Yhat_opto[0::12] = np.nanmean(Yhat_opto[0::12], axis=0)[np.newaxis]
    Yhat_opto[1::12] = np.nanmean(Yhat_opto[1::12], axis=0)[np.newaxis]
    Yhat_opto = Yhat_opto / np.nanmax(Yhat_opto[0::2], 0)[np.newaxis, :]
    #print(Yhat_opto.shape)
    h_opto = opto_dict['h_opto']
    #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2]

    YYhat_halo = Yhat_opto.reshape((nN, 2, -1))
    opto_transform1 = calnet.utils.fit_opto_transform(
        YYhat_halo, norm01=norm_opto_transforms)

    opto_transform1.res[:, [0, 2, 3, 4, 6, 7]] = 0

    dYY1 = opto_transform1.transform(YYhat) - opto_transform1.preprocess(YYhat)

    #YYhat_halo_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_halo)
    #dYY1 = YYhat_halo_sim[:,1,:] - YYhat_halo_sim[:,0,:]

    def overwrite_plus_n(arr, to_overwrite, n):
        arr[:, to_overwrite] = arr[:, int(to_overwrite + n)]
        return arr

    for to_overwrite in [1, 2]:
        n = 4
        dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \
                = [overwrite_plus_n(x,to_overwrite,n) for x in \
                        [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]]
    for to_overwrite in [7]:
        n = -4
        dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res \
                = [overwrite_plus_n(x,to_overwrite,n) for x in \
                        [dYY1,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res]]

    if ignore_halo_vip:
        dYY1[:, 2::nQ] = np.nan

    #for to_overwrite in [1,2]:
    #    dYY1[:,to_overwrite] = dYY1[:,to_overwrite+4]
    #for to_overwrite in [7]:
    #    dYY1[:,to_overwrite] = dYY1[:,to_overwrite-4]

    #Yhat_opto = opto_dict['Yhat_opto']
    #for iS in range(nS):
    #    mx = np.zeros((nQ,))
    #    for iQ in range(nQ):
    #        slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ)
    #        mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer])
    #        Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ]
    ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    #print(Yhat_opto.shape)
    #h_opto = opto_dict['h_opto']
    #dYY1 = Yhat_opto[1::2]-Yhat_opto[0::2]
    #for to_overwrite in [1,2,5,6]: # overwrite sst and vip with off-centered values
    #    dYY1[:,to_overwrite] = dYY1[:,to_overwrite+8]
    #for to_overwrite in [11,15]:
    #    dYY1[:,to_overwrite] = np.nan #dYY1[:,to_overwrite-8]

    opto_dict = np.load(opto_activation_data_file, allow_pickle=True)[()]

    Yhat_opto = opto_dict['Yhat_opto']
    Yhat_opto = np.nanmean(np.reshape(Yhat_opto, (nN, 2, nS, 2, nQ)),
                           3).reshape((nN * 2, -1))
    Yhat_opto[0::12] = np.nanmean(Yhat_opto[0::12], axis=0)[np.newaxis]
    Yhat_opto[1::12] = np.nanmean(Yhat_opto[1::12], axis=0)[np.newaxis]
    Yhat_opto = Yhat_opto / Yhat_opto[0::2].max(0)[np.newaxis, :]
    #print(Yhat_opto.shape)
    h_opto = opto_dict['h_opto']
    #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2]

    YYhat_chrimson = Yhat_opto.reshape((nN, 2, -1))
    opto_transform2 = calnet.utils.fit_opto_transform(
        YYhat_chrimson, norm01=norm_opto_transforms)
    dYY2 = opto_transform2.transform(YYhat) - opto_transform2.preprocess(YYhat)
    #YYhat_chrimson_sim = calnet.utils.simulate_opto_effect(YYhat,YYhat_chrimson)
    #dYY2 = YYhat_chrimson_sim[:,1,:] - YYhat_chrimson_sim[:,0,:]

    #Yhat_opto = opto_dict['Yhat_opto']
    #for iS in range(nS):
    #    mx = np.zeros((nQ,))
    #    for iQ in range(nQ):
    #        slicer = slice(nQ*nT*iS+iQ,nQ*nT*(1+iS),nQ)
    #        mx[iQ] = np.nanmax(Yhat_opto[0::2][:,slicer])
    #        Yhat_opto[:,slicer] = Yhat_opto[:,slicer]/mx[iQ]
    ##Yhat_opto = Yhat_opto/Yhat_opto[0::2].max(0)[np.newaxis,:]
    #print(Yhat_opto.shape)
    #h_opto = opto_dict['h_opto']
    #dYY2 = Yhat_opto[1::2]-Yhat_opto[0::2]

    #print('dYY1 mean: %03f'%np.nanmean(np.abs(dYY1)))
    #print('dYY2 mean: %03f'%np.nanmean(np.abs(dYY2)))

    dYY = np.concatenate((dYY1, dYY2), axis=0)

    #titles = ['VIP silencing','VIP activation']
    #for itype in [0,1,2,3]:
    #    plt.figure(figsize=(5,2.5))
    #    for iyy,dyy in enumerate([dYY1,dYY2]):
    #        plt.subplot(1,2,iyy+1)
    #        if np.sum(np.isnan(dyy[:,itype]))==0:
    #            sca.scatter_size_contrast(YYhat[:,itype],YYhat[:,itype]+dyy[:,itype],nsize=6,ncontrast=6)#,mn=0)
    #        plt.title(titles[iyy])
    #        plt.xlabel('cell type %d event rate, \n light off'%itype)
    #        plt.ylabel('cell type %d event rate, \n light on'%itype)
    #        ut.erase_top_right()
    #    plt.tight_layout()
    #    ut.mkdir('figures')
    #    plt.savefig('figures/scatter_light_on_light_off_target_celltype_%d.eps'%itype)

    opto_mask = ~np.isnan(dYY)

    #dYY[nN:][~opto_mask[nN:]] = -dYY[:nN][~opto_mask[nN:]]

    #print('mean of opto_mask: '+str(opto_mask.mean()))

    #dYY[~opto_mask] = 0
    def zero_nans(arr):
        arr[np.isnan(arr)] = 0
        return arr

    #dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\
    #        opto_transform2.slope,opto_transform2.intercept,opto_transform2.res\
    #        = [zero_nans(x) for x in \
    #                [dYY,opto_transform1.slope,opto_transform1.intercept,opto_transform1.res,\
    #                opto_transform2.slope,opto_transform2.intercept,opto_transform2.res]]
    dYY = zero_nans(dYY)

    to_adjust = np.logical_or(np.isnan(opto_transform2.slope[0]),
                              np.isnan(opto_transform2.intercept[0]))

    opto_transform2.slope[:,
                          to_adjust] = 1 / opto_transform1.slope[:, to_adjust]
    opto_transform2.intercept[:,
                              to_adjust] = -opto_transform1.intercept[:,
                                                                      to_adjust] / opto_transform1.slope[:,
                                                                                                         to_adjust]
    opto_transform2.res[:,
                        to_adjust] = -opto_transform1.res[:,
                                                          to_adjust] / opto_transform1.slope[:,
                                                                                             to_adjust]

    #np.save('/Users/dan/Documents/notebooks/mossing-PC/shared_data/calnet_data/dYY.npy',dYY)

    from importlib import reload
    reload(calnet)
    #reload(calnet.fitting_2step_spatial_feature_opto_tight_nonlinear)
    reload(sim_utils)
    # reload(calnet.fitting_spatial_feature)
    # W0list = [np.ones(shp) for shp in shapes]
    wt_dict = {}
    wt_dict['X'] = 3  #1
    wt_dict['Y'] = 3
    #wt_dict['Eta'] = 3 # 1 #
    wt_dict['Xi'] = 0.1
    wt_dict['stims'] = np.ones((nN, 1))  #(np.arange(30)/30)[:,np.newaxis]**1 #
    wt_dict['barrier'] = 0.  #30.0 #0.1
    wt_dict['opto'] = 1  #1e1
    wt_dict['isn'] = 0.3
    wt_dict['tv'] = 1
    spont_frac = 0.5
    pc_frac = 0.5
    wt_dict['stimsOpto'] = (1 - spont_frac) * 6 / 5 * np.ones((nN, 1))
    wt_dict['stimsOpto'][0::6] = spont_frac * 6
    wt_dict['celltypesOpto'] = (1 - pc_frac) * 4 / 3 * np.ones(
        (1, nQ * nS * nT))
    wt_dict['celltypesOpto'][0, 0::nQ] = pc_frac * 4
    wt_dict['dirOpto'] = np.array((1, 0.3))
    wt_dict['dYY'] = 10  #10
    wt_dict['coupling'] = 1e-3
    wt_dict['smi'] = 0.1
    wt_dict['smi_halo'] = 30
    wt_dict['smi_chrimson'] = 0.1

    ##temporary no_opto
    wt_dict['opto'] = 0
    wt_dict['dirOpto'] = np.array((1, 1))
    #wt_dict['stimsOpto'] = np.ones((nN,1))
    wt_dict['celltypesOpto'] = np.ones((1, nQ * nS * nT))
    wt_dict['smi'] = 0  #0.01 # 0
    wt_dict['smi_halo'] = 0  #1 # 0
    wt_dict['smi_chrimson'] = 0  #0.01 # 0
    wt_dict['isn'] = 0.1
    wt_dict['tv'] = 0.1
    wt_dict['X'] = 3
    wt_dict['Eta'] = 10  #3 # 1 #

    ## temporary opto from no_opto
    #wt_dict['opto'] = 0.01
    #wt_dict['tv'] = 0.3#0.1

    np.save(
        'XXYYhat.npy', {
            'YYhat': YYhat,
            'XXhat': XXhat,
            'rs': rs,
            'Rs': Rs,
            'Rso': Rso,
            'Ypc_list': Ypc_list,
            'Xpc_list': Xpc_list
        })
    if allow_s2:
        Eta0 = invert_f_mt(YYhat)
    else:
        Eta0 = invert_f_mt(YYhat, s02=0)

    #         Wmx,    Wmy,    Wsx,    Wsy,    s02,  k,    kappa,T,   h1, h2
    #XX,            XXp,          Eta,          Xi

    opt = fmc.gen_opt(nS=nS, nT=nT)
    opt['allow_s02'] = False
    opt['allow_A'] = False
    opt['allow_B'] = True

    ntries = 1
    nhyper = 1
    dt = 1e-1
    niter = int(np.round(10 / dt))  #int(1e4)
    perturbation_size = 5e-2
    # learning_rate = 1e-4 # 1e-5 #np.linspace(3e-4,1e-3,niter+1) # 1e-5
    #l2_penalty = 0.1
    W1t = [[None for itry in range(ntries)] for ihyper in range(nhyper)]
    W2t = [[None for itry in range(ntries)] for ihyper in range(nhyper)]
    loss = np.zeros((nhyper, ntries))
    is_neg = np.array([b[1] for b in bounds1]) == 0
    counter = 0
    negatize = [np.zeros(shp, dtype='bool') for shp in shapes1]
    #print(shapes1)
    for ishp, shp in enumerate(shapes1):
        nel = np.prod(shp)
        negatize[ishp][:][is_neg[counter:counter + nel].reshape(shp)] = True
        counter = counter + nel
    for ihyper in range(nhyper):
        for itry in range(ntries):
            #print((ihyper,itry))
            #[0.(nP,nQ),1.(nQ,nQ),2.(nP,nQ),3.(nQ,nQ),4.(nQ,),5.(nQ*(nS-1),),6.(1,),7.(nQ*(nT-1),),8.(1,),9.(1,),10.(nQ,),11.(nQ*nS*nT,)]
            W10list = [
                init_noise * (ihyper + 1) * np.random.rand(*shp)
                for shp in shapes1
            ]
            W20list = [
                init_noise * (ihyper + 1) * np.random.rand(*shp)
                for shp in shapes2
            ]
            #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1])))
            #print('size of w10: '+str(np.sum([np.size(x) for x in W10list])))
            #print('len(W10list) : '+str(len(W10list)))
            counter = 0
            for ishp, shp in enumerate(shapes1):
                W10list[ishp][negatize[ishp]] = -W10list[ishp][negatize[ishp]]
            W10list[4] = np.ones(shapes1[4])  # s02
            W10list[5] = np.ones(shapes1[5])  # K
            W10list[6] = np.ones(shapes1[6])  # kappa
            W10list[7] = np.ones(shapes1[7])  # T
            W10list[8] = np.zeros(shapes1[8])  # h1
            W10list[9] = np.zeros(shapes1[9])  # h2
            W10list[10] = np.zeros(shapes1[10])  # baseline
            W10list[11] = np.ones(shapes1[11])  # amplitude
            W20list[0] = np.concatenate(Xhat, axis=1)  #XX
            W20list[1] = np.zeros_like(W20list[1])  #XXp
            W20list[2] = Eta0.copy()  #np.zeros(shapes[10]) #Eta
            W20list[3] = np.zeros(shapes2[3])  #Xi
            #[Wmx,Wmy,Wsx,Wsy,s02,k,kappa,T,XX,XXp,Eta,Xi]
            if init_W_from_lsq:
                W10list[0], W10list[1] = initialize_W(Xhat,
                                                      Yhat,
                                                      scale_by=scale_init_by,
                                                      allow_s2=allow_s2)
                for ivar in range(0, 2):
                    W10list[
                        ivar] = W10list[ivar] + init_noise * np.random.randn(
                            *W10list[ivar].shape)
            if init_W_from_lbfgs:
                print(opt)
                opt_param, result, _, _, _, _, _, _, _, _, _, _, _ = fmc.initialize_params(
                    XXhat, YYhat, opt, wpcpc=5, wpvpv=-6)
                these_shapes = [(nP, nQ), (nQ, nQ), (nQ, ), (nQ, ), (nQ, ),
                                (nQ, )]
                Wmx0, Wmy0, K0, s020, amplitude0, baseline0 = calnet.utils.parse_thing(
                    opt_param, these_shapes)
                if init_Eta_with_s02:
                    #assert(True==False)
                    Eta0 = invert_f_mt_with_s02(YYhat -
                                                np.tile(baseline0, nS * nT),
                                                s020,
                                                nS=nS,
                                                nT=nT)
                    W20list[2] = Eta0.copy()
                #Wmx0 = opt_param[:nP]
                #Wmy0 = opt_param[nP:nP+nQ]
                #K0 = opt_param[nP+nQ]
                #s020 = opt_param[nP+nQ+1]
                #amplitude0 = opt_param[nP+nQ+2]
                #baseline0 = opt_param[nP+nQ+3]
                print((Wmx0, Wmy0, K0, s020, np.tile(amplitude0,
                                                     2), baseline0))
                W10list[0], W10list[1], W10list[5], W10list[4], W10list[
                    -1], W10list[-2] = Wmx0, Wmy0, K0, s020, np.tile(
                        amplitude0, 2), baseline0
                for ivar in range(0, 2):
                    W10list[
                        ivar] = W10list[ivar] + init_noise * np.random.randn(
                            *W10list[ivar].shape)
            elif constrain_isn:
                W10list[1][0, 0] = 3
                if help_constrain_isn:
                    W10list[1][0, 3] = 5
                    W10list[1][3, 0] = -5
                    W10list[1][3, 3] = -5
                else:
                    W10list[1][0, 1:4] = 5
                    W10list[1][1:4, 0] = -5

            if init_W_from_file:
                npyfile = np.load(init_file, allow_pickle=True)[()]

                #Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,h1,h2,bl,amp = parse_W1(W1)
                #XX,XXp,Eta,Xi = parse_W2(W2)
                #Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2,bl,amp = parse_W1(W1)
                W10list = [
                    npyfile['as_list'][ivar]
                    for ivar in [0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15]
                ]
                W20list = [npyfile['as_list'][ivar] for ivar in [8, 9, 10, 11]]
                if W20list[0].size == nN * nS * 2 * nP:
                    #assert(True==False)
                    W10list[7] = np.array(())
                    W10list[1][1, 0] = W10list[1][1, 0]
                    W20list[0] = np.nanmean(
                        W20list[0].reshape((nN, nS, 2, nP)), 2).flatten()  #XX
                    W20list[1] = np.nanmean(
                        W20list[1].reshape((nN, nS, 2, nP)), 2).flatten()  #XXp
                    W20list[2] = np.nanmean(
                        W20list[2].reshape((nN, nS, 2, nQ)), 2).flatten()  #Eta
                    W20list[3] = np.nanmean(
                        W20list[3].reshape((nN, nS, 2, nQ)), 2).flatten()  #Xi
                if correct_Eta:
                    #assert(True==False)
                    W20list[2] = Eta0.copy()
                if len(W10list) < len(shapes1):
                    #assert(True==False)
                    W10list = W10list + [
                        np.array(1),
                        np.zeros((nQ, )),
                        np.zeros((nT * nS * nQ, ))
                    ]  # add h2, bl, amp
                if init_Eta_with_s02:
                    #assert(True==False)
                    s02 = W10list[4].copy()
                    Eta0 = invert_f_mt_with_s02(YYhat, s02, nS=nS, nT=nT)
                    W20list[2] = Eta0.copy()
                #if init_Eta12_with_dYY:
                #    Eta0 = W20list[2].copy().reshape((nN,nQ*nS*nT))
                #    Xi0 = W20list[3].copy().reshape((nN,nQ*nS*nT))
                #    s020 = W10list[4].copy()
                #    YY0s = compute_f_(Eta0,Xi0,s020)
                #titles = ['VIP silencing','VIP activation']
                #for itype in [0,1,2,3]:
                #    plt.figure(figsize=(5,2.5))
                #    for iyy,yy in enumerate([YY10s,YY20s]):
                #        plt.subplot(1,2,iyy+1)
                #        if np.sum(np.isnan(yy[:,itype]))==0:
                #            sca.scatter_size_contrast(YY0s[:,itype],yy[:,itype],nsize=6,ncontrast=6)#,mn=0)
                #        plt.title(titles[iyy])
                #        plt.xlabel('cell type %d event rate, \n light off'%itype)
                #        plt.ylabel('cell type %d event rate, \n light on'%itype)
                #        ut.erase_top_right()
                #    plt.tight_layout()
                #    ut.mkdir('figures')
                #    plt.savefig('figures/scatter_light_on_light_off_init_celltype_%d.eps'%itype)
                for ivar in [0, 1, 4, 5]:  # Wmx, Wmy, s02, k
                    print(init_noise)
                    W10list[
                        ivar] = W10list[ivar] + init_noise * np.random.randn(
                            *W10list[ivar].shape)

            #print('size of bounds1: '+str(np.sum([np.size(x) for x in bd1list])))
            #print('size of w10: '+str(np.sum([np.size(x) for x in W10list])))
            #print('size of shapes1: '+str(np.sum([np.prod(shp) for shp in shapes1])))
            W1t[ihyper][itry], W2t[ihyper][itry], loss[ihyper][
                itry], gr, hess, result = calnet.fitting_2step_spatial_feature_opto_tight_nonlinear_baseline.fit_W_sim(
                    Xhat,
                    Xpc_list,
                    Yhat,
                    Ypc_list,
                    pop_rate_fn=pop_rate_fn,
                    pop_deriv_fn=pop_deriv_fn,
                    W10list=W10list.copy(),
                    W20list=W20list.copy(),
                    bounds1=bounds1,
                    bounds2=bounds2,
                    niter=niter,
                    wt_dict=wt_dict,
                    l2_penalty=l2_penalty,
                    compute_hessian=False,
                    dt=dt,
                    perturbation_size=perturbation_size,
                    dYY=dYY,
                    constrain_isn=constrain_isn,
                    tv=tv,
                    opto_mask=opto_mask,
                    use_opto_transforms=use_opto_transforms,
                    opto_transform1=opto_transform1,
                    opto_transform2=opto_transform2,
                    share_residuals=share_residuals,
                    stimwise=stimwise,
                    simulate1=simulate1,
                    simulate2=simulate2,
                    verbose=verbose)

    #def parse_W(W):
    #    Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2 = W
    #    return Wmx,Wmy,Wsx,Wsy,s02,K,kappa,T,XX,XXp,Eta,Xi,h1,h2
    def parse_W1(W):
        Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, h1, h2, bl, amp = W
        return Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, h1, h2, bl, amp

    def parse_W2(W):
        XX, XXp, Eta, Xi = W
        return XX, XXp, Eta, Xi

    itry = 0
    Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, h1, h2, bl, amp = parse_W1(W1t[0][0])
    XX, XXp, Eta, Xi = parse_W2(W2t[0][0])

    labels1 = [
        'Wmx', 'Wmy', 'Wsx', 'Wsy', 's02', 'K', 'kappa', 'T', 'h1', 'h2', 'bl',
        'amp'
    ]
    labels2 = ['XX', 'XXp', 'Eta', 'Xi']
    Wstar_dict = {}
    for i, label in enumerate(labels1):
        Wstar_dict[label] = W1t[0][0][i]
    for i, label in enumerate(labels2):
        Wstar_dict[label] = W2t[0][0][i]
    Wstar_dict['as_list'] = [
        Wmx, Wmy, Wsx, Wsy, s02, K, kappa, T, XX, XXp, Eta, Xi, h1, h2, bl, amp
    ]
    Wstar_dict['loss'] = loss[0][0]
    Wstar_dict['wt_dict'] = wt_dict
    np.save(weights_file, Wstar_dict, allow_pickle=True)
Example #42
0
    def load_data(self):
        '''Function to load all the necessary data
        Prinicpal components, standard deviations, input image, triangle mesh data, landmarks'''
        
        #Expression parameters
        fileName='Dataset/Coarse_Dataset/Exp_Pca.bin'
        with open(fileName, mode='rb') as file: # b is important -> binary
        #     fileContent = file.read()
            dim_exp = np.fromfile(file, dtype=np.int32, count=1)
            mu_exp = np.zeros(self.no_of_ver*3)
            base_exp = np.zeros((self.no_of_ver*3,dim_exp[0]), dtype=float)
            mu_exp = np.fromfile(file, dtype=float, count=3*self.no_of_ver)
            base_exp = np.fromfile(file, dtype=float, count=3*self.no_of_ver*dim_exp[0])
        self.A_exp = np.array(np.resize(base_exp, (self.no_of_ver*3, dim_exp[0])))
        
        data = np.loadtxt('Dataset/Coarse_Dataset/std_exp.txt', delimiter=' ')
        data=data[:,np.newaxis]
        self.std_exp = np.array(data)
        
        #Triangle mesh data
        temp = loadmat('Dataset/3DDFA_Release/Matlab/ModelGeneration/model_info.mat')
        trimIndex = np.array(temp['trimIndex'][:,0], dtype=np.int32)
        trim_ind = np.reshape(np.array([3*trimIndex-2,3*trimIndex-1,3*trimIndex])-1,(self.no_of_ver*3,),'F')#np.append(3*trimIndex-2,np.append( 3*trimIndex-1, 3*trimIndex))
        self.tri_mesh_data = temp['tri'].T - 1
        
        #3D and 2D landmarks data
        lmks_3d_ind = temp['keypoints']
        lmks_2d = np.array(self.load('Dataset/300W-Convert/300W-Original/afw/134212_1.pts'))
        self.no_of_lmks = len(lmks_2d)
        self.lmks['2d'] = lmks_2d-[700,144]
        self.lmks['3d'] = lmks_3d_ind
        self.no_of_face_pixels = len(lmks_3d_ind)
        
        #Identity and Albedo parameters
        morph_model = loadmat('Dataset/PublicMM1/01_MorphableModel.mat')
        shapePCA = morph_model['shapePC']
        shapeMU = morph_model['shapeMU']
        shapeSTD = morph_model['shapeEV']

        texPCA = morph_model['texPC']
        texMU = morph_model['texMU']
        texSTD = morph_model['texEV']
        
        self.p_mu = shapeMU[trim_ind]
        self.b_mu = texMU[trim_ind]
        self.A_alb = np.array(texPCA[trim_ind,:100])
        self.A_id = np.array(shapePCA[trim_ind,:100])
        self.std_id = np.array(shapeSTD[:100])
        self.std_alb = np.array(texSTD[:100])
        
        #Input image
        I_in = plt.imread('Dataset/300W-Convert/300W-Original/afw/134212_1.jpg')
        self.I_in=I_in[144:400,700:956,:]
        
        #Approximate estimation of number of face pixels using first 17 landmarks
        polygon = Polygon(self.lmks['2d'][:17,:])
        temp2 = np.empty((self.h,self.w))
        for i in range(self.h):
            for j in range(self.w):
                point = Point(i,j)
                temp2[i,j] = polygon.contains(point)
        self.no_of_face_pxls = np.sum(temp2==1)
 def jacfun(*args, **kwargs):
     vjp, ans = make_vjp(fun, argnum)(*args, **kwargs)
     ans_vspace = vspace(getval(ans))
     jacobian_shape = ans_vspace.shape + vspace(getval(args[argnum])).shape
     grads = map(vjp, ans_vspace.standard_basis())
     return np.reshape(np.stack(grads), jacobian_shape)
Example #44
0
 def unflatten(x):
     sqrt_gwidth = x[0]
     V = np.reshape(x[1:], (J, d))
     return sqrt_gwidth, V
Example #45
0
def create_animation_interactive_diffusion(particles_paths,
                                           second_path,
                                           X,
                                           Y,
                                           Z,
                                           ratio,
                                           second_particle_type="particles",
                                           graph_type="heatmap",
                                           graph_details=None):
    """
    Assume all paths have same length. And that the length of first and second path are off by an integer factor."""
    if not os.path.isdir("./tmp"):
        os.mkdir("./tmp")
    ani_path = "./tmp/interactive_{}".format(time.time())
    os.mkdir(ani_path)

    trail_length = 20

    # ratio = (particles_paths.shape[1] - 1) / (second_path.shape[0] - 1)
    # assert int(ratio) == ratio

    for i in range(len(particles_paths[0])):
        p_X = particles_paths[:, i, 0].T
        p_Y = particles_paths[:, i, 1].T

        fig, ax = plt.subplots()

        if graph_type == "contour":
            ax.contour(X, Y, Z, graph_details["lines"])
        else:
            ax.imshow(Z,
                      cmap=plt.cm.gist_earth_r,
                      extent=[X[0][0], X[0][-1], Y[-1][0], Y[0][0]],
                      interpolation=graph_details["interpolation"])

        trail_start = int(max(0, i / ratio - trail_length))
        trail_end = int(max(0, i / ratio + 1))

        if second_particle_type == "particles":
            ax.plot(p_X, p_Y, "o", color="orange")
        else:
            kernel = stats.gaussian_kde(np.vstack([p_X, p_Y]))
            x_min, x_max = -10, 10  # max(-15, min(min(X), min(second_path[trail_start:trail_end, 0]))), min(15, max(max(X), max(second_path[trail_start:trail_end, 0])))
            y_min, y_max = -10, 10  # max(-15, min(min(Y), min(second_path[trail_start:trail_end, 1]))), min(15, max(max(Y), max(second_path[trail_start:trail_end, 1])))

            positions = np.mgrid[x_min:x_max:0.2, y_min:y_max:0.2]

            Z = np.reshape(
                kernel(np.vstack([positions[0].ravel(),
                                  positions[1].ravel()])).T,
                positions[0].shape)
            ax.imshow(Z,
                      cmap=plt.cm.gist_earth_r,
                      extent=[x_min, x_max, y_min, y_max])

        # plot the second path
        for j in range(trail_start, trail_end):
            ax.plot(second_path[j - 1:j + 1, 0],
                    second_path[j - 1:j + 1, 1],
                    "--*",
                    color="red",
                    alpha=np.exp(-(trail_end - j - 1) / 5.))

        plt.savefig(ani_path + "/{}.png".format(i))
    return ani_path
Example #46
0
 def loss(W_flat):
     W = np.reshape(W_flat, (K, D))
     scores = np.dot(X, W.T) + bias
     lp = np.sum(y_oh * scores) - np.sum(logsumexp(scores, axis=1))
     prior = np.sum(-0.5 * (W - mu0)**2 / sigma0)
     return -(lp + prior) / N
Example #47
0
 def get(self, vect, name):
     """Takes in a vector and returns the subset indexed by name."""
     idxs, shape = self.idxs_and_shapes[name]
     return np.reshape(vect[idxs], shape)
Example #48
0
def logistic_regression(X,
                        y,
                        bias=None,
                        K=None,
                        W0=None,
                        mu0=0,
                        sigma0=1,
                        verbose=False,
                        maxiter=1000):
    """
    Fit a multiclass logistic regression

        y_i ~ Cat(softmax(W x_i))

    y is a one hot vector in {0, 1}^K
    x_i is a vector in R^D
    W is a matrix R^{K x D}

    The log likelihood is,

        L(W) = sum_i sum_k y_ik * w_k^T x_i - logsumexp(W x_i)

    The prior is w_k ~ Norm(mu0, diag(sigma0)).
    """
    N, D = X.shape
    assert y.shape[0] == N

    # Make sure y is one hot
    if y.ndim == 1 or y.shape[1] == 1:
        assert y.dtype == int and y.min() >= 0
        K = y.max() + 1 if K is None else K
        y_oh = np.zeros((N, K), dtype=int)
        y_oh[np.arange(N), y] = 1

    else:
        K = y.shape[1]
        assert y.min() == 0 and y.max() == 1 and np.allclose(y.sum(1), 1)
        y_oh = y

    # Check that bias is correct shape
    if bias is not None:
        assert bias.shape == (K, ) or bias.shape == (N, K)
    else:
        bias = np.zeros((K, ))

    def loss(W_flat):
        W = np.reshape(W_flat, (K, D))
        scores = np.dot(X, W.T) + bias
        lp = np.sum(y_oh * scores) - np.sum(logsumexp(scores, axis=1))
        prior = np.sum(-0.5 * (W - mu0)**2 / sigma0)
        return -(lp + prior) / N

    W0 = W0 if W0 is not None else np.zeros((K, D))
    assert W0.shape == (K, D)

    itr = [0]

    def callback(W_flat):
        itr[0] += 1
        print("Iteration {} loss: {:.3f}".format(itr[0], loss(W_flat)))

    result = minimize(loss,
                      np.ravel(W0),
                      jac=grad(loss),
                      method="BFGS",
                      callback=callback if verbose else None,
                      options=dict(maxiter=maxiter, disp=verbose))

    W = np.reshape(result.x, (K, D))
    return W
Example #49
0
 def callback(weights):
     cur_occlusion = np.reshape(weights, (rows, cols))
     simulate(init_vx, init_vy, simulation_timesteps, cur_occlusion, ax)
 def jacfun(*args, **kwargs):
     vjp, ans = make_vjp(fun, argnum)(*args, **kwargs)
     outshape = getshape(ans)
     grads = map(vjp, unit_vectors(outshape))
     jacobian_shape = outshape + getshape(args[argnum])
     return np.reshape(concatenate(grads), jacobian_shape)
Example #51
0
 def partial_flatten(x):
     return np.reshape(x, (x.shape[0], np.prod(x.shape[1:])))
Example #52
0
 def unflatten(x):
     sqrt_neg_b = x[0]
     c = x[1]
     V = np.reshape(x[2:], (J, d))
     return sqrt_neg_b, c, V
Example #53
0
    def objective(params):
        cur_occlusion = np.reshape(params, (rows, cols))
        final_vx, final_vy = simulate(init_vx, init_vy, simulation_timesteps, cur_occlusion)
        return -lift(final_vy) / drag(final_vx)

    # Specify gradient of objective function using autograd.
    objective_with_grad = value_and_grad(objective)

    fig = plt.figure(figsize=(8,8))
    ax = fig.add_subplot(111, frameon=False)

    def callback(weights):
        cur_occlusion = np.reshape(weights, (rows, cols))
        simulate(init_vx, init_vy, simulation_timesteps, cur_occlusion, ax)

    print("Rendering initial flow...")
    callback(init_occlusion)

    print("Optimizing initial conditions...")
    result = minimize(objective_with_grad, init_occlusion, jac=True, method='CG',
                      options={'maxiter':50, 'disp':True}, callback=callback)

    print("Rendering optimized flow...")
    final_occlusion = np.reshape(result.x, (rows, cols))
    simulate(init_vx, init_vy, simulation_timesteps, final_occlusion, ax, render=True)

    print("Converting frames to an animated GIF...")   # Using imagemagick.
    os.system("convert -delay 5 -loop 0 step*.png "
              "-delay 250 step{0:03d}.png wing.gif".format(simulation_timesteps))
    os.system("rm step*.png")
Example #54
0
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Define neural network parameters - simple 1-layer fc,ff nn - Xavier initialization
num_neurons = 82
weights_1 = np.random.randn(state_len, num_neurons) * np.sqrt(
    1.0 / (state_len + num_neurons))
weights_2 = np.random.randn(num_neurons, state_len) * np.sqrt(
    1.0 / (state_len + num_neurons))
bias_1 = np.random.randn(1, num_neurons) * np.sqrt(1.0 / (num_neurons))
bias_2 = np.random.randn(1, state_len) * np.sqrt(1.0 / (state_len))
# Flatten (and reshape) parameters for the purpose of autograd
thetas = np.concatenate((weights_1.flatten(), bias_1.flatten(),
                         weights_2.flatten(), bias_2.flatten()),
                        axis=0)
num_wb = np.shape(thetas)[0]
thetas = np.reshape(thetas, newshape=(1, num_wb))

# Reshaping indices
w1_idx_end = num_neurons * (state_len)  # not inclusive
b1_idx_start = num_neurons * (state_len)
b1_idx_end = num_neurons * (state_len) + num_neurons

w2_idx_start = num_neurons * (state_len) + num_neurons
w2_idx_end = num_neurons * (state_len) + num_neurons + num_neurons * state_len

b2_idx_start = num_neurons * (
    state_len) + num_neurons + num_neurons * state_len


# Reshaping function for parameters
def theta_reshape(thetas):
Example #55
0
    def optimize_locs(p,
                      dat,
                      b,
                      c,
                      test_locs0,
                      reg=1e-5,
                      max_iter=100,
                      tol_fun=1e-5,
                      disp=False,
                      locs_bounds_frac=100):
        """
        Optimize just the test locations by maximizing a test power criterion,
        keeping the kernel parameters b, c fixed to the specified values. data
        should not be the same data as used in the actual test (i.e., should be
        a held-out set). This function is deterministic.

        - p: an UnnormalizedDensity specifying the problem
        - dat: a Data object
        - b, c: kernel parameters of the IMQ kernel. Not optimized.
        - test_locs0: Jxd numpy array. Initial V.
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
            the box defined by coordinate-wise min-max by std of each coordinate
            multiplied by this number.
        
        Return (V test_locs, optimization info log)
        """
        J = test_locs0.shape[0]
        X = dat.data()
        n, d = X.shape

        def obj(V):
            return -IMQFSSD.power_criterion(p, dat, b, c, V, reg=reg)

        flatten = lambda V: np.reshape(V, -1)

        def unflatten(x):
            V = np.reshape(x, (J, d))
            return V

        def flat_obj(x):
            V = unflatten(x)
            return obj(V)

        # Initial point
        x0 = flatten(test_locs0)

        # Make a box to bound test locations
        X_std = np.std(X, axis=0)
        # X_min: length-d array
        X_min = np.min(X, axis=0)
        X_max = np.max(X, axis=0)
        # V_lb: J x d
        V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1))
        V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1))
        # (J*d) x 2.
        x0_bounds = list(
            zip(
                V_lb.reshape(-1)[:, np.newaxis],
                V_ub.reshape(-1)[:, np.newaxis]))

        # optimize. Time the optimization as well.
        # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
        grad_obj = autograd.elementwise_grad(flat_obj)
        with util.ContextTimer() as timer:
            opt_result = scipy.optimize.minimize(
                flat_obj,
                x0,
                method='L-BFGS-B',
                bounds=x0_bounds,
                tol=tol_fun,
                options={
                    'maxiter': max_iter,
                    'ftol': tol_fun,
                    'disp': disp,
                    'gtol': 1.0e-06,
                },
                jac=grad_obj,
            )

        opt_result = dict(opt_result)
        opt_result['time_secs'] = timer.secs
        x_opt = opt_result['x']
        V_opt = unflatten(x_opt)
        return V_opt, opt_result
Example #56
0

obj.Gauss_Newton_optim()


# In[ ]:


al_id = obj.chi_final[0:100]
al_exp = obj.chi_final[100:179]
[s, pitch, yaw, roll] = obj.chi_final[179:183,0]
t = obj.chi_final[183:185]
lmks_2d = obj.lmks['2d']
lmks_3d_ind = obj.lmks['3d']

R = obj.rot_mat(pitch, yaw, roll)

p = obj.p_mu + obj.A_id@al_id + obj.A_exp@al_exp
obj.vertex2 = np.reshape(p, (obj.no_of_ver, 3))
q_world = s*[email protected]
q_world[:2,:] = q_world[:2,:] + t
q_image = obj.world_to_image(q_world.T)


# In[ ]:


plt.scatter(lmks_2d[:,0],-lmks_2d[:,1])
plt.scatter(q_image[lmks_3d_ind[0,:],0],-q_image[lmks_3d_ind[0,:],1])

Example #57
0
 def unflatten(x):
     V = np.reshape(x, (J, d))
     return V
Example #58
0
    def optimize_locs_params(
        p,
        dat,
        b0,
        c0,
        test_locs0,
        reg=1e-2,
        max_iter=100,
        tol_fun=1e-5,
        disp=False,
        locs_bounds_frac=100,
        b_lb=-20.0,
        b_ub=-1e-4,
        c_lb=1e-6,
        c_ub=1e3,
    ):
        """
        Optimize the test locations and the the two parameters (b and c) of the
        IMQ kernel by maximizing the test power criterion. 
             k(x,y) = (c^2 + ||x-y||^2)^b 
            where c > 0 and b < 0. 
        data should not be the same data as used in the actual test (i.e.,
        should be a held-out set). This function is deterministic.

        - p: UnnormalizedDensity specifying the problem.
        - b0: initial parameter value for b (in the kernel)
        - c0: initial parameter value for c (in the kernel)
        - dat: a Data object (training set)
        - test_locs0: Jxd numpy array. Initial V.
        - reg: reg to add to the mean/sqrt(variance) criterion to become
            mean/sqrt(variance + reg)
        - max_iter: #gradient descent iterations
        - tol_fun: termination tolerance of the objective value
        - disp: True to print convergence messages
        - locs_bounds_frac: When making box bounds for the test_locs, extend
            the box defined by coordinate-wise min-max by std of each coordinate
            multiplied by this number.
        - b_lb: absolute lower bound on b. b is always < 0.
        - b_ub: absolute upper bound on b
        - c_lb: absolute lower bound on c. c is always > 0.
        - c_ub: absolute upper bound on c

        #- If the lb, ub bounds are None 
        
        Return (V test_locs, b, c, optimization info log)
        """
        """
        In the optimization, we will parameterize b with its square root.
        Square back and negate to form b. c is not parameterized in any special
        way since it enters to the kernel with c^2. Absolute value of c will be
        taken to make sure it is positive.
        """
        J = test_locs0.shape[0]
        X = dat.data()
        n, d = X.shape

        def obj(sqrt_neg_b, c, V):
            b = -sqrt_neg_b**2
            return -IMQFSSD.power_criterion(p, dat, b, c, V, reg=reg)

        flatten = lambda sqrt_neg_b, c, V: np.hstack(
            (sqrt_neg_b, c, V.reshape(-1)))

        def unflatten(x):
            sqrt_neg_b = x[0]
            c = x[1]
            V = np.reshape(x[2:], (J, d))
            return sqrt_neg_b, c, V

        def flat_obj(x):
            sqrt_neg_b, c, V = unflatten(x)
            return obj(sqrt_neg_b, c, V)

        # gradient
        #grad_obj = autograd.elementwise_grad(flat_obj)
        # Initial point
        b02 = np.sqrt(-b0)
        x0 = flatten(b02, c0, test_locs0)

        # Make a box to bound test locations
        X_std = np.std(X, axis=0)
        # X_min: length-d array
        X_min = np.min(X, axis=0)
        X_max = np.max(X, axis=0)

        # V_lb: J x d
        V_lb = np.tile(X_min - locs_bounds_frac * X_std, (J, 1))
        V_ub = np.tile(X_max + locs_bounds_frac * X_std, (J, 1))

        # (J*d+2) x 2. Make sure to bound the reparamterized values (not the original)
        """
        For b, b2 := sqrt(-b)
            lb <= b <= ub < 0 means 

            sqrt(-ub) <= b2 <= sqrt(-lb)
            Note the positions of ub, lb.
        """
        x0_lb = np.hstack((np.sqrt(-b_ub), c_lb, np.reshape(V_lb, -1)))
        x0_ub = np.hstack((np.sqrt(-b_lb), c_ub, np.reshape(V_ub, -1)))
        x0_bounds = list(zip(x0_lb, x0_ub))

        # optimize. Time the optimization as well.
        # https://docs.scipy.org/doc/scipy/reference/optimize.minimize-lbfgsb.html
        grad_obj = autograd.elementwise_grad(flat_obj)
        with util.ContextTimer() as timer:
            opt_result = scipy.optimize.minimize(
                flat_obj,
                x0,
                method='L-BFGS-B',
                bounds=x0_bounds,
                tol=tol_fun,
                options={
                    'maxiter': max_iter,
                    'ftol': tol_fun,
                    'disp': disp,
                    'gtol': 1.0e-06,
                },
                jac=grad_obj,
            )

        opt_result = dict(opt_result)
        opt_result['time_secs'] = timer.secs
        x_opt = opt_result['x']
        sqrt_neg_b, c, V_opt = unflatten(x_opt)
        b = -sqrt_neg_b**2
        assert util.is_real_num(b), 'b is not real. Was {}'.format(b)
        assert b < 0
        assert util.is_real_num(c), 'c is not real. Was {}'.format(c)
        assert c > 0

        return V_opt, b, c, opt_result
Example #59
0
 def cal_ver_alb(self, al_id, al_exp, al_alb):
     p = self.p_mu + self.A_id@al_id + self.A_exp@al_exp
     b = self.b_mu + self.A_alb@al_alb
     self.vertex = np.reshape(p, (self.no_of_ver, 3))
     self.albedo = np.reshape(b, (self.no_of_ver, 3))
Example #60
0
def solve_symm_block_tridiag(J_diag, J_lower_diag, v):
    J_banded = blocks_to_bands(J_diag, J_lower_diag, lower=True)
    x_flat = solveh_banded(J_banded, np.ravel(v), lower=True)
    return np.reshape(x_flat, v.shape)