Example #1
0
def reduce_data_and_connectivity(labels, n_labels, connectivity, data_matrix,
                                 thr):
    """
    """
    n_features = len(labels)

    incidence = coo_matrix(
        (np.ones(n_features), (labels, np.arange(n_features))),
        shape=(n_labels, n_features), dtype=np.float32).tocsc()

    inv_sum_col = dia_matrix(
        (np.array(1. / incidence.sum(axis=1)).squeeze(), 0),
        shape=(n_labels, n_labels))

    incidence = inv_sum_col * incidence

    # reduced data
    reduced_data_matrix = (incidence * data_matrix.T).T
    reduced_connectivity = (incidence * connectivity) * incidence.T

    reduced_connectivity = reduced_connectivity - dia_matrix(
        (reduced_connectivity.diagonal(), 0), shape=(reduced_connectivity.shape))

    i_idx, j_idx = reduced_connectivity.nonzero()

    data_matrix_ = np.maximum(thr, np.sum(
        (reduced_data_matrix[:, i_idx] - reduced_data_matrix[:, j_idx]) ** 2, 0))
    reduced_connectivity.data = data_matrix_

    return reduced_connectivity, reduced_data_matrix
Example #2
0
 def divide(self, factor):
     """
     Scalar and matrix division. Scalar division is equivalent to 1/factor * matrix.
     Note: Division by zero will return zero, rather than NaN.
     :param factor: Denominator to use for division.
     :return: Resulted divided matrix.
     """
     if isinstance(factor, Number):
         mat = self.matrix.copy()
         mat.data = mat.data / factor
         return self._new_instance(mat)
     elif isinstance(factor, IndexMatrix):
         if factor.shape == (1,1):
             return self.divide(factor[0,0])
         elif factor.shape[0] == 1:
             inverted = 1/factor
             length = factor.shape[1]
             diag = self._new_instance(sp.dia_matrix((inverted.to_ndarray(), [0]), shape=(length, length)),
                                       row2word=self.col2word)
             return self.dot(diag)
         elif factor.shape[1] == 1:
             inverted = 1/factor.transpose()
             length = factor.shape[0]
             diag = self._new_instance(sp.dia_matrix((inverted.to_ndarray(), [0]), shape=(length, length)),
                                       col2word=self.row2word)
             return diag.dot(self)
         else:
             return self.multiply(1/factor)
     else:
         raise TypeError("Has to be either scalar or of type Matrix")
Example #3
0
 def __init__(self, dS, dt, ds, S, **kwargs):
     super(CrankNicolsonScheme, self).__init__(S)
     a, b, c, d = dS.fde(dt, ds, S, "explicit", **kwargs)
     self.Le = sparse.dia_matrix(([a, 2 + b, c], [-1, 0, 1]), shape=S.shape*2)
     a, b, c, d = dS.fde(dt, ds, S, "implicit", **kwargs)
     self.Li = sparse.dia_matrix(([-a, 2 - b, -c], [-1, 0, 1]), shape=S.shape*2).tocsr()
     self.d = 2 * d
Example #4
0
 def _laplace(self):
   '''
   construct Laplace operator as a matrix
   based on user input filter.
   Actually this matrix can be thought
   as a convolution operator:
   f(x,z)*U(x,z)
   '''
   f=[-1.,2.,-1.]
   nx = nz = self.n
   nf = len(f)
   nonzero = np.ones((nf,nx))
   for i in range(nf):
     nonzero[i] *=f[i]
   offsets = array(range(nf/2,-nf/2 ,-1))
 
   m1 = dia_matrix((nonzero,offsets),shape=(nx,nx))
   m2 = identity(nz)
   k1 = kron(m1,m2)
   nonzero = np.ones((nf,nz))
   for i in range(nf):
     nonzero[i,:] *=f[i]
   m1 = dia_matrix((nonzero,offsets),shape=(nz,nz))
   m2 = identity(nx)
   k2 = kron(m2,m1)   
   return k1+ k2
Example #5
0
    def optimize_obs_beta(self, ins_seq, obs_seq, j, n_iter, threshold=1e-6):
        Y = obs_seq
        X = ins_seq

        obs_beta = np.matrix(self.obs_weight_mat[j]).T
        g = np.squeeze(self.state_posts[:, j])
        diag_g = dia_matrix(([g], 0), shape=(len(g), len(g)))
        difference = []

        log_g = np.matrix(self.log_state_posts[:, j]).T

        g_y = np.multiply(np.matrix(self.state_posts[:, j]).T, Y)

        for n in range(n_iter):
            #print n, "Before The innder obs_obj_cost", self.obj_obs_subnet(obs_beta, j)
            nu = X * obs_beta
            mu = np.exp(nu)
            w_data = np.squeeze(np.array(mu))
            W = dia_matrix(([w_data], 0), shape=(len(w_data), len(w_data)))
            grad = X.T * diag_g * (Y - mu) - 2 * self.gamma * obs_beta
            hess = -1 * X.T * diag_g * W * X - 2 * self.gamma * np.identity(self.input_dim)

            beta_old = obs_beta
            try:
                obs_beta = obs_beta - np.linalg.pinv(hess) * grad
            except Exception as e:
                print 'grad', n, grad, log_g[1:4], nu[1:4]
                sys.exit()
            #print n, "After The innder obs_obj_cost", self.obj_obs_subnet(obs_beta, j)
            difference.append(np.max(beta_old - obs_beta))
            if difference[-1] <= threshold:
                break
        self.obs_weight_mat[j, :] = np.squeeze(np.array(obs_beta))
Example #6
0
def second_difference_matrix(N=3,bc='BothClamped'):
    """
    generates the second order difference operator in matrix form
    """
    checkInputArgs(N,bc)

    diag = 2*[None]

    if bc == 'BothFree':
        M = N + 1
    elif 'Free' in bc:
        M = N
    else:
        M = N - 1

    if 'Free' not in bc:
        diag[0] = -2 + np.zeros(M)
        diag[1] = np.ones(M)
        mat = dia_matrix(([diag[0],diag[1],diag[1]],[0,1,-1]),shape=(M,M))
    else:
        if bc == 'BothFree':
            diag[0] = np.concatenate((np.zeros(1),-2 + np.zeros(N - 1),np.zeros(1)))
            diag[1] = np.concatenate((np.ones(N - 1),np.zeros(2)))
            mat = dia_matrix(([diag[0],np.roll(diag[1],2),diag[1]],[0,1,-1]),shape=(M,M))
        elif bc in ('LeftClampedRightFree','LeftSimplySupportedRightFree'):
            diag[0] = np.concatenate((-2 + np.zeros(N - 1),np.zeros(1)))
            diag[1] = np.concatenate((np.ones(N - 1),np.zeros(1)))
            mat = dia_matrix(([diag[0],np.roll(diag[1],1),np.roll(diag[1],-1)],[0,1,-1]),shape=(M,M))
        elif bc in ('LeftFreeRightClamped','LeftFreeRightSimplySupported'):
            diag[0] = np.concatenate((np.zeros(1),-2 + np.zeros(N - 1)))
            diag[1] = np.concatenate((np.zeros(1),np.ones(N - 1)))
            mat = dia_matrix(([diag[0],np.roll(diag[1],1),np.roll(diag[1],-1)],[0,1,-1]),shape=(M,M))

    return mat
Example #7
0
    def optimize_trans_beta(self, ins_seq, obs_seq, j, n_iter, threshold=1e-3):
        trans_theta = np.matrix(self.trans_weight_mat[j])
        trans_post = np.matrix(self.trans_posts[:, j, :])
        X = ins_seq
        Y = obs_seq
        difference = []

        for n in range(n_iter):
            jac_array = np.zeros((self.n_components, self.input_dim))
            I = np.matrix(np.identity(self.n_components))
            nu = X * trans_theta.T
            prob_mu = extmath.safe_softmax(nu)
            for s in range(self.n_components):
                I_s = I[:, s]
                prob_mu_s = np.squeeze(np.array(prob_mu[:, s]))
                jac_s = np.squeeze(np.array(X.T * trans_post * I_s)) - \
                        np.squeeze(np.array(X.T *
                                            dia_matrix((prob_mu_s, 0), shape=(len(prob_mu_s), len(prob_mu_s))) *
                                            np.sum(trans_post, axis=1))) - 2 * self.gamma * np.squeeze(trans_theta[s,:])

                jac_array[s, :] = jac_s

                # check for the NAN in records
                if np.isnan(np.min(jac_s)):
                    print 'Encounter NAN', jac_s, n, s, jac_array
                    print 'Debug: '
                    print "trans_post: ", trans_post
                    print "prob_mu_s", prob_mu_s
                    print "I_s", I_s

                    sys.exit()
            jac_vec = np.matrix(jac_array.reshape(self.input_dim * self.n_components, 1))
            hess_array = np.zeros((self.input_dim * self.n_components, self.input_dim * self.n_components))

            for s in range(self.n_components):
                sum_trans_post = np.squeeze(np.array(np.sum(trans_post, axis=1)))
                for p in range(self.n_components):
                    I_sp = I[s, p]
                    prob_s = prob_mu[:, s]
                    prob_p = prob_mu[:, p]
                    prob_item = np.squeeze(np.array(np.multiply(prob_s, prob_p) - I_sp * prob_s))
                    hess_item = X.T * dia_matrix((prob_item, 0), shape=(len(prob_item), len(prob_item))) * dia_matrix((sum_trans_post, 0), shape=(len(sum_trans_post), len(sum_trans_post))) * X - 2 * self.gamma * np.matrix(np.identity(self.input_dim))
                    hess_array[(s * self.input_dim):((s + 1) * self.input_dim), (p * self.input_dim):((p + 1) * self.input_dim)] = np.array(hess_item)

            hess_array = np.matrix(hess_array)
            trans_theta_old = trans_theta

            try:
                trans_theta = trans_theta - np.reshape(np.linalg.pinv(hess_array) * jac_vec, (self.n_components, self.input_dim))
            except Exception as e:
                print 'Failed to Converge!'
                print 'jac_vec', jac_vec
                print hess_array
                sys.exit()
            #print "trans_theta_old ", trans_theta_old
            #print "trans_theta_new", trans_theta
            difference.append(np.max(trans_theta_old - trans_theta))
            if difference[-1] <= threshold:
                break
        self.trans_weight_mat[j, :, :] = np.array(trans_theta)
Example #8
0
def BuildLaPoisson():
    """
    pour l'etape de projection
    matrice de Laplacien phi
    avec CL Neumann pour phi

    BUT condition de Neumann pour phi 
    ==> non unicite de la solution

    besoin de fixer la pression en un point 
    pour lever la degenerescence: ici [0][1]
    
    ==> need to build a correction matrix

    """
    ### ne pas prendre en compte les points fantome (-2)
    NXi = nx
    NYi = ny

    ###### Definition of the 1D Lalace operator

    ###### AXE X
    ### Diagonal terms
    dataNXi = [numpy.ones(NXi), -2*numpy.ones(NXi), numpy.ones(NXi)]   
    
    ### Conditions aux limites : Neumann à gauche, rien à droite
    dataNXi[2][1]     = 2.  # SF left
    # dataNXi[0][NXi-2] = 2.  # SF right

    ###### AXE Y
    ### Diagonal terms
    dataNYi = [numpy.ones(NYi), -2*numpy.ones(NYi), numpy.ones(NYi)] 
   
    ### Conditions aux limites : Neumann 
    dataNYi[2][1]     = 2.  # SF low
    dataNYi[0][NYi-2] = 2.  # SF top

    ###### Their positions
    offsets = numpy.array([-1,0,1])                    
    DXX = sp.dia_matrix((dataNXi,offsets), shape=(NXi,NXi)) * dx_2
    DYY = sp.dia_matrix((dataNYi,offsets), shape=(NYi,NYi)) * dy_2
    
    ####### 2D Laplace operator
    LAP = sp.kron(sp.eye(NYi,NYi), DXX) + sp.kron(DYY, sp.eye(NXi,NXi))
    
    ####### BUILD CORRECTION MATRIX

    ### Upper Diagonal terms
    dataNYNXi = [numpy.zeros(NYi*NXi)]
    offset = numpy.array([1])

    ### Fix coef: 2+(-1) = 1 ==> Dirichlet en un point (redonne Laplacien)
    ### ATTENTION  COEF MULTIPLICATIF : dx_2 si M(j,i) j-NY i-NX
    dataNYNXi[0][1] = -1 * dx_2

    LAP0 = sp.dia_matrix((dataNYNXi,offset), shape=(NYi*NXi,NYi*NXi))
  
    return LAP + LAP0
Example #9
0
 def multiply(self, other):
     """Point-wise multiplication by another matrix, vector, or
     scalar.
     """
     # Scalar multiplication.
     if isscalarlike(other):
         return self._mul_scalar(other)
     # Sparse matrix or vector.
     if isspmatrix(other):
         if self.shape == other.shape:
             if not isinstance(other, fast_csr_matrix):
                 other = csr_matrix(other)
             return self._binopt(other, '_elmul_')
         # Single element.
         elif other.shape == (1,1):
             return self._mul_scalar(other.toarray()[0, 0])
         elif self.shape == (1,1):
             return other._mul_scalar(self.toarray()[0, 0])
         # A row times a column.
         elif self.shape[1] == other.shape[0] and self.shape[1] == 1:
             return self._mul_sparse_matrix(other.tocsc())
         elif self.shape[0] == other.shape[1] and self.shape[0] == 1:
             return other._mul_sparse_matrix(self.tocsc())
         # Row vector times matrix. other is a row.
         elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
             other = dia_matrix((other.toarray().ravel(), [0]),
                                 shape=(other.shape[1], other.shape[1]))
             return self._mul_sparse_matrix(other)
         # self is a row.
         elif self.shape[0] == 1 and self.shape[1] == other.shape[1]:
             copy = dia_matrix((self.toarray().ravel(), [0]),
                                 shape=(self.shape[1], self.shape[1]))
             return other._mul_sparse_matrix(copy)
         # Column vector times matrix. other is a column.
         elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
             other = dia_matrix((other.toarray().ravel(), [0]),
                                 shape=(other.shape[0], other.shape[0]))
             return other._mul_sparse_matrix(self)
         # self is a column.
         elif self.shape[1] == 1 and self.shape[0] == other.shape[0]:
             copy = dia_matrix((self.toarray().ravel(), [0]),
                                 shape=(self.shape[0], self.shape[0]))
             return copy._mul_sparse_matrix(other)
         else:
             raise ValueError("inconsistent shapes")
     # Dense matrix.
     if isdense(other):
         if self.shape == other.shape:
             ret = self.tocoo()
             ret.data = np.multiply(ret.data, other[ret.row, ret.col]
                                    ).view(np.ndarray).ravel()
             return ret
         # Single element.
         elif other.size == 1:
             return self._mul_scalar(other.flat[0])
     # Anything else.
     return np.multiply(self.todense(), other)
Example #10
0
def whfilter(a, weights=None, lamb=1600, p=3, ):
    """
    Generalized Whittaker-Handerson Graduation Method

    Parameters
    ----------
    a : array-like
          The input array, shape (n,)
    weights : array-like or None
          Weights
    lamb : float
          The relative importance between goodness of fit 
          and smoothness (smoothness increases with lamb).
    p : integer, default 3
          The degree of smoothness. We minimize the p-th 
          finite-differences of the graduated data. Examples:
          p=2 Hodrick-Prescott filter;
          p=3 Whittaker-Henderson method;
          Note: moments 0..p-1 will be conserved by graduation
    
    Returns
    -------
    out : array
          The smoothed data

    References
    ----------
    implementation of scikits.statsmodels.tsa.filters.hp_filter.py
    Alicja S. Nocon & William F. Scott (2012): "An extension of the 
       Whittaker-Henderson method of graduation", Scandinavian 
       Actuarial Journal, 2012:1, 70-79
    Whittaker, E. T. (1922). "On a new method of graduation", 
       Proceedings of the Edinburgh Mathematical Society 41,63-75.

    """
    # input data
    a = np.squeeze(a); 
    if a.ndim>1: raise ValueError("input array a must be 1d");
    n = a.size;

    # weights
    W = np.squeeze(weights) if weights is not None else np.ones(n);
    if np.any(W==0) or not np.all(np.isfinite(W)): 
      raise ValueError("weights must be non-zero and finite.");
    W = sparse.dia_matrix((W, 0), shape=(n,n));

    # set up difference Matrix K, shape (n-p, n)
    # K_ij = k(j-i),  l=j-i
    # k(l) = (-1)^l Binomial(p,l) if 0<=l<=p else 0
    l = np.arange(p+1);
    k = (-1)**l * comb(p,l);       # same as K_0j
    diags  =np.tile(k,(n,1)).T;    # side-diagonal K_i,i+l; n-times k(l)
    offsets=np.arange(p+1);        # index of side-diagonals
    K = sparse.dia_matrix((diags,offsets),shape=(n-p,n)); # K_ij

    # solve quadratic optimization problem 
    return spsolve(W+lamb*K.T.dot(K), W.dot(a));
Example #11
0
	def hittimes(self):
		lplus = self.lplus
		D = dia_matrix((self.degrees,0),shape = (self.nnodes,self.nnodes))
		diag = dia_matrix((lplus.diagonal(),0),shape = lplus.shape)
		sums = dia_matrix( ((lplus*D).sum(1),0),shape = lplus.shape)
		one = matrix(ones(lplus.shape))
		oneS = sums*one
		vol = self.volume
		return array(oneS - oneS.T - vol*lplus + vol*one*diag)
Example #12
0
def test_endianness():
    d = np.ones((3,4))
    offsets = [-1,0,1]

    a = dia_matrix((d.astype('<f8'), offsets), (4, 4))
    b = dia_matrix((d.astype('>f8'), offsets), (4, 4))
    v = np.arange(4)

    assert_allclose(a.dot(v), [1, 3, 6, 5])
    assert_allclose(b.dot(v), [1, 3, 6, 5])
Example #13
0
 def testExpLog(self):
     N = 10
     a = random(N)
     c = exp(a)
     discrepancy = c.diff(a) - sp.dia_matrix((np.exp(a._base), 0), (N,N))
     if discrepancy.nnz > 0:
         self.assertAlmostEqual(0, np.abs(discrepancy.data).max())
     c = log(a)
     discrepancy = c.diff(a) - sp.dia_matrix((1 / a._base, 0), (N,N))
     if discrepancy.nnz > 0:
         self.assertAlmostEqual(0, np.abs(discrepancy.data).max())
Example #14
0
 def testSinCos(self):
     N = 10
     a = random(N)
     b = sin(a)
     c = cos(a)
     discrepancy = b.diff(a) - sp.dia_matrix((np.cos(a._base), 0), (N,N))
     if discrepancy.nnz > 0:
         self.assertAlmostEqual(0, np.abs(discrepancy.data).max())
     discrepancy = c.diff(a) + sp.dia_matrix((np.sin(a._base), 0), (N,N))
     if discrepancy.nnz > 0:
         self.assertAlmostEqual(0, np.abs(discrepancy.data).max())
Example #15
0
 def testDiv(self):
     N = 10
     a = random(N)
     b = random(N)
     c = a / b / 2
     discrepancy = c.diff(a) - sp.dia_matrix((1. / b._base / 2., 0), (N,N))
     if discrepancy.nnz > 0:
         self.assertAlmostEqual(0, np.abs(discrepancy.data).max())
     discrepancy = c.diff(b) + sp.dia_matrix(((a / b**2)._base/2, 0), (N,N))
     if discrepancy.nnz > 0:
         self.assertAlmostEqual(0, np.abs(discrepancy.data).max())
Example #16
0
    def testPoisson1DResidual(self):
        N = 10000
        u = random(N)
        dx = 1. / (N * COMM_WORLD.Get_size() + 1)

        u_right = zeros(1)
        u_left = zeros(1)

        my_rank = COMM_WORLD.Get_rank()
        if my_rank > 0:
            COMM_WORLD.Send(u[:1], my_rank - 1)
        if my_rank < COMM_WORLD.Get_size() - 1:
            COMM_WORLD.Recv(u_right, my_rank + 1)

        if my_rank < COMM_WORLD.Get_size() - 1:
            COMM_WORLD.Send(u[-1:], my_rank + 1)
        if my_rank > 0:
            COMM_WORLD.Recv(u_left, my_rank - 1)

        u_ext = hstack([u_left, u, u_right])
        f = (u_ext[2:] + u_ext[:-2] - 2 * u_ext[1:-1]) / dx**2

        f_diff_u = diff_mpi(f, u, 'tangent')

        # check diagonal blocks
        lapl = -2 * sp.eye(N,N) \
             + sp.dia_matrix((np.ones(N), 1), (N,N)) \
             + sp.dia_matrix((np.ones(N), -1), (N,N))

        my_rank = COMM_WORLD.Get_rank()
        discrepancy = f_diff_u[my_rank] - lapl / dx**2
        if discrepancy.nnz > 0:
            self.assertAlmostEqual(0, np.abs(discrepancy.data).max())

        # lower diagonal blocks
        lapl_l = sp.csr_matrix(([1.], ([0], [N-1])), shape=(N,N))
        if my_rank > 0:
            discrepancy = f_diff_u[my_rank-1] - lapl_l / dx**2
            if discrepancy.nnz > 0:
                self.assertAlmostEqual(0, np.abs(discrepancy.data).max())

        # upper diagonal blocks
        lapl_u = lapl_l.T
        if my_rank < COMM_WORLD.Get_size() - 1:
            discrepancy = f_diff_u[my_rank+1] - lapl_u / dx**2
            if discrepancy.nnz > 0:
                self.assertAlmostEqual(0, np.abs(discrepancy.data).max())

        # other blocks are 0
        for rank in range(COMM_WORLD.Get_size()):
            if abs(rank - my_rank) > 1 and rank in f_diff_u:
                self.assertEqual(f_diff_u[rank], 0)
def scale_normalize(X):
	row_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=1))).squeeze()
	col_diag = np.asarray(1.0 / np.sqrt(X.sum(axis=0))).squeeze()
	row_diag = np.where(np.isnan(row_diag), 0, row_diag)
	col_diag = np.where(np.isnan(col_diag), 0, col_diag)
	if issparse(X):
		n_rows, n_cols = X.shape
		r = dia_matrix((row_diag, [0]), shape=(n_rows, n_rows))
		c = dia_matrix((col_diag, [0]), shape=(n_cols, n_cols))
		an = r * X * c
	else:
		an = row_diag[:, np.newaxis] * X * col_diag
	return an, row_diag, col_diag
Example #18
0
 def testMul(self):
     N = 1000
     a = random(N)
     b = random(N)
     c = a * b * 5
     self.assertEqual(0, (c.diff(a, 'tangent') - \
             5 * sp.dia_matrix((b._base, 0), (N,N))).nnz)
     self.assertEqual(0, (c.diff(a, 'adjoint') - \
             5 * sp.dia_matrix((b._base, 0), (N,N))).nnz)
     self.assertEqual(0, (c.diff(b, 'tangent') - \
             5 * sp.dia_matrix((a._base, 0), (N,N))).nnz)
     self.assertEqual(0, (c.diff(b, 'adjoint') - \
             5 * sp.dia_matrix((a._base, 0), (N,N))).nnz)
Example #19
0
 def _bh(self): 
   n = self.nx -1
   dp = ones(n)*2.
   dl = ones(n)*-1.
   ds = dl
 
   dh2 = 1./(self.dx*self.dx)
   D = [dp,dl,ds]
   
   bh = dia_matrix( (D,array([0,-1,1])),shape=(n,n))
   I  = dia_matrix( (-1.*dl,array([0])),shape=(n,n))
   
   A = I + self.a*self.dt*dh2*bh
   return A
def geometry(Nr,Nz,parms):

    r = np.linspace(-parms.Lr, parms.Lr, Nr+1)
    hr= r[1]-r[0]
    r = r[::-1]
    e = np.ones(Nr)

    Dr = (np.diag(e,-1) - np.diag(e,1))/(2*hr)
    Dr[0,0:2] = [1,-1]/hr
    Dr[Nr,Nr-1:Nr+1] = [1,-1]/hr

    Dr2 = (np.diag(e,-1) - 2*np.diag(np.ones(Nr+1),0) + np.diag(e,1))/hr**2
    Dr2[0,0:3] = [1,-2,1]/hr**2
    Dr2[Nr,Nr-2:Nr+1] = [1,-2,1]/hr**2

    z = np.linspace(-parms.Lz, 0, Nz)
    hz=z[1]-z[0]
    z = z[::-1]
    e = np.ones(Nz-1)

    Dz = (np.diag(e,-1) - np.diag(e,1))/(2*hz)
    Dz[0,0:3] = [-3,4,-1]/(2*hz)
    Dz[Nz-1,Nz-3:Nz] = [1,-4,3]/(2*hz)

    Dz2 = (np.diag(e,-1) - 2*np.diag(np.ones(Nz),0) + np.diag(e,1))/hz**2
    Dz2[0,0:3] = [1,-2,1]/hz**2
    Dz2[Nz-1,Nz-3:Nz] = [1,-2,1]/hz**2

    sp.dia_matrix(Dr); sp.dia_matrix(Dr2)
    sp.dia_matrix(Dz); sp.dia_matrix(Dz2)

    return [Dr,Dr2,r,Dz,Dz2,z]
Example #21
0
def build_laplacian_related_matrices_sparse(W):
    """W: the sign matrix (sparse)
    """
    assert issparse(W)

    entries = list(indexed_entries(W))
    W_p = _make_matrix(list(filter(lambda e: e[2] > 0, entries)), W.shape)
    W_n = _make_matrix(list(filter(lambda e: e[2] < 0, entries)), W.shape)
    W_n = -W_n

    D_p = dia_matrix((np.transpose(W_p.sum(axis=1)), [0]), W.shape)
    D_n = dia_matrix((np.transpose(W_n.sum(axis=1)), [0]), W.shape)
    D_hat = D_p + D_n
    return W_p, W_n, D_p, D_n, D_hat
Example #22
0
    def normalize(self, c=0):
        """ Normalize the graph according to the index c
        Normalization means that the sum of the edges values
        that go into or out each vertex must sum to 1

        Parameters
        ----------
        c=0 in {0, 1, 2}, optional: index that designates the way
            according to which D is normalized
            c == 0 => for each vertex a, sum{edge[e, 0]=a} D[e]=1
            c == 1 => for each vertex b, sum{edge[e, 1]=b} D[e]=1
            c == 2 => symmetric ('l2') normalization

        Notes
        -----
        Note that when sum_{edge[e, .] == a } D[e] = 0, nothing is performed
        """
        from scipy.sparse import dia_matrix
        c = int(c)
        if not c in [0, 1, 2]:
            raise ValueError('c must be equal to 0, 1 or 2')

        if self.E == 0:
            if c < 2:
                return np.zeros(self.V)
            else:
                return np.zeros(self.V), np.zeros(self.V)
        adj = self.to_coo_matrix().tocsr()
        s1 = adj.sum(0)
        s2 = adj.sum(1)
        if c == 1:
            s = dia_matrix((1. / s1, 0), shape=(self.V, self.V))
            adj = adj * s
            self.weights = wgraph_from_adjacency(adj).get_weights()
            return np.asarray(s1)
        if c == 0:
            s = dia_matrix((1. / s2.T, 0), shape=(self.V, self.V))
            adj = s * adj
            self.weights = wgraph_from_adjacency(adj).get_weights()
            return np.asarray(s2)
        if c == 2:
            s1 = dia_matrix((1. / np.sqrt(s1), 0),
                            shape=(self.V, self.V))
            s2 = dia_matrix((1. / np.sqrt(adj.sum(1)), 0),
                            shape=(self.V, self.V))
            adj = (s1 * adj) * s2
            self.weights = wgraph_from_adjacency(adj).get_weights()
            return np.asarray(s1), np.asarray(s2)
Example #23
0
 def get_subset_cpd(self, sub_idx):
     """ Get the cpd over a subset of the variables.
     :param np.ndarray[int]|np.ndarray[bool] sub_idx: indices of variables to keep
     :return: a new Gaussian CPD
     :rtype: GaussianCPD
     """
     if len(sub_idx) == 0 or (sub_idx.dtype == bool and not np.sum(sub_idx)):
         raise ValueError("sub_idx must not be empty")
     sub_mean = self.mean[sub_idx]
     sub_dim = len(sub_mean)
     if isinstance(self.precision, sp.dia_matrix):
         sub_precision = sp.dia_matrix((self.precision.diagonal()[sub_idx], np.zeros(1)),
                                       shape=(sub_dim, sub_dim))
     elif np.isscalar(self.precision):
         sub_precision = self.precision
     elif isinstance(self.precision, np.ndarray):
         if np.prod(self.precision.shape) == self.dim:
             sub_precision = self.precision[sub_idx]
         else:
             # We do the indexing this way for performance reasons.
             sub_precision = self.precision[sub_idx, :][:, sub_idx]
     else:
         # We do the indexing this way for performance reasons.
         sub_precision = self.precision.tocsr()[sub_idx, :][:, sub_idx]
     return GaussianCPD(dim=sub_dim, mean=sub_mean, precision=sub_precision,
                        mean_lin_op=get_subset_lin_op(self.mean_lin_op, sub_idx))
Example #24
0
def _solve_cholesky(X, y, alpha, sample_weight=None):
    # w = inv(X^t X + alpha*Id) * X.T y
    n_samples, n_features = X.shape
    n_targets = y.shape[1]

    has_sw = sample_weight is not None

    if has_sw:
        sample_weight = sample_weight * np.ones(n_samples)
        sample_weight_matrix = sparse.dia_matrix((sample_weight, 0),
            shape=(n_samples, n_samples))
        weighted_X = safe_sparse_dot(sample_weight_matrix, X)
        A = safe_sparse_dot(weighted_X.T, X, dense_output=True)
        Xy = safe_sparse_dot(weighted_X.T, y, dense_output=True)
    else:
        A = safe_sparse_dot(X.T, X, dense_output=True)
        Xy = safe_sparse_dot(X.T, y, dense_output=True)

    one_alpha = np.array_equal(alpha, len(alpha) * [alpha[0]])

    if one_alpha:
        A.flat[::n_features + 1] += alpha[0]
        return linalg.solve(A, Xy, sym_pos=True,
                            overwrite_a=True).T
    else:
        coefs = np.empty([n_targets, n_features])
        for coef, target, current_alpha in zip(coefs, Xy.T, alpha):
            A.flat[::n_features + 1] += current_alpha
            coef[:] = linalg.solve(A, target, sym_pos=True,
                                   overwrite_a=False).ravel()
            A.flat[::n_features + 1] -= current_alpha
        return coefs
Example #25
0
    def smooth(self, scalars, factor=1.0):
        """Smooth vertex-wise function given by `scalars` across the surface using
        mean curvature flow method (see http://brickisland.net/cs177fa12/?p=302).

        Amount of smoothing is controlled by `factor`.

        Parameters
        ----------
        scalars : 1D ndarray, shape (total_verts,)
            A scalar-valued function across the cortex, such as the curvature
            supplied by mean_curvature.
        factor : float, optional
            Amount of smoothing to perform, larger values smooth more.

        Returns
        -------
        smscalars : 1D ndarray, shape (total_verts,)
            Smoothed scalar values.
        """
        if factor == 0.0:
            return scalars
        
        B,D,W,V = self.laplace_operator
        npt = len(D)
        lfac = sparse.dia_matrix((D,[0]), (npt,npt)) - factor * (W-V)
        goodrows = np.nonzero(~np.array(lfac.sum(0) == 0).ravel())[0]
        lfac_solver = sparse.linalg.dsolve.factorized(lfac[goodrows][:,goodrows])
        goodsmscalars = lfac_solver((D * scalars)[goodrows])
        smscalars = np.zeros(scalars.shape)
        smscalars[goodrows] = goodsmscalars
        return smscalars
Example #26
0
    def dilation(self, nbiter=1, fast=True):
        """
        Morphological dimlation of the field data. self.field is changed

        Parameters
        ----------
        nbiter: int, optional, the number of iterations required

        fixme
        -----
        cython
        """
        nbiter = int(nbiter)
        if fast:
            from ._graph import dilation
            if self.E > 0:
                if (self.field.size == self.V):
                    self.field = self.field.reshape((self.V, 1))
                idx, neighb, _ = self.compact_neighb()
                for i in range(nbiter):
                    dilation(self.field, idx, neighb)
        else:
            from scipy.sparse import dia_matrix
            adj = self.to_coo_matrix() + dia_matrix(
                (np.ones(self.V), 0), (self.V, self.V))
            rows = adj.tolil().rows
            for i in range(nbiter):
                self.field = np.array([self.field[row].max(0) for row in rows])
Example #27
0
def smoothing_matrix(vertices, adj_mat, smoothing_steps=20, verbose=None):
    """Create a smoothing matrix which can be used to interpolate data defined
       for a subset of vertices onto mesh with an adjancency matrix given by
       adj_mat.

       If smoothing_steps is None, as many smoothing steps are applied until
       the whole mesh is filled with with non-zeros. Only use this option if
       the vertices correspond to a subsampled version of the mesh.

    Parameters
    ----------
    vertices : 1d array
        vertex indices
    adj_mat : sparse matrix
        N x N adjacency matrix of the full mesh
    smoothing_steps : int or None
        number of smoothing steps (Default: 20)
    verbose : bool, str, int, or None
        If not None, override default verbose level (see surfer.verbose).

    Returns
    -------
    smooth_mat : sparse matrix
        smoothing matrix with size N x len(vertices)
    """
    from scipy import sparse

    logger.info("Updating smoothing matrix, be patient..")

    e = adj_mat.copy()
    e.data[e.data == 2] = 1
    n_vertices = e.shape[0]
    e = e + sparse.eye(n_vertices, n_vertices)
    idx_use = vertices
    smooth_mat = 1.0
    n_iter = smoothing_steps if smoothing_steps is not None else 1000
    for k in range(n_iter):
        e_use = e[:, idx_use]

        data1 = e_use * np.ones(len(idx_use))
        idx_use = np.where(data1)[0]
        scale_mat = sparse.dia_matrix((1 / data1[idx_use], 0),
                                      shape=(len(idx_use), len(idx_use)))

        smooth_mat = scale_mat * e_use[idx_use, :] * smooth_mat

        logger.info("Smoothing matrix creation, step %d" % (k + 1))
        if smoothing_steps is None and len(idx_use) >= n_vertices:
            break

    # Make sure the smoothing matrix has the right number of rows
    # and is in COO format
    smooth_mat = smooth_mat.tocoo()
    smooth_mat = sparse.coo_matrix((smooth_mat.data,
                                    (idx_use[smooth_mat.row],
                                     smooth_mat.col)),
                                   shape=(n_vertices,
                                          len(vertices)))

    return smooth_mat
Example #28
0
def _randomized_logistic(X, y, weights, mask, C=1., verbose=False,
                         fit_intercept=True, tol=1e-3):
    X = X[safe_mask(X, mask)]
    y = y[mask]
    if issparse(X):
        size = len(weights)
        weight_dia = sparse.dia_matrix((1 - weights, 0), (size, size))
        X = X * weight_dia
    else:
        X *= (1 - weights)

    C = np.atleast_1d(np.asarray(C, dtype=np.float64))
    if C.ndim > 1:
        raise ValueError("C should be 1-dimensional array-like, "
                         "but got a {}-dimensional array-like instead: {}."
                         .format(C.ndim, C))

    scores = np.zeros((X.shape[1], len(C)), dtype=np.bool)

    for this_C, this_scores in zip(C, scores.T):
        # XXX : would be great to do it with a warm_start ...
        clf = LogisticRegression(C=this_C, tol=tol, penalty='l1', dual=False,
                                 fit_intercept=fit_intercept,
                                 solver='liblinear', multi_class='ovr')
        clf.fit(X, y)
        this_scores[:] = np.any(
            np.abs(clf.coef_) > 10 * np.finfo(np.float).eps, axis=0)
    return scores
Example #29
0
def to_sparse(D, format="csc"):
    """
    Transform dense matrix to sparse matrix of return_type
        bsr_matrix(arg1[, shape, dtype, copy, blocksize]) 	Block Sparse Row matrix
        coo_matrix(arg1[, shape, dtype, copy]) 	A sparse matrix in COOrdinate format.
        csc_matrix(arg1[, shape, dtype, copy]) 	Compressed Sparse Column matrix
        csr_matrix(arg1[, shape, dtype, copy]) 	Compressed Sparse Row matrix
        dia_matrix(arg1[, shape, dtype, copy]) 	Sparse matrix with DIAgonal storage
        dok_matrix(arg1[, shape, dtype, copy]) 	Dictionary Of Keys based sparse matrix.
        lil_matrix(arg1[, shape, dtype, copy]) 	Row-based linked list sparse matrix
    :param D: Dense matrix
    :param format: how to save the sparse matrix
    :return: sparse version
    """
    if format == "bsr":
        return sprs.bsr_matrix(D)
    elif format == "coo":
        return sprs.coo_matrix(D)
    elif format == "csc":
        return sprs.csc_matrix(D)
    elif format == "csr":
        return sprs.csr_matrix(D)
    elif format == "dia":
        return sprs.dia_matrix(D)
    elif format == "dok":
        return sprs.dok_matrix(D)
    elif format == "lil":
        return sprs.lil_matrix(D)
    else:
        return to_dense(D)
def compute_lrbt_transfos(zfc=None, zfo=None, mmat=None,
                          trunck=dict(threshh=1e-6)):
    """
    the transformation matrices for the BT MOR

    :param zfc:
        Factor of the controllability Gramian :math:`W_c = Z_cZ_c^H`
    :param zfo:
        Factor of the observability Gramian :math:`W_o = Z_oZ_o^H`
    :param mmat:
        mass matrix
    :param trunck:
        truncation parameters

    :return:
        the left and right transformation matrices `tl` and `tr` \
        for the balanced truncation

    """
    if mmat is None:
        mmat = sps.eye(zfo.shape[0])

    lsv_mat, sv, rsv_matt = np.linalg.svd(np.dot(zfc.T, mmat*zfo))

    k = np.where(sv > trunck['threshh'])[0].size
    lsvk, rsvk, svk = lsv_mat[:, :k], rsv_matt.T[:, :k], sv[:k]

    svsqri = 1./np.sqrt(svk)

    svsqri_mat = sps.dia_matrix((svsqri, np.array([0])), shape=(k, k))

    tl = np.dot(zfc, lsvk*svsqri_mat)
    tr = np.dot(zfo, rsvk*svsqri_mat)

    return tl, tr, sv
Example #31
0
 def identity(self):
     """ Returns the identy matrix with same dimension as the adjacency matrix.
     """
     return dia_matrix((ones(self.nnodes), 0),
                       shape=(self.nnodes, self.nnodes))
Example #32
0
 def laplacian(self):
     if self._laplacian is None:
         self._laplacian = dia_matrix((self.degrees, 0),
                                      shape=(self.nnodes, self.nnodes))
         self._laplacian -= self.adj
     return self._laplacian
Example #33
0
def mask_test_edges(adj):
    # Function to build test set with 10% positive links
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
    # TODO: Clean up.

    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)  #返回上三角矩阵
    adj_tuple = sparse_to_tuple(adj_triu)  #转换为coo_matrix
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] / 10.))  #10%的测试集
    num_val = int(np.floor(edges.shape[0] / 20.))  #5%的验证集

    all_edge_idx = list(range(edges.shape[0]))
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges,
                            np.hstack([test_edge_idx, val_edge_idx]),
                            axis=0)

    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    print(~ismember(val_edges_false, edges_all))
    # assert ~ismember(test_edges_false, edges_all)
    # assert ~ismember(val_edges_false, edges_all)
    # assert ~ismember(val_edges, train_edges)
    # assert ~ismember(test_edges, train_edges)
    # assert ~ismember(val_edges, test_edges)

    data = np.ones(train_edges.shape[0])

    # Re-build adj matrix
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])),
                              shape=adj.shape)
    adj_train = adj_train + adj_train.T

    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
def hpfilter(X, lamb=1600):
    """
    Hodrick-Prescott filter

    Parameters
    ----------
    X : array-like
        The 1d ndarray timeseries to filter of length (nobs,) or (nobs,1)
    lamb : float
        The Hodrick-Prescott smoothing parameter. A value of 1600 is
        suggested for quarterly data. Ravn and Uhlig suggest using a value
        of 6.25 (1600/4**4) for annual data and 129600 (1600*3**4) for monthly
        data.

    Returns
    -------
    cycle : array
        The estimated cycle in the data given lamb.
    trend : array
        The estimated trend in the data given lamb.

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> import pandas as pd
    >>> dta = sm.datasets.macrodata.load_pandas().data
    >>> index = pd.DatetimeIndex(start='1959Q1', end='2009Q4', freq='Q')
    >>> dta.set_index(index, inplace=True)

    >>> cycle, trend = sm.tsa.filters.hpfilter(dta.realgdp, 1600)
    >>> gdp_decomp = dta[['realgdp']]
    >>> gdp_decomp["cycle"] = cycle
    >>> gdp_decomp["trend"] = trend

    >>> import matplotlib.pyplot as plt
    >>> fig, ax = plt.subplots()
    >>> gdp_decomp[["realgdp", "trend"]]["2000-03-31":].plot(ax=ax,
    ...                                                      fontsize=16)
    >>> plt.show()

    .. plot:: plots/hpf_plot.py

    Notes
    -----
    The HP filter removes a smooth trend, `T`, from the data `X`. by solving

    min sum((X[t] - T[t])**2 + lamb*((T[t+1] - T[t]) - (T[t] - T[t-1]))**2)
     T   t

    Here we implemented the HP filter as a ridge-regression rule using
    scipy.sparse. In this sense, the solution can be written as

    T = inv(I - lamb*K'K)X

    where I is a nobs x nobs identity matrix, and K is a (nobs-2) x nobs matrix
    such that

    K[i,j] = 1 if i == j or i == j + 2
    K[i,j] = -2 if i == j + 1
    K[i,j] = 0 otherwise

    See Also
    --------
    statsmodels.tsa.filters.bk_filter.bkfilter
    statsmodels.tsa.filters.cf_filter.cffilter
    statsmodels.tsa.seasonal.seasonal_decompose

    References
    ----------
    Hodrick, R.J, and E. C. Prescott. 1980. "Postwar U.S. Business Cycles: An
        Empricial Investigation." `Carnegie Mellon University discussion
        paper no. 451`.
    Ravn, M.O and H. Uhlig. 2002. "Notes On Adjusted the Hodrick-Prescott
        Filter for the Frequency of Observations." `The Review of Economics and
        Statistics`, 84(2), 371-80.
    """
    _pandas_wrapper = _maybe_get_pandas_wrapper(X)
    X = np.asarray(X, float)
    if X.ndim > 1:
        X = X.squeeze()
    nobs = len(X)
    I = sparse.eye(nobs, nobs)  # noqa:E741
    offsets = np.array([0, 1, 2])
    data = np.repeat([[1.], [-2.], [1.]], nobs, axis=1)
    K = sparse.dia_matrix((data, offsets), shape=(nobs - 2, nobs))

    use_umfpack = True
    trend = spsolve(I + lamb * K.T.dot(K), X, use_umfpack=use_umfpack)

    cycle = X - trend
    if _pandas_wrapper is not None:
        return _pandas_wrapper(cycle), _pandas_wrapper(trend)
    return cycle, trend
Example #35
0
def main():
    timer = Timer()
    timer.start()

    os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
    tf.set_random_seed(0)

    MAX_SENT_LENGTH = 20
    MAX_SENTS = 100
    EMBEDDING_DIM = 50
    POST_DIM = 10
    TEXT_DIM = 50
    VALIDATION_SPLIT = 0.2
    MIXTURES = 5
    Graph_DIM = 10
    TRAINING_EPOCHS = 50

    flags = tf.app.flags
    FLAGS = flags.FLAGS
    flags.DEFINE_float('learning_rate', 0.0001, 'Initial learning rate.')
    flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.')
    flags.DEFINE_integer('hidden2', Graph_DIM,
                         'Number of units in hidden layer 2.')
    flags.DEFINE_integer('batch_size', 32, 'Size of a mini-batch')
    flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).')
    flags.DEFINE_float('lambda1', 1e-4, 'Parameter of energy.')
    flags.DEFINE_float('lambda2', 1e-9, 'lossSigma.')
    flags.DEFINE_float('lambda3', 0.01, 'GAE.')
    flags.DEFINE_string('model', 'gcn_ae', 'Model string.')
    model_str = FLAGS.model

    # variable to store evaluation results
    precision_list = []
    recall_list = []
    f1_list = []
    auc_list = []

    for t in range(10):
        with open('./data/instagram.pickle', 'rb') as handle:
            store_data = pickle.load(handle)

        labels = store_data['labels']
        df = store_data['df']
        data = store_data['data']
        postInfo = store_data['postInfo']
        timeInfo = store_data['timeInfo']
        embedding_matrix = store_data['embedding_matrix']
        word_index = store_data['word_index']

        num_session = data.shape[0]
        nb_validation_samples = int(VALIDATION_SPLIT * num_session)
        '''For Evaluation'''
        single_label = np.asarray(labels)
        labels = to_categorical(np.asarray(labels))
        print('Shape of data tensor:', data.shape)
        print('Shape of label tensor:', labels.shape)

        zeros = np.zeros(num_session)
        zeros = zeros.reshape((num_session, 1, 1))
        # FLAGS.learning_rate = lr
        '''Hierarchical Attention Network for text and other info'''
        placeholders = {
            'zero_input':
            tf.placeholder(tf.float32, shape=[None, 1, 1]),
            'review_input':
            tf.placeholder(tf.float32,
                           shape=[None, MAX_SENTS, MAX_SENT_LENGTH + 1]),
            'post_input':
            tf.placeholder(tf.float32, shape=[
                None,
                4,
            ]),
            'time_label':
            tf.placeholder(tf.float32, shape=[None, MAX_SENTS])
        }

        g = nx.Graph()
        edgelist = pd.read_csv('./data/source_target.csv')
        for i, elrow in edgelist.iterrows():
            g.add_edge(elrow[0].strip('\n'), elrow[1].strip('\n'))
        adj = nx.adjacency_matrix(g)
        user_attributes = pd.read_csv('./data/user_friend_follower.csv')
        user_attributes = user_attributes.set_index('user').T.to_dict('list')
        nodelist = list(g.nodes())
        features = []
        User_post = np.zeros(
            (len(nodelist), num_session))  # 2218 number of posts

        for id, node in enumerate(nodelist):
            posts_ID = df.loc[df['owner_id'] == node].index.values.tolist()
            for p_id in posts_ID:
                User_post[id][p_id] = 1
            features.append(user_attributes[node])

        # only keep the posts that are in the training data
        User_post_train = User_post[:, :-nb_validation_samples]
        User_post_test = User_post[:, -nb_validation_samples:]
        features = sparse.csr_matrix(features)
        features = normalize(features, norm='max', axis=0)
        adj_orig = adj
        adj_orig = adj_orig - sparse.dia_matrix(
            (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
        adj_orig.eliminate_zeros()
        adj_norm = preprocess_graph(adj)
        adj_label = adj + sparse.eye(adj.shape[0])
        adj_label = sparse_to_tuple(adj_label)

        # Define placeholders
        placeholders.setdefault('features', tf.sparse_placeholder(tf.float32))
        placeholders.setdefault('adj', tf.sparse_placeholder(tf.float32))
        placeholders.setdefault('adj_orig', tf.sparse_placeholder(tf.float32))
        placeholders.setdefault('dropout',
                                tf.placeholder_with_default(0., shape=()))
        placeholders.setdefault(
            'user_post', tf.placeholder(tf.int32, [len(nodelist), None]))
        d = {placeholders['dropout']: FLAGS.dropout}
        placeholders.update(d)
        num_nodes = adj.shape[0]

        features = sparse_to_tuple(features.tocoo())
        num_features = features[2][1]
        features_nonzero = features[1].shape[0]
        '''Graph AutoEncoder'''
        if model_str == 'gcn_ae':
            Graph_model = GCNModelAE(placeholders, num_features,
                                     features_nonzero)
        elif model_str == 'gcn_vae':
            Graph_model = GCNModelVAE(placeholders, num_features, num_nodes,
                                      features_nonzero)

        embedding_layer = Embedding(len(word_index) + 1,
                                    EMBEDDING_DIM,
                                    weights=[embedding_matrix],
                                    input_length=MAX_SENT_LENGTH,
                                    trainable=True,
                                    mask_zero=True)

        all_input = Input(shape=(MAX_SENT_LENGTH + 1, ))
        sentence_input = crop(1, 0, MAX_SENT_LENGTH)(all_input)  # slice
        time_input = crop(1, MAX_SENT_LENGTH,
                          MAX_SENT_LENGTH + 1)(all_input)  # slice
        embedded_sequences = embedding_layer(sentence_input)
        # embedded_sequences=BatchNormalization()(embedded_sequences)
        l_lstm = Bidirectional(GRU(TEXT_DIM,
                                   return_sequences=True))(embedded_sequences)
        l_att = AttLayer(TEXT_DIM)(l_lstm)  # (?,200)
        # time_embedding=Dense(TIME_DIM,activation='sigmoid')(time_input)
        merged_output = Concatenate()([l_att,
                                       time_input])  # text+time information
        sentEncoder = Model(all_input, merged_output)

        review_input = placeholders['review_input']
        review_encoder = TimeDistributed(sentEncoder)(review_input)
        l_lstm_sent = Bidirectional(GRU(TEXT_DIM,
                                        return_sequences=True))(review_encoder)
        fully_sent = Dense(1, use_bias=False)(l_lstm_sent)
        pred_time = Activation(activation='linear')(fully_sent)
        zero_input = placeholders['zero_input']
        shift_predtime = Concatenate(axis=1)([zero_input, pred_time])
        shift_predtime = crop(1, 0, MAX_SENTS)(shift_predtime)
        l_att_sent = AttLayer(TEXT_DIM)(l_lstm_sent)

        # embed the #likes, shares
        post_input = placeholders['post_input']
        fully_post = Dense(POST_DIM, use_bias=False)(post_input)
        # norm_fullypost=BatchNormalization()(fully_post)
        post_embedding = Activation(activation='relu')(fully_post)
        fully_review = concatenate(
            [l_att_sent, post_embedding]
        )  # merge the document level vectro with the additional embedded features such as #likes

        pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
        norm = adj.shape[0] * adj.shape[0] / float(
            (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
        with tf.name_scope('graph_cost'):
            preds_sub = Graph_model.reconstructions
            labels_sub = tf.reshape(
                tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                          validate_indices=False), [-1])
            if model_str == 'gcn_ae':
                opt = CostAE(preds=preds_sub,
                             labels=labels_sub,
                             pos_weight=pos_weight,
                             norm=norm)
            elif model_str == 'gcn_vae':
                opt = CostVAE(preds=preds_sub,
                              labels=labels_sub,
                              model=Graph_model,
                              num_nodes=num_nodes,
                              pos_weight=pos_weight,
                              norm=norm)
        User_latent = Graph_model.z_mean  # (n_user, G_embeddim)
        Post_latent = fully_review  # (batch size, text_embed_dim+post_dim)
        max_indices = tf.argmax(placeholders['user_post'], axis=0)
        add_latent = tf.gather(User_latent, max_indices)
        session_latent = tf.concat(
            [Post_latent, add_latent],
            axis=1)  # the representation of text + graph
        '''DAGMM'''
        h1_size = 2 * TEXT_DIM + Graph_DIM + POST_DIM
        gmm = GMM(MIXTURES)
        est_net = EstimationNet([h1_size, MIXTURES], tf.nn.tanh)
        gamma = est_net.inference(session_latent, FLAGS.dropout)
        gmm.fit(session_latent, gamma)
        individual_energy = gmm.energy(session_latent)

        Time_label = placeholders['time_label']
        Time_label = tf.reshape(Time_label,
                                [tf.shape(Time_label)[0], MAX_SENTS, 1])

        with tf.name_scope('loss'):
            GAE_error = opt.cost
            energy = tf.reduce_mean(individual_energy)
            lossSigma = gmm.cov_diag_loss()
            prediction_error = tf.losses.mean_squared_error(
                shift_predtime, Time_label)
            loss = prediction_error + FLAGS.lambda1 * energy + FLAGS.lambda2 * lossSigma + FLAGS.lambda3 * GAE_error

        x_train = data[:-nb_validation_samples]
        time_train = timeInfo[:-nb_validation_samples]
        zeros_train = zeros[:-nb_validation_samples]
        y_train = labels[:-nb_validation_samples]
        post_train = postInfo[:-nb_validation_samples]
        x_val = data[-nb_validation_samples:]
        zeros_test = zeros[-nb_validation_samples:]
        time_test = timeInfo[-nb_validation_samples:]
        y_val = labels[-nb_validation_samples:]
        post_test = postInfo[-nb_validation_samples:]
        y_single = single_label[-nb_validation_samples:]

        print(
            'Number of positive and negative posts in training and validation set'
        )
        print(y_train.sum(axis=0))
        print(y_val.sum(axis=0))
        print("model fitting - Unsupervised cyberbullying detection")

        optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
        train_step = optimizer.minimize(loss)
        GAEcorrect_prediction = tf.equal(
            tf.cast(tf.greater_equal(tf.sigmoid(preds_sub), 0.5), tf.int32),
            tf.cast(labels_sub, tf.int32))
        feed_dict_train = construct_feed_dict(zeros_train, x_train, post_train,
                                              time_train, FLAGS.dropout,
                                              adj_norm, adj_label, features,
                                              User_post_train, placeholders)
        feed_dict_train.update({placeholders['dropout']: FLAGS.dropout})

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        total_batch = int(num_session / FLAGS.batch_size)
        zero_batches = np.array_split(zeros_train, total_batch)
        x_batches = np.array_split(x_train, total_batch)
        p_batches = np.array_split(post_train, total_batch)
        t_batches = np.array_split(time_train, total_batch)
        UP_batches = np.array_split(User_post_train, total_batch, axis=1)

        for epoch in range(TRAINING_EPOCHS):
            ave_cost = 0
            ave_energy = 0
            ave_recon = 0
            ave_sigma = 0
            ave_GAE = 0
            for i in range(total_batch):
                batch_x = x_batches[i]
                batch_p = p_batches[i]
                batch_t = t_batches[i]
                batch_z = zero_batches[i]
                user_post = UP_batches[i]
                feed_dict = construct_feed_dict(batch_z, batch_x, batch_p,
                                                batch_t, FLAGS.dropout,
                                                adj_norm, adj_label, features,
                                                user_post, placeholders)
                feed_dict.update({placeholders['dropout']: FLAGS.dropout})
                _, total_loss, loss_sigma, GAE_loss, Energy_error, recon_error = sess.run(
                    [
                        train_step, loss, lossSigma, GAE_error, energy,
                        prediction_error
                    ], feed_dict)
                ave_cost += total_loss / total_batch
                ave_energy += Energy_error / total_batch
                ave_GAE += GAE_loss / total_batch
                ave_sigma += loss_sigma / total_batch
                ave_recon += recon_error / total_batch
            # if epoch % 10 == 0 or epoch == TRAINING_EPOCHS - 1:
            # print("This is epoch %d, the total loss is %f, energy error is %f, GAE error is %f, sigma error is %f,prediction error is %f") \
            #      % (epoch + 1, ave_cost, ave_energy, ave_GAE, ave_sigma, ave_recon)

        fix = gmm.fix_op()
        sess.run(fix, feed_dict=feed_dict_train)

        feed_dict_test = construct_feed_dict(zeros_test, x_val, post_test,
                                             time_test, FLAGS.dropout,
                                             adj_norm, adj_label, features,
                                             User_post_test, placeholders)
        pred_energy, representations = sess.run(
            [individual_energy, session_latent], feed_dict=feed_dict_test)
        bully_energy_threshold = np.percentile(pred_energy, 65)
        print('the bully energy threshold is : %f' % bully_energy_threshold)
        label_pred = np.where(pred_energy >= bully_energy_threshold, 1, 0)
        print(precision_recall_fscore_support(y_single, label_pred))
        print(accuracy_score(y_single, label_pred))
        print(roc_auc_score(y_single, label_pred))
        tf.reset_default_graph()
        K.clear_session()

        precision_list.append(
            precision_recall_fscore_support(y_single, label_pred)[0][1])
        recall_list.append(
            precision_recall_fscore_support(y_single, label_pred)[1][1])
        f1_list.append(
            precision_recall_fscore_support(y_single, label_pred)[2][1])
        auc_list.append(roc_auc_score(y_single, label_pred))

    print('>>> Evaluation metrics')
    print('>>> precision mean: {0.4f}; precision std: {1:.4f}'.format(
        np.mean(precision_list), np.std(precision_list)))
    print('>>> recall mean: {0.4f}; recall std: {1:.4f}'.format(
        np.mean(recall_list), np.std(recall_list)))
    print('>>> f1 mean: {0.4f}; f1 std: {1:.4f}'.format(
        np.mean(f1_list), np.std(f1_list)))
    print('>>> auc mean: {0.4f}; auc std: {1:.4f}'.format(
        np.mean(auc_list), np.std(auc_list)))

    timer.stop()
Example #36
0
def lat2SW(nrows=3, ncols=5, criterion="rook", row_st=False):
    """
    Create a sparse W matrix for a regular lattice.

    Parameters
    ----------

    nrows   : int
              number of rows
    ncols   : int
              number of columns
    rook    : {"rook", "queen", "bishop"}
              type of contiguity. Default is rook.
    row_st  : boolean
              If True, the created sparse W object is row-standardized so
              every row sums up to one. Defaults to False.

    Returns
    -------

    w : scipy.sparse.dia_matrix
        instance of a scipy sparse matrix

    Notes
    -----

    Observations are row ordered: first k observations are in row 0, next k in row 1, and so on.
    This method directly creates the W matrix using the strucuture of the contiguity type.

    Examples
    --------

    >>> from pysal import weights
    >>> w9 = weights.lat2SW(3,3)
    >>> w9[0,1]
    1
    >>> w9[3,6]
    1
    >>> w9r = weights.lat2SW(3,3, row_st=True)
    >>> w9r[3,6]
    0.33333333333333331
    """

    n = nrows * ncols
    diagonals = []
    offsets = []
    if criterion == "rook" or criterion == "queen":
        d = np.ones((1, n))
        for i in range(ncols - 1, n, ncols):
            d[0, i] = 0
        diagonals.append(d)
        offsets.append(-1)

        d = np.ones((1, n))
        diagonals.append(d)
        offsets.append(-ncols)

    if criterion == "queen" or criterion == "bishop":
        d = np.ones((1, n))
        for i in range(0, n, ncols):
            d[0, i] = 0
        diagonals.append(d)
        offsets.append(-(ncols - 1))

        d = np.ones((1, n))
        for i in range(ncols - 1, n, ncols):
            d[0, i] = 0
        diagonals.append(d)
        offsets.append(-(ncols + 1))
    data = np.concatenate(diagonals)
    offsets = np.array(offsets)
    m = sparse.dia_matrix((data, offsets), shape=(n, n), dtype=np.int8)
    m = m + m.T
    if row_st:
        m = sparse.spdiags(1. / m.sum(1).T, 0, *m.shape) * m
    return m
Example #37
0
def maskTestEdges(adj, testPercent=10., valPercent=5.):
    """ Randomly removes some edges from original graph to create test and validation sets for link prediction task
    :param adj: complete sparse adjacency matrix of the graph
    :param testPercent: percentage of edges in test set
    :param valPercent: percentage of edges in validation set
    :return: train incomplete adjacency matrix, validation and test sets
    """
    # Remove diagonal elements
    adj = adj - sp.dia_matrix((adj.diagonal()[None, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert adj.diagonal().sum() == 0

    edgesPositive, _, _ = sparseToTuple(adj)
    # Filtering out edges from lower triangle of adjacency matrix
    edgesPositive = edgesPositive[edgesPositive[:,1] > edgesPositive[:,0],:]
    # val_edges, val_edges_false, test_edges, test_edges_false = None, None, None, None

    # number of positive (and negative) edges in test and val sets:
    numTest = int(np.floor(edgesPositive.shape[0] / (100. / testPercent)))
    numVal = int(np.floor(edgesPositive.shape[0] / (100. / valPercent)))

    # sample positive edges for test and val sets:
    edgesPositiveIdx = np.arange(edgesPositive.shape[0])
    np.random.shuffle(edgesPositiveIdx)
    valEdgeIdx = edgesPositiveIdx[:numVal]
    testEdgeIdx = edgesPositiveIdx[numVal:(numVal + numTest)]
    testEdges = edgesPositive[testEdgeIdx] # positive test edges
    valEdges = edgesPositive[valEdgeIdx] # positive val edges
    trainEdges = np.delete(edgesPositive, np.hstack([testEdgeIdx, valEdgeIdx]), axis = 0) # positive train edges

    # the above strategy for sampling without replacement will not work for
    # sampling negative edges on large graphs, because the pool of negative
    # edges is much much larger due to sparsity, therefore we'll use
    # the following strategy:
    # 1. sample random linear indices from adjacency matrix WITH REPLACEMENT
    # (without replacement is super slow). sample more than we need so we'll
    # probably have enough after all the filtering steps.
    # 2. remove any edges that have already been added to the other edge lists
    # 3. convert to (i,j) coordinates
    # 4. swap i and j where i > j, to ensure they're upper triangle elements
    # 5. remove any duplicate elements if there are any
    # 6. remove any diagonal elements
    # 7. if we don't have enough edges, repeat this process until we get enough
    positiveIdx, _, _ = sparseToTuple(adj) # [i,j] coord pairs for all true edges
    positiveIdx = positiveIdx[:,0]*adj.shape[0] + positiveIdx[:,1] # linear indices
    testEdgesFalse = np.empty((0,2),dtype='int64')
    idxTestEdgesFalse = np.empty((0,),dtype='int64')

    while len(testEdgesFalse) < len(testEdges):
        # step 1:
        idx = np.random.choice(adj.shape[0]**2, 2*(numTest - len(testEdgesFalse)), replace = True)
        # step 2:
        idx = idx[~np.in1d(idx, positiveIdx, assume_unique = True)]
        idx = idx[~np.in1d(idx, idxTestEdgesFalse, assume_unique = True)]
        # step 3:
        rowidx = idx // adj.shape[0]
        colidx = idx % adj.shape[0]
        coords = np.vstack((rowidx,colidx)).transpose()
        # step 4:
        lowertrimask = coords[:,0] > coords[:,1]
        coords[lowertrimask] = coords[lowertrimask][:,::-1]
        # step 5:
        coords = np.unique(coords, axis = 0) # note: coords are now sorted lexicographically
        np.random.shuffle(coords) # not anymore
        # step 6:
        coords = coords[coords[:,0] != coords[:,1]]
        # step 7:
        coords = coords[:min(numTest, len(idx))]
        testEdgesFalse = np.append(testEdgesFalse, coords, axis = 0)
        idx = idx[:min(numTest, len(idx))]
        idxTestEdgesFalse = np.append(idxTestEdgesFalse, idx)

    valEdgesFalse = np.empty((0,2), dtype = 'int64')
    idxValEdgesFalse = np.empty((0,), dtype = 'int64')
    while len(valEdgesFalse) < len(valEdges):
        # step 1:
        idx = np.random.choice(adj.shape[0]**2, 2*(numVal - len(valEdgesFalse)), replace = True)
        # step 2:
        idx = idx[~np.in1d(idx, positiveIdx, assume_unique = True)]
        idx = idx[~np.in1d(idx, idxTestEdgesFalse, assume_unique = True)]
        idx = idx[~np.in1d(idx, idxValEdgesFalse, assume_unique = True)]
        # step 3:
        rowidx = idx // adj.shape[0]
        colidx = idx % adj.shape[0]
        coords = np.vstack((rowidx,colidx)).transpose()
        # step 4:
        lowertrimask = coords[:,0] > coords[:,1]
        coords[lowertrimask] = coords[lowertrimask][:,::-1]
        # step 5:
        coords = np.unique(coords, axis = 0) # note: coords are now sorted lexicographically
        np.random.shuffle(coords) # not any more
        # step 6:
        coords = coords[coords[:,0] != coords[:,1]]
        # step 7:
        coords = coords[:min(numVal, len(idx))]
        valEdgesFalse = np.append(valEdgesFalse, coords, axis = 0)
        idx = idx[:min(numVal, len(idx))]
        idxValEdgesFalse = np.append(idxValEdgesFalse, idx)

    # sanity checks:
    trainEdgesLinear = trainEdges[:,0]*adj.shape[0] + trainEdges[:,1]
    testEdgesLinear = testEdges[:,0]*adj.shape[0] + testEdges[:,1]
    assert not np.any(np.in1d(idxTestEdgesFalse, positiveIdx))
    assert not np.any(np.in1d(idxValEdgesFalse, positiveIdx))
    assert not np.any(np.in1d(valEdges[:,0]*adj.shape[0]+valEdges[:,1], trainEdgesLinear))
    assert not np.any(np.in1d(testEdgesLinear, trainEdgesLinear))
    assert not np.any(np.in1d(valEdges[:,0]*adj.shape[0]+valEdges[:,1], testEdgesLinear))

    # Re-build adj matrix
    data = np.ones(trainEdges.shape[0])
    adjTrain = sp.csr_matrix((data, (trainEdges[:, 0], trainEdges[:, 1])), shape=adj.shape)
    adjTrain = adjTrain + adjTrain.T
    return adjTrain, valEdges, valEdgesFalse, testEdges, testEdgesFalse
Example #38
0
# Load data
#adj, features = load_data(dataset_str)
matfile = sio.loadmat('../../../data/' + dataset_str + '.mat')
adj0 = matfile['net']
try:
    features0 = matfile['group']
except:
    features0 = sp.identity(adj0.shape[0])

Roc = []
Ap = []
for expnum in range(10):

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj0
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj0)
    adj = adj_train

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

# Some preprocessing
    adj_norm = preprocess_graph(adj)

    # Define placeholders
    placeholders = {
        'features': tf.sparse_placeholder(tf.float32),
Example #39
0
def fourth_difference_matrix(N=3, bc='BothClamped', cfs=None):
    """
    generates the fourth order difference operator in matrix form
    """
    checkInputArgs(N, bc)

    diag = 3 * [None]

    # determine matrix dimensions based on N and type of boundary conditions
    if bc == 'BothFree':
        M = N + 1
    elif 'Free' in bc:
        M = N
    else:
        M = N - 1

    diag[0] = 6 + np.zeros(M)
    diag[1] = -4 + np.zeros(M)
    diag[2] = np.ones(M)

    if 'Free' not in bc:
        if bc == 'BothSimplySupported':
            diag[0][0] = 5
            diag[0][-1] = 5
        elif bc == 'LeftSimplySupportedRightClamped':
            diag[0][0] = 5
        elif bc == 'LeftClampedRightSimplySupported':
            diag[0][-1] = 5

        return dia_matrix(
            ([diag[0], diag[1], diag[1], diag[2], diag[2]], [0, 1, -1, 2, -2]),
            shape=(M, M))
    else:
        if cfs == None:
            raise TypeError('argument cfs must be supplied')
        elif not isinstance(cfs, dict):
            raise TypeError('argument cfs must be of type dict')

        if bc == 'BothFree':
            diag[0][0] = cfs['a0']
            diag[0][-1] = cfs['a0']
            diag[0][1] = 5
            diag[0][-2] = 5
            diag[1][0] = -2
            diag[1][-2] = cfs['a1']
            diag2 = np.concatenate((cfs['a2'] + np.zeros(1), np.zeros(M - 2),
                                    cfs['a2'] + np.zeros(1)))
            Dxxxx = dia_matrix(
                ([diag[0],
                  np.fliplr([diag[1]])[0], diag[1], diag[2], diag[2]
                  ], [0, 1, -1, 2, -2]),
                shape=(M, M))
        elif bc in ('LeftClampedRightFree', 'LeftSimplySupportedRightFree'):
            diag[0][-1] = cfs['a0']
            diag[0][-2] = 5
            diag[1][-2] = cfs['a1']
            diag2 = np.concatenate((np.zeros(M - 1), cfs['a2'] + np.zeros(1)))
            if bc == 'LeftSimplySupportedRightFree':
                diag[0][0] = 5
            tmp = np.roll(np.fliplr([diag[1]])[0], -1)
            tmp[-1] = -2
            Dxxxx = dia_matrix(
                ([diag[0], tmp, diag[1], diag[2], diag[2]], [0, 1, -1, 2, -2]),
                shape=(M, M))
        elif bc in ('LeftFreeRightClamped', 'LeftFreeRightSimplySupported'):
            diag[0][0] = cfs['a0']
            diag[0][1] = 5
            diag[1][0] = -2
            diag2 = np.concatenate((cfs['a2'] + np.zeros(1), np.zeros(M - 1)))
            if bc == 'LeftFreeRightSimplySupported':
                diag[0][-1] = 5
            tmp = np.roll(np.fliplr([diag[1]])[0], 1)
            tmp[1] = cfs['a1']
            Dxxxx = dia_matrix(
                ([diag[0], tmp, diag[1], diag[2], diag[2]], [0, 1, -1, 2, -2]),
                shape=(M, M))

        return (Dxxxx,
                dia_matrix(([
                    diag2, -1.0 * np.roll(diag2, 1), -1.0 * np.roll(diag2, -1)
                ], [0, 1, -1]),
                           shape=(M, M)))
    def train(self, adj):
        # Store original adjacency matrix (without diagonal entries) for later
        adj_orig = adj
        adj_orig = adj_orig - sp.dia_matrix(
            (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
        adj_orig.eliminate_zeros()

        adj_train = adj
        features = sp.identity(adj.shape[0])  # featureless
        # Some preprocessing
        adj_norm = preprocess_graph(adj)
        # Define placeholders
        self.placeholders = {
            'features': tf.sparse_placeholder(tf.float32),
            'adj': tf.sparse_placeholder(tf.float32),
            'adj_orig': tf.sparse_placeholder(tf.float32),
            'dropout': tf.placeholder_with_default(0., shape=())
        }

        num_nodes = adj.shape[0]
        features = sparse_to_tuple(features.tocoo())
        num_features = features[2][1]
        features_nonzero = features[1].shape[0]

        # Create model
        if self.model_selection == 'gcn_ae':
            self.model = GCNModelAE(self.placeholders, num_features,
                                    features_nonzero, self.hidden1,
                                    self.hidden2)
        elif self.model_selection == 'gcn_vae':
            self.model = GCNModelVAE(self.placeholders, num_features,
                                     num_nodes, features_nonzero, self.hidden1,
                                     self.hidden2)

        pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
        norm = adj.shape[0] * adj.shape[0] / float(
            (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

        # Optimizer
        with tf.name_scope('optimizer'):
            if self.model_selection == 'gcn_ae':
                opt = OptimizerAE(preds=self.model.reconstructions,
                                  labels=tf.reshape(
                                      tf.sparse_tensor_to_dense(
                                          self.placeholders['adj_orig'],
                                          validate_indices=False), [-1]),
                                  pos_weight=pos_weight,
                                  norm=norm,
                                  learning_rate=self.learning_rate)
            elif self.model_selection == 'gcn_vae':
                opt = OptimizerVAE(preds=self.model.reconstructions,
                                   labels=tf.reshape(
                                       tf.sparse_tensor_to_dense(
                                           self.placeholders['adj_orig'],
                                           validate_indices=False), [-1]),
                                   model=self.model,
                                   num_nodes=num_nodes,
                                   pos_weight=pos_weight,
                                   norm=norm,
                                   learning_rate=self.learning_rate)

        # Initialize session
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        adj_label = adj_train + sp.eye(adj_train.shape[0])
        adj_label = sparse_to_tuple(adj_label)

        # Train model
        for epoch in range(self.epochs):
            t = time.time()
            # Construct feed dictionary
            self.feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                                 self.placeholders)
            self.feed_dict.update({self.placeholders['dropout']: self.dropout})
            # Run single weight update
            outs = self.sess.run([opt.opt_op, opt.cost, opt.accuracy],
                                 feed_dict=self.feed_dict)

            # Compute average loss
            avg_cost = outs[1]
            avg_accuracy = outs[2]

            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(avg_cost), "train_acc=",
                  "{:.5f}".format(avg_accuracy), "time=",
                  "{:.5f}".format(time.time() - t))

        print("Optimization Finished!")
Example #41
0
 def test_dia(self):
     x = sparse.dia_matrix(
         (cupy.array([], 'f'),
          cupy.array([0], 'i')),
         shape=(0, 0), dtype='f')
     self.assertTrue(sparse.isspmatrix_dia(x))
Example #42
0
def d2matrix(nelem):
    elements = ones((3, nelem))
    elements[1, :] *= -2
    return dia_matrix((elements, [-1, 0, 1]), shape=(nelem, nelem)).tocsc()
Example #43
0
def gae_scores(adj_sparse,
               train_test_split,
               features_matrix=None,
               LEARNING_RATE=0.01,
               EPOCHS=200,
               HIDDEN1_DIM=32,
               HIDDEN2_DIM=16,
               DROPOUT=0,
               edge_score_mode="dot-product",
               verbose=1):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
        test_edges, test_edges_false = train_test_split # Unpack train-test split

    start_time = time.time()

    # Train on CPU (hide GPU) due to memory constraints
    os.environ['CUDA_VISIBLE_DEVICES'] = ""

    # Convert features from normal matrix --> sparse matrix --> tuple
    # features_tuple contains: (list of matrix coordinates, list of values, matrix dimensions)
    if features_matrix is None:
        x = sp.lil_matrix(np.identity(adj_sparse.shape[0]))
    else:
        x = sp.lil_matrix(features_matrix)
    features_tuple = sparse_to_tuple(x)
    features_shape = features_tuple[2]

    # Get graph attributes (to feed into model)
    num_nodes = adj_sparse.shape[0]  # number of nodes in adjacency matrix
    num_features = features_shape[
        1]  # number of features (columsn of features matrix)
    features_nonzero = features_tuple[1].shape[
        0]  # number of non-zero entries in features matrix (or length of values list)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = deepcopy(adj_sparse)
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    # Normalize adjacency matrix
    adj_norm = preprocess_graph(adj_train)

    # Add in diagonals
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)

    # Define placeholders
    placeholders = {
        'features': tf.sparse_placeholder(tf.float32),
        'adj': tf.sparse_placeholder(tf.float32),
        'adj_orig': tf.sparse_placeholder(tf.float32),
        'dropout': tf.placeholder_with_default(0., shape=())
    }

    # How much to weigh positive examples (true edges) in cost print_function
    # Want to weigh less-frequent classes higher, so as to prevent model output bias
    # pos_weight = (num. negative samples / (num. positive samples)
    pos_weight = float(adj_sparse.shape[0] * adj_sparse.shape[0] -
                       adj_sparse.sum()) / adj_sparse.sum()

    # normalize (scale) average weighted cost
    norm = adj_sparse.shape[0] * adj_sparse.shape[0] / float(
        (adj_sparse.shape[0] * adj_sparse.shape[0] - adj_sparse.sum()) * 2)

    # Create VAE model
    model = GCNModelVAE(placeholders, num_features, num_nodes,
                        features_nonzero, HIDDEN1_DIM, HIDDEN2_DIM)

    opt = OptimizerVAE(preds=model.reconstructions,
                       labels=tf.reshape(
                           tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                                     validate_indices=False),
                           [-1]),
                       model=model,
                       num_nodes=num_nodes,
                       pos_weight=pos_weight,
                       norm=norm,
                       learning_rate=LEARNING_RATE)

    cost_val = []
    acc_val = []
    val_roc_score = []

    prev_embs = []

    # Initialize session
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    # Train model
    for epoch in range(EPOCHS):

        t = time.time()
        # Construct feed dictionary
        feed_dict = construct_feed_dict(adj_norm, adj_label, features_tuple,
                                        placeholders)
        feed_dict.update({placeholders['dropout']: DROPOUT})
        # Run single weight update
        outs = sess.run([opt.opt_op, opt.cost, opt.accuracy],
                        feed_dict=feed_dict)

        # Compute average loss
        avg_cost = outs[1]
        avg_accuracy = outs[2]

        # Evaluate predictions
        feed_dict.update({placeholders['dropout']: 0})
        gae_emb = sess.run(model.z_mean, feed_dict=feed_dict)

        prev_embs.append(gae_emb)

        gae_score_matrix = np.dot(gae_emb, gae_emb.T)

        # # TODO: remove this (debugging)
        # if not np.isfinite(gae_score_matrix).all():
        #     print 'Found non-finite value in GAE score matrix! Epoch: {}'.format(epoch)
        #     with open('numpy-nan-debugging.pkl', 'wb') as f:
        #         dump_info = {}
        #         dump_info['gae_emb'] = gae_emb
        #         dump_info['epoch'] = epoch
        #         dump_info['gae_score_matrix'] = gae_score_matrix
        #         dump_info['adj_norm'] = adj_norm
        #         dump_info['adj_label'] = adj_label
        #         dump_info['features_tuple'] = features_tuple
        #         # dump_info['feed_dict'] = feed_dict
        #         dump_info['prev_embs'] = prev_embs
        #         pickle.dump(dump_info, f, protocol=2)
        # # END TODO

        roc_curr, roc_curve_curr, ap_curr = get_roc_score(val_edges,
                                                          val_edges_false,
                                                          gae_score_matrix,
                                                          apply_sigmoid=True)
        val_roc_score.append(roc_curr)

        # Print results for this epoch
        if verbose == 2:
            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(avg_cost), "train_acc=",
                  "{:.5f}".format(avg_accuracy), "val_roc=", "{:.5f}".format(
                      val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr),
                  "time=", "{:.5f}".format(time.time() - t))

    if verbose == 2:
        print("Optimization Finished!")

    # Print final results
    feed_dict.update({placeholders['dropout']: 0})
    gae_emb = sess.run(model.z_mean, feed_dict=feed_dict)

    # Dot product edge scores (default)
    if edge_score_mode == "dot-product":
        gae_score_matrix = np.dot(gae_emb, gae_emb.T)

        runtime = time.time() - start_time

        # Calculate final scores
        gae_val_roc, gae_val_roc_curve, gae_val_ap = get_roc_score(
            val_edges, val_edges_false, gae_score_matrix)
        gae_test_roc, gae_test_roc_curve, gae_test_ap = get_roc_score(
            test_edges, test_edges_false, gae_score_matrix)

    # Take bootstrapped edge embeddings (via hadamard product)
    elif edge_score_mode == "edge-emb":

        def get_edge_embeddings(edge_list):
            embs = []
            for edge in edge_list:
                node1 = edge[0]
                node2 = edge[1]
                emb1 = gae_emb[node1]
                emb2 = gae_emb[node2]
                edge_emb = np.multiply(emb1, emb2)
                embs.append(edge_emb)
            embs = np.array(embs)
            return embs

        # Train-set edge embeddings
        pos_train_edge_embs = get_edge_embeddings(train_edges)
        neg_train_edge_embs = get_edge_embeddings(train_edges_false)
        train_edge_embs = np.concatenate(
            [pos_train_edge_embs, neg_train_edge_embs])

        # Create train-set edge labels: 1 = real edge, 0 = false edge
        train_edge_labels = np.concatenate(
            [np.ones(len(train_edges)),
             np.zeros(len(train_edges_false))])

        # Val-set edge embeddings, labels
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            pos_val_edge_embs = get_edge_embeddings(val_edges)
            neg_val_edge_embs = get_edge_embeddings(val_edges_false)
            val_edge_embs = np.concatenate(
                [pos_val_edge_embs, neg_val_edge_embs])
            val_edge_labels = np.concatenate(
                [np.ones(len(val_edges)),
                 np.zeros(len(val_edges_false))])

        # Test-set edge embeddings, labels
        pos_test_edge_embs = get_edge_embeddings(test_edges)
        neg_test_edge_embs = get_edge_embeddings(test_edges_false)
        test_edge_embs = np.concatenate(
            [pos_test_edge_embs, neg_test_edge_embs])

        # Create val-set edge labels: 1 = real edge, 0 = false edge
        test_edge_labels = np.concatenate(
            [np.ones(len(test_edges)),
             np.zeros(len(test_edges_false))])

        # Train logistic regression classifier on train-set edge embeddings
        edge_classifier = LogisticRegression(random_state=0)
        edge_classifier.fit(train_edge_embs, train_edge_labels)

        # Predicted edge scores: probability of being of class "1" (real edge)
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
        test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]

        runtime = time.time() - start_time

        # Calculate scores
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            gae_val_roc = roc_auc_score(val_edge_labels, val_preds)
            gae_val_roc_curve = roc_curve(val_edge_labels, val_preds)
            gae_val_ap = average_precision_score(val_edge_labels, val_preds)
        else:
            gae_val_roc = None
            gae_val_roc_curve = None
            gae_val_ap = None

        gae_test_roc = roc_auc_score(test_edge_labels, test_preds)
        gae_test_roc_curve = roc_curve(test_edge_labels, test_preds)
        gae_test_ap = average_precision_score(test_edge_labels, test_preds)

    # Record scores
    gae_scores = {}

    gae_scores['test_roc'] = gae_test_roc
    gae_scores['test_roc_curve'] = gae_test_roc_curve
    gae_scores['test_ap'] = gae_test_ap

    gae_scores['val_roc'] = gae_val_roc
    gae_scores['val_roc_curve'] = gae_val_roc_curve
    gae_scores['val_ap'] = gae_val_ap

    gae_scores['val_roc_per_epoch'] = val_roc_score
    gae_scores['runtime'] = runtime
    return gae_scores
Example #44
0
W[3, 6] = W[6, 3] = 1
W[5, 9] = W[9, 5] = 1
W[6, 7] = W[7, 6] = 1
W[6, 8] = W[8, 6] = 1
W[7, 9] = W[9, 7] = 1

# クラスラベル
# 与えられていないときは0
y = dok_matrix([[1, 0, 1, 0, 0, 1, 0, 1, 0, 0]]).T

A = W.T * W

# 単位行列
I = identity(10)

D = dia_matrix((A.sum(0), [0]), (10, 10)).tocsr()
D = scipy.sparse.diags(numpy.reciprocal(numpy.sqrt(D).data))
L = I - D * A * D
print(L)
lamb = 0.0001

# In[22]:

#[ 0.45966011  0.23023256  0.46046512  0.1519678   0.5372093  -0.57951699
# -0.38980322 -0.51627907-0.19490161 -0.15903399]

f = spsolve((I + (1 - lamb) * L), y)

# In[23]:

# top 1, 5, 10,
Example #45
0
def TVRegDiff(data,
              itern,
              alph,
              u0=None,
              scale='small',
              ep=1e-6,
              dx=None,
              plotflag=_has_matplotlib,
              diagflag=True):
    """
    Estimate derivatives from noisy data based using the Total 
    Variation Regularized Numerical Differentiation (TVDiff) 
    algorithm.

    Parameters
    ----------
    data : ndarray
        One-dimensional array containing series data to be
        differentiated.
    itern : int
        Number of iterations to run the main loop.  A stopping
        condition based on the norm of the gradient vector g
        below would be an easy modification.  No default value.
    alph : float    
        Regularization parameter.  This is the main parameter
        to fiddle with.  Start by varying by orders of
        magnitude until reasonable results are obtained.  A
        value to the nearest power of 10 is usally adequate.
        No default value.  Higher values increase
        regularization strenght and improve conditioning.
    u0 : ndarray, optional
        Initialization of the iteration.  Default value is the
        naive derivative (without scaling), of appropriate
        length (this being different for the two methods).
        Although the solution is theoretically independent of
        the initialization, a poor choice can exacerbate
        conditioning issues when the linear system is solved.
    scale : {large' or 'small' (case insensitive)}, str, optional   
        Default is 'small'.  'small' has somewhat better boundary
        behavior, but becomes unwieldly for data larger than
        1000 entries or so.  'large' has simpler numerics but
        is more efficient for large-scale problems.  'large' is
        more readily modified for higher-order derivatives,
        since the implicit differentiation matrix is square.
    ep : float, optional 
        Parameter for avoiding division by zero.  Default value
        is 1e-6.  Results should not be very sensitive to the
        value.  Larger values improve conditioning and
        therefore speed, while smaller values give more
        accurate results with sharper jumps.
    dx : float, optional    
        Grid spacing, used in the definition of the derivative
        operators.  Default is the reciprocal of the data size.
    plotflag : bool, optional
        Flag whether to display plot at each iteration.
        Default is True.  Useful, but adds significant
        running time.
    diagflag : bool, optional
        Flag whether to display diagnostics at each
        iteration.  Default is True.  Useful for diagnosing
        preconditioning problems.  When tolerance is not met,
        an early iterate being best is more worrying than a
        large relative residual.

    Returns
    -------
    u : ndarray
        Estimate of the regularized derivative of data.  Due to
        different grid assumptions, length(u) = length(data) + 1
        if scale = 'small', otherwise length(u) = length(data).
    """

    # Make sure we have a column vector
    data = np.array(data)
    assert len(data.shape) == 1, "data is not one-dimensional"
    # Get the data size.
    n = len(data)

    # Default checking. (u0 is done separately within each method.)
    if dx is None:
        dx = 1.0 / n

    # Different methods for small- and large-scale problems.
    if (scale.lower() == 'small'):

        # Construct differentiation matrix.
        c = np.ones(n + 1) / dx
        D = sparse.spdiags([-c, c], [0, 1], n, n + 1)

        DT = D.transpose()

        # Construct antidifferentiation operator and its adjoint.
        def A(x):
            return (np.cumsum(x) - 0.5 * (x + x[0]))[1:] * dx

        def AT(w):
            return (sum(w) * np.ones(n + 1) - np.transpose(
                np.concatenate(([sum(w) / 2.0], np.cumsum(w) - w / 2.0)))) * dx

        # Default initialization is naive derivative

        if u0 is None:
            u0 = np.concatenate(([0], np.diff(data), [0]))

        u = u0
        # Since Au( 0 ) = 0, we need to adjust.
        ofst = data[0]
        # Precompute.
        ATb = AT(ofst - data)  # input: size n

        # Main loop.
        for ii in range(1, itern + 1):
            # Diagonal matrix of weights, for linearizing E-L equation.
            Q = sparse.spdiags(1. / (np.sqrt((D * u)**2 + ep)), 0, n, n)
            # Linearized diffusion matrix, also approximation of Hessian.
            L = dx * DT * Q * D

            # Gradient of functional.
            g = AT(A(u)) + ATb + alph * L * u

            # Prepare to solve linear equation.
            tol = 1e-6
            maxit = 400
            # Simple preconditioner.
            P = alph * sparse.spdiags(L.diagonal() + 1, 0, n + 1, n + 1)

            def linop(v):
                return (alph * L * v + AT(A(v)))

            linop = splin.LinearOperator((n + 1, n + 1), linop)
            P = None
            if diagflag:
                [s, info_i] = sparse.linalg.cg(linop,
                                               g,
                                               x0=None,
                                               tol=tol,
                                               maxiter=maxit,
                                               callback=None,
                                               M=P,
                                               atol='legacy')
                log_iteration(ii, s[0], u, g)
                if (info_i > 0):
                    print(ii)
                    logging.warning(
                        "WARNING - convergence to tolerance not achieved!")
                elif (info_i < 0):
                    logging.warning("WARNING - illegal input or breakdown")
            else:
                [s, info_i] = sparse.linalg.cg(linop,
                                               g,
                                               x0=None,
                                               tol=tol,
                                               maxiter=maxit,
                                               callback=None,
                                               M=P,
                                               atol='legacy')
            # Update solution.
            u = u - s
            # Display plot.
            if plotflag:
                plt.plot(u)
                plt.show()

    elif (scale.lower() == 'large'):

        # Construct anti-differentiation operator and its adjoint.
        def A(v):
            return np.cumsum(v)

        def AT(w):
            return (sum(w) * np.ones(len(w)) -
                    np.transpose(np.concatenate(([0.0], np.cumsum(w[:-1])))))

        # Construct differentiation matrix.
        c = np.ones(n)
        D = sparse.spdiags([-c, c], [0, 1], n, n) / dx
        mask = np.ones((n, n))
        mask[-1, -1] = 0.0
        D = sparse.dia_matrix(D.multiply(mask))
        DT = D.transpose()
        # Since Au( 0 ) = 0, we need to adjust.
        data = data - data[0]
        # Default initialization is naive derivative.
        if u0 is None:
            u0 = np.concatenate(([0], np.diff(data)))
        u = u0
        # Precompute.
        ATd = AT(data)

        # Main loop.
        for ii in range(1, itern + 1):
            # Diagonal matrix of weights, for linearizing E-L equation.
            Q = sparse.spdiags(1. / np.sqrt((D * u)**2.0 + ep), 0, n, n)
            # Linearized diffusion matrix, also approximation of Hessian.
            L = DT * Q * D
            # Gradient of functional.
            g = AT(A(u)) - ATd
            g = g + alph * L * u
            # Build preconditioner.
            c = np.cumsum(range(n, 0, -1))
            B = alph * L + sparse.spdiags(c[::-1], 0, n, n)
            # droptol = 1.0e-2
            R = sparse.dia_matrix(np.linalg.cholesky(B.todense()))
            # Prepare to solve linear equation.
            tol = 1.0e-6
            maxit = 200

            def linop(v):
                return (alph * L * v + AT(A(v)))

            linop = splin.LinearOperator((n, n), linop)

            if diagflag:
                [s, info_i] = sparse.linalg.cg(linop,
                                               -g,
                                               x0=None,
                                               tol=tol,
                                               maxiter=maxit,
                                               callback=None,
                                               M=np.dot(R.transpose(), R),
                                               atol='legacy')
                log_iteration(ii, s[0], u, g)
                if (info_i > 0):
                    print(ii)
                    logging.warning(
                        "WARNING - convergence to tolerance not achieved!")
                elif (info_i < 0):
                    logging.warning("WARNING - illegal input or breakdown")

            else:
                [s, info_i] = sparse.linalg.cg(linop,
                                               -g,
                                               x0=None,
                                               tol=tol,
                                               maxiter=maxit,
                                               callback=None,
                                               M=np.dot(R.transpose(), R),
                                               atol='legacy')
            # Update current solution
            u = u + s
            # Display plot
            if plotflag:
                plt.close('all')
                plt.plot(u / dx)
                plt.show()

        u = u / dx

    return u
Example #46
0
def biharmonic_matrix_2d(Nx=3, Ny=3, bc='CCCC'):
    """
    generates the discrete biharmonic operator in matrix form
    """
    checkInputArgs(Nx, Ny, bc)

    #np.set_printoptions(threshold=np.nan,linewidth=230,precision=2,suppress=True)

    diag = 6 * [None]

    if bc == 'CCCC':
        diag[0] = 20 + np.zeros((Nx - 1) * (Ny - 1))
    elif bc == 'SSSS':
        diag[0] = np.array([18] + (Ny - 3) * [19] + [18] +
                           sum([[19] + (Ny - 3) * [20] + [19]] *
                               (Nx - 3), []) + [18] + (Ny - 3) * [19] + [18])
    elif bc == 'CCCS':
        diag[0] = np.array(sum([[19] + [20] * (Ny - 2)] * (Nx - 1), []))
    elif bc == 'CCSC':
        diag[0] = np.array(sum([[20] * (Ny - 2) + [19]] * (Nx - 1), []))
    elif bc == 'CCSS':
        diag[0] = np.array(sum([[19] + [20] * (Ny - 3) + [19]] * (Nx - 1), []))
    elif bc == 'CSCC':
        diag[0] = [20] * (Ny - 1) * Nx + [19] * (Ny - 1)
    elif bc == 'CSCS':
        diag[0] = sum([[19] + [20] * (Ny - 2)] *
                      (Nx - 2), []) + [18] + [19] * (Ny - 2)
    elif bc == 'CSSC':
        diag[0] = sum([[20] * (Ny - 2) + [19]] *
                      (Nx - 2), []) + [19] * (Ny - 2) + [18]
    elif bc == 'CSSS':
        diag[0] = sum([[19] + [20] * (Ny - 3) + [19]] *
                      (Nx - 2), []) + [18] + [19] * (Ny - 3) + [18]
    elif bc == 'SCCC':
        diag[0] = [19] * (Ny - 1) + [20] * (Ny - 1) * (Nx - 2)
    elif bc == 'SCCS':
        diag[0] = [18] + [19] * (Ny - 2) + sum([[19] + [20] * (Ny - 2)] *
                                               (Nx - 2), [])
    elif bc == 'SCSC':
        diag[0] = [19] * (Ny - 2) + [18] + sum([[20] * (Ny - 2) + [19]] *
                                               (Nx - 2), [])
    elif bc == 'SCSS':
        diag[0] = [18] + [19] * (Ny - 3) + [18] + sum([[19] + [20] *
                                                       (Ny - 3) + [19]] *
                                                      (Nx - 2), [])
    elif bc == 'SSCC':
        diag[0] = [19] * (Ny - 1) + [20] * (Ny - 1) * (Nx - 3) + [19
                                                                  ] * (Ny - 1)
    elif bc == 'SSCS':
        diag[0] = [18] + [19] * (Ny - 2) + sum(
            [[19] + [20] * (Ny - 2)] * (Nx - 3), []) + [18] + [19] * (Ny - 2)
    elif bc == 'SSSC':
        diag[0] = [19] * (Ny - 2) + [18] + sum(
            [[20] * (Ny - 2) + [19]] * (Nx - 3), []) + [19] * (Ny - 2) + [18]
    else:
        raise NotImplementedError(
            'free boundary conditions are not implemented yet')

    diag[1] = -8 + np.zeros((Nx - 1) * (Ny - 1))
    for i in range(1, Nx):
        diag[1][i * (Ny - 1) - 1] = 0
    diag[2] = np.ones((Nx - 1) * (Ny - 1))
    for i in range(1, Nx):
        diag[2][i * (Ny - 1) - 1] = 0
        diag[2][i * (Ny - 1) - 2] = 0
    diag[3] = -8 + np.zeros((Nx - 1) * (Ny - 1))
    diag[4] = 2 + np.zeros((Nx - 1) * (Ny - 1))
    for i in range(1, Nx):
        diag[4][i * (Ny - 1) - 1] = 0
    diag[5] = np.ones((Nx - 1) * (Ny - 1))

    mat = dia_matrix(([diag[0],np.roll(diag[1],1),diag[1],np.roll(diag[2],2),diag[2],diag[3],diag[3],\
    np.roll(diag[4],1),np.roll(diag[4],Ny - 1),np.roll(diag[4],-Ny + 2),diag[4],diag[5],diag[5]],\
    [0,1,-1,2,-2,Ny - 1,-Ny + 1,Ny,Ny - 2,-Ny + 2,-Ny,2*(Ny - 1),2*(-Ny + 1)]),\
    shape=((Nx - 1)*(Ny - 1),(Nx - 1)*(Ny - 1)))

    return mat
Example #47
0
def _load_nonlink_level(handler, level, pathtable, pathname):
    """
    Loads level and builds appropriate type, without handling softlinks
    """
    if isinstance(level, tables.Group):
        if _sns and (level._v_title.startswith('SimpleNamespace:')
                     or DEEPDISH_IO_ROOT_IS_SNS in level._v_attrs):
            val = SimpleNamespace()
            dct = val.__dict__
        elif level._v_title.startswith('list:'):
            dct = {}
            val = []
        else:
            dct = {}
            val = dct
        # in case of recursion, object needs to be put in pathtable
        # before trying to fully load it
        pathtable[pathname] = val

        # Load sub-groups
        for grp in level:
            lev = _load_level(handler, grp, pathtable)
            n = grp._v_name
            # Check if it's a complicated pair or a string-value pair
            if n.startswith('__pair'):
                dct[lev['key']] = lev['value']
            else:
                dct[n] = lev

        # Load attributes
        for name in level._v_attrs._f_list():
            if name.startswith(DEEPDISH_IO_PREFIX):
                continue
            v = level._v_attrs[name]
            dct[name] = v

        if level._v_title.startswith('list:'):
            N = int(level._v_title[len('list:'):])
            for i in range(N):
                val.append(dct['i{}'.format(i)])
            return val
        elif level._v_title.startswith('tuple:'):
            N = int(level._v_title[len('tuple:'):])
            lst = []
            for i in range(N):
                lst.append(dct['i{}'.format(i)])
            return tuple(lst)
        elif level._v_title.startswith('nonetype:'):
            return None
        elif is_pandas_dataframe(level):
            assert _pandas, "pandas is required to read this file"
            store = _HDFStoreWithHandle(handler)
            return store.get(level._v_pathname)
        elif level._v_title.startswith('sparse:'):
            frm = level._v_attrs.format
            if frm in ('csr', 'csc', 'bsr'):
                shape = tuple(level.shape[:])
                cls = {
                    'csr': sparse.csr_matrix,
                    'csc': sparse.csc_matrix,
                    'bsr': sparse.bsr_matrix
                }
                matrix = cls[frm](shape)
                matrix.data = level.data[:]
                matrix.indices = level.indices[:]
                matrix.indptr = level.indptr[:]
                matrix.maxprint = level._v_attrs.maxprint
                return matrix
            elif frm == 'dia':
                shape = tuple(level.shape[:])
                matrix = sparse.dia_matrix(shape)
                matrix.data = level.data[:]
                matrix.offsets = level.offsets[:]
                matrix.maxprint = level._v_attrs.maxprint
                return matrix
            elif frm == 'coo':
                shape = tuple(level.shape[:])
                matrix = sparse.coo_matrix(shape)
                matrix.data = level.data[:]
                matrix.col = level.col[:]
                matrix.row = level.row[:]
                matrix.maxprint = level._v_attrs.maxprint
                return matrix
            else:
                raise ValueError('Unknown sparse matrix type: {}'.format(frm))
        else:
            return val

    elif isinstance(level, tables.VLArray):
        if level.shape == (1, ):
            return _load_pickled(level)
        else:
            return level[:]

    elif isinstance(level, tables.Array):
        if 'zeroarray_dtype' in level._v_attrs:
            # Unpack zero-size arrays (shape is stored in an HDF5 array and
            # type is stored in the attibute 'zeroarray_dtype')
            dtype = level._v_attrs.zeroarray_dtype
            sh = level[:]
            return np.zeros(tuple(sh), dtype=dtype)

        if 'strtype' in level._v_attrs:
            strtype = level._v_attrs.strtype
            itemsize = level._v_attrs.itemsize
            if strtype == b'unicode':
                return level[:].view(dtype=(np.unicode_, itemsize))
            elif strtype == b'ascii':
                return level[:].view(dtype=(np.string_, itemsize))
        # This serves two purposes:
        # (1) unpack big integers: the only time we save arrays like this
        # (2) unpack non-flammkuchen "scalars"
        if level.shape == ():
            return level[()]

        return level[:]
Example #48
0
    def create_biharmonic_solver(self, boundary_verts, clip_D=0.1):
        r"""Set up biharmonic equation with Dirichlet boundary conditions on the cortical
        mesh and precompute Cholesky factorization for solving it. The vertices listed in
        `boundary_verts` are considered part of the boundary, and will not be included in
        the factorization.

        To facilitate Cholesky decomposition (which requires a symmetric matrix), the
        squared Laplace-Beltrami operator is separated into left-hand-side (L2) and
        right-hand-side (Dinv) parts. If we write the L-B operator as the product of
        the stiffness matrix (V-W) and the inverse mass matrix (Dinv), the biharmonic
        problem is as follows (with `u` denoting non-boundary vertices)

        .. math::
            :nowrap:
            
            \begin{eqnarray}
            L^2_{u} \phi &=& -\rho_{u} \\
            \left[ D^{-1} (V-W) D^{-1} (V-W) \right]_{u} \phi &=& -\rho_{u} \\
            \left[ (V-W) D^{-1} (V-W) \right]_{u} \phi &=& -\left[D \rho\right]_{u}
            \end{eqnarray}

        Parameters
        ----------
        boundary_verts : list or ndarray of length V
            Indices of vertices that will be part of the Dirichlet boundary.

        Returns
        -------
        lhs : sparse matrix
            Left side of biharmonic problem, (V-W) D^{-1} (V-W)
        rhs : sparse matrix, dia
            Right side of biharmonic problem, D
        Dinv : sparse matrix, dia
            Inverse mass matrix, D^{-1}
        lhsfac : cholesky Factor object
            Factorized left side, solves biharmonic problem
        notboundary : ndarray, int
            Indices of non-boundary vertices
        """
        try:
            from scikits.sparse.cholmod import cholesky
            factorize = lambda x: cholesky(x).solve_A
        except ImportError:
            factorize = sparse.linalg.dsolve.factorized
            
        B, D, W, V = self.laplace_operator
        npt = len(D)

        g = np.nonzero(D > 0)[0] # Find vertices with non-zero mass
        #g = np.nonzero((L.sum(0) != 0).A.ravel())[0] # Find vertices with non-zero mass
        notboundary = np.setdiff1d(np.arange(npt)[g], boundary_verts) # find non-boundary verts
        D = np.clip(D, clip_D, D.max())

        Dinv = sparse.dia_matrix((D**-1,[0]), (npt,npt)).tocsr() # construct Dinv
        L = Dinv.dot((V-W)) # construct Laplace-Beltrami operator
        
        lhs = (V-W).dot(L) # construct left side, almost squared L-B operator
        #lhsfac = cholesky(lhs[notboundary][:,notboundary]) # factorize
        lhsfac = factorize(lhs[notboundary][:,notboundary]) # factorize
        
        return lhs, D, Dinv, lhsfac, notboundary
Example #49
0
    def _assemble(self, mu=None):
        grid = self.grid

        # compute the local coordinates of the codim-1 subentity centers in the reference element
        reference_element = grid.reference_element(0)
        subentity_embedding = reference_element.subentity_embedding(1)
        subentity_centers = (
            np.einsum('eij,j->ei', subentity_embedding[0],
                      reference_element.sub_reference_element(1).center()) +
            subentity_embedding[1])

        # compute shift for periodic boundaries
        embeddings = grid.embeddings(0)
        superentities = grid.superentities(1, 0)
        superentity_indices = grid.superentity_indices(1, 0)
        boundary_mask = grid.boundary_mask(1)
        inner_mask = ~boundary_mask
        SE_I0 = superentities[:, 0]
        SE_I1 = superentities[:, 1]
        SE_I0_I = SE_I0[inner_mask]
        SE_I1_I = SE_I1[inner_mask]

        SHIFTS = (np.einsum(
            'eij,ej->ei', embeddings[0][SE_I0_I, :, :],
            subentity_centers[superentity_indices[:, 0][inner_mask]]) +
                  embeddings[1][SE_I0_I, :])
        SHIFTS -= (np.einsum(
            'eij,ej->ei', embeddings[0][SE_I1_I, :, :],
            subentity_centers[superentity_indices[:, 1][inner_mask]]) +
                   embeddings[1][SE_I1_I, :])

        # comute distances for gradient approximations
        centers = grid.centers(1)
        orthogonal_centers = grid.orthogonal_centers()
        VOLS = grid.volumes(1)

        INNER_DISTS = np.linalg.norm(orthogonal_centers[SE_I0_I, :] -
                                     orthogonal_centers[SE_I1_I, :] - SHIFTS,
                                     axis=1)
        del SHIFTS

        # assemble matrix
        FLUXES = VOLS[inner_mask] / INNER_DISTS
        if self.diffusion_function is not None:
            FLUXES *= self.diffusion_function(centers[inner_mask], mu=mu)
        if self.diffusion_constant is not None:
            FLUXES *= self.diffusion_constant
        del INNER_DISTS

        FLUXES = np.concatenate((-FLUXES, -FLUXES, FLUXES, FLUXES))
        FLUXES_I0 = np.concatenate((SE_I0_I, SE_I1_I, SE_I0_I, SE_I1_I))
        FLUXES_I1 = np.concatenate((SE_I1_I, SE_I0_I, SE_I0_I, SE_I1_I))

        if self.boundary_info.has_dirichlet:
            dirichlet_mask = self.boundary_info.dirichlet_mask(1)
            SE_I0_D = SE_I0[dirichlet_mask]
            boundary_normals = grid.unit_outer_normals()[
                SE_I0_D, superentity_indices[:, 0][dirichlet_mask]]
            BOUNDARY_DISTS = np.sum(
                (centers[dirichlet_mask, :] - orthogonal_centers[SE_I0_D, :]) *
                boundary_normals,
                axis=-1)

            DIRICHLET_FLUXES = VOLS[dirichlet_mask] / BOUNDARY_DISTS
            if self.diffusion_function is not None:
                DIRICHLET_FLUXES *= self.diffusion_function(
                    centers[dirichlet_mask], mu=mu)
            if self.diffusion_constant is not None:
                DIRICHLET_FLUXES *= self.diffusion_constant

            FLUXES = np.concatenate((FLUXES, DIRICHLET_FLUXES))
            FLUXES_I0 = np.concatenate((FLUXES_I0, SE_I0_D))
            FLUXES_I1 = np.concatenate((FLUXES_I1, SE_I0_D))

        A = coo_matrix((FLUXES, (FLUXES_I0, FLUXES_I1)),
                       shape=(self.source.dim, self.source.dim))
        A = (dia_matrix(
            ([1. / grid.volumes(0)], [0]), shape=(grid.size(0), ) * 2) *
             A).tocsc()

        return A
Example #50
0
    def jacobian(self, U, mu=None):
        assert U in self.source and len(U) == 1
        mu = self.parse_parameter(mu)

        if not hasattr(self, '_grid_data'):
            self._fetch_grid_data()

        U = U.data.ravel()

        g = self.grid
        bi = self.boundary_info
        gd = self._grid_data
        SUPE = gd['SUPE']
        VOLS0 = gd['VOLS0']
        VOLS1 = gd['VOLS1']
        BOUNDARIES = gd['BOUNDARIES']
        CENTERS = gd['CENTERS']
        DIRICHLET_BOUNDARIES = gd['DIRICHLET_BOUNDARIES']
        NEUMANN_BOUNDARIES = gd['NEUMANN_BOUNDARIES']
        UNIT_OUTER_NORMALS = gd['UNIT_OUTER_NORMALS']
        INNER = np.setdiff1d(np.arange(g.size(1)), BOUNDARIES)

        solver_options = self.solver_options
        delta = solver_options.get(
            'jacobian_delta') if solver_options else None
        if delta is None:
            delta = jacobian_options()['delta']

        if bi.has_dirichlet:
            if hasattr(self, '_dirichlet_values'):
                dirichlet_values = self._dirichlet_values
            elif self.dirichlet_data is not None:
                dirichlet_values = self.dirichlet_data(
                    CENTERS[DIRICHLET_BOUNDARIES], mu=mu)
            else:
                dirichlet_values = np.zeros_like(DIRICHLET_BOUNDARIES)
            F_dirichlet = self.numerical_flux.evaluate_stage1(
                dirichlet_values, mu)

        UP = U + delta
        UM = U - delta
        F = self.numerical_flux.evaluate_stage1(U, mu)
        FP = self.numerical_flux.evaluate_stage1(UP, mu)
        FM = self.numerical_flux.evaluate_stage1(UM, mu)
        del UP, UM

        F_edge = [f[SUPE] for f in F]
        FP_edge = [f[SUPE] for f in FP]
        FM_edge = [f[SUPE] for f in FM]
        del F, FP, FM

        F0P_edge = [f.copy() for f in F_edge]
        for f, ff in zip(F0P_edge, FP_edge):
            f[:, 0] = ff[:, 0]
            f[BOUNDARIES, 1] = f[BOUNDARIES, 0]
        if bi.has_dirichlet:
            for f, f_d in zip(F0P_edge, F_dirichlet):
                f[DIRICHLET_BOUNDARIES, 1] = f_d
        NUM_FLUX_0P = self.numerical_flux.evaluate_stage2(
            F0P_edge, UNIT_OUTER_NORMALS, VOLS1, mu)
        del F0P_edge

        F0M_edge = [f.copy() for f in F_edge]
        for f, ff in zip(F0M_edge, FM_edge):
            f[:, 0] = ff[:, 0]
            f[BOUNDARIES, 1] = f[BOUNDARIES, 0]
        if bi.has_dirichlet:
            for f, f_d in zip(F0M_edge, F_dirichlet):
                f[DIRICHLET_BOUNDARIES, 1] = f_d
        NUM_FLUX_0M = self.numerical_flux.evaluate_stage2(
            F0M_edge, UNIT_OUTER_NORMALS, VOLS1, mu)
        del F0M_edge

        D_NUM_FLUX_0 = (NUM_FLUX_0P - NUM_FLUX_0M)
        D_NUM_FLUX_0 /= (2 * delta)
        if bi.has_neumann:
            D_NUM_FLUX_0[NEUMANN_BOUNDARIES] = 0
        del NUM_FLUX_0P, NUM_FLUX_0M

        F1P_edge = [f.copy() for f in F_edge]
        for f, ff in zip(F1P_edge, FP_edge):
            f[:, 1] = ff[:, 1]
            f[BOUNDARIES, 1] = f[BOUNDARIES, 0]
        if bi.has_dirichlet:
            for f, f_d in zip(F1P_edge, F_dirichlet):
                f[DIRICHLET_BOUNDARIES, 1] = f_d
        NUM_FLUX_1P = self.numerical_flux.evaluate_stage2(
            F1P_edge, UNIT_OUTER_NORMALS, VOLS1, mu)
        del F1P_edge, FP_edge

        F1M_edge = F_edge
        for f, ff in zip(F1M_edge, FM_edge):
            f[:, 1] = ff[:, 1]
            f[BOUNDARIES, 1] = f[BOUNDARIES, 0]
        if bi.has_dirichlet:
            for f, f_d in zip(F1M_edge, F_dirichlet):
                f[DIRICHLET_BOUNDARIES, 1] = f_d
        NUM_FLUX_1M = self.numerical_flux.evaluate_stage2(
            F1M_edge, UNIT_OUTER_NORMALS, VOLS1, mu)
        del F1M_edge, FM_edge
        D_NUM_FLUX_1 = (NUM_FLUX_1P - NUM_FLUX_1M)
        D_NUM_FLUX_1 /= (2 * delta)
        if bi.has_neumann:
            D_NUM_FLUX_1[NEUMANN_BOUNDARIES] = 0
        del NUM_FLUX_1P, NUM_FLUX_1M

        I1 = np.hstack([
            SUPE[INNER, 0], SUPE[INNER, 0], SUPE[INNER, 1], SUPE[INNER, 1],
            SUPE[BOUNDARIES, 0]
        ])
        I0 = np.hstack([
            SUPE[INNER, 0], SUPE[INNER, 1], SUPE[INNER, 0], SUPE[INNER, 1],
            SUPE[BOUNDARIES, 0]
        ])
        V = np.hstack([
            D_NUM_FLUX_0[INNER], -D_NUM_FLUX_0[INNER], D_NUM_FLUX_1[INNER],
            -D_NUM_FLUX_1[INNER], D_NUM_FLUX_0[BOUNDARIES]
        ])

        A = coo_matrix((V, (I0, I1)), shape=(g.size(0), g.size(0)))
        A = csc_matrix(A).copy(
        )  # See pymor.operators.cg.DiffusionOperatorP1 for why copy() is necessary
        A = dia_matrix(([1. / VOLS0], [0]), shape=(g.size(0), ) * 2) * A

        return NumpyMatrixOperator(A,
                                   source_id=self.source.id,
                                   range_id=self.range.id)
Example #51
0
    def _assemble(self, mu=None):

        A = dia_matrix((self.grid.volumes(0), [0]),
                       shape=(self.grid.size(0), ) * 2)

        return A
Example #52
0
def smoothing_matrix(vertices, adj_mat, smoothing_steps=20, verbose=None):
    """Create a smoothing matrix.

    This smoothing matrix can be used to interpolate data defined
    for a subset of vertices onto mesh with an adjancency matrix given by
    adj_mat.

    If smoothing_steps is None, as many smoothing steps are applied until
    the whole mesh is filled with with non-zeros. Only use this option if
    the vertices correspond to a subsampled version of the mesh.

    Parameters
    ----------
    vertices : 1d array
        vertex indices
    adj_mat : sparse matrix
        N x N adjacency matrix of the full mesh
    smoothing_steps : int or None
        number of smoothing steps (Default: 20)
    %(verbose)s

    Returns
    -------
    smooth_mat : sparse matrix
        smoothing matrix with size N x len(vertices)
    """
    from scipy import sparse

    logger.info("Updating smoothing matrix, be patient..")

    if smoothing_steps == 0:
        return _nearest(vertices, adj_mat)

    e = adj_mat.copy()
    e.data[e.data == 2] = 1
    n_vertices = e.shape[0]
    e = e + sparse.eye(n_vertices, n_vertices)
    idx_use = vertices
    smooth_mat = 1.0
    n_iter = smoothing_steps if smoothing_steps is not None else 1000
    for k in range(n_iter):
        e_use = e[:, idx_use]

        data1 = e_use * np.ones(len(idx_use))
        idx_use = np.where(data1)[0]
        scale_mat = sparse.dia_matrix((1 / data1[idx_use], 0),
                                      shape=(len(idx_use), len(idx_use)))

        smooth_mat = scale_mat * e_use[idx_use, :] * smooth_mat

        logger.info("Smoothing matrix creation, step %d" % (k + 1))
        if smoothing_steps is None and len(idx_use) >= n_vertices:
            break

    # Make sure the smoothing matrix has the right number of rows
    # and is in COO format
    smooth_mat = smooth_mat.tocoo()
    smooth_mat = sparse.coo_matrix((smooth_mat.data,
                                    (idx_use[smooth_mat.row],
                                     smooth_mat.col)),
                                   shape=(n_vertices,
                                          len(vertices)))

    return smooth_mat
Example #53
0
def getLaplacianEigs(A, NEigs):
    DEG = sparse.dia_matrix((A.sum(1).flatten(), 0), A.shape)
    L = DEG - A
    w, v = slinalg.eigsh(L, k=NEigs, sigma=0, which='LM')
    return (w, v, L)
Example #54
0
def coefUfll(h):
    return 1


def coefb(h, y):
    return (8 * h ** 2) - y


x = np.linspace(0, 1, 11)
h = 0.1
y = [3, 0, 0, 0, 0, 0, 0, 0, 4]

diag = [coefU(h) for i in range(9)]
diagFll = [coefUfll(h) for i in range(9)]
diagPrv = [coefUprv(h) for i in range(9)]
arr = dia_matrix(([diag, diagFll, diagPrv], [0, -1, 1]), shape=(9, 9)).toarray()

b = [coefb(h, i) for i in y]

yy = np.linalg.solve(arr, b).tolist()
yy.insert(0, 3)
yy.append(4)

table = pt.PrettyTable()
table.add_column("x", np.round(x, 7))
table.add_column("y", np.round(yy, 7))
print(table)

graph1 = plt.plot(x, yy, color="black", label="Runge4")
graph1 = plt.grid(True)
plt.show()
Example #55
0
def gae_for_na(name):
    """
    train and evaluate disambiguation results for a specific name
    :param name:  author name
    :return: evaluation results
    """
    adj, features, labels = load_local_data(name=name)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()
    adj_train = gen_train_edges(adj)

    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    num_nodes = adj.shape[0]
    input_feature_dim = features.shape[1]
    if FLAGS.is_sparse:  # TODO to test
        # features = sparse_to_tuple(features.tocoo())
        # features_nonzero = features[1].shape[0]
        features = features.todense()  # TODO
    else:
        features = normalize_vectors(features)

    # Define placeholders
    placeholders = {
        # 'features': tf.sparse_placeholder(tf.float32),
        'features': tf.placeholder(tf.float32,
                                   shape=(None, input_feature_dim)),
        'adj': tf.sparse_placeholder(tf.float32),
        'adj_orig': tf.sparse_placeholder(tf.float32),
        'dropout': tf.placeholder_with_default(0., shape=())
    }

    # Create model
    model = None
    if model_str == 'gcn_ae':
        model = GCNModelAE(placeholders, input_feature_dim)
    elif model_str == 'gcn_vae':
        model = GCNModelVAE(placeholders, input_feature_dim, num_nodes)
    pos_weight = float(adj.shape[0] * adj.shape[0] -
                       adj.sum()) / adj.sum()  # negative edges/pos edges
    print('positive edge weight', pos_weight)
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.nnz) * 2)

    # Optimizer
    with tf.name_scope('optimizer'):
        if model_str == 'gcn_ae':
            opt = OptimizerAE(preds=model.reconstructions,
                              labels=tf.reshape(
                                  tf.sparse_tensor_to_dense(
                                      placeholders['adj_orig'],
                                      validate_indices=False), [-1]),
                              pos_weight=pos_weight,
                              norm=norm)
        elif model_str == 'gcn_vae':
            opt = OptimizerVAE(preds=model.reconstructions,
                               labels=tf.reshape(
                                   tf.sparse_tensor_to_dense(
                                       placeholders['adj_orig'],
                                       validate_indices=False), [-1]),
                               model=model,
                               num_nodes=num_nodes,
                               pos_weight=pos_weight,
                               norm=norm)

    # Initialize session
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)

    def get_embs():
        feed_dict.update({placeholders['dropout']: 0})
        emb = sess.run(model.z_mean, feed_dict=feed_dict)  # z_mean is better
        return emb

    # Train model
    for epoch in range(FLAGS.epochs):

        t = time.time()
        # Construct feed dictionary
        feed_dict = construct_feed_dict(adj_norm, adj_label, features,
                                        placeholders)
        feed_dict.update({placeholders['dropout']: FLAGS.dropout})
        # Run single weight update
        outs = sess.run([opt.opt_op, opt.cost, opt.accuracy],
                        feed_dict=feed_dict)

        # Compute average loss
        avg_cost = outs[1]
        avg_accuracy = outs[2]

        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(avg_cost), "train_acc=",
              "{:.5f}".format(avg_accuracy), "time=",
              "{:.5f}".format(time.time() - t))

    emb = get_embs()
    n_clusters = len(set(labels))
    emb_norm = normalize_vectors(emb)
    clusters_pred = clustering(emb_norm, num_clusters=n_clusters)
    prec, rec, f1 = pairwise_precision_recall_f1(clusters_pred, labels)
    print('pairwise precision', '{:.5f}'.format(prec), 'recall',
          '{:.5f}'.format(rec), 'f1', '{:.5f}'.format(f1))
    return [prec, rec, f1], num_nodes, n_clusters
Example #56
0
    def geodesic_distance(self, verts, m=1.0, fem=False):
        """Minimum mesh geodesic distance (in mm) from each vertex in surface to any
        vertex in the collection `verts`.

        Geodesic distance is estimated using heat-based method (see 'Geodesics in Heat',
        Crane et al, 2012). Diffusion of heat along the mesh is simulated and then
        used to infer geodesic distance. The duration of the simulation is controlled
        by the parameter `m`. Larger values of `m` will smooth & regularize the distance
        computation. Smaller values of `m` will roughen and will usually increase error
        in the distance computation. The default value of 1.0 is probably pretty good.

        This function caches some data (sparse LU factorizations of the laplace-beltrami
        operator and the weighted adjacency matrix), so it will be much faster on
        subsequent runs.

        The time taken by this function is independent of the number of vertices in verts.

        Parameters
        ----------
        verts : 1D array-like of ints
            Set of vertices to compute distance from. This function returns the shortest
            distance to any of these vertices from every vertex in the surface.
        m : float, optional
            Reverse Euler step length. The optimal value is likely between 0.5 and 1.5.
            Default is 1.0, which should be fine for most cases.
        fem : bool, optional
            Whether to use Finite Element Method lumped mass matrix. Wasn't used in 
            Crane 2012 paper. Doesn't seem to help any.

        Returns
        -------
        1D ndarray, shape (total_verts,)
            Geodesic distance (in mm) from each vertex in the surface to the closest
            vertex in `verts`.
        """
        npt = len(self.pts)
        if m not in self._rlfac_solvers or m not in self._nLC_solvers:
            B, D, W, V = self.laplace_operator
            nLC = W - V # negative laplace matrix
            if not fem:
                spD = sparse.dia_matrix((D,[0]), (npt,npt)).tocsr() # lumped mass matrix
            else:
                spD = B
            
            t = m * self.avg_edge_length ** 2 # time of heat evolution
            lfac = spD - t * nLC # backward Euler matrix

            # Exclude rows with zero weight (these break the sparse LU, that finicky f**k)
            goodrows = np.nonzero(~np.array(lfac.sum(0) == 0).ravel())[0]
            self._goodrows = goodrows
            self._rlfac_solvers[m] = sparse.linalg.dsolve.factorized(lfac[goodrows][:,goodrows])
            self._nLC_solvers[m] = sparse.linalg.dsolve.factorized(nLC[goodrows][:,goodrows])

        # Solve system to get u, the heat values
        u0 = np.zeros((npt,)) # initial heat values
        u0[verts] = 1.0
        goodu = self._rlfac_solvers[m](u0[self._goodrows])
        u = np.zeros((npt,))
        u[self._goodrows] = goodu

        # Compute grad u at each face
        gradu = self.surface_gradient(u, at_verts=False)
        
        # Compute X (normalized grad u)
        #X = np.nan_to_num((-gradu.T / np.sqrt((gradu**2).sum(1))).T)
        graduT = gradu.T
        gusum = ne.evaluate("sum(gradu ** 2, 1)")
        X = np.nan_to_num(ne.evaluate("-graduT / sqrt(gusum)").T)

        # Compute integrated divergence of X at each vertex
        #x1 = x2 = x3 = np.zeros((X.shape[0],))
        c32, c13, c21 = self._cot_edge
        x1 = 0.5 * (c32 * X).sum(1)
        x2 = 0.5 * (c13 * X).sum(1)
        x3 = 0.5 * (c21 * X).sum(1)
        
        conn1, conn2, conn3 = self._polyconn
        divx = conn1.dot(x1) + conn2.dot(x2) + conn3.dot(x3)

        # Compute phi (distance)
        goodphi = self._nLC_solvers[m](divx[self._goodrows])
        phi = np.zeros((npt,))
        phi[self._goodrows] = goodphi - goodphi.min()

        # Ensure that distance is zero for selected verts
        phi[verts] = 0.0

        return phi
Example #57
0
def mask_test_edges(adj):
    '''Randomly sample possibly unconnected training graph, and validation and test edges.'''

    # Remove diagonal elements
    adj = adj - sp.dia_matrix(
        (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    adj.eliminate_zeros()
    # Check that diag is zero:
    assert np.diag(adj.todense()).sum() == 0

    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    num_test = int(np.floor(edges.shape[0] / 10.))
    num_val = int(np.floor(edges.shape[0] / 20.))

    all_edge_idx = range(edges.shape[0])
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges,
                            np.hstack([test_edge_idx, val_edge_idx]),
                            axis=0)

    def ismember(a, b, tol=5):
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return (np.all(np.any(rows_close, axis=-1), axis=-1)
                and np.all(np.any(rows_close, axis=0), axis=0))

    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])

    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])

    assert not ismember(test_edges_false, edges_all)
    assert not ismember(val_edges_false, edges_all)
    assert not ismember(val_edges, train_edges)
    assert not ismember(test_edges, train_edges)
    assert not ismember(val_edges, test_edges)

    data = np.ones(train_edges.shape[0])

    # Re-build adj matrix
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])),
                              shape=adj.shape)
    adj_train = adj_train + adj_train.T

    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
Example #58
0
    def matrix(self, g, data, bc_weight=False):
        """
        Return the matrix for a discretization of a second order elliptic equation
        using dual virtual element method. See self.matrix_rhs for a detaild
        description.

        Additional parameter:
        --------------------
        bc_weight: to compute the infinity norm of the matrix and use it as a
            weight to impose the boundary conditions. Default True.

        Additional return:
        weight: if bc_weight is True return the weight computed.

        """
        # Allow short variable names in backend function
        # pylint: disable=invalid-name

        # If a 0-d grid is given then we return an identity matrix
        if g.dim == 0:
            M = sps.dia_matrix(([1, 0], 0), (self.ndof(g), self.ndof(g)))
            if bc_weight:
                return M, 1
            return M

        # Retrieve the permeability, boundary conditions, and aperture
        # The aperture is needed in the hybrid-dimensional case, otherwise is
        # assumed unitary
        param = data['param']
        k = param.get_tensor(self)
        bc = param.get_bc(self)
        a = param.get_aperture()

        faces, cells, sign = sps.find(g.cell_faces)
        index = np.argsort(cells)
        faces, sign = faces[index], sign[index]

        # Map the domain to a reference geometry (i.e. equivalent to compute
        # surface coordinates in 1d and 2d)
        c_centers, f_normals, f_centers, R, dim, _ = pp.cg.map_grid(g)

        if not data.get('is_tangential', False):
            # Rotate the permeability tensor and delete last dimension
            if g.dim < 3:
                k = k.copy()
                k.rotate(R)
                remove_dim = np.where(np.logical_not(dim))[0]
                k.perm = np.delete(k.perm, (remove_dim), axis=0)
                k.perm = np.delete(k.perm, (remove_dim), axis=1)

        # In the virtual cell approach the cell diameters should involve the
        # apertures, however to keep consistency with the hybrid-dimensional
        # approach and with the related hypotheses we avoid.
        diams = g.cell_diameters()
        # Weight for the stabilization term
        weight = np.power(diams, 2 - g.dim)

        # Allocate the data to store matrix entries, that's the most efficient
        # way to create a sparse matrix.
        size = np.sum(np.square(g.cell_faces.indptr[1:]-\
                                g.cell_faces.indptr[:-1]))
        I = np.empty(size, dtype=np.int)
        J = np.empty(size, dtype=np.int)
        dataIJ = np.empty(size)
        idx = 0

        for c in np.arange(g.num_cells):
            # For the current cell retrieve its faces
            loc = slice(g.cell_faces.indptr[c], g.cell_faces.indptr[c + 1])
            faces_loc = faces[loc]

            # Compute the H_div-mass local matrix
            A = self.massHdiv(a[c] * k.perm[0:g.dim, 0:g.dim, c],
                              c_centers[:, c], g.cell_volumes[c],
                              f_centers[:, faces_loc], f_normals[:, faces_loc],
                              sign[loc], diams[c], weight[c])[0]

            # Save values for Hdiv-mass local matrix in the global structure
            cols = np.tile(faces_loc, (faces_loc.size, 1))
            loc_idx = slice(idx, idx + cols.size)
            I[loc_idx] = cols.T.ravel()
            J[loc_idx] = cols.ravel()
            dataIJ[loc_idx] = A.ravel()
            idx += cols.size

        # Construct the global matrices
        mass = sps.coo_matrix((dataIJ, (I, J)))
        div = -g.cell_faces.T
        M = sps.bmat([[mass, div.T], [div, None]], format='csr')

        norm = sps.linalg.norm(mass, np.inf) if bc_weight else 1

        # assign the Neumann boundary conditions
        # For dual discretizations, internal boundaries
        # are handled by assigning Dirichlet conditions. THus, we remove them
        # from the is_neu (where they belong by default) and add them in
        # is_dir.
        is_neu = np.logical_and(bc.is_neu, np.logical_not(bc.is_internal))
        if bc and np.any(is_neu):
            is_neu = np.hstack((is_neu, np.zeros(g.num_cells, dtype=np.bool)))
            M[is_neu, :] *= 0
            M[is_neu, is_neu] = norm

        if bc_weight:
            return M, norm
        return M
Example #59
0
    def interpolate(self, fnc=None, m0=None, mn=None, set_coeffs=False):
        """
        Determines the spline's coefficients such that it interpolates
        a given function.

        Parameters
        ----------

        fnc : callable or tuple of arrays (tt, xx)

        m0 : float

        mn : float

        set_coeffs: bool
            determine whether the calculated coefficients should be set to self or not
        """

        if not callable(fnc):
            fnc = self._interpolate_array(fnc)

        assert callable(fnc)
        points = self.nodes

        # IPS()
        if 0 and not self._use_std_approach:
            # TODO: This code seems to be obsolete since 2015-12
            assert self._steady_flag

            # how many independent coefficients does the spline have
            coeffs_size = self._indep_coeffs.size

            # generate points to evaluate the function at
            # (function and spline interpolant should be equal in these)
            nodes = np.linspace(self.a, self.b, coeffs_size, endpoint=True)

            # evaluate the function
            fnc_t = np.array([fnc(t) for t in nodes])

            dep_vecs = [self.get_dependence_vectors(t) for t in nodes]
            S_dep_mat = np.array([vec[0] for vec in dep_vecs])
            S_dep_mat_abs = np.array([vec[1] for vec in dep_vecs])

            # solve the equation system
            #free_coeffs = np.linalg.solve(S_dep_mat, fnc_t - S_dep_mat_abs)
            free_coeffs = np.linalg.lstsq(S_dep_mat, fnc_t - S_dep_mat_abs)[0]

        else:
            # compute values at the nodes
            vv = np.array([fnc(t) for t in self.nodes])

            # create vector of step sizes
            #h = np.array([self.nodes[k+1] - self.nodes[k] for k in xrange(self.nodes.size-1)])
            h = np.diff(self.nodes)

            # create diagonals for the coefficient matrix of the equation system
            l = np.array([
                h[k + 1] / (h[k] + h[k + 1])
                for k in xrange(self.nodes.size - 2)
            ])
            d = 2.0 * np.ones(self.nodes.size - 2)
            u = np.array([
                h[k] / (h[k] + h[k + 1]) for k in xrange(self.nodes.size - 2)
            ])

            # right hand side of the equation system
            r = np.array([(3.0/h[k])*l[k]*(vv[k+1] - vv[k]) + (3.0/h[k+1])*u[k]*(vv[k+2]-vv[k+1])\
                          for k in xrange(self.nodes.size-2)])
            # add conditions for unique solution

            # boundary derivatives
            l = np.hstack([l, 0.0, 0.0])
            d = np.hstack([1.0, d, 1.0])
            u = np.hstack([0.0, 0.0, u])

            if m0 is None:
                m0 = (vv[1] - vv[0]) / (self.nodes[1] - self.nodes[0])

            if mn is None:
                mn = (vv[-1] - vv[-2]) / (self.nodes[-1] - self.nodes[-2])

            r = np.hstack([m0, r, mn])

            data = [l, d, u]
            offsets = [-1, 0, 1]

            # create tridiagonal coefficient matrix
            D = sparse.dia_matrix((data, offsets),
                                  shape=(self.n + 1, self.n + 1))

            # solve the equation system
            sol = sparse.linalg.spsolve(D.tocsr(), r)

            # calculate the coefficients
            coeffs = np.zeros((self.n, 4))

            # compute the coefficients of the interpolant
            if self._use_std_approach:
                for i in xrange(self.n):
                    coeffs[i, :] = [
                        vv[i],
                        sol[i],
                        3.0 / h[i]**2 * (vv[i + 1] - vv[i]) - 1.0 / h[i] *
                        (2 * sol[i] + sol[i + 1]),
                        -2.0 / h[i]**3 * (vv[i + 1] - vv[i]) + 1.0 / h[i]**2 *
                        (sol[i] + sol[i + 1]),
                    ]
            else:
                for i in xrange(self.n):
                    coeffs[i, :] = [
                        vv[i + 1], sol[i + 1],
                        3.0 / h[i]**2 * (vv[i] - vv[i + 1]) + 1.0 / h[i] *
                        (sol[i] + 2 * sol[i + 1]),
                        2.0 / h[i]**3 * (vv[i] - vv[i + 1]) + 1.0 / h[i]**2 *
                        (sol[i] + sol[i + 1])
                    ]

            # get the indices of the free coefficients
            coeff_name_split_str = [
                c.name.split('_')[-2:] for c in self._indep_coeffs_sym
            ]
            free_coeff_indices = [(int(s[0]), int(s[1]))
                                  for s in coeff_name_split_str]

            free_coeffs = np.array([coeffs[i] for i in free_coeff_indices])

        # set solution for the free coefficients
        if set_coeffs:
            self.set_coefficients(free_coeffs=free_coeffs)

            #!!! dbg test
            # self.set_coefficients(coeffs=coeffs)

        return free_coeffs
Example #60
0
def tv_derivative(data,
                  xs,
                  u0=None,
                  alpha=10.,
                  maxit=1000,
                  linalg_solver_maxit=100.,
                  tol=1e-4,
                  verbose=False,
                  solver='lgmres'):
    data = _np.asarray(data, dtype=_np.float64).squeeze()
    xs = _np.asarray(xs, dtype=_np.float64).squeeze()
    n = data.shape[0]
    assert xs.shape[0] == n, "the grid must have the same dimension as data"

    epsilon = 1e-6

    # grid of mid points between xs, extrapolating first and last node:
    #
    #    x--|--x--|---x---|-x-|-x
    #
    midpoints = _np.concatenate(
        ([xs[0] - .5 * (xs[1] - xs[0])], .5 * (xs[1:] + xs[:-1]),
         [xs[-1] + .5 * (xs[-1] - xs[-2])])).squeeze()
    assert midpoints.shape[0] == n + 1

    diff = _fd.get_fd_matrix_midpoints(midpoints, k=1, window_width=5)
    assert diff.shape[0] == n
    assert diff.shape[1] == n + 1

    diff_t = diff.transpose(copy=True).tocsc()
    assert diff.shape[0] == n
    assert diff.shape[1] == n + 1

    A = _cumtrapz_operator(midpoints)
    AT = A.transpose(copy=True)

    ATA = AT.dot(A)

    if u0 is None:
        u = _np.concatenate(([0], _np.diff(data), [0]))
    else:
        u = u0
    # Aadj_A = lambda v: A_adjoint(A(v))
    Aadj_offset = AT * (data[0] - data)

    E_n = _sparse.dia_matrix((n, n), dtype=xs.dtype)
    midpoints_diff = _np.diff(midpoints)

    for ii in range(1, maxit + 1):
        E_n.setdiag(midpoints_diff *
                    (1. / _np.sqrt(_np.diff(u)**2.0 + epsilon)))
        L = diff_t * E_n * diff
        g = ATA.dot(u) + Aadj_offset + alpha * L * u

        # solve linear equation.
        info_i = 0
        if solver == 'lgmres' or solver == 'lgmres_scipy':
            if solver == 'lgmres_scipy':
                s, info_i = _splin.lgmres(A=alpha * L + ATA,
                                          b=-g,
                                          x0=u,
                                          tol=tol,
                                          maxiter=linalg_solver_maxit,
                                          outer_k=7)
            else:
                from pynumtools.lgmres import lgmres as _lgmres
                s = _lgmres(A=alpha * L + ATA,
                            b=-g,
                            x0=u,
                            tol=tol,
                            maxiter=linalg_solver_maxit)
        elif solver == 'bicgstab':
            [s, info_i] = _splin.bicgstab(A=alpha * L + ATA,
                                          b=-g,
                                          x0=u,
                                          tol=tol,
                                          maxiter=linalg_solver_maxit)
        elif solver == 'spsolve':
            s = _splin.spsolve((alpha * L + ATA), -g, use_umfpack=True)
        elif solver == 'np':
            s = _np.linalg.solve(
                (alpha * L + ATA).todense().astype(_np.float64),
                (-g).astype(_np.float64))

        relative_change = _np.linalg.norm(s[0]) / _np.linalg.norm(u)
        if verbose:
            print(
                'iteration {0:4d}: relative change = {1:.3e}, gradient norm = {2:.3e}'
                .format(ii, relative_change, _np.linalg.norm(g)))
            if info_i > 0:
                print("WARNING - convergence to tolerance not achieved!")
            elif info_i < 0:
                print("WARNING - illegal input or breakdown")

        # Update current solution
        u = u + s

    return u