Ejemplo n.º 1
0
def split_af(_af, _inds):
    """
    splits the input matrix into diagonal and off-diagonal portions, with the split being determined by _inds
    :param _af:
    :param _inds:
    :return:
    """
    _af = _af.tocoo()
    _r = _af.row
    _c = _af.col
    _d = _af.data
    _d_non = []
    _d_scc = []
    _shape = _af.shape
    for i in range(len(_d)):
        if _r[i] in _inds and _c[i] in _inds:
            _d_non.append(0)
            _d_scc.append(_d[i])
        else:
            _d_non.append(_d[i])
            _d_scc.append(0)
    _af_non = csc_matrix((_d_non, (_r, _c)), shape=_shape)
    _af_scc = csc_matrix((_d_scc, (_r, _c)), shape=_shape)
    assert (_af_non + _af_scc - _af).nnz == 0
    return _af_non, _af_scc
Ejemplo n.º 2
0
    def test_bfs(self):
        a = igl.adjacency_matrix(self.f1)
        p, d = igl.bfs(a, 0)
        self.assertEqual(p.shape, (self.v1.shape[0],))
        self.assertEqual(p.shape, (self.v1.shape[0],))

        try:
            p, d, = igl.bfs(a, -1)
            self.assertTrue(False)
        except IndexError as e:
            pass

        a = csc.csc_matrix(np.zeros([0, 0], dtype=np.int32))
        try:
            p, d, = igl.bfs(a, 0)
            self.assertTrue(False)
        except ValueError as e:
            pass

        a = csc.csc_matrix(np.zeros([10, 11], dtype=np.int32))
        try:
            p, d, = igl.bfs(a, 0)
            self.assertTrue(False)
        except ValueError as e:
            pass

        a = csc.csc_matrix(np.zeros([10, 10], dtype=np.int32))
        p, d, = igl.bfs(a, 0)
        self.assertEqual(p.shape, ())
        self.assertTrue(np.array_equal(d, -np.ones(10)))
        self.assertTrue(p.flags.c_contiguous)
Ejemplo n.º 3
0
 def __init__(self,
              positions,
              masses,
              springs,
              fixed,
              method,
              profiling_rate=0):
     """
     method: 'Newton' | 'FMS' | 'Jacobi'
     profiling_rate: the step rate at which the performance will be graphed
     """
     # General state setup
     self.profiling_rate = profiling_rate
     self.method = method
     self.g = -9.8
     self.m = len(positions)
     self.s = len(springs)
     self.q0 = np.array(positions).reshape(self.ndim * self.m, 1)
     self.q = copy(self.q0)
     # Store initial state
     self.state0 = copy(self.q)
     self.fixed = fixed
     self.qFixed = copy(self.q0)
     self.M = kron(diags(masses), np.eye(self.ndim), format='csc')
     self.Minv = kron(diags(
         list(map(lambda x: 0 if x == 0 else 1.0 / x, masses))),
                      np.eye(self.ndim),
                      format='csc')
     self.springs = np.array(springs, dtype=spring_type)
     self.d = np.empty((self.ndim * self.s, 1))
     # Matrices L and J setup
     self.L = csc_matrix((self.m, self.m))
     self.J = csc_matrix((self.m, self.s))
     for idx, s in enumerate(self.springs):
         Ai = None
         if idx in self.fixed:
             Ai = csc_matrix(([1], ([s['p1']], [0])), shape=(self.m, 1))
         else:
             Ai = csc_matrix(([1, -1], ([s['p1'], s['p2']], [0, 0])),
                             shape=(self.m, 1))
         self.L += s['k'] * Ai * Ai.transpose()
         self.J += s['k'] * Ai * csc_matrix(
             ([1.0], ([idx], [0])), shape=(self.s, 1)).transpose()
     self.L = kron(self.L, np.eye(self.ndim), format='csc')
     self.J = kron(self.J, np.eye(self.ndim), format='csc')
     # Matrix A precomputation (Global step)
     self.A = self.M + self.dt2 * self.L
     self.Ch = cho_factor(self.A.toarray())
     # Implemented methods
     self.implemented = {
         'FMS': self.step_LocalGlobal,
         'Jacobi': self.step_Jacobi,
         'Newton': self.step_Newton,
         'Anderson': self.step_Anderson
     }
def load_dataset(fname):
    z = np.load(open(fname,'rb'))
    X_train = z['arr_0']
    X_test = z['arr_1']
    y_train = z['arr_2']
    y_test = z['arr_3']

    X_train = csc.csc_matrix(X_train.tolist())
    X_test = csc.csc_matrix(X_test.tolist())

    return X_train, X_test, y_train, y_test
def load_lda_dataset_small(uid, neg_to_pos_rate):
    fname = 'ldads_small%d_%d' % (neg_to_pos_rate, uid)
    fname = join(DATASETS_FOLDER, '%s.npz' % fname)
    z = np.load(open(fname,'rb'))
    X_train_lda = z['arr_0']
    X_test_lda = z['arr_1']
    y_train = z['arr_2']
    y_test = z['arr_3']

    X_train_lda = csc.csc_matrix(X_train_lda.tolist())
    X_test_lda = csc.csc_matrix(X_test_lda.tolist())

    return X_train_lda, X_test_lda, y_train, y_test
def load_lda_dataset_big(uid, train_size):
    fname = 'ldads_%d' % uid
    if train_size:
        fname += '_tr%d' % train_size
    fname = join(DATASETS_FOLDER, '%s.npz' % fname)
    z = np.load(open(fname,'rb'))
    X_train_lda = z['arr_0']
    X_test_lda = z['arr_1']
    y_train = z['arr_2']
    y_test = z['arr_3']

    X_train_lda = csc.csc_matrix(X_train_lda.tolist())
    X_test_lda = csc.csc_matrix(X_test_lda.tolist())

    return X_train_lda, X_test_lda, y_train, y_test
Ejemplo n.º 7
0
def positive_mass_stiffness_smooth(triangles,
                                   vertices,
                                   nb_iter=1,
                                   diffusion_step=1.0,
                                   flow_file=None,
                                   gaussian_threshold=0.2,
                                   angle_threshold=1.0):
    vertices_csc = csc_matrix(vertices)
    curvature_normal_mtx = mean_curvature_normal_matrix(triangles,
                                                        vertices,
                                                        area_weighted=False)
    # mass_mtx = mass_matrix(triangles, vertices_csc)

    if isinstance(diffusion_step, (int, long, float)):
        diffusion_step = diffusion_step * np.ones(len(vertices))

    if flow_file is not None:
        mem_map = np.memmap(flow_file,
                            dtype=G_DTYPE,
                            mode='w+',
                            shape=(nb_iter, vertices.shape[0],
                                   vertices.shape[1]))

    for i in range(nb_iter):
        stdout.write("\r step %d on %d done" % (i, nb_iter))
        stdout.flush()
        if flow_file is not None:
            mem_map[i] = vertices_csc.todense()

        # third try
        mass_mtx = mass_matrix(triangles, vertices_csc)

        pos_curv = vertices_cotan_curvature(triangles, vertices_csc,
                                            False) > -G_ATOL

        if gaussian_threshold is not None:
            # Gaussian threshold: maximum value PI, cube corner = PI/2 # = 0.8
            deg_vts = np.abs(
                vertices_gaussian_curvature(triangles, vertices_csc,
                                            False)) > gaussian_threshold
            pos_curv = np.logical_or(pos_curv, deg_vts)

        if angle_threshold is not None:
            # angle_threshold: PI, cube corner = PI/2 # = 1.7
            deg_seg = edge_triangle_normal_angle(
                triangles,
                vertices_csc).max(1).toarray().squeeze() > angle_threshold
            pos_curv = np.logical_or(pos_curv, deg_seg)

        possitive_diffusion_step = pos_curv * diffusion_step

        # (D - d*L)*y = D*x = b
        A_matrix = mass_mtx - \
            (diags(possitive_diffusion_step, 0).dot(curvature_normal_mtx))

        b_matrix = mass_mtx.dot(vertices_csc)
        vertices_csc = spsolve(A_matrix, b_matrix)

    stdout.write("\r step %d on %d done \n" % (nb_iter, nb_iter))
    return vertices_csc.toarray()
Ejemplo n.º 8
0
    def log_det_estimate_shogun(Q):
        logging.debug("Entering")
        op = RealSparseMatrixOperator(csc_matrix(Q))
        engine = SerialComputationEngine()
        linear_solver = CGMShiftedFamilySolver()
        accuracy = 1e-3
        eigen_solver = LanczosEigenSolver(op)
        eigen_solver.set_min_eigenvalue(OzonePosterior.ridge)
        op_func = LogRationalApproximationCGM(op, engine, eigen_solver, linear_solver, accuracy)

        # limit computation time
        linear_solver.set_iteration_limit(1000)
        eigen_solver.set_max_iteration_limit(1000)
        
        logging.info("Computing Eigenvalues (only largest)")
        eigen_solver.compute()
        
        trace_sampler = ProbingSampler(op)
        log_det_estimator = LogDetEstimator(trace_sampler, op_func, engine)
        n_estimates = 1
        logging.info("Sampling log-determinant with probing vectors and rational approximation")
        estimates = log_det_estimator.sample(n_estimates)
        
        logging.debug("Leaving")
        return mean(estimates)
Ejemplo n.º 9
0
def curvature_normal_smooth(triangles, vertices, nb_iter=1,
                            diffusion_step=1.0, area_weighted=False,
                            backward_step=False, flow_file=None):

    if flow_file is not None:
        mem_map = np.memmap(flow_file, dtype=G_DTYPE, mode='w+',
                            shape=(nb_iter, vertices.shape[0], vertices.shape[1]))

    vertices_csc = csc_matrix(vertices)

    if isinstance(diffusion_step, (int, float)):
        diffusion_step = diffusion_step * np.ones(len(vertices))

    for i in range(nb_iter):
        stdout.write("\r step %d on %d done" % (i, nb_iter))
        stdout.flush()
        if flow_file is not None:
            mem_map[i] = vertices_csc.toarray()

        # get curvature_normal_matrix
        curvature_normal_mtx = mean_curvature_normal_matrix(
            triangles, vertices_csc, area_weighted=area_weighted)

        next_vertices_csc = euler_step(
            curvature_normal_mtx, vertices_csc, diffusion_step, backward_step)
        vertices_csc = next_vertices_csc

    stdout.write("\r step %d on %d done \n" % (nb_iter, nb_iter))
    # return next_vertices_csc
    return vertices_csc.toarray()
Ejemplo n.º 10
0
def volume_curvature_normal_smooth(triangles, vertices, nb_iter=1,
                                   diffusion_step=1.0, area_weighted=False,
                                   backward_step=False, flow_file=None):
    if isinstance(diffusion_step, (int, float)):
        diffusion_step = diffusion_step * np.ones(len(vertices))

    if flow_file is not None:
        mem_map = np.memmap(flow_file, dtype=G_DTYPE, mode='w+',
                            shape=(nb_iter, vertices.shape[0], vertices.shape[1]))

    for i in range(nb_iter):
        stdout.write("\r step %d on %d done" % (i, nb_iter))
        stdout.flush()
        if flow_file is not None:
            mem_map[i] = vertices
        # get curvature_normal_matrix
        # todo not optimal, because operation done twice etc
        curvature_normal_mtx = mean_curvature_normal_matrix(
            triangles, vertices, area_weighted=area_weighted)
        # do the first step
        next_vertices = euler_step(curvature_normal_mtx, csc_matrix(
            vertices), diffusion_step, backward_step).toarray()
        # test if direction is positive
        direction = next_vertices - vertices
        normal_dir = vertices_cotan_normal(triangles, vertices, normalize=True)
        dotv = dot(normalize_vectors(direction), normal_dir, keepdims=True)
        vertices += direction * np.maximum(0.0, -dotv)

    stdout.write("\r step %d on %d done \n" % (nb_iter, nb_iter))
    return vertices
Ejemplo n.º 11
0
def dtm_to_gensim_corpus(dtm):
    """
    Convert a (sparse) DTM to a Gensim Corpus object.

    .. seealso:: :func:`~tmtoolkit.bow.dtm.gensim_corpus_to_dtm` for the reverse function or
                 :func:`~tmtoolkit.bow.dtm.dtm_and_vocab_to_gensim_corpus_and_dict` which additionally creates a Gensim
                 :class:`~gensim.corpora.dictionary.Dictionary`.

    :param dtm: (sparse) document-term-matrix of size NxM (N docs, M is vocab size) with raw terms counts
    :return: a Gensim :class:`gensim.matutils.Sparse2Corpus` object
    """
    import gensim

    # DTM with documents to words sparse matrix in COO format has to be converted to transposed sparse matrix in CSC
    # format
    dtm_t = dtm.transpose()

    if issparse(dtm_t):
        if dtm_t.format != 'csc':
            dtm_sparse = dtm_t.tocsc()
        else:
            dtm_sparse = dtm_t
    else:
        from scipy.sparse.csc import csc_matrix
        dtm_sparse = csc_matrix(dtm_t)

    return gensim.matutils.Sparse2Corpus(dtm_sparse)
    def log_det_estimate_shogun(Q):
        logging.debug("Entering")
        op = RealSparseMatrixOperator(csc_matrix(Q))
        engine = SerialComputationEngine()
        linear_solver = CGMShiftedFamilySolver()
        accuracy = 1e-3
        eigen_solver = LanczosEigenSolver(op)
        eigen_solver.set_min_eigenvalue(OzonePosterior.ridge)
        op_func = LogRationalApproximationCGM(op, engine, eigen_solver,
                                              linear_solver, accuracy)

        # limit computation time
        linear_solver.set_iteration_limit(1000)
        eigen_solver.set_max_iteration_limit(1000)

        logging.info("Computing Eigenvalues (only largest)")
        eigen_solver.compute()

        trace_sampler = ProbingSampler(op)
        log_det_estimator = LogDetEstimator(trace_sampler, op_func, engine)
        n_estimates = 1
        logging.info(
            "Sampling log-determinant with probing vectors and rational approximation"
        )
        estimates = log_det_estimator.sample(n_estimates)

        logging.debug("Leaving")
        return mean(estimates)
 def solve_sparse_linear_system_shogun(A, b):
     logging.debug("Entering")
     solver = DirectSparseLinearSolver()
     operator = RealSparseMatrixOperator(csc_matrix(A))
     result = solver.solve(operator, b)
     logging.debug("Leaving")
     return result
Ejemplo n.º 14
0
def test_from_csc1():

    from siconos.numerics import SBM_from_csparse, SBM_get_value
    from scipy.sparse.csc import csc_matrix

    M = csc_matrix([[1,2,3],
                    [4,5,6],
                    [7,8,9]])

    # print(M.indices)
    # print(M.indptr)
    # print(M.data)
    
    blocksize =3

    r,SBM = SBM_from_csparse(blocksize,M)

    assert SBM_get_value(SBM,0,0) == 1
    assert SBM_get_value(SBM,0,1) == 2
    assert SBM_get_value(SBM,0,2) == 3
    assert SBM_get_value(SBM,1,0) == 4
    assert SBM_get_value(SBM,1,1) == 5
    assert SBM_get_value(SBM,1,2) == 6
    assert SBM_get_value(SBM,2,0) == 7
    assert SBM_get_value(SBM,2,1) == 8
    assert SBM_get_value(SBM,2,2) == 9
Ejemplo n.º 15
0
def load_results(f_results):
    """
    Load results from CNMF on various FOVs and merge them after some preprocessing
    
    """
    # load data
    i = 0
    A_s = []
    C_s = []
    YrA_s = []
    Cn_s = []
    shape = None
    b_s = []
    f_s = []
    for f_res in f_results:
        print f_res
        i += 1
        with np.load(f_res) as ld:
            A_s.append(csc.csc_matrix(ld['A2']))
            C_s.append(ld['C2'])
            YrA_s.append(ld['YrA'])
            Cn_s.append(ld['Cn'])
            b_s.append(ld['b2'])
            f_s.append(ld['f2'])
            if shape is not None:
                shape_new = (ld['d1'], ld['d2'])
                if shape_new != shape:
                    raise Exception('Shapes of FOVs not matching')
                else:
                    shape = shape_new
            else:
                shape = (ld['d1'], ld['d2'])

    return A_s, C_s, YrA_s, Cn_s, b_s, f_s, shape
Ejemplo n.º 16
0
 def solve_sparse_linear_system_shogun(A, b):
     logging.debug("Entering")
     solver = DirectSparseLinearSolver()
     operator = RealSparseMatrixOperator(csc_matrix(A))
     result = solver.solve(operator, b)
     logging.debug("Leaving")
     return result
Ejemplo n.º 17
0
def load_results(f_results):
    """
    Load results from CNMF on various FOVs and merge them after some preprocessing

    """
    # load data
    i=0
    A_s=[]
    C_s=[]
    YrA_s=[]
    Cn_s=[]
    shape = None
    b_s=[]
    f_s=[]
    for f_res in f_results:
        print(f_res)
        i+=1
        with  np.load(f_res) as ld:
            A_s.append(csc.csc_matrix(ld['A2']))
            C_s.append(ld['C2'])
            YrA_s.append(ld['YrA'])
            Cn_s.append(ld['Cn'])
            b_s.append(ld['b2'])
            f_s.append(ld['f2'])            
            if shape is not None:
                shape_new=(ld['d1'],ld['d2'])
                if shape_new != shape:
                    raise Exception('Shapes of FOVs not matching')
                else:
                    shape = shape_new
            else:            
                shape=(ld['d1'],ld['d2'])

    return A_s,C_s,YrA_s, Cn_s, b_s, f_s, shape  
Ejemplo n.º 18
0
def mass_stiffness_smooth(triangles, vertices, nb_iter=1,
                          diffusion_step=1.0, flow_file=None):
    vertices_csc = csc_matrix(vertices)
    curvature_normal_mtx = mean_curvature_normal_matrix(
        triangles, vertices_csc, area_weighted=False)
    # mass_mtx = mass_matrix(triangles, vertices_csc).astype(np.float)

    if isinstance(diffusion_step, (int, float)):
        diffusion_step = diffusion_step * np.ones(len(vertices))

    if flow_file is not None:
        mem_map = np.memmap(flow_file, dtype=G_DTYPE, mode='w+',
                            shape=(nb_iter, vertices.shape[0], vertices.shape[1]))

    for i in range(nb_iter):
        stdout.write("\r step %d on %d done" % (i, nb_iter))
        stdout.flush()
        if flow_file is not None:
            mem_map[i] = vertices_csc.toarray()
        # get curvature_normal_matrix
        mass_mtx = mass_matrix(triangles, vertices_csc).astype(np.float)

        # (D - d*L)*y = D*x = b
        A_matrix = mass_mtx - \
            (diags(diffusion_step, 0).dot(curvature_normal_mtx))
        b_matrix = mass_mtx.dot(vertices_csc)
        vertices_csc = spsolve(A_matrix, b_matrix)

    stdout.write("\r step %d on %d done \n" % (nb_iter, nb_iter))
    # return next_vertices_csc
    return vertices_csc.toarray()
Ejemplo n.º 19
0
def CSCfromCompactRepresentation(diag_vals, upper_rows, upper_cols, upper_vals):
  n = len(diag_vals)
  rows = np.concatenate((np.arange(n), upper_rows, upper_cols))
  cols = np.concatenate((np.arange(n), upper_cols, upper_rows))
  ij = np.vstack((rows, cols))
  vals = np.concatenate((diag_vals, upper_vals, upper_vals))
  return csc_matrix((vals, ij), shape=(n, n))
Ejemplo n.º 20
0
def positive_curvature_normal_smooth(triangles, vertices, nb_iter=1,
                                     diffusion_step=1.0, area_weighted=False,
                                     backward_step=False, flow_file=None):
    if flow_file is not None:
        mem_map = np.memmap(flow_file, dtype=G_DTYPE, mode='w+',
                            shape=(nb_iter, vertices.shape[0], vertices.shape[1]))

    if isinstance(diffusion_step, (int, float)):
        diffusion_step = diffusion_step * np.ones(len(vertices))

    curvature_normal_mtx = mean_curvature_normal_matrix(
        triangles, vertices, area_weighted=area_weighted)
    for i in range(nb_iter):
        stdout.write("\r step %d on %d done" % (i, nb_iter))
        stdout.flush()
        if flow_file is not None:
            mem_map[i] = vertices

        # do the first step
        next_vertices = euler_step(curvature_normal_mtx, csc_matrix(
            vertices), diffusion_step, backward_step).toarray()
        # test if direction is positive
        direction = next_vertices - vertices
        normal_dir = vertices_normal(triangles, next_vertices, normalize=False)
        pos_curv = dot(direction, normal_dir, keepdims=True) < 0
        vertices += direction * pos_curv

    stdout.write("\r step %d on %d done \n" % (nb_iter, nb_iter))
    return vertices
Ejemplo n.º 21
0
def volume_mass_stiffness_smooth(triangles,
                                 vertices,
                                 nb_iter=1,
                                 diffusion_step=1.0,
                                 flow_file=None):
    vertices_csc = csc_matrix(vertices)
    curvature_normal_mtx = mean_curvature_normal_matrix(triangles,
                                                        vertices,
                                                        area_weighted=False)

    if isinstance(diffusion_step, (int, long, float)):
        diffusion_step = diffusion_step * np.ones(len(vertices))

    if flow_file is not None:
        mem_map = np.memmap(flow_file,
                            dtype=G_DTYPE,
                            mode='w+',
                            shape=(nb_iter, vertices.shape[0],
                                   vertices.shape[1]))

    for i in range(nb_iter):
        stdout.write("\r step %d on %d done" % (i, nb_iter))
        stdout.flush()
        if flow_file is not None:
            mem_map[i] = vertices_csc.toarray()
        # get curvature_normal_matrix
        mass_mtx = mass_matrix(triangles, vertices)

        raise NotImplementedError()
        # (D - d*L)*y = D*x = b
        A_matrix = mass_mtx - \
            diags(diffusion_step, 0).dot(curvature_normal_mtx)
        b_matrix = mass_mtx.dot(csc_matrix(vertices_csc))
        next_vertices = spsolve(A_matrix, b_matrix)
        # test if direction is positive
        direction = next_vertices.toarray() - vertices_csc
        normal_dir = vertices_cotan_normal(triangles,
                                           next_vertices,
                                           normalize=True)
        dotv = normalize_vectors(direction).multiply(normal_dir)
        vertices_csc += direction * np.maximum(0.0, -dotv)
        # vertices_csc += direction * sigmoid(-np.arctan(dotv)*np.pi - np.pi)
        # vertices_csc += direction * softplus(-dotv)

    stdout.write("\r step %d on %d done \n" % (nb_iter, nb_iter))
    return vertices_csc.toarray()
Ejemplo n.º 22
0
 def test_tocsc(self):
     cscm = self.basic_m.tocsc()
     m = self.basic_m
     scipym = csc_matrix((m.data, (m.row, m.col)), shape=(4, 4))
     self.assertListEqual(cscm.indices.tolist(), scipym.indices.tolist())
     self.assertListEqual(cscm.indptr.tolist(), scipym.indptr.tolist())
     self.assertListEqual(cscm.data.tolist(), scipym.data.tolist())
     self.assertEqual(cscm.shape, scipym.shape)
     self.assertIsInstance(cscm, SparseBase)
Ejemplo n.º 23
0
def build_sparse(R,U,rows,cols):
  """ Returns an equivalent matrix in CSC format.
  """
  n = len(R)
  all_rows = np.concatenate((np.arange(n),rows,cols))
  all_cols = np.concatenate((np.arange(n),cols,rows))
  ij = np.vstack((all_rows,all_cols))
  vals = np.concatenate((R,U,U))
  return csc_matrix((vals,ij),shape=(n,n))
Ejemplo n.º 24
0
def subtract_dtm_frequencies(dtm_1, terms_1, dtm_2, terms_2):
    """
    :param dtm_1: DTM to subtract frequencies from
    :param terms_1: Terms (column names) for dtm_1
    :param dtm_2: DTM to subtract frequencies to dtm_1
    :param terms_2: Terms (column names) for dtm_2
    :return: DTM with a unique row with the difference of frequencies from terms_1 minus terms_2
    """
    arr_freq_1 = dtm_1.sum(axis=0).getA1()
    arr_freq_2 = dtm_2.sum(axis=0).getA1()
    return csc_matrix(subtract_term_frequencies(terms_1, arr_freq_1, terms_2, arr_freq_2))
Ejemplo n.º 25
0
    def transpose(self, axes=None, copy=False):
        if axes is not None:
            raise ValueError(("Sparse matrices do not support "
                              "an 'axes' parameter because swapping "
                              "dimensions is the only logical permutation."))

        M, N = self.shape

        from scipy.sparse.csc import csc_matrix
        return csc_matrix((self.data, self.indices, self.indptr),
                          shape=(N, M),
                          copy=copy)
Ejemplo n.º 26
0
def laplacian_smooth(triangles,
                     vertices,
                     nb_iter=1,
                     diffusion_step=1.0,
                     l2_dist_weighted=False,
                     area_weighted=False,
                     backward_step=False,
                     flow_file=None):

    if flow_file is not None:
        mem_map = np.memmap(flow_file,
                            dtype=G_DTYPE,
                            mode='w+',
                            shape=(nb_iter, vertices.shape[0],
                                   vertices.shape[1]))

    vertices_csc = csc_matrix(vertices)

    if isinstance(diffusion_step, (int, long, float)):
        diffusion_step = diffusion_step * np.ones(len(vertices))

    for i in range(nb_iter):
        stdout.write("\r step %d on %d done" % (i, nb_iter))
        stdout.flush()
        if flow_file is not None:
            mem_map[i] = vertices_csc.toarray()

        if l2_dist_weighted:
            # if l2_dist_weighted, we need to compute laplacian_matrix
            #   each iteration (because ||e_ij|| change)
            # A_ij_l2_dist_weighted = A_ij / ||e_ij||
            adjacency_matrix = edge_length(triangles, vertices_csc)
            ###################################################################
            # adjacency_matrix.data **= -1
            # laplacian_matrix = laplacian(adjacency_matrix, diag_of_1=False)
            ###################################################################
            adjacency_matrix.data **= 1  # 1
            laplacian_matrix = laplacian(adjacency_matrix, diag_of_1=True)
        else:
            adjacency_matrix = edge_adjacency(triangles, vertices_csc)
            laplacian_matrix = laplacian(adjacency_matrix, diag_of_1=True)

        if area_weighted:
            vts_mix_area = vertices_mix_area(triangles, vertices_csc)
            laplacian_matrix = diags(vts_mix_area**-1, 0).dot(laplacian_matrix)

        next_vertices_csc = euler_step(laplacian_matrix, vertices_csc,
                                       diffusion_step, backward_step)
        vertices_csc = next_vertices_csc

    stdout.write("\r step %d on %d done \n" % (nb_iter, nb_iter))
    # return next_vertices_csc
    return next_vertices_csc.toarray()
Ejemplo n.º 27
0
def exhaustive_set(G, query_nodes, target_nodes, n_edges, start_dist):
    """Exaustively searches all the combinations of k links between 
    a set of query nodes Q and a set of absorbing
    target nodes C such that Q \cap C = \emptyset. 
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    ac_scores: list
        The set of scores of adding the links
    """
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    P = csc_matrix(nx.google_matrix(G, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum())[0,0]
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if G.has_edge(candidates[i][0], candidates[i][1]) == False]
    ac_scores = [row_sums]
    exhaustive_links = []
    for L in range(1, n_edges+1):
        print '\t Number of edges {}'.format(L)
        round_min = -1
        best_combination = [] 
        for subset in combinations(eligible, L):
            H = G.copy()
            F_modified = F.copy()
            for links_to_add in subset:
                F_updated = update_fundamental_mat(F_modified, H, map_query_to_org, links_to_add[0])
                H.add_edge(links_to_add[0], links_to_add[1])
                F_modified = F_updated            
            abs_cen = start_dist.dot( F_updated.sum(axis = 1))[0,0]
            if abs_cen < round_min or round_min == -1:
                best_combination = subset
                round_min = abs_cen
        exhaustive_links.append(best_combination)
        ac_scores.append(round_min)              
    return exhaustive_links, ac_scores
Ejemplo n.º 28
0
def dtm_to_gensim_corpus(dtm):
    import gensim

    # DTM with documents to words sparse matrix in COO format has to be converted to transposed sparse matrix in CSC
    # format
    dtm_t = dtm.transpose()
    if hasattr(dtm_t, 'tocsc'):
        dtm_sparse = dtm_t.tocsc()
    else:
        from scipy.sparse.csc import csc_matrix
        dtm_sparse = csc_matrix(dtm_t)

    return gensim.matutils.Sparse2Corpus(dtm_sparse)
Ejemplo n.º 29
0
    def test_get_dtm_frequency_diff(self):
        texts_2 = [
            "Más texto con la letra eñe", "Este texto también contiene la eñe"
        ]

        dtm_1 = vec.fit_transform(TEXTS)
        terms_1 = np.array(vec.get_feature_names())
        dtm_2 = vec.fit_transform(texts_2)
        terms_2 = np.array(vec.get_feature_names())

        result = dtm.subtract_dtm_frequencies(dtm_1, terms_1, dtm_2, terms_2)
        expected = csc_matrix([0, 2, 1, 1, -1, -1, 0, 1, 2, 1])
        self.assertTrue(dtm.equal(expected, result))
Ejemplo n.º 30
0
def test():
    """
  Test function ran with -t flag
  """
    print "Running 5 node test ...."
    g = csc_matrix([[0, 1, 0, 0, 1], [1, 0, 1, 1, 0], [0, 1, 0, 1, 0],
                    [0, 1, 1, 0, 0], [1, 0, 0, 0, 1]])
    print "Input csc: \n", g.todense()

    print "Python igraph ...\n", csc_to_igraph(g).get_adjacency()

    from r_utils import r_igraph_get_adjacency
    print "R igraph ...\n", r_igraph_get_adjacency(csc_to_r_igraph(g))
Ejemplo n.º 31
0
    def tocsc(self, copy=False):
        idx_dtype = get_index_dtype((self.indptr, self.indices),
                                    maxval=max(self.nnz, self.shape[0]))
        indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype)
        indices = np.empty(self.nnz, dtype=idx_dtype)
        data = np.empty(self.nnz, dtype=upcast(self.dtype))

        csr_tocsc(self.shape[0], self.shape[1], self.indptr.astype(idx_dtype),
                  self.indices.astype(idx_dtype), self.data, indptr, indices,
                  data)

        from scipy.sparse.csc import csc_matrix
        A = csc_matrix((data, indices, indptr), shape=self.shape)
        A.has_sorted_indices = True
        return A
Ejemplo n.º 32
0
def test_jaccard():
    implicit_matrix = np.array([[1, 1, 1, 1], [1, 1, 0, 0], [0, 0, 1, 0]])
    assert implicit_matrix.shape == (3, 4)
    implicit_matrix = csc_matrix(implicit_matrix)
    n2i = {"huey": 0, "dewey": 1, "louie": 2, "chewy": 3}
    t2i = {"Batman": 0, "Mystery Men": 1, "Taxi Driver": 2}
    i2n = {v: k for k, v in n2i.items()}
    i2t = {v: k for k, v in t2i.items()}

    jrec = JaccardRecommender(implicit_matrix,
                              p2i=None,
                              t2i=t2i,
                              i2t=i2t,
                              i2p=None)

    print(jrec.item_to_item(N=10, title="Batman"))
Ejemplo n.º 33
0
def tf_idf(df, voc, idf=None, mode='train'):
    vectorizer_tit = CountVectorizer(token_pattern='\w+', vocabulary=voc)
    vectorizer_des = CountVectorizer(token_pattern='\w+', vocabulary=voc)
    vectorizer_com = CountVectorizer(token_pattern='\w+', vocabulary=voc)

    # fit vectorizer on data(text:srting) -> vector of features
    vec_fit_tit = vectorizer_tit.fit_transform(df['title'])
    vec_fit_des = vectorizer_des.fit_transform(df['description'])
    vec_fit_com = vectorizer_com.fit_transform(df['combined'])

    # count each word
    counts_tit = np.array(vec_fit_tit.sum(axis=0)).flatten().tolist()
    counts_des = np.array(vec_fit_des.sum(axis=0)).flatten().tolist()
    counts_com = np.array(vec_fit_com.sum(axis=0)).flatten().tolist()

    # get each uniq word in data
    words_tit = vectorizer_tit.get_feature_names()
    words_des = vectorizer_des.get_feature_names()
    words_com = vectorizer_com.get_feature_names()

    # dictionary of word and collection frequency
    df_tit = pd.Series(counts_tit, index=words_tit).to_dict()
    df_des = pd.Series(counts_des, index=words_des).to_dict()
    df_com = pd.Series(counts_com, index=words_com).to_dict()

    if (mode == 'train'):
        # calculate idf vector
        N = df.shape[0]
        idf = {}
        for term in df_com.keys():
            idf[term] = np.log(N) - np.log(
                df_com[term] + 0.000001
            )  # calculate idf based on combined 'title'+'description'.

        idf = csc.csc_matrix(list(
            idf.values()))  # convert idf values to sparse matrix

    # calculate tfidf vectors
    tfidf_tit_csc = idf.multiply(vec_fit_tit)
    tfidf_des_csc = idf.multiply(vec_fit_des)
    tfidf_com_csc = idf.multiply(vec_fit_com)
    for index, row in df.iterrows():
        df.at[index, 'title'] = tfidf_tit_csc[index]
        df.at[index, 'description'] = tfidf_des_csc[index]
        df.at[index, 'combined'] = tfidf_com_csc[index]

    return (df, idf)
Ejemplo n.º 34
0
    def test_mul_sparse_matrix(self):
        #from pyomo.contrib.pynumero.sparse.block_matrix import BlockMatrix

        # test unsymmetric times unsymmetric
        m = self.basic_m
        dense_m = m.toarray()
        res = m * m
        dense_res = np.matmul(dense_m, dense_m)
        self.assertFalse(res.is_symmetric)
        self.assertTrue(np.allclose(res.toarray(), dense_res))

        # test symmetric result
        m = self.basic_m
        dense_m = m.toarray()
        res = m.transpose() * m
        dense_res = np.matmul(dense_m.transpose(), dense_m)
        self.assertTrue(res.is_symmetric)
        self.assertTrue(np.allclose(res.toarray(), dense_res))

        # test unsymmetric with rectangular
        m = self.basic_m
        dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]])

        m2 = CSCMatrix(dense_m2)
        res = m * m2
        dense_res = np.matmul(m.toarray(), dense_m2)
        self.assertFalse(res.is_symmetric)
        self.assertTrue(np.allclose(res.toarray(), dense_res))

        # test unsymmetric with rectangular scipycsr
        m = self.basic_m
        dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]])

        m2 = csc_matrix(dense_m2)
        with self.assertRaises(Exception) as context:
            res = m * m2

        # test product with symmetric matrix
        m = self.basic_m
        dense_m = m.todense()
        m2 = self.basic_sym_m
        dense_m2 = m2.todense()
        res = m * m2
        res_dense = np.matmul(dense_m, dense_m2)
        self.assertTrue(np.allclose(res.todense(), res_dense))
        """
Ejemplo n.º 35
0
def lpDot(mat, arr):
    """
    CSC matrix-vector or CSC matrix-matrix dot product (A x b)
    :param mat: CSC sparse matrix (A)
    :param arr: dense vector or matrix of object type (b)
    :return: vector or matrix result of the product
    """
    n_rows, n_cols = mat.shape

    # check dimensional compatibility
    assert (n_cols == arr.shape[0])

    # check that the sparse matrix is indeed of CSC format
    if mat.format == 'csc':
        mat_2 = mat
    else:
        # convert the matrix to CSC sparse
        mat_2 = csc_matrix(mat)

    if len(arr.shape) == 1:
        """
        Uni-dimensional sparse matrix - vector product
        """
        res = np.zeros(n_rows, dtype=arr.dtype)
        for i in range(n_cols):
            for ii in range(mat_2.indptr[i], mat_2.indptr[i + 1]):
                j = mat_2.indices[ii]  # row index
                res[j] += mat_2.data[ii] * arr[
                    i]  # C.data[ii] is equivalent to C[i, j]
    else:
        """
        Multi-dimensional sparse matrix - matrix product
        """
        cols_vec = arr.shape[1]
        res = np.zeros((n_rows, cols_vec), dtype=arr.dtype)

        for k in range(
                cols_vec
        ):  # for each column of the matrix "vec", do the matrix vector product
            for i in range(n_cols):
                for ii in range(mat_2.indptr[i], mat_2.indptr[i + 1]):
                    j = mat_2.indices[ii]  # row index
                    res[j, k] += mat_2.data[ii] * arr[
                        i, k]  # C.data[ii] is equivalent to C[i, j]
    return res
Ejemplo n.º 36
0
def test():
  """
  Test function ran with -t flag
  """
  print "Running 5 node test ...."
  g = csc_matrix([
        [0, 1, 0, 0, 1],
        [1, 0, 1, 1, 0],
        [0, 1, 0, 1, 0],
        [0, 1, 1, 0, 0],
        [1, 0, 0, 0, 1]
        ])
  print "Input csc: \n", g.todense()

  print "Python igraph ...\n", csc_to_igraph(g).get_adjacency()

  from r_utils import r_igraph_get_adjacency
  print "R igraph ...\n", r_igraph_get_adjacency(csc_to_r_igraph(g))
Ejemplo n.º 37
0
def random_links(G, query_nodes, target_nodes, n_edges, start_dist):
    """Selects a random set of links between a set of query nodes Q and a set of absorbing
    target nodes C such that Q \cap C = \emptyset. 
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    ac_scores: list
        The set of scores of adding the links
    """
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    P = csc_matrix(nx.google_matrix(G, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum())[0,0]
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if G.has_edge(candidates[i][0], candidates[i][1]) == False]
    links_to_add = sample(eligible, n_edges)
    
    ac_scores = []
    ac_scores.append(row_sums)
    i = 0
    while i < n_edges:
        F_updated = update_fundamental_mat(F, G, map_query_to_org, links_to_add[i][0])
        G.add_edge(links_to_add[i][0], links_to_add[i][1])
        abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0]
        F = F_updated            
        ac_scores.append(abs_cen)
        i += 1
    return links_to_add, ac_scores
Ejemplo n.º 38
0
 def doFeatureEncoding(self, features):
     """ do feature encoding to original features"""
     encodedFeatures = None
     whitenedFeatures = whiten(features)
     
     if self._featureEncodingMethod == 'vector-quantization':
         # Vector quantization
         # each row is a feature vector
         index, _ = vq(whitenedFeatures, self._codebook)
         row, _ = features.shape
         col = config.codebookSize
         encodedFeatures = np.zeros((row, col))
         
         for i in xrange(len(index)):
             encodedFeatures[i, index[i]] = 1
             
     elif self._featureEncodingMethod == 'sparse-coding':
         # Sparse coding
         # each column is a feature vector
         X = np.asfortranarray(whitenedFeatures.transpose())
         X = np.asfortranarray(X / np.tile(np.sqrt((X*X).sum(axis=0)),
                                           (X.shape[0],1)),
                               dtype= X.dtype)
         D = np.asfortranarray(self._codebook.transpose())
         D = np.asfortranarray(D / np.tile(np.sqrt((D*D).sum(axis=0)),
                                           (D.shape[0],1)), 
                               dtype = D.dtype)
         
         # Parameters of the optimization are chosen
         param = {
             'lambda1': 0.15, 
             'numThreads': -1,
             'mode': 0    
             }
         
         alpha = spams.lasso(X, D, **param)   # alpha is sparse matrix
         
         alphaShape = (D.shape[1], X.shape[1])
         denseMatrix = csc_matrix(alpha, shape = alphaShape).todense()
         encodedFeatures = np.asarray(denseMatrix).transpose()
             
     return encodedFeatures
Ejemplo n.º 39
0
Archivo: mog.py Proyecto: waytai/utils
def kmeans(X, K, maxiters, M=None, eps=1e-3):
    """Standard k-means.

    _X_ is data rowwise. _K_ is the number of
    clusters. _M_ is the set of centers.

    Implementation tries to save some computation cycles.
    """
    N, d = X.shape
    if M is None:
        tmp = np.random.permutation(N)
        M = X[tmp[:K]].copy()

    costs = []
    last = np.inf
    X_sq_sum = np.sum(X**2)
    for i in xrange(maxiters):
        # see metric.py, but here: don't need squares from
        # X, because _minimal_ cost over K is independent from it.
        cost = -2*np.dot(X, M.T) + np.sum(M**2, axis=1)
        idx = np.argmin(cost, axis=1)
        cost = cost[xrange(N), idx]
        costs.append(X_sq_sum + np.sum(cost))

        if (last - costs[-1]) < eps:
            break
        last = costs[-1]
        # Determine new centers
        # Sparseification from Jakob Verbeek's kmeans code,
        # http://lear.inrialpes.fr/~verbeek/software.php
        ind = csc.csc_matrix( (np.ones(N), (idx, xrange(N))), shape=(K, N))
        M = ind.dot(X)
        weights = np.array(ind.sum(axis=1))
        # Handle problem: no points assigned to a cluster
        zeros_idx = (weights.ravel()==0)
        zeros = np.sum(zeros_idx)
        tmp = np.random.permutation(N)
        M[zeros_idx, :] = X[tmp[:zeros]].copy()
        weights[zeros_idx] = 1
        M /= weights
    return M, costs
Ejemplo n.º 40
0
def kmeans(X, K, maxiters, M=None, eps=1e-3):
    """Standard k-means.

    _X_ is data rowwise. _K_ is the number of
    clusters. _M_ is the set of centers.

    Implementation tries to save some computation cycles.
    """
    N, d = X.shape
    if M is None:
        tmp = np.random.permutation(N)
        M = X[tmp[:K]].copy()

    costs = []
    last = np.inf
    X_sq_sum = np.sum(X**2)
    for i in xrange(maxiters):
        # see metric.py, but here: don't need squares from
        # X, because _minimal_ cost over K is independent from it.
        cost = -2 * np.dot(X, M.T) + np.sum(M**2, axis=1)
        idx = np.argmin(cost, axis=1)
        cost = cost[xrange(N), idx]
        costs.append(X_sq_sum + np.sum(cost))

        if (last - costs[-1]) < eps:
            break
        last = costs[-1]
        # Determine new centers
        # Sparseification from Jakob Verbeek's kmeans code,
        # http://lear.inrialpes.fr/~verbeek/software.php
        ind = csc.csc_matrix((np.ones(N), (idx, xrange(N))), shape=(K, N))
        M = ind.dot(X)
        weights = np.array(ind.sum(axis=1))
        # Handle problem: no points assigned to a cluster
        zeros_idx = (weights.ravel() == 0)
        zeros = np.sum(zeros_idx)
        tmp = np.random.permutation(N)
        M[zeros_idx, :] = X[tmp[:zeros]].copy()
        weights[zeros_idx] = 1
        M /= weights
    return M, costs
Ejemplo n.º 41
0
def kmeans_np(X, lmbda, M=None):
    """Non-parametric kmeans.

    _X_ is input data, rowwise. _lmbda_ controls
    tradeoff between standard kmeans and cluster
    penalty term.

    See http://www.cs.berkeley.edu/~jordan/papers/kulis-jordan-icml12.pdf
    """
    N, d = X.shape
    if M is None:
        M = np.mean(X, axis=0).reshape(1, d)
    k = M.shape[0] - 1
    X_sq_sum = np.sum(X**2, axis=1)
    ind = np.zeros(N)
    old_ind = ind.copy()
    tmp = 0
    iters = 1
    while True:
        print "Iteration ", iters
        iters = iters + 1
        for i in xrange(N):
            tmp = -2 * np.dot(X[i], M.T) + np.sum(M**2, axis=1)
            idx = np.argmin(tmp)
            if (X_sq_sum[i] + tmp[idx]) > lmbda:
                k = k + 1
                M = np.append(M, X[i].copy().reshape(1, d), axis=0)
                ind[i] = k
                print "Adding cluster for ", i, k
            else:
                ind[i] = idx
        if np.all(old_ind == ind):
            break
        # see kmeans above
        ind_all = csc.csc_matrix((np.ones(N), (ind, xrange(N))),
                                 shape=(k + 1, N))
        M = ind_all.dot(X)
        M /= np.array(ind_all.sum(axis=1))
        old_ind = ind
        ind = np.zeros(N)
    return M, np.array(ind_all.sum(axis=1)).ravel()
Ejemplo n.º 42
0
def writetest(desikan):
  """
  Write Test function ran with -t flag

  Positional Args:
  ===============
  desikan - use the desikan mapping?
  """
  from scipy.sparse.csc import csc_matrix
  print "Running 5 node test ...."
  g = csc_matrix([
        [0, 1, 0, 0, 5],
        [1, 0, 3, 1, 0],
        [0, 3, 0, 1, 0],
        [0, 1, 1, 0, 0],
        [5, 0, 0, 0, 0]
        ])

  src = csc_to_graphml(g, test=True, desikan=desikan)
  print "Test complete ..."
  print src
Ejemplo n.º 43
0
Archivo: mog.py Proyecto: waytai/utils
def kmeans_np(X, lmbda, M=None):
    """Non-parametric kmeans.

    _X_ is input data, rowwise. _lmbda_ controls
    tradeoff between standard kmeans and cluster
    penalty term.

    See http://www.cs.berkeley.edu/~jordan/papers/kulis-jordan-icml12.pdf
    """
    N, d = X.shape
    if M is None:
        M = np.mean(X, axis=0).reshape(1, d)
    k = M.shape[0] - 1 
    X_sq_sum = np.sum(X**2, axis=1)
    ind = np.zeros(N)
    old_ind = ind.copy()
    tmp = 0
    iters = 1
    while True:
        print "Iteration ", iters
        iters = iters + 1
        for i in xrange(N):
            tmp = -2*np.dot(X[i], M.T) + np.sum(M**2, axis=1)
            idx = np.argmin(tmp)
            if (X_sq_sum[i] + tmp[idx]) > lmbda:
                k = k + 1
                M = np.append(M, X[i].copy().reshape(1, d), axis=0)
                ind[i] = k
                print "Adding cluster for ", i, k
            else:
                ind[i] = idx
        if np.all(old_ind == ind):
            break
        # see kmeans above
        ind_all = csc.csc_matrix((np.ones(N), (ind, xrange(N))), shape=(k+1, N))
        M = ind_all.dot(X)
        M /= np.array(ind_all.sum(axis=1))
        old_ind = ind
        ind = np.zeros(N)
    return M, np.array(ind_all.sum(axis=1)).ravel()
Ejemplo n.º 44
0
def igraph_to_csc(g, save=False, fn="csc_matlab"):
  """
  Convert an igraph to scipy.sparse.csc.csc_matrix

  Positional arguments:
  =====================
  g - the igraph graph

  Optional arguments:
  ===================
  save - save file to disk
  fn - the file name to be used when writing (appendmat = True by default)
  """
  assert isinstance(g, igraph.Graph), "Arg1 'g' must be an igraph graph"
  print "Creating CSC from igraph object ..."
  gs = csc_matrix(g.get_adjacency().data) # Equiv of calling to_dense so may case MemError
  print "CSC creation complete ..."

  if save:
    print "Saving to MAT file ..."
    sio.savemat(fn, {"data":gs}, True) # save as MAT format only. No other options!
  return gs
Ejemplo n.º 45
0
    def doFeatureEncoding(self, features):
        """ do feature encoding to original features"""
        encodedFeatures = None
        whitenedFeatures = whiten(features)

        if self._featureEncodingMethod == 'vector-quantization':
            # Vector quantization
            # each row is a feature vector
            index, _ = vq(whitenedFeatures, self._codebook)
            row, _ = features.shape
            col = config.codebookSize
            encodedFeatures = np.zeros((row, col))

            for i in xrange(len(index)):
                encodedFeatures[i, index[i]] = 1

        elif self._featureEncodingMethod == 'sparse-coding':
            # Sparse coding
            # each column is a feature vector
            X = np.asfortranarray(whitenedFeatures.transpose())
            X = np.asfortranarray(X / np.tile(np.sqrt((X * X).sum(axis=0)),
                                              (X.shape[0], 1)),
                                  dtype=X.dtype)
            D = np.asfortranarray(self._codebook.transpose())
            D = np.asfortranarray(D / np.tile(np.sqrt((D * D).sum(axis=0)),
                                              (D.shape[0], 1)),
                                  dtype=D.dtype)

            # Parameters of the optimization are chosen
            param = {'lambda1': 0.15, 'numThreads': -1, 'mode': 0}

            alpha = spams.lasso(X, D, **param)  # alpha is sparse matrix

            alphaShape = (D.shape[1], X.shape[1])
            denseMatrix = csc_matrix(alpha, shape=alphaShape).todense()
            encodedFeatures = np.asarray(denseMatrix).transpose()

        return encodedFeatures
Ejemplo n.º 46
0
def test_from_csc1():

    from siconos.numerics import SBM_from_csparse, SBM_get_value
    from scipy.sparse.csc import csc_matrix

    M = csc_matrix([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

    # print(M.indices)
    # print(M.indptr)
    # print(M.data)

    blocksize = 3

    r, SBM = SBM_from_csparse(blocksize, M)

    assert SBM_get_value(SBM, 0, 0) == 1
    assert SBM_get_value(SBM, 0, 1) == 2
    assert SBM_get_value(SBM, 0, 2) == 3
    assert SBM_get_value(SBM, 1, 0) == 4
    assert SBM_get_value(SBM, 1, 1) == 5
    assert SBM_get_value(SBM, 1, 2) == 6
    assert SBM_get_value(SBM, 2, 0) == 7
    assert SBM_get_value(SBM, 2, 1) == 8
    assert SBM_get_value(SBM, 2, 2) == 9
Ejemplo n.º 47
0
# ================================================================================

import numpy as np
from scipy.sparse.csc import csc_matrix
import pyGPs
from pyGPs.Validation import valid
from pyGPs.GraphExtensions import graphUtil, graphKernels

location = "graphData/"
data = np.load(location + "MUTAG.npz")

# n = num of nodes
# N = num of graphs
# p = num of labels
A = csc_matrix(
    (data["adj_data"], data["adj_indice"], data["adj_indptr"]), shape=data["adj_shape"]
)  # n x n adjancy array (sparse matrix)
gr_id = data["graph_ind"]  # n x 1 graph id array
node_label = data["responses"]  # n x 1 node label array
graph_label = data["labels"]  # N x 1 graph label array
N = graph_label.shape[0]  # number of graphs)

graph_label = np.int8(graph_label)
for i in xrange(N):
    if graph_label[i, 0] == 0:
        graph_label[i, 0] -= 1

# ===========================================================================
# COMPUTE PROPAGATION KERNELS
# ===========================================================================
num_Iteration = 10
Ejemplo n.º 48
0
#    Marion Neumann, Daniel Marthaler, Shan Huang & Kristian Kersting, 18/02/2014
#================================================================================

import numpy as np
from scipy.sparse.csc import csc_matrix
import pyGPs
from pyGPs.Validation import valid
from pyGPs.GraphExtensions import graphUtil,graphKernels

location = 'graphData/'
data = np.load(location+'MUTAG.npz')

# n = num of nodes
# N = num of graphs
# p = num of labels
A = csc_matrix( (data['adj_data'], data['adj_indice'], \
    data['adj_indptr']), shape=data['adj_shape'])     # n x n adjancy array (sparse matrix)  
gr_id = data['graph_ind']                             # n x 1 graph id array
node_label = data['responses']                        # n x 1 node label array
graph_label = data['labels']                          # N x 1 graph label array
N = graph_label.shape[0]                              # number of graphs)

graph_label = np.int8(graph_label)
for i in range(N):
    if graph_label[i,0] == 0:
        graph_label[i,0] -= 1

#===========================================================================
# COMPUTE PROPAGATION KERNELS
#===========================================================================
num_Iteration = 10
w = 1e-4
Ejemplo n.º 49
0
def get_approx_boundary(G, query_nodes, target_nodes, n_edges, start_dist):
    """
    Used to calculate an approximation guarantee for greedy algorithm
    """
    
    H = G.copy() # GET A COPY OF THE GRAPH
    query_set_size = len(query_nodes) 
    target_set_size = len(target_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    
    candidates = list(product(query_nodes, target_nodes))
    # ALL minus exitsting in G
    eligible = [candidates[i] for i in range(len(candidates)) 
                if H.has_edge(candidates[i][0], candidates[i][1]) == False]
    
    # CALCULATE MARGINAL GAIN TO EMPTY SET FOR ALL NODES IN STEEPNESS FUNCTION
    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums_empty = start_dist.dot(F.sum(axis=1))[0,0] # F(\emptyset)
    # candidates = list(product(query_nodes, target_nodes))
    ac_marginal_empty   = []
    ac_marginal_full    = []
    source_idx_empty = []
    node_processed = -1
    for out_edge in eligible:
        abs_cen = -1
        source_node = out_edge[0]
        if(node_processed == source_node):
            # skip updating matrix because this updates the F matrix in the same way
            continue
        node_processed = source_node           
        F_updated = update_fundamental_mat(F, H, map_query_to_org, source_node)
        abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0]
        ac_marginal_empty.append(abs_cen)
        source_idx_empty.append(source_node)
        
    sorted_indexes_empty = [i[0] for i in sorted(enumerate(source_idx_empty), key=lambda x:x[1])]
    ac_marginal_empty = [ac_marginal_empty[i] for i in sorted_indexes_empty]   
    # CALCULATE MARGINAL GAIN FOR FULL SET

    H.add_edges_from(eligible)
    P_all = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs_all = P_all[list(query_nodes),:][:,list(query_nodes)]
    F_all = compute_fundamental(P_abs_all)
    
    row_sums_all = start_dist.dot(F_all.sum(axis=1))[0,0]
    node_prcessed   = -1
    source_idx = []
    for out_edge in eligible:
        abs_cen = -1
        source_node = out_edge[0]
        if(node_prcessed == source_node):
            # skip updating matrix because this updates the F matrix in the same way
            continue
        node_prcessed = source_node
        F_all_updated = update_rev_fundamental_mat(F_all, H, map_query_to_org, source_node)
        abs_cen   = start_dist.dot(F_all_updated.sum(axis = 1))[0,0]
        ac_marginal_full.append(abs_cen)
        source_idx.append(source_node)   
    
    sorted_indexes = [i[0] for i in sorted(enumerate(source_idx), key=lambda x:x[1])]
    ac_marginal_full = [ac_marginal_full[i] for i in sorted_indexes]
    
    assert sorted_indexes == sorted_indexes_empty , "Something is wrong with the way scores are appended"
    
    all_steepness = (asarray(ac_marginal_full) - row_sums_all) / (row_sums_empty-asarray(ac_marginal_empty))
    s = min(all_steepness)
    node_max = argmin(all_steepness)
    return 1-s, sorted_indexes[node_max]
Ejemplo n.º 50
0
def greedy_navigation(G, query_nodes, target_nodes, n_edges, start_dist):
    """Selects a set of links with a greedy descent algorithm that reduce the 
    absorbing RW centrality between a set of query nodes Q and a set of absorbing
    target nodes C such that Q \cap C = \emptyset. The query and target set 
    must be a 'viable' partition of the graph.
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    P : Scipy matrix
        The transition matrix of the graph G
    F : Scipy matrix
        The fundamental matrix for the graph G with the given set of absorbing
        random walk nodes
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    """
    H = G.copy()
    prng = RandomState()
    query_set_size = len(query_nodes)
    target_set_size = len(target_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))

    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum(axis=1))[0,0]
    best_F = zeros(F.shape)
    optimal_set = []
    ac_scores = []
    ac_scores.append(row_sums)
    
    while n_edges > 0:
        round_min = -1
        best_node = -1
        
        for i in query_nodes:
            abs_neighbours = [l for l in H.neighbors(i) if l in target_nodes]
            if len(abs_neighbours) == target_set_size:
                continue
            
            F_updated = update_fundamental_mat(F, H, map_query_to_org, i)
            abs_cen = start_dist.dot( F_updated.sum(axis = 1))[0,0]
            if abs_cen < round_min or round_min == -1:
                best_node = i
                round_min = abs_cen
                best_F = F_updated
        F = best_F            
        ac_scores.append(round_min)
        optimal_candidate_edges = [(best_node, k, round_min) 
                                   for k in target_nodes 
                                   if H.has_edge(best_node, k) == False ]
        
        try:
            edge_idx = prng.randint(0, len(optimal_candidate_edges))
        except ValueError:
            print(H.neighbors(best_node))
            print([l for l in H.neighbors(best_node) if l in target_nodes])
            print(best_node)
            print(optimal_candidate_edges)
            print(target_nodes)
        H.add_edge(optimal_candidate_edges[edge_idx][0], 
                   optimal_candidate_edges[edge_idx][1])
        optimal_set.append(optimal_candidate_edges[edge_idx])
        n_edges -= 1

    return optimal_set, ac_scores
Ejemplo n.º 51
0
def reverse_greedy(G, query_nodes, target_nodes, n_edges, start_dist):
    """Selects a set of links with a reverse greedy descent algorithm that reduce the 
    absorbing RW centrality between a set of query nodes Q and a set of absorbing
    target nodes C such that Q \cap C = \emptyset. The query and target set 
    must be a 'viable' partition of the graph.
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    P : Scipy matrix
        The transition matrix of the graph G
    F : Scipy matrix
        The fundamental matrix for the graph G with the given set of absorbing
        random walk nodes
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    """
    H = G.copy()
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if H.has_edge(candidates[i][0], candidates[i][1]) == False]
    H.add_edges_from(eligible)
    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum(axis=1))[0,0]
    # candidates = list(product(query_nodes, target_nodes))
    worst_F = zeros(F.shape)
    worst_set = []
    optimal_set = []
    ac_scores = []
#     ac_scores.append(row_sums)
    
    while len(eligible) > 0:
        round_min       = -1
        worst_link      = (-1,-1)
        node_prcessed   = -1
        for out_edge in eligible:
            source_node = out_edge[0]
            if(node_prcessed == source_node):
                # skip updating matrix because this updates the F matrix in the same way
                continue
            node_prcessed = source_node
            F_updated = update_rev_fundamental_mat(F, H, map_query_to_org, source_node)
            abs_cen   = start_dist.dot(F_updated.sum(axis = 1))[0,0]
            if abs_cen < round_min or round_min == -1:
                worst_link  = out_edge
                round_min   = abs_cen
                worst_F     = F_updated
        F = worst_F
        H.remove_edge(*worst_link)
        worst_set.append(worst_link) 
        eligible.remove(worst_link)
        if (len(eligible) <= n_edges):           
            ac_scores.append(round_min)
            optimal_set.append(worst_link)
        
    return list(reversed(optimal_set)), list(reversed(ac_scores))
Ejemplo n.º 52
0
def link_prediction(G, query_nodes, target_nodes, n_edges, start_dist, alg = "ra"):
    """Selects a random set of links between based on the scores calculated by 
    a standard link-prediction algorithm from networkx library
    Parameters
    ----------
    G : Networkx graph
        The graph from which the team will be selected.
    query : list 
        The set of nodes from which random walker starts.
    target : list
        The set of nodes from where the random walker ends.
    n_edges : integer
        the number of links to be added
    start_dist: list
        The starting distribution over the query set
    alg: string
        A string describing the link-prediction algorithm to be used
    Returns
    -------
    links : list
        The set of links that reduce the absorbing RW centrality
    ac_scores: list
        The set of scores of adding the links
    """
    assert alg in ["ra", "pa", "jaccard", "aa"], "alg must be one of [\"ra\", \"pa\", \"jaccard\", \"aa\"]."
          
    H = G.copy()
    query_set_size = len(query_nodes)
    map_query_to_org = dict(zip(query_nodes, range(query_set_size)))
    P = csc_matrix(nx.google_matrix(H, alpha=1))
    P_abs = P[list(query_nodes),:][:,list(query_nodes)]
    F = compute_fundamental(P_abs)
    row_sums = start_dist.dot(F.sum())[0,0]
    candidates = list(product(query_nodes, target_nodes))
    eligible = [candidates[i] for i in range(len(candidates)) 
                if H.has_edge(candidates[i][0], candidates[i][1]) == False]
    links_to_add = []
    if alg == 'ra':
        preds = nx.resource_allocation_index(H, eligible)
    elif alg == 'jaccard':
        preds = nx.jaccard_coefficient(H, eligible)
    elif alg == 'aa':
        preds = nx.adamic_adar_index(H, eligible)
    elif alg == 'pa':
        preds = nx.preferential_attachment(H, eligible)
        
    for u,v,p in preds:
        links_to_add.append((u,v,p))
    links_to_add.sort(key=lambda x: x[2], reverse = True)
    
    ac_scores = []
    ac_scores.append(row_sums)
    i = 0
    while i < n_edges:
        F_updated = update_fundamental_mat(F, H, map_query_to_org, links_to_add[i][0])
        H.add_edge(links_to_add[i][0], links_to_add[i][1])
        abs_cen = start_dist.dot(F_updated.sum(axis = 1))[0,0]
        F = F_updated            
        ac_scores.append(abs_cen)
        i += 1
    return links_to_add, ac_scores
Ejemplo n.º 53
0
def threshold_components(A_s,shape,min_size=5,max_size=np.inf,max_perc=.5,remove_unconnected_components=True):        
    """
    Threshold components output of a CNMF algorithm (A matrices)

    Parameters:
    ----------

    A_s: list 
        list of A matrice output from CNMF

    min_size: int
        min size of the component in pixels

    max_size: int
        max size of the component in pixels

    max_perc: float        
        fraction of the maximum of each component used to threshold 

    remove_unconnected_components: boolean
        whether to remove components that are fragmented in space
    Returns:
    -------        

    B_s: list of the thresholded components

    lab_imgs: image representing the components in ndimage format

    cm_s: center of masses of each components
    """

    B_s=[]
    lab_imgs=[]

    cm_s=[]
    for A_ in A_s:
        print('*')
        max_comps=A_.max(0).todense().T
        tmp=[]
        cm=[]
        lim=np.zeros(shape)
        for idx,a in enumerate(A_.T):        
            #create mask by thresholding to 50% of the max
            mask=np.reshape(a.todense()>(max_comps[idx]*max_perc),shape)        
            label_im, nb_labels = ndimage.label(mask)
            sizes = ndimage.sum(mask, label_im, list(range(nb_labels + 1)))

            if remove_unconnected_components: 
                l_largest=(label_im==np.argmax(sizes))
                cm.append(scipy.ndimage.measurements.center_of_mass(l_largest,l_largest))
                lim[l_largest] = (idx+1)
        #       #remove connected components that are too small
                mask_size=np.logical_or(sizes<min_size,sizes>max_size)
                if np.sum(mask_size[1:])>1:
                    print(('removing ' + str( np.sum(mask_size[1:])-1) + ' components'))
                remove_pixel=mask_size[label_im]
                label_im[remove_pixel] = 0




            label_im=(label_im>0)*1    

            tmp.append(label_im.flatten())


        cm_s.append(cm)    
        lab_imgs.append(lim)        
        B_s.append(csc.csc_matrix(np.array(tmp)).T)

    return B_s, lab_imgs, cm_s           
Ejemplo n.º 54
0
 def log_det_shogun_exact(Q):
     logging.debug("Entering")
     logdet = Statistics.log_det(csc_matrix(Q))
     logging.debug("Leaving")
     return logdet
Ejemplo n.º 55
0
 def log_det_shogun_exact_plus_noise(Q):
     logging.debug("Entering")
     logdet = Statistics.log_det(csc_matrix(Q)) + randn()
     logging.debug("Leaving")
     return logdet
Ejemplo n.º 56
0
 def log_det_scikits(Q):
     d = cholesky(csc_matrix(Q)).L().diagonal()
     return 2 * sum(log(d))
     raise Exception("cholmod not installed")
Ejemplo n.º 57
0
  fh.seek(pos)

  print "Getting edges ..."
  line = ""
  while True:
    line += fh.readline().replace(" ", "").strip() # remove if inefficient

    if line.endswith("</edge>"):
      edge = get_edge(line)
      g[edge[0], edge[1]] = edge[2] # Naive i.e slow. TODO: Optimize
      line = ""

    elif line.endswith("</graphml>"):
      break

  return csc_matrix(g) # Convert to CSC first

def get_edge(st):
  """
  Given a string I need to extract src, dest, weight (if available)
  No other edge attributes are representable

  Positional Args:
  ===============
  st - the string
  """
  global __weight__
  src = int(re.search("(?<=source=[\"']n)\d+", st).group())
  dest = int(re.search("(?<=target=[\"']n)\d+", st).group())

  if __weight__:
Ejemplo n.º 58
0
def csc_to_graphml(g, is_weighted=True, desikan=False, is_directed=False, save_fn="default_name.graphml", is_tri=False, test=False):
  """
  Convert a csc graph to graphml format for writing to disk

  Positional arguments:
  ====================
  g - the csc graph

  Optional arguments:
  ===================
  is_weighted - is the graph weighted. Type: boolean.
  desikan - use the desikan mapping to label nodes. Type: boolean
  is_directed - is g symmetric ? Type: boolean
  save_fn - file name to use when saving. Type: boolean
  is_tri - is the adjacency mat upper or lower triangular. Type: boolean
  test - are we running a test. Type: boolean
  """

  print "Beginning graphml construction .."
  if test: test_str = ""

  tabs = 2 # How many tabs on affix to the front

  src = """<?xml version="1.0" encoding="UTF-8"?>
  <graphml xmlns="http://graphml.graphdrawing.org/xmlns"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
    http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
    <!-- Created by script: %s -->\n""" % __file__

  # Do we have desikan labels ?
  if desikan:
    from mrcap import desikan

    src += "  "*2+"<key id=\"v_region\" for=\"node\" attr.name=\"region\" attr.type=\"string\"/>\n" # Desikan vertex attr called v_region
    tabs = 3

  # Is our graph weighted ?
  if is_weighted:
    src += "  "*2+"<key id=\"e_weight\" for=\"edge\" attr.name=\"weight\" attr.type=\"double\"/>\n" # Desikan vertex attr called v_region
    tabs = 3

  # Directed graph ?
  if is_directed:
    src += "\n    <graph id=\"G\" edgedefault=\"undirected\">"

  # Undirected graph?
  else:  # not directed so just use upper tri
    if not is_tri:
      print "Converting to upper triangular ..."
      from scipy.sparse.csc import csc_matrix
      from scipy.sparse import triu

      g = g = csc_matrix(triu(g, k=0))
      src += "\n    <graph id=\"G\" edgedefault=\"undirected\">\n"

  NUM_NODES = g.shape[0]

  if not test: f = open(save_fn if os.path.splitext(save_fn)[1] == ".graphml" else save_fn+".graphml", "wb")

  # Can be #pragma for
  for node in xrange(NUM_NODES): # Cycle through all nodes
    s = "<node id=\"n%d\">\n" % node

    if desikan:
      s += "  "*(tabs+1)+"<data key=\"v_region\">\"%s\"</data>\n" % (desikan.des_map.get(node, "Undefined"))

    s += "  "*tabs+"</node>\n"
    src += "  "*tabs+s

    if node % 50000 == 0:
      print "Processing node %d / %d ..." % (node, NUM_NODES)
      if test: test_str += src
      else: f.write(src)
      src = ""

  del s # free mem

  print "Adding edges to graph ..."
  # Get all edge data
  nodes_from, nodes_to = g.nonzero()
  data = g.data
  del g # free some mem

  # Can be #pragma for
  NUM_EDGES = nodes_from.shape[0]
  for idx in xrange(NUM_EDGES): # Only the edges that exist
    src += "  "*tabs+"<edge source=\"n%d\" target=\"n%d\">\n" % (nodes_from[idx], nodes_to[idx])
    if is_weighted:
      src += "  "*(tabs+1)+"<data key=\"e_weight\">%d</data>\n" % data[idx]
    src += "  "*tabs+"</edge>\n"

    if idx % 100000 == 0:
      print "Processing edge %d / %d ..." % (idx, NUM_EDGES)
      if test: test_str += src
      else: f.write(src)
      src = ""

  src += "  </graph>\n</graphml>"

  if test:
    test_str += src
    return test_str

  f.write(src)
  f.close