Beispiel #1
0
def to_coo_adjacency_matrix(data, simalarity=False, distance_fun=None):
    '''
    convert the dataset to a sparse coo adjacency matrix.
    
    :param data: :py:class:`gct.Dataset`
    :rtype: scipy coo_matrix
    '''

    edges = data.get_edges()
    rows = edges['src'].values
    cols = edges['dest'].values
    n = int(max(np.max(rows), np.max(cols))) + 1
    if data.is_weighted():
        weight = edges['weight'].values
    else:
        weight = np.ones_like(rows)
    if not simalarity:  # distance
        if distance_fun is None or distance_fun == 'minus':
            weight = -weight
        elif distance_fun == 'exp_minus':
            weight = np.exp(-weight)
        else:
            raise ValueError("unknown " + distance_fun)
    if data.is_directed():
        return coo_matrix((weight, (rows, cols)), shape=[n, n])
    else:
        newX = np.concatenate([weight, weight])
        newrows = np.concatenate([rows, cols])
        newcols = np.concatenate([cols, rows])
        return coo_matrix((newX, (newrows, newcols)), shape=[n, n])
Beispiel #2
0
    def test_sparse_multiply(self):
        row = np.array([0, 3, 1, 0])
        col = np.array([0, 3, 1, 2])
        data = np.array([4, 5, 7, 9])
        S1 = coo_matrix((data, (row, col)), shape=(4, 5))
        S2 = coo_matrix((data, (row, col)), shape=(5, 4))
        pybammS1 = pybamm.Matrix(S1)
        pybammS2 = pybamm.Matrix(S2)
        D1 = np.ones((4, 5))
        D2 = np.ones((5, 4))
        pybammD1 = pybamm.Matrix(D1)
        pybammD2 = pybamm.Matrix(D2)

        # Multiplication is elementwise
        np.testing.assert_array_equal(
            (pybammS1 * pybammS1).evaluate().toarray(),
            S1.multiply(S1).toarray())
        np.testing.assert_array_equal(
            (pybammS2 * pybammS2).evaluate().toarray(),
            S2.multiply(S2).toarray())
        np.testing.assert_array_equal(
            (pybammD1 * pybammS1).evaluate().toarray(),
            S1.toarray() * D1)
        np.testing.assert_array_equal(
            (pybammS1 * pybammD1).evaluate().toarray(),
            S1.toarray() * D1)
        np.testing.assert_array_equal(
            (pybammD2 * pybammS2).evaluate().toarray(),
            S2.toarray() * D2)
        np.testing.assert_array_equal(
            (pybammS2 * pybammD2).evaluate().toarray(),
            S2.toarray() * D2)
        with self.assertRaisesRegex(pybamm.ShapeError, "inconsistent shapes"):
            (pybammS1 * pybammS2).test_shape()
        with self.assertRaisesRegex(pybamm.ShapeError, "inconsistent shapes"):
            (pybammS2 * pybammS1).test_shape()
        with self.assertRaisesRegex(pybamm.ShapeError, "inconsistent shapes"):
            (pybammS2 * pybammS1).evaluate_ignoring_errors()

        # Matrix multiplication is normal matrix multiplication
        np.testing.assert_array_equal(
            (pybammS1 @ pybammS2).evaluate().toarray(), (S1 * S2).toarray())
        np.testing.assert_array_equal(
            (pybammS2 @ pybammS1).evaluate().toarray(), (S2 * S1).toarray())
        np.testing.assert_array_equal((pybammS1 @ pybammD2).evaluate(),
                                      S1 * D2)
        np.testing.assert_array_equal((pybammD2 @ pybammS1).evaluate(),
                                      D2 * S1)
        np.testing.assert_array_equal((pybammS2 @ pybammD1).evaluate(),
                                      S2 * D1)
        np.testing.assert_array_equal((pybammD1 @ pybammS2).evaluate(),
                                      D1 * S2)
        with self.assertRaisesRegex(pybamm.ShapeError, "dimension mismatch"):
            (pybammS1 @ pybammS1).test_shape()
        with self.assertRaisesRegex(pybamm.ShapeError, "dimension mismatch"):
            (pybammS2 @ pybammS2).test_shape()
def apply_threshold(original, threshold):
    """
    Filter the matrix such that each field has is greater than the threshold.

    :param original: The original matrix.
    :param threshold: Numeric threshold applied to each cell.
    :return: COO matrix with cells filtered according to the threshold.
    """
    original = coo_matrix(original)  # Ensure COO format
    indices = original.data > threshold
    new_data = original.data[indices]
    new_matrix = new_data, (original.row[indices], original.col[indices])
    return coo_matrix(new_matrix)
Beispiel #4
0
 def tocoo(self):
     """ Return a copy of this matrix in COOrdinate format"""
     from scipy.sparse.coo import coo_matrix
     if self.nnz == 0:
         return coo_matrix(self.shape, dtype=self.dtype)
     else:
         idx_dtype = get_index_dtype(
             maxval=max(self.shape[0], self.shape[1]))
         data = np.asarray(_list(self.values()), dtype=self.dtype)
         indices = np.asarray(_list(self.keys()), dtype=idx_dtype).T
         return coo_matrix((data, indices),
                           shape=self.shape,
                           dtype=self.dtype)
    def test_is_matrix_zero(self):
        a = pybamm.Matrix(coo_matrix(np.zeros((10, 10))))
        b = pybamm.Matrix(coo_matrix(np.ones((10, 10))))
        c = pybamm.Matrix(coo_matrix(([1], ([0], [0])), shape=(5, 5)))
        self.assertTrue(pybamm.is_matrix_zero(a))
        self.assertFalse(pybamm.is_matrix_zero(b))
        self.assertFalse(pybamm.is_matrix_zero(c))

        a = pybamm.Matrix(np.zeros((10, 10)))
        b = pybamm.Matrix(np.ones((10, 10)))
        c = pybamm.Matrix([1, 0, 0])
        self.assertTrue(pybamm.is_matrix_zero(a))
        self.assertFalse(pybamm.is_matrix_zero(b))
        self.assertFalse(pybamm.is_matrix_zero(c))
Beispiel #6
0
def build_edge_data(args, base_data):
    if args.use_edge:
        adj_ = []

        if len(base_data['edges_in']) == 0:
            adj_.append(
                np.zeros((args.max_nodes, args.max_nodes), dtype=np.float32))
        else:
            adj = coo_matrix((np.ones(len(
                base_data['edges_in'])), np.array(base_data['edges_in']).T),
                             shape=(args.max_nodes, args.max_nodes),
                             dtype=np.float32).toarray()

            adj_.append(adj)

        if len(base_data['edges_out']) == 0:
            adj_.append(
                np.zeros((args.max_nodes, args.max_nodes), dtype=np.float32))
        else:
            adj = coo_matrix((np.ones(len(
                base_data['edges_out'])), np.array(base_data['edges_out']).T),
                             shape=(args.max_nodes, args.max_nodes),
                             dtype=np.float32).toarray()

            adj_.append(adj)

        adj = np.pad(np.ones((len(base_data['nodes_candidates_id']), len(base_data['nodes_candidates_id'])), dtype=np.float32),
            ((0, args.max_nodes - len(base_data['nodes_candidates_id'])),
             (0, args.max_nodes - len(base_data['nodes_candidates_id']))), mode='constant') \
              - adj_[0] - adj_[1] - np.pad(np.eye(len(base_data['nodes_candidates_id'])),
            ((0, args.max_nodes - len(base_data['nodes_candidates_id'])),
             (0, args.max_nodes - len(base_data['nodes_candidates_id']))), mode='constant')

        adj_.append(np.clip(adj, 0, 1, dtype=np.float32))

        adj = np.stack(adj_, 0)

        d_ = adj.sum(-1)
        d_[np.nonzero(d_)] **= -1
        adj = adj * np.expand_dims(d_, -1)
        return torch.from_numpy(adj)
    else:
        adj = np.pad(np.ones((len(base_data['nodes_candidates_id']), len(d['nodes_candidates_id']))),
            ((0, args.max_nodes - len(base_data['nodes_candidates_id'])),
             (0, args.max_nodes - len(base_data['nodes_candidates_id']))), mode='constant') \
                - np.pad(np.eye(len(base_data['nodes_candidates_id'])),
            ((0, args.max_nodes - len(base_data['nodes_candidates_id'])),
             (0, args.max_nodes - len(base_data['nodes_candidates_id']))), mode='constant')
        return torch.from_numpy(adj)
def test_get_term_proportions(dtm, matrix_type):
    if matrix_type == 1:
        dtm = np.matrix(dtm)
        dtm_arr = dtm.A
        dtm_flat = dtm.A1
    elif matrix_type == 2:
        dtm = coo_matrix(dtm)
        dtm_arr = dtm.A
        dtm_flat = dtm.A.flatten()
    else:
        dtm = np.array(dtm)
        dtm_arr = dtm
        dtm_flat = dtm.flatten()

    if dtm.ndim != 2:
        with pytest.raises(ValueError):
            lda_utils.common.get_term_proportions(dtm)
    else:
        tp = lda_utils.common.get_term_proportions(dtm)
        assert tp.ndim == 1
        assert tp.shape == (dtm_arr.shape[1],)

        if len(dtm_flat) > 0:
            assert np.isclose(tp.sum(), 1.0)
            assert all(0 <= v <= 1 for v in tp)
Beispiel #8
0
def _assembly_K(mesh, omega):
    build_local_Ke = g.choose_impl(_fwi_ls.build_local_Ke, _build_local_Ke)

    # Prepare Ke for each element, and keep their data to be used in the assembly
    Ke_local_list = []
    for connectivity, points, mu, eta in zip(mesh.connectivity_list,
                                             mesh.points_in_elements, mesh.mu,
                                             mesh.eta):
        Ke_local_list.append(
            (connectivity, build_local_Ke(points, omega, mu, eta)))

    # Assembly the global matrix
    Ke_coo_i = []
    Ke_coo_j = []
    Ke_coo_data = []
    for connectivity, Ke_local in Ke_local_list:
        for k, p1 in enumerate(connectivity):
            for l, p2 in enumerate(connectivity):
                Ke_coo_i.append(p1)
                Ke_coo_j.append(p2)
                Ke_coo_data.append(Ke_local[k, l])

    # Build the sparse data structure
    Ke_global = coo_matrix(
        (Ke_coo_data, (Ke_coo_i, Ke_coo_j)),
        shape=(mesh.n_points, mesh.n_points),
        dtype=np.complex,
    )

    return Ke_global
Beispiel #9
0
    def _compute_table_rank(self, contained):
        logger.log(logging.DEBUG, "Computing tables relations")

        tables_rank = [([], []) for _ in range(6)]

        indices = [
            set(l) for l in np.split(contained.indices, contained.indptr)[1:-1]
        ]

        for root in self.dictionary.roots:
            for t0, t1 in combinations(self.dictionary.roots[root], 2):
                commons = [self.dictionary.index[i] for i in indices[t0.index] & indices[t1.index]]

                ranks = set(map(lambda t: t.rank, commons))
                for rank in ranks:
                    tables_rank[rank][0].extend((t0.index, t1.index))
                    tables_rank[rank][1].extend((t1.index, t0.index))

        for t in self.dictionary:
            ranks = {self.dictionary.index[i].rank for i in indices[t.index]} - {6}
            for rank in ranks:
                tables_rank[rank][0].append(t.index)
                tables_rank[rank][1].append(t.index)

        return [coo_matrix(([True]*len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in tables_rank]
Beispiel #10
0
    def test_add_sparse(self):
        m = self.basic_m
        mm = m + m
        test_m = np.array([[4., 0., 9., 0.], [0., 7., 0., 0.],
                           [0., 0., 0., 0.], [0., 0., 0., 5.]])
        mm2 = test_m * 2
        self.assertIsInstance(mm, CSRMatrix)
        self.assertListEqual(mm.toarray().flatten().tolist(),
                             mm2.flatten().tolist())

        m2 = coo_matrix(
            (self.basic_m.data, (self.basic_m.row, self.basic_m.col)),
            shape=self.basic_m.shape)
        with self.assertRaises(Exception) as context:
            mm = m + m2

        m2 = IdentityMatrix(4)
        mm = m + m2
        test_m = np.array([[5., 0., 9., 0.], [0., 8., 0., 0.],
                           [0., 0., 1., 0.], [0., 0., 0., 6.]])
        mm2 = test_m
        self.assertIsInstance(mm, CSRMatrix)
        self.assertListEqual(mm.toarray().flatten().tolist(),
                             mm2.flatten().tolist())

        mm = m2 + m
        self.assertIsInstance(mm, CSRMatrix)
        self.assertListEqual(mm.toarray().flatten().tolist(),
                             mm2.flatten().tolist())

        D = DiagonalMatrix(np.ones(m.shape[1]))
        mm = m + D
        mm_dense = m.todense() + D.todense()
        self.assertTrue(np.allclose(mm.todense(), mm_dense))
Beispiel #11
0
def create_w_from_binary(chosen: list,
                         not_chosen: list,
                         nonzero_binary: list,
                         sparse: bool = True):
    """
        Return W s.t. W dot W.T is reduced density matrix according to selected bipartition

    :param chosen:      list of chosen qubits
    :param notchosen:   list of qubits to trace away
    :param nonzero_binary:  list s.t. nonzero_binary[j] = j-th index in which the state is nonzero, represented as
                            binary string:   psi = [0,1,0,1,0] -> nonzero_binary = ['001','011']
    :return: W
    """

    # row idxs of nonzero elements in W
    rows = [
        aux.to_decimal(aux.select_components(i, chosen))
        for i in nonzero_binary
    ]
    cols = [
        aux.to_decimal((aux.select_components(i, not_chosen)))
        for i in nonzero_binary
    ]

    number_of_nonzeros = len(nonzero_binary)
    norm = number_of_nonzeros**(-1 / 2)

    data = np.ones(number_of_nonzeros) * norm
    if sparse:
        return coo_matrix((data, (rows, cols)),
                          shape=(2**len(chosen), 2**len(not_chosen))).tocsc()
    flatrow_idx = [i * 2**len(notchosen) + j for i, j in zip(rows, cols)]
    W = np.zeros(2**(len(chosen) + len(notchosen)))
    W[flatrow_idx] = norm
    return W.reshape((2**len(chosen), 2**len(notchosen)))
    def _init_features(self, features):
        '''
        This built-in feature model treats features as discrete, not distributed representations (embeddings). To handle
        distances between feature vectors, it is better to set no_words=True and use a GP label model.
        :param features:
        :return:
        '''

        self.feat_map = {}  # map features tokens to indices
        self.features = []  # list of mapped index values for each token

        if self.no_features:
            return

        N = len(features)

        for feat in features.flatten():
            if feat not in self.feat_map:
                self.feat_map[feat] = len(self.feat_map)

            self.features.append(self.feat_map[feat])

        self.features = np.array(self.features).astype(int)

        # sparse matrix of one-hot encoding, nfeatures x N, where N is number of tokens in the dataset
        self.features_mat = coo_matrix(
            (np.ones(len(features)), (self.features, np.arange(N)))).tocsr()
Beispiel #13
0
    def _compute_table_rank(self, contained):
        logger.log(logging.DEBUG, "Computing tables relations")

        tables_rank = [([], []) for _ in range(6)]

        indices = [
            set(l) for l in np.split(contained.indices, contained.indptr)[1:-1]
        ]

        for root in self.dictionary.roots:
            for t0, t1 in combinations(self.dictionary.roots[root], 2):
                commons = [
                    self.dictionary.index[i]
                    for i in indices[t0.index] & indices[t1.index]
                ]

                ranks = set(map(lambda t: t.rank, commons))
                for rank in ranks:
                    tables_rank[rank][0].extend((t0.index, t1.index))
                    tables_rank[rank][1].extend((t1.index, t0.index))

        for t in self.dictionary:
            ranks = {self.dictionary.index[i].rank
                     for i in indices[t.index]} - {6}
            for rank in ranks:
                tables_rank[rank][0].append(t.index)
                tables_rank[rank][1].append(t.index)

        return [
            coo_matrix(([True] * len(i), (i, j)),
                       shape=self.shape,
                       dtype=np.bool) for i, j in tables_rank
        ]
Beispiel #14
0
    def _prepare_sparse_data(data):
        if not hasattr(data, 'dtype') or not hasattr(
                data, 'shape') or len(data.shape) != 2:
            raise ValueError(
                '`data` must be a NumPy array/matrix or SciPy sparse matrix of two dimensions'
            )

        if data.dtype == np.int:
            arr_ctype = ctypes.c_int
        elif data.dtype == np.int32:
            arr_ctype = ctypes.c_int32
        elif data.dtype == np.int64:
            arr_ctype = ctypes.c_int64
        else:
            raise ValueError('dtype of `data` is not supported: `%s`' %
                             data.dtype)

        if not hasattr(
                data, 'format'
        ):  # dense matrix -> convert to sparse matrix in coo format
            data = coo_matrix(data)
        elif data.format != 'coo':
            data = data.tocoo()

        sparse_data_base = mp.Array(arr_ctype, data.data)
        sparse_rows_base = mp.Array(ctypes.c_int,
                                    data.row)  # TODO: datatype correct?
        sparse_cols_base = mp.Array(ctypes.c_int,
                                    data.col)  # TODO: datatype correct?

        logger.info(
            'initializing evaluation with sparse matrix of format `%s` and shape %dx%d'
            % (data.format, data.shape[0], data.shape[1]))

        return sparse_data_base, sparse_rows_base, sparse_cols_base
Beispiel #15
0
def generate_adjacency(X, graph):
    """根据边列表生成邻接矩阵

        Inputs:
        -------
        X: tensor, 所有图的节点特征
        graph: tensor, 所有图的边列表

        Output:
        -------
        adjacency: sparse numpy array, 所有图节点组成的稀疏邻接矩阵

    """

    # 节点个数, 边条数
    num_nodes = X.size(0)
    num_edges = graph.size(1)
    # 转换tensor至numpy array
    graph_np = graph.detach().cpu().numpy()

    # 添加自连接并删除重复的边
    edge_index = np.concatenate((graph_np, np.flipud(graph_np)), axis=1)
    edge_index = edge_index.T.tolist()
    sorted_edge_index = sorted(edge_index)
    edge_index = list(k for k, _ in groupby(sorted_edge_index))
    edge_index = np.asarray(edge_index)

    # 生成稀疏邻接矩阵
    adjacency = coo_matrix(
        (np.ones(num_edges), (edge_index[:, 0], edge_index[:, 1])),
        shape=(num_nodes, num_nodes),
        dtype=float)

    return adjacency
Beispiel #16
0
    def __init__(self,
                 worker_id,
                 tasks_queue,
                 results_queue,
                 data,
                 group=None,
                 target=None,
                 name=None,
                 args=(),
                 kwargs=None):
        super(MultiprocModelsWorkerABC, self).__init__(group, target, name,
                                                       args, kwargs or {})

        logger.debug('worker `%s`: creating worker with ID %d' %
                     (self.name, worker_id))
        self.worker_id = worker_id
        self.tasks_queue = tasks_queue
        self.results_queue = results_queue

        self.data_per_doc = {}
        for doc_label, sparse_mem in data.items():
            sparse_data_base, sparse_row_ind_base, sparse_col_ind_base = sparse_mem
            sparse_data = np.ctypeslib.as_array(sparse_data_base.get_obj())
            sparse_row_ind = np.ctypeslib.as_array(
                sparse_row_ind_base.get_obj())
            sparse_col_ind = np.ctypeslib.as_array(
                sparse_col_ind_base.get_obj())
            logger.debug(
                'worker `%s`: creating sparse data matrix for document `%s`' %
                (self.name, doc_label))
            self.data_per_doc[doc_label] = coo_matrix(
                (sparse_data, (sparse_row_ind, sparse_col_ind)))
Beispiel #17
0
    def test_mul_sparse_matrix(self):

        # test unsymmetric times unsymmetric
        m = self.basic_m
        dense_m = m.toarray()
        res = m * m
        dense_res = np.matmul(dense_m, dense_m)
        self.assertFalse(res.is_symmetric)
        self.assertTrue(np.allclose(res.toarray(), dense_res))

        # test symmetric result
        m = self.basic_m
        dense_m = m.toarray()
        res = m.transpose() * m
        dense_res = np.matmul(dense_m.transpose(), dense_m)
        self.assertTrue(res.is_symmetric)
        self.assertTrue(np.allclose(res.toarray(), dense_res))

        # test unsymmetric with rectangular
        m = self.basic_m
        dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]])

        m2 = COOMatrix(dense_m2)
        res = m * m2
        dense_res = np.matmul(m.toarray(), dense_m2)
        self.assertFalse(res.is_symmetric)
        self.assertTrue(np.allclose(res.toarray(), dense_res))

        # test unsymmetric with rectangular scipycoo
        m = self.basic_m
        dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]])

        m2 = coo_matrix(dense_m2)
        with self.assertRaises(Exception) as context:
            res = m * m2
Beispiel #18
0
    def probes(self, x: ndarray):
        """Return matrix which acts on a solution vector to find its values
        on points `x`.

        The product of this with a finite element function vector is like the
        result of assembling a `Functional` and it can be thought of as the
        matrix of inner products of the test functions of the basis with Dirac
        deltas at `x` but because its action is concentrated at points it is
        not assembled with the usual quadratures.

        """
        cells = self.mesh.element_finder(mapping=self.mapping)(*x)
        pts = self.mapping.invF(x[:, :, np.newaxis], tind=cells)
        phis = np.array([
            self.elem.gbasis(self.mapping, pts, k, tind=cells)[0]
            for k in range(self.Nbfun)
        ]).flatten()
        return coo_matrix(
            (
                phis,
                (
                    np.tile(np.arange(x.shape[1]), self.Nbfun),
                    self.element_dofs[:, cells].flatten(),
                ),
            ),
            shape=(x.shape[1], self.N),
        )
Beispiel #19
0
def generate_sparse_matrix(n, pl, bw):
    per_line = pl * 2 + 1
    rows_data = np.empty(per_line * n, dtype=np.int)
    cols_data = np.empty(per_line * n, dtype=np.int)
    data_data = np.ones(per_line * n, dtype=np.float)

    pl1 = pl + 1
    pts = np.linspace(1, 1 + bw, pl, False, dtype=np.int) - 1

    for i in range(n):
        rs = i + pts
        rs[(rs >= n)] = i

        ls = i - pts
        ls[(ls < 0)] = i

        s = i * per_line
        cols_data[s:s+pl] = ls
        cols_data[s+pl] = i
        cols_data[s+pl1:s+per_line] = rs
        # cols_data[i * per_line:(i+1) * per_line] = np.hstack((ls, [i], rs))

        rows_data[s:(i + 1) * per_line] = i

    matrix = coo_matrix((data_data, (rows_data, cols_data)), shape=(n, n), dtype=np.double)
    return matrix
Beispiel #20
0
def calculate_transition_matrix(potential, resolution, beta, bounds=None):
    """Calculate a transition matrix from a potential."""
    if bounds is None:
        bounds = potential.bounds
    grid = plotting.get_grid(bounds, resolution)

    n_states = grid.shape[1] * grid.shape[2]

    # The transition matrix will be constructed as a sparse COO matrix
    t_matrix_rows = np.zeros((0,))
    t_matrix_cols = np.zeros((0,))
    t_matrix_data = np.zeros((0,))

    neighbors = _neighbors()
    potential = potential.potential(grid[0], grid[1])

    for row_i in xrange(grid.shape[1]):
        for col_i in xrange(grid.shape[2]):
            # Loop through each starting point
            pot = potential[row_i, col_i]
            from_state = _state_id(row_i, col_i, grid.shape)
            normalization = 0.0
            # Only do nearest-neighbor
            for neighs in neighbors:
                nrow_i = row_i + neighs[0]
                ncol_i = col_i + neighs[1]
                if (nrow_i < 0 or nrow_i >= grid.shape[1]
                    or ncol_i < 0 or ncol_i >= grid.shape[2]):
                    # Transition probability to states outside our state space
                    # is zero. This is not in the transition matrix
                    pass
                else:
                    to_state = _state_id(nrow_i, ncol_i, grid.shape)
                    delta_pot = potential[nrow_i, ncol_i] - pot
                    t_prob = np.exp(-beta * delta_pot)

                    # Store info for our sparse matrix
                    t_matrix_rows = np.append(t_matrix_rows, from_state)
                    t_matrix_cols = np.append(t_matrix_cols, to_state)
                    t_matrix_data = np.append(t_matrix_data, t_prob)

                    normalization += t_prob

    t_matrix = coo.coo_matrix((t_matrix_data, (t_matrix_rows, t_matrix_cols)),
                              shape=(n_states, n_states)).tocsr()

    # Normalize
    for trow_i in xrange(n_states):
        rfrom = t_matrix.indptr[trow_i]
        rto = t_matrix.indptr[trow_i + 1]
        normalization = np.sum(t_matrix.data[rfrom:rto])
        if normalization < EPSILON:
            print("No transitions from %d" % (trow_i))
        else:
            t_matrix.data[rfrom:rto] = t_matrix.data[rfrom:rto] / normalization

    return t_matrix, grid
Beispiel #21
0
 def _assemble_scipy_csr(
         indices: ndarray,
         data: ndarray,
         shape: Tuple[int, ...],
         local_shape: Optional[Tuple[int, ...]]
 ):
     K = coo_matrix((data, (indices[0], indices[1])), shape=shape)
     K.eliminate_zeros()
     return K.tocsr()
Beispiel #22
0
    def test_mul_sparse_matrix(self):

        # test symmetric times symmetric
        m = self.g_matrix
        dense_m = m.toarray()
        res = m * m
        dense_res = np.matmul(dense_m, dense_m)
        self.assertTrue(res.is_symmetric)
        self.assertTrue(np.allclose(res.toarray(), dense_res))

        # test symmetric times unsymmetric
        m = self.basic_m
        dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]])

        m2 = COOMatrix(dense_m2)
        res = m * m2
        dense_res = np.matmul(m.toarray(), dense_m2)
        self.assertFalse(res.is_symmetric)
        self.assertTrue(np.allclose(res.toarray(), dense_res))

        # test symmetric times full symmetric
        m2 = self.full_m
        dense_m = m.toarray()
        dense_m2 = m2.toarray()
        res = m * m2
        dense_res = np.matmul(dense_m, dense_m2)
        self.assertTrue(res.is_symmetric)
        self.assertTrue(np.allclose(res.toarray(), dense_res))

        # test symmetric times full scipycoo
        m2 = coo_matrix((self.full_m.data, (self.full_m.row, self.full_m.col)),
                        shape=self.full_m.shape)

        dense_m = m.toarray()
        dense_m2 = m2.toarray()
        with self.assertRaises(Exception) as context:
            res = m * m2

        m = self.basic_m
        dense_m2 = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]])

        m2 = coo_matrix(dense_m2)
        with self.assertRaises(Exception) as context:
            res = m * m2
Beispiel #23
0
def buildLaplacianMat(rt, userNum, itemNum, adj_type):
    rt_item = rt['itemId'] + userNum
    uiMat = coo_matrix((rt['rating'], (rt['userId'], rt['itemId'])))
    # print('uiMat shape', uiMat.shape)
    uiMat_upperPart = coo_matrix((rt['rating'], (rt['userId'], rt_item)))
    # print('uiMat_upperPart shape', uiMat_upperPart.shape)
    uiMat_lowerPart = uiMat.transpose()
    uiMat_lowerPart.resize((itemNum, userNum + itemNum))

    adj = sparse.vstack([uiMat_upperPart,uiMat_lowerPart])

    selfLoop = sparse.eye(userNum + itemNum)

    def normalize_adj(adj):
        adj = adj.tocsr()
        degree = sparse.csr_matrix(adj.sum(axis=1))
        d_inv_sqrt = degree.power(-0.5) # csr_matrix (size ,1) 
        d_inv_sqrt = np.array(d_inv_sqrt.todense()).reshape(-1)
        D = sparse.diags(d_inv_sqrt)
        L = D.dot(adj).dot(D) # csr_matrix (size, size)
        return sparse.coo_matrix(L)

    # def normalize_adj(adj):
    #     adj = adj.tocsr()
    #     degree = sparse.csr_matrix(adj.sum(axis=1))
    #     degree = np.array(degree.todense())
    #     d_inv_sqrt = 1.0/degree 
    #     d_inv_sqrt = d_inv_sqrt.reshape(-1)
    #     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    #     D = sparse.diags(d_inv_sqrt)
    #     L = D.dot(adj) # csr_matrix (size, size)
    #     return sparse.coo_matrix(L)
    
    # A' = (D + I)^-1/2  ( A + I )  (D + I)^-1/2
    if adj_type == 'ui_mat':
        return uiMat
    elif adj_type == 'plain_adj':
        return adj
    elif adj_type == 'norm_adj':
        return normalize_adj(adj + selfLoop)
    # A'' = D^-1/2 A D^-1/2
    elif adj_type == 'mean_adj':
        # return (adj + selfLoop).tocoo(), normalize_adj(adj)
        return normalize_adj(adj)
def merge_sparse(matrix1, matrix2):
    matrix1_coo = matrix1.tocoo()
    matrix2_coo = matrix2.tocoo()

    data = np.concatenate((matrix1_coo.data, matrix2_coo.data))
    rows = np.concatenate((matrix1_coo.row, matrix2_coo.row))
    cols = np.concatenate((matrix1_coo.col, matrix2_coo.col))

    full_coo = coo.coo_matrix((data, (rows, cols)), shape=matrix1.shape)
    return full_coo.tocsr()
def merge_sparse(matrix1, matrix2):
    matrix1_coo = matrix1.tocoo()
    matrix2_coo = matrix2.tocoo()

    data = np.concatenate((matrix1_coo.data, matrix2_coo.data))
    rows = np.concatenate((matrix1_coo.row, matrix2_coo.row))
    cols = np.concatenate((matrix1_coo.col, matrix2_coo.col))

    full_coo = coo.coo_matrix((data, (rows,cols)), shape=matrix1.shape)
    return full_coo.tocsr()
Beispiel #26
0
    def tocoo(self, copy=True):
        major_dim, minor_dim = self._swap(self.shape)
        minor_indices = self.indices
        major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype)
        _sparsetools.expandptr(major_dim, self.indptr, major_indices)
        row, col = self._swap((major_indices, minor_indices))

        from scipy.sparse.coo import coo_matrix
        return coo_matrix((self.data, (row, col)),
                          self.shape,
                          copy=copy,
                          dtype=self.dtype)
    def test_sparse_divide(self):
        row = np.array([0, 3, 1, 0])
        col = np.array([0, 3, 1, 2])
        data = np.array([4, 5, 7, 9])
        S1 = coo_matrix((data, (row, col)), shape=(4, 5))
        pybammS1 = pybamm.Matrix(S1)
        v1 = np.ones((4, 1))
        pybammv1 = pybamm.Vector(v1)

        np.testing.assert_array_equal(
            (pybammS1 / pybammv1).evaluate().toarray(), S1.toarray() / v1
        )
Beispiel #28
0
def buildLaplacianMat(rt, userNum, itemNum, adj_type):
    rt_item = rt['itemId'] + userNum
    uiMat = coo_matrix((rt['rating'], (rt['userId'], rt['itemId'])))
    # print('uiMat shape', uiMat.shape)
    uiMat_upperPart = coo_matrix((rt['rating'], (rt['userId'], rt_item)))
    # print('uiMat_upperPart shape', uiMat_upperPart.shape)
    uiMat = uiMat.transpose()
    uiMat.resize((itemNum, userNum + itemNum))

    adj = sparse.vstack([uiMat_upperPart, uiMat])

    selfLoop = sparse.eye(userNum + itemNum)

    # def normalize_adj(adj):

    #     sumArr = (adj>0).sum(axis=1)
    #     diag = list(np.array(sumArr.flatten())[0])
    #     diag = np.power(diag,-0.5)
    #     D = sparse.diags(diag)
    #     L = D * adj * D
    #     L = sparse.coo_matrix(L)
    #     return L

    def normalize_adj(adj):
        adj = adj.tocsr()
        degree = sparse.csr_matrix(adj.sum(axis=1))
        d_inv_sqrt = degree.power(-0.5)  # csr_matrix (size ,1)
        d_inv_sqrt = np.array(d_inv_sqrt.todense()).reshape(-1)
        D = sparse.diags(d_inv_sqrt)
        L = D.dot(adj).dot(D)  # csr_matrix (size, size)
        return sparse.coo_matrix(L)

    if adj_type == 'plain_adj':
        return adj
    # A' = (D + I)^-1/2  ( A + I )  (D + I)^-1/2
    elif adj_type == 'norm_adj':
        return normalize_adj(adj + selfLoop)
    # A'' = D^-1/2 A D^-1/2
    elif adj_type == 'mean_adj':
        return normalize_adj(adj)
Beispiel #29
0
    def __init__(self,
                 arg1,
                 shape=None,
                 filename="sparse.spy",
                 tablename="dok_matrix",
                 dtype=None,
                 copy=False,
                 commit_freq=1.0):
        spmatrix.__init__(self)

        self.dtype = getdtype(dtype, default=float)
        if isinstance(arg1, tuple) and isshape(arg1):  # (M,N)
            M, N = arg1
            self.shape = (M, N)
        elif isspmatrix(arg1):  # Sparse ctor
            if isspmatrix_dok(arg1) and copy:
                arg1 = arg1.copy()
            else:
                arg1 = arg1.todok()

            if dtype is not None:
                arg1 = arg1.astype(dtype)

            self.shape = arg1.shape
            self.dtype = arg1.dtype
        else:  # Dense ctor
            try:
                arg1 = np.asarray(arg1)
            except:
                raise TypeError('invalid input format')

            if len(arg1.shape) != 2:
                raise TypeError('expected rank <=2 dense array or matrix')

            from scipy.sparse.coo import coo_matrix
            d = coo_matrix(arg1, dtype=dtype).todok()
            self.shape = arg1.shape
            self.dtype = d.dtype

        ddict.__init__(self,
                       filename,
                       tablename=tablename,
                       commit_freq=commit_freq,
                       key_types=("UNSIGNED INTEGER",
                                  int_tuple_ser(*self.shape),
                                  int_tuple_unser(*self.shape)),
                       val_types=("REAL", float, float))

        if isspmatrix(arg1):  # Sparse ctor
            ddict.update(self, arg1)
        elif not (isinstance(arg1, tuple) and isshape(arg1)):
            ddict.update(self, d)
Beispiel #30
0
def alsModel(ratingsPath):
    print("\n***Generating Recommendations using ALS model***")
    userlist = [
        56886, 71026, 73136, 88837, 125582, 25403, 36860, 70402, 132121, 4363,
        56592, 83665, 87073, 99945
    ]
    model_name = "Alternating Least Squares"
    ratingsMatrix = pd.read_csv(ratingsPath)
    ratingsMatrix.rename(columns={
        'product_id_left': 'product_id',
        'ones_left': 'purchases'
    },
                         inplace=True)

    # map each Item and user to a unique numeric value
    ratingsMatrix['user_id'] = ratingsMatrix['user_id'].astype("category")
    ratingsMatrix['product_id'] = ratingsMatrix['product_id'].astype(
        "category")

    # create a sparse matrix of all the user/product/purchases triples
    purchases = coo_matrix((ratingsMatrix['purchases'].astype(float),
                            (ratingsMatrix['product_id'].cat.codes,
                             ratingsMatrix['user_id'].cat.codes)))

    # initialize a model
    print("Initializing model")
    model = implicit.als.AlternatingLeastSquares(factors=50)

    # train the model on a sparse matrix of item/user/confidence weights
    log.debug("training model %s", model_name)
    start = time.time()
    model.fit(purchases)
    log.debug("trained model '%s' in %s", model_name, time.time() - start)
    log.debug("calculating top purchases")

    # recommend items for a user
    user_items = purchases.T.tocsr()
    #recommendations = model.recommend(userid, user_items)
    recommendations = []
    for userid in userlist:
        for itemid, score in model.recommend(userid, user_items):
            record = [userid, itemid, score]
            recommendations.append(record)
            #print("recommendations for",userid,"\n",record)

    #print(recommendations)
    recommendedProductsDf = pd.DataFrame(
        recommendations, columns=["userid", "productid", "score"])
    print(recommendedProductsDf.head(10))
    recommendedProductsDf.to_csv(
        "../../../data/processed/als-recommendations.csv")
Beispiel #31
0
def graph_to_sparse_matrix(graph):
    """
    Creates a connectivity sparse matrix from the adjacency graph
    (list of neighbors list)
    """
    from scipy.sparse.coo import coo_matrix
    n_vox = len(graph)

    ij = [[], []]
    for i in xrange(n_vox):
        ij[0] += [i] * len(graph[i])
        ij[1] += list(graph[i])

    return coo_matrix((np.ones(len(ij[0]), dtype=int), ij))
Beispiel #32
0
def graph_to_sparse_matrix(graph):
    """
    Creates a connectivity sparse matrix from the adjacency graph 
    (list of neighbors list)
    """
    from scipy.sparse.coo import coo_matrix
    n_vox = len(graph)

    ij = [[],[]]
    for i in xrange(n_vox):
        ij[0] += [i] * len(graph[i])
        ij[1] += list(graph[i])

    return coo_matrix((np.ones(len(ij[0]), dtype=int), ij))
Beispiel #33
0
 def build_U(self, f):
     bits = self.bits
     n = 2**(2 * bits)
     if (bits > 32):
         print("bits = {}, exiting".format(bits))
         exit(1)
     b = np.arange(n)
     bH = b >> bits
     bL = b % 2**bits
     aL = f[bH] ^ bL
     a = bH << bits | aL
     data = [1] * n
     U = coo.coo_matrix((data, (a, b)), shape=[n, n])
     return qutip.Qobj(U)
Beispiel #34
0
def face_angles_sparse(mesh):
    """
    A sparse matrix representation of the face angles.

    Returns
    ----------
    sparse: scipy.sparse.coo_matrix with:
            dtype: float
            shape: (len(mesh.vertices), len(mesh.faces))
    """
    matrix = coo_matrix((mesh.face_angles.flatten(),
                         (mesh.faces_sparse.row, mesh.faces_sparse.col)),
                        mesh.faces_sparse.shape)
    return matrix
Beispiel #35
0
    def toarray(self) -> ndarray:
        """Return a dense numpy array."""
        if len(self.shape) == 1:
            return coo_matrix(
                (self.data, (self.indices[0], np.zeros_like(self.indices[0]))),
                shape=self.shape + (1,),
            ).toarray().T[0]
        elif len(self.shape) == 2:
            return self.tocsr().toarray()

        # slow implementation for testing N-tensors
        out = np.zeros(self.shape)
        for itr in range(self.indices.shape[1]):
            out[tuple(self.indices[:, itr])] += self.data[itr]
        return out
Beispiel #36
0
    def _compute_contains(self):
        logger.log(logging.DEBUG, "Computing contains/contained relations")
        # contain/contained

        i = list(range(len(self.dictionary)))
        j = list(range(len(self.dictionary)))
        for r_p, v in self.dictionary.roots.items():
            paradigms = {t for t in v if t.script.paradigm}

            for p in paradigms:
                _contains = [self.dictionary.terms[ss].index for ss in p.script.singular_sequences] + \
                            [k.index for k in paradigms if k.script in p.script]
                i.extend(repeat(p.index, len(_contains)))
                j.extend(_contains)

        return coo_matrix(([True] * len(i), (i, j)), shape=self.shape, dtype=np.bool)
 def test_03_03_color_ijv(self):
     workspace, module, ijv = self.make_workspace_ijv()
     self.assertTrue(isinstance(module, C.ConvertObjectsToImage))
     module.image_mode.value = C.IM_COLOR
     module.run(workspace)
     pixel_data = workspace.image_set.get_image(IMAGE_NAME).pixel_data
     #
     # convert the labels into individual bits (1, 2, 4, 8)
     # the labels matrix is a matrix of bits that are on
     #
     vbit = 2 ** (ijv[:, 2] - 1)
     vbit_color = np.zeros((np.max(vbit) * 2, 3))
     bits = coo_matrix((vbit, (ijv[:, 0], ijv[:, 1]))).toarray()
     #
     # Get some color for every represented bit combo
     #
     vbit_color[bits[ijv[:, 0], ijv[:,1]], :] = pixel_data[ijv[:, 0], ijv[:,1], :]
     
     self.assertTrue(np.all(pixel_data == vbit_color[bits, :]))
 def test_03_02_gray_ijv(self):
     workspace, module, ijv = self.make_workspace_ijv()
     self.assertTrue(isinstance(module, C.ConvertObjectsToImage))
     module.image_mode.value = C.IM_GRAYSCALE
     module.run(workspace)
     pixel_data = workspace.image_set.get_image(IMAGE_NAME).pixel_data
     
     counts = coo_matrix((np.ones(ijv.shape[0]), (ijv[:, 0], ijv[:, 1]))).toarray()
     self.assertTrue(np.all(pixel_data[counts == 0] == 0))
     pd_values = np.unique(pixel_data)
     pd_labels = np.zeros(pixel_data.shape, int)
     for i in range(1, len(pd_values)):
         pd_labels[pixel_data == pd_values[i]] = i
     
     dest_v = np.zeros(np.max(ijv[:, 2] + 1), int)
     dest_v[ijv[:, 2]] = pd_labels[ijv[:, 0], ijv[:, 1]]
     pd_ok = np.zeros(pixel_data.shape, bool)
     ok = pd_labels[ijv[:, 0], ijv[:, 1]] == dest_v[ijv[:, 2]]
     pd_ok[ijv[ok, 0], ijv[ok, 1]] = True
     self.assertTrue(np.all(pd_ok[counts > 0]))
 def __get_predicted_labels(self, data_matrix, features_to_include):
     """
     Finds the nearest cluster for all data points and adds a new feature label in all feature vectors of data matrix. The
     data matrix is modified in place.
     It returns a new copy of data_matrix with "features_to_include" features.
     """
     feature_names = self.vectorizer.get_feature_names()
     for feature_vector in data_matrix:
         row = [0] * len(feature_names)
         column = range(len(feature_names))
         data = map(lambda feature_name:feature_vector[feature_name] if feature_name in feature_vector else 0, feature_names)
         feature_csr_matrix = csr_matrix(coo_matrix((data, (row, column))))
         predicted_label = self.k_means_estimator.predict(feature_csr_matrix)
         feature_vector[self.LABEL_FEATURE_KEY] = predicted_label[0]
     
     expanded_data_matrix = self.__get_expanded_data_matrix(data_matrix)
     if features_to_include:
         return self.__get_filtered_data_matrix(expanded_data_matrix, features_to_include)
     else:
         return expanded_data_matrix
Beispiel #40
0
    def _compute_father(self):
        logger.log(logging.DEBUG, "Computing father/child relations")

        def _recurse_script(script):
            result = []
            for sub_s in script.children if isinstance(script, AdditiveScript) else [script]:
                if isinstance(sub_s, NullScript):
                    continue

                if sub_s in self.dictionary.terms:
                    result.append(self.dictionary.terms[sub_s].index)
                else:
                    if sub_s.layer > 0:
                        result.extend(chain.from_iterable(_recurse_script(c) for c in sub_s.children))

            return result

        # father = coo_matrix((3, len(self.dictionary), len(self.dictionary)), dtype=np.bool)

        father = [([], []) for _ in range(3)]

        for t in self.dictionary.terms.values():
            s = t.script

            for sub_s in s if isinstance(s, AdditiveScript) else [s]:
                if len(sub_s.children) == 0 or isinstance(sub_s, NullScript):
                    continue

                for i, s in enumerate(('father_substance', 'father_attribute', 'father_mode')):
                    if s in t.inhibitions:
                        continue

                    fathers_indexes = _recurse_script(sub_s.children[i])
                    father[i][0].extend(repeat(t.index, len(fathers_indexes)))
                    father[i][1].extend(fathers_indexes)

        return [coo_matrix(([True] * len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in father]
Beispiel #41
0
    def _compute_siblings(self):
        # siblings
        # 1 dim => the sibling type
        #  -0 opposed
        #  -1 associated
        #  -2 crossed
        #  -3 twin
        def _opposed_sibling(s0, s1):
            return not s0.empty and not s1.empty and\
                   s0.cardinal == s1.cardinal and\
                   s0.children[0] == s1.children[1] and s0.children[1] == s1.children[0]

        def _associated_sibling(s0, s1):
            return s0.cardinal == s1.cardinal and\
                   s0.children[0] == s1.children[0] and \
                   s0.children[1] == s1.children[1] and \
                   s0.children[2] != s1.children[2]

        def _crossed_sibling(s0, s1):
            return s0.layer >= 2 and \
                   s0.cardinal == s1.cardinal and \
                   _opposed_sibling(s0.children[0], s1.children[0]) and \
                   _opposed_sibling(s0.children[1], s1.children[1])

        siblings = [([], []) for _ in range(4)]

        logger.log(logging.DEBUG, "Computing siblings relations")

        for root in self.dictionary.roots:
            _inhib_opposed = 'opposed' not in root.inhibitions
            _inhib_associated = 'associated' not in root.inhibitions
            _inhib_crossed = 'crossed' not in root.inhibitions
            _inhib_twin = 'twin' not in root.inhibitions

            if root.script.layer == 0:
                continue
            _twins = []

            for i, t0 in enumerate(self.dictionary.roots[root]):
                if not isinstance(t0.script, MultiplicativeScript):
                    continue

                if t0.script.children[0] == t0.script.children[1]:
                    _twins.append(t0)

                for t1 in [t for j, t in enumerate(self.dictionary.roots[root])
                           if j > i and isinstance(t.script, MultiplicativeScript)]:

                    if _inhib_opposed and _opposed_sibling(t0.script, t1.script):
                        siblings[0][0].extend((t0.index, t1.index))
                        siblings[0][1].extend((t1.index, t0.index))

                    if _inhib_associated and _associated_sibling(t0.script, t1.script):
                        siblings[1][0].extend((t0.index, t1.index))
                        siblings[1][1].extend((t1.index, t0.index))

                    if _inhib_crossed and _crossed_sibling(t0.script, t1.script):
                        siblings[2][0].extend((t0.index, t1.index))
                        siblings[2][1].extend((t1.index, t0.index))

            if _inhib_twin:
                _twins = sorted(_twins, key=lambda t: t.script.cardinal)
                for card, g in groupby(_twins, key=lambda t: t.script.cardinal):
                    twin_indexes = [t.index for t in g]

                    if len(twin_indexes) > 1:
                        index0, index1 = list(zip(*permutations(twin_indexes, r=2)))
                        siblings[3][0].extend(index0)
                        siblings[3][1].extend(index1)

        return [coo_matrix(([True]*len(i), (i, j)), shape=self.shape, dtype=np.bool) for i, j in siblings]
Beispiel #42
0
threshold=boston.target.mean())
new_target[:5]
#array([ 1., 0., 1., 1., 1.])

(boston.target[:5] > boston.target.mean()).astype(int)
#array([1, 0, 1, 1, 1])

bin = preprocessing.Binarizer(boston.target.mean())
new_target = bin.fit_transform(boston.target)
new_target[:5]
#array([ 1., 0., 1., 1., 1.])


#Sparse matrices
from scipy.sparse import coo
spar = coo.coo_matrix(np.random.binomial(1, .25, 100))
preprocessing.binarize(spar, threshold=-1)
#ValueError: Cannot binarize a sparse matrix with threshold < 0

#Working with categorical类别 variables

iris = datasets.load_iris()
X = iris.data
y = iris.target
#Now, with X and Y being as they normally will be, we'll operate on the data as one:
#把 X,y合成一个矩阵
d = np.column_stack((X, y))

#先跳过一些

    docs, input_type, tokenizer = dataset_small()
    paths = docs
else:
    unchecked_paths, input_type, tokenizer = dataset_mails(doc_path)
    docs, paths = get_document_names(doc_path, unchecked_paths)

print('Loaded ', len(docs), ' files from path: ', doc_path, '.')

print('Extracting features.')
vectorizer = create_vectorizer(input_type, tokenizer=tokenizer,
                               ngram_range=(1, 1))

data = vectorizer.fit_transform(paths)
features = vectorizer.get_feature_names()

data = coo_matrix(data)

data = apply_threshold(data, 0.1)  # Filter out everything, that is too weak.

print('Reading headers.')
with codecs.open(rescal_path + '/headers.txt', 'r', encoding='utf8') as f:
    original_headers = f.read().splitlines()

DOCUMENT = 'Need: '
FEATURE = 'Attr: '

new_headers, offsets = new_tensor_slice(original_headers, [
    (DOCUMENT, docs, data.row), (FEATURE, features, data.col)]
)

document_offsets = offsets[DOCUMENT]
Beispiel #44
0
def sprandn(m, n, density=0.01, format="coo", dtype=None, random_state=None):
    """Generate a sparse matrix of the given shape and density with standard
    normally distributed values.
    Parameters
    ----------
    m, n : int
        shape of the matrix
    density : real
        density of the generated matrix: density equal to one means a full
        matrix, density of 0 means a matrix with no non-zero items.
    format : str
        sparse matrix format.
    dtype : dtype
        type of the returned matrix values.
    random_state : {numpy.random.RandomState, int}, optional
        Random number generator or random seed. If not given, the singleton
        numpy.random will be used.
    Notes
    -----
    Only float types are supported for now.
    """
    if density < 0 or density > 1:
        raise ValueError("density expected to be 0 <= density <= 1")
    if dtype and (dtype not in [np.float32, np.float64, np.longdouble]):
        raise NotImplementedError("type %s not supported" % dtype)

    mn = m * n

    tp = np.intc
    if mn > np.iinfo(tp).max:
        tp = np.int64

    if mn > np.iinfo(tp).max:
        msg = """\
Trying to generate a random sparse matrix such as the product of dimensions is
greater than %d - this is not supported on this machine
"""
        raise ValueError(msg % np.iinfo(tp).max)

    # Number of non zero values
    k = int(density * m * n)

    if random_state is None:
        random_state = np.random
    elif isinstance(random_state, (int, np.integer)):
        random_state = np.random.RandomState(random_state)

    # Use the algorithm from python's random.sample for k < mn/3.
    if mn < 3*k:
        # We should use this line, but choice is only available in numpy >= 1.7
        # ind = random_state.choice(mn, size=k, replace=False)
        ind = random_state.permutation(mn)[:k]
    else:
        ind = np.empty(k, dtype=tp)
        selected = set()
        for i in xrange(k):
            j = random_state.randint(mn)
            while j in selected:
                j = random_state.randint(mn)
            selected.add(j)
            ind[i] = j

    j = np.floor(ind * 1. / m).astype(tp)
    i = (ind - j * m).astype(tp)
    vals = random_state.randn(k).astype(dtype)
    return coo_matrix((vals, (i, j)), shape=(m, n)).asformat(format)