Ejemplo n.º 1
0
def run_snmnmf(V, V1):
    """
    Run sparse network-regularized multiple NMF. 
    
    :param V: First target matrix to estimate.
    :type V: :class:`numpy.matrix`
    :param V1: Second target matrix to estimate.
    :type V1: :class:`numpy.matrix`
    """
    rank = 10
    model = nimfa.mf(target = (V, V1), 
                  seed = "random_c", 
                  rank = rank, 
                  method = "snmnmf", 
                  max_iter = 12, 
                  initialize_only = True,
                  A = abs(sp.rand(V1.shape[1], V1.shape[1], density = 0.7, format = 'csr')),
                  B = abs(sp.rand(V.shape[1], V1.shape[1], density = 0.7, format = 'csr')), 
                  gamma = 0.01,
                  gamma_1 = 0.01,
                  lamb = 0.01,
                  lamb_1 = 0.01)
    fit = nimfa.mf_run(model)
    # print all quality measures concerning first target and mixture matrix in multiple NMF
    print_info(fit, idx = 0)
    # print all quality measures concerning second target and mixture matrix in multiple NMF
    print_info(fit, idx = 1)
Ejemplo n.º 2
0
def generate_dummy_data(num_users=15000, num_items=30000, interaction_density=.00045, num_user_features=200,
                        num_item_features=200, n_features_per_user=20, n_features_per_item=20,  pos_int_ratio=.5,
                        return_datasets=False):

    if pos_int_ratio <= 0.0:
        raise Exception("pos_int_ratio must be > 0")

    print("Generating positive interactions")
    interactions = sp.rand(num_users, num_items, density=interaction_density * pos_int_ratio)
    if pos_int_ratio < 1.0:
        print("Generating negative interactions")
        interactions += -1 * sp.rand(num_users, num_items, density=interaction_density * (1 - pos_int_ratio))

    print("Generating user features")
    user_features = sp.rand(num_users, num_user_features, density=float(n_features_per_user) / num_user_features)

    print("Generating item features")
    item_features = sp.rand(num_items, num_item_features, density=float(n_features_per_item) / num_item_features)

    if return_datasets:
        interactions = create_tensorrec_dataset_from_sparse_matrix(interactions)
        user_features = create_tensorrec_dataset_from_sparse_matrix(user_features)
        item_features = create_tensorrec_dataset_from_sparse_matrix(item_features)

    return interactions, user_features, item_features
Ejemplo n.º 3
0
def test_sparse_input():
    # note: Fixed random state in sp.rand is not supported in older scipy.
    #       The test should succeed regardless.
    X1 = sp.rand(50, 100)
    X2 = sp.rand(10, 100)
    forest_sparse = ignore_warnings(LSHForest, category=DeprecationWarning)(
        radius=1, random_state=0).fit(X1)
    forest_dense = ignore_warnings(LSHForest, category=DeprecationWarning)(
        radius=1, random_state=0).fit(X1.A)

    d_sparse, i_sparse = forest_sparse.kneighbors(X2, return_distance=True)
    d_dense, i_dense = forest_dense.kneighbors(X2.A, return_distance=True)

    assert_almost_equal(d_sparse, d_dense)
    assert_almost_equal(i_sparse, i_dense)

    d_sparse, i_sparse = forest_sparse.radius_neighbors(X2,
                                                        return_distance=True)
    d_dense, i_dense = forest_dense.radius_neighbors(X2.A,
                                                     return_distance=True)
    assert_equal(d_sparse.shape, d_dense.shape)
    for a, b in zip(d_sparse, d_dense):
        assert_almost_equal(a, b)
    for a, b in zip(i_sparse, i_dense):
        assert_almost_equal(a, b)
Ejemplo n.º 4
0
 def test_kron_sparse(self):
     m, n = 13, 15
     p, q = 17, 19
     X = Variable(m, n)
     C = sp.rand(p, q, density=0.1)
     A = sp.rand(p * m, q * n, density=0.1)
     constr = [kron(C, X) == A]
     self.assertConstraintsMatch(constr)
Ejemplo n.º 5
0
 def check_join_op(op):
     A = rand(3, 4, density=0.5)
     B = rand(3, 4, density=0.5)
     Asdb = sdb.from_sparse(A).redimension('<f0:double NOT NULL>[i0=0:2,10,0,i1=0:3,10,0]')
     Bsdb = sdb.from_sparse(B).redimension('<f0:double NOT NULL>[i0=0:2,2,1,i1=0:3,2,1]')
     C = op(Asdb, Bsdb)
     expected = op(A.toarray(), B.toarray())
     assert_allclose(C.toarray(), expected, rtol=RTOL)
def matrix_factor_SGD(matrix, 
                      learning_rate, 
                      loss_type,
                      tol, 
                      max_iters, 
                      regularization_param,
                      dim):
  matrix_density = matrix.nnz/(matrix.shape[0] * matrix.shape[1])
  #initialization of two factors: small random numbers between -0.01 and 0.01
  #may not want to initialize completely dense matrix or will take a while
  dens = 1 #initialize factors with this density
  factor1 = -4*sp.rand(matrix.shape[0], dim, density=dens, format="csr")
  fac1_rows, fac1_cols = factor1.nonzero()
  factor2 = -4*sp.rand(matrix.shape[0], dim, density=dens, format="csr")
  fac2_rows, fac2_cols = factor2.nonzero()

  num_iters = 1 #start counting from 1
  #iterate over all nonzero entries (training entries)
  #do unless stopping criterion is met 
  #(found good enough approximation or iterated long enough)

  error = np.inf
  errors = list()
  while num_iters <= max_iters and error > tol:
    rows, cols = matrix.nonzero()
    entry = np.random.randint(len(rows)) #choose entry at random
    row = rows[entry]
    col = cols[entry]

    #compute loss
    actual_entry = matrix[row,col]
    approx_factor1 = np.asarray(factor1[row,:].A[0])
    approx_factor2 = np.asarray(factor2[col,:].A[0])
    approx_entry = np.dot(approx_factor1,approx_factor2)
    #loss on this training example
    tr_loss = loss(actual_entry,approx_entry,loss_type) 

    #update estimates
    fac1_update = gradient(actual_entry, approx_factor2, approx_factor1, loss_type)
    fac1_update += regularization_param * approx_factor1
    factor1[row,:] = approx_factor1 - learning_rate * fac1_update

    fac2_update = gradient(actual_entry, approx_factor1, approx_factor2, loss_type)
    fac2_update += regularization_param * approx_factor2
    factor2[col,:] = approx_factor2 - learning_rate * fac2_update
    
    #NOTE diff took a while
    error = diff(matrix,factor1,factor2)
    if num_iters % 50 == 0:
      errors.append(error)
    num_iters += 1

  print("Found estimate with %f error in %d iterations" % \
    (diff(matrix,factor1,factor2), num_iters))
  print "Errors: ", 
  print errors
  return factor1, factor2
Ejemplo n.º 7
0
def test_dot_sparse():
    from scipy.sparse import rand
    A = rand(4, 5, density=0.5)
    B = rand(5, 4, density=0.5)

    C = sdb.dot(sdb.from_sparse(A), sdb.from_sparse(B)).toarray()

    exp = np.dot(A.toarray(), B.toarray())

    assert_allclose(C, exp)
Ejemplo n.º 8
0
	def test_add_matrix(self):
		s = Structure('LABEL', 'STRUCTURE NAME', True)
		self.assertFalse( s.plannable )
		s.A_full = np.random.rand(100, 300)
		self.assertTrue( s.size == 100 )
		self.assertTrue( isinstance(s.dvh, DVH) )
		self.assertTrue( s.plannable )
		self.assertTrue( s.A_mean is not None )
		self.assertTrue( len(s.A_mean) == 300 )
		s.size = 120 # inconsistent size, no longer plannable
		self.assertFalse( s.plannable )
		s.size = 100
		self.assertTrue( s.plannable )

		# set CSR matrix
		try:
			s.reset_matrices()
			s.A_full = sp.rand(100, 50, 0.3, 'csr')
			self.assertTrue( s.A_mean is not None )
			self.assertTrue( len(s.A_mean) == 50 )
		except:
			self.assertTrue( False )

		# set CSC matrix
		try:
			s.reset_matrices()
			s.A_full = sp.rand(100, 70, 0.3, 'csc')
			self.assertTrue( s.A_mean is not None )
			self.assertTrue( len(s.A_mean) == 70 )
		except:
			self.assertTrue( False )

		# test exception handling
		try:
			s.reset_matrices()
			s.A_full = np.random.rand()
			self.assertTrue( False )
		except:
			self.assertTrue( True )

		try:
			s.reset_matrices()
			s.A_full = 'random_string'
			self.assertTrue( False )
		except:
			self.assertTrue( True )

		try:
			s.reset_matrices()
			s.A_full = np.random.rand(50, 300)
			self.assertTrue( False )
		except:
			self.assertTrue( True )
Ejemplo n.º 9
0
    def test_sparse_system(self):
        m = 1000
        n = 800
        r = 700
        np.random.seed(1)
        density = 0.2
        A = sp.rand(m, n, density)
        b = np.random.randn(m, 1)
        G = sp.rand(r, n, density)
        h = np.random.randn(r, 1)

        x = Variable(n)
        optval = Problem(Minimize(sum_squares(A*x - b)), [G*x == h]).solve(solver=LS)
        self.assertAlmostEqual(optval, 6071.830658)
Ejemplo n.º 10
0
def lasso_sparse(n):
    m = 2*n
    A = sp.rand(m, n, 0.1)
    A.data = np.random.randn(A.nnz)
    N = A.copy()
    N.data = N.data**2
    A = A*sp.diags([1 / np.sqrt(np.ravel(N.sum(axis=0)))], [0])

    b = A*sp.rand(n, 1, 0.1) + 1e-2*np.random.randn(m,1)
    lam = 0.2*np.max(np.abs(A.T*b))

    x = cvx.Variable(n)
    f = cvx.sum_squares(A*x - b) + lam*cvx.norm1(x)
    return cvx.Problem(cvx.Minimize(f))
Ejemplo n.º 11
0
    def setUp(self):

        self.n = 1000
        self.k = 30
        self.A = 20 * sps.eye(self.n) + \
            sps.rand(self.n, self.n, format='csr')
        self.U = np.random.randn(self.n, self.k)
        self.V = np.random.randn(self.k, self.n)
        self.Z = np.random.randn(self.n, self.k + 2)
        self.Vsp = sps.rand(self.k, self.n)
        self.J = sps.rand(self.k, self.n)
        self.Jt = sps.rand(self.n, self.k)
        self.M = sps.eye(self.n)
        self.Aspd = self.A + self.A.T
        self.krpslvprms = {'tol': 1e-9}
Ejemplo n.º 12
0
def test_sp_profile():
    "Sparse: Profile"
    for kk in range(10):
        A = sp.rand(1000, 1000, 0.1, format='csr')
        pro = sp_profile(A)
        B = A.toarray()
        ans = _dense_profile(B)
        assert_equal(pro, ans)

    for kk in range(10):
        A = sp.rand(1000, 1000, 0.1, format='csc')
        pro = sp_profile(A)
        B = A.toarray()
        ans = _dense_profile(B)
        assert_equal(pro, ans)
Ejemplo n.º 13
0
def gendd(n, density, fmt='csr'):
    a = sp.rand(n, n, format='lil', density=density)
    for i in range(n):
        rowsum = np.abs(a[i, :]).sum() - np.abs(a[i, i])
        a[i, i] = np.abs(rowsum) + 1

    return a.asformat(fmt)
Ejemplo n.º 14
0
Archivo: Solver.py Proyecto: wathen/PhD
def SchurPCD(Mass,L,F, backend):
    Mass = Mass.sparray()
    F = F.sparray()
    F = F + 1e-10*sp.identity(Mass.shape[0])
    F = PETSc.Mat().createAIJ(size=F.shape,csr=(F.indptr, F.indices, F.data))
    Mass.tocsc()
    Schur = sp.rand(Mass.shape[0], Mass.shape[0], density=0.00, format='csr')
    ksp = PETSc.KSP().create()
    pc = ksp.getPC()
    ksp.setOperators(F,F)
    ksp.setType('preonly')
    pc.setType('lu')
    OptDB = PETSc.Options()
    OptDB['pc_factor_shift_amount'] = "0.1"
    # OptDB['pc_factor_shift_type'] = 'POSITIVE_DEFINITE'
    OptDB['pc_factor_mat_ordering_type'] = 'amd'
    # OptDB['rtol']  = 1e-8
    # ksp.max_it = 5
    ksp.setFromOptions()
    for i in range(0,Mass.shape[0]):
        Col = Mass.getcol(i)
        Col = Col.toarray()
        Col = IO.arrayToVec(Col)
        u = Col.duplicate()
        ksp.solve(Col,u)
        C = u.duplicate()
        L.mult(u,C)
        # print C.array
        Schur[i,:] = C.array

    if backend == "PETSc":
        return PETSc.Mat().createAIJ(size=Schur.transpose().shape,csr=(Schur.transpose().indptr, Schur.transpose().indices, Schur.transpose().data))
    else:
        return Schur.transpose()
    def check_create_csr_from_scipy(shape, density, f):
        def assert_csr_almost_equal(nd, sp):
            assert_almost_equal(nd.data.asnumpy(), sp.data)
            assert_almost_equal(nd.indptr.asnumpy(), sp.indptr)
            assert_almost_equal(nd.indices.asnumpy(), sp.indices)
            sp_csr = nd.asscipy()
            assert_almost_equal(sp_csr.data, sp.data)
            assert_almost_equal(sp_csr.indptr, sp.indptr)
            assert_almost_equal(sp_csr.indices, sp.indices)
            assert(sp.dtype == sp_csr.dtype), (sp.dtype, sp_csr.dtype)

        try:
            import scipy.sparse as spsp
            # random canonical csr
            csr_sp = spsp.rand(shape[0], shape[1], density, format="csr")
            csr_nd = f(csr_sp)
            assert_csr_almost_equal(csr_nd, csr_sp)
            # non-canonical csr which contains duplicates and unsorted indices
            indptr = np.array([0, 2, 3, 7])
            indices = np.array([0, 2, 2, 0, 1, 2, 1])
            data = np.array([1, 2, 3, 4, 5, 6, 1])
            non_canonical_csr = spsp.csr_matrix((data, indices, indptr), shape=(3, 3), dtype=csr_nd.dtype)
            canonical_csr_nd = f(non_canonical_csr, dtype=csr_nd.dtype)
            canonical_csr_sp = non_canonical_csr.copy()
            canonical_csr_sp.sum_duplicates()
            canonical_csr_sp.sort_indices()
            assert_csr_almost_equal(canonical_csr_nd, canonical_csr_sp)
        except ImportError:
            print("Could not import scipy.sparse. Skipping unit tests for scipy csr creation")
Ejemplo n.º 16
0
def build_future_transition_matrix():
    # The real transition_matrix has ~70.000 non-zero Entries in 50.000^2 fields --> Densitiy 0.000028
    expected_dimension = 150000
    expected_density = 0.001
    # todo: change randint to skewed distribution
    # todo: compare creation speed: coo vs lil
    transition_matrix = rand(m=expected_dimension, n=expected_dimension,
                             density=expected_density, format='coo',
                             random_state=np.random.randint(low=1000))

    # save to h5 file
    filters = tb.Filters(complevel=5, complib='blosc')

    with tb.open_file('future_transition_matrix.h5', 'w') as f:
        # Earrays
        data = f.create_earray(f.root, 'data', tb.Float32Atom(), shape=(0,), filters=filters)
        row_indices = f.create_earray(f.root, 'row_indices', tb.Float32Atom(), shape=(0,), filters=filters)
        column_indices = f.create_earray(f.root, 'column_indices', tb.Float32Atom(), shape=(0,), filters=filters)
        # Carray
        shape_dimensions = f.create_carray(f.root, 'shape_dimensions', tb.Float32Atom(), shape=(1, 2), filters=filters)

        # append values to file
        data.append(transition_matrix.data)
        row_indices.append(transition_matrix.row)
        column_indices.append(transition_matrix.col)

        shape_dimensions[0, 0] = transition_matrix.shape[0]
        shape_dimensions[0, 1] = transition_matrix.shape[1]
Ejemplo n.º 17
0
    def setup(self, density, format):
        n = 1000
        if format == 'dok' and n * density >= 500:
            raise NotImplementedError()

        warnings.simplefilter('ignore', SparseEfficiencyWarning)
        self.X = sparse.rand(n, n, format=format, density=density)
Ejemplo n.º 18
0
def test_gradients():
    """Test gradient accuracy."""
    # data
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    X = np.random.normal(0.0, 1.0, [n_samples, n_features])
    X = scaler.fit_transform(X)

    density = 0.1
    beta_ = np.zeros(n_features + 1)
    beta_[0] = np.random.rand()
    beta_[1:] = sps.rand(n_features, 1, density=density).toarray()[:, 0]

    reg_lambda = 0.1
    distrs = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma']
    for distr in distrs:
        glm = GLM(distr=distr, reg_lambda=reg_lambda)
        y = simulate_glm(glm.distr, beta_[0], beta_[1:], X)

        func = partial(_L2loss, distr, glm.alpha,
                       glm.Tau, reg_lambda, X, y, glm.eta, glm.group)
        grad = partial(_grad_L2loss, distr, glm.alpha, glm.Tau,
                       reg_lambda, X, y,
                       glm.eta)
        approx_grad = approx_fprime(beta_, func, 1.5e-8)
        analytical_grad = grad(beta_)
        assert_allclose(approx_grad, analytical_grad, rtol=1e-5, atol=1e-3)
Ejemplo n.º 19
0
def rand_ket(N, density=1, dims=None, seed=None):
    """Creates a random Nx1 sparse ket vector.

    Parameters
    ----------
    N : int
        Number of rows for output quantum operator.
    density : float
        Density between [0,1] of output ket state.
    dims : list
        Left-dimensions of quantum object.  Used for specifying
        tensor structure. Default is dims=[[N]].

    Returns
    -------
    oper : qobj
        Nx1 ket state quantum operator.

    """
    if seed is not None:
        np.random.seed(seed=seed)
    if dims:
        _check_ket_dims(dims, N)
    X = sp.rand(N, 1, density, format='csr')
    X.data = X.data - 0.5
    Y = X.copy()
    Y.data = 1.0j * (np.random.random(len(X.data)) - 0.5)
    X = X + Y
    X.sort_indices()
    X = Qobj(X)
    if dims:
        return Qobj(X / X.norm(), dims=dims)
    else:
        return Qobj(X / X.norm())
Ejemplo n.º 20
0
 def test_spmatrix(self):
     A = np.matrix([[1,2,3],[4,5,6]])
     As = spmatrix(A)
     self.assertEqual(As.shape,A.shape)
     self.assertTrue(type(As) is c.arrays.cvxpy_spmatrix)
     self.assertEqual(As.nnz,6)
     self.assertEqual(As.dtype,np.float64)
     self.assertEqual(As[0,0],A[0,0])
     B = sp.rand(10,10,format='csr')
     Bs = spmatrix(B)
     self.assertEqual(Bs.shape,B.shape)
     self.assertTrue(type(Bs) is c.arrays.cvxpy_spmatrix)
     self.assertEqual(Bs.nnz,B.nnz)
     self.assertEqual(Bs.dtype,np.float64)
     Blil = B.tolil()
     for i in range(0,10):
         for j in range(0,10):
             self.assertEqual(Bs[i,j],Blil[i,j])
     Cs = spmatrix((50,50))
     self.assertEqual(Cs.shape,(50,50))
     self.assertTrue(type(Cs) is c.arrays.cvxpy_spmatrix)
     self.assertEqual(Cs.nnz,0)
     self.assertEqual(Cs.dtype,np.float64)
     for i in range(0,50):
         for j in range(0,50):
             self.assertEqual(Cs[i,j],0.)
Ejemplo n.º 21
0
 def __init__(self, s,d):
     X = sp.rand(s,s,d,format='csr')
     self.nnz = X.nnz
     self.nRows = X.shape[0]
     self.A = X.data
     self.IA = X.indptr
     self.JA = X.indices
Ejemplo n.º 22
0
def random_problem(m, n, density, nproblems):
    """
    Generate a random problem with m rows and n columns and nproblems.
    Sparse matrix with density generated using scipy.sparse.rand
    """
    from scipy.sparse import rand, csc_matrix
    from pycllp.lp import SparseMatrix, StandardLP

    np.random.seed(0)

    A = np.empty((m, n))
    for i in range(m):
        A[i, :] = rand(1, n, density=max(density, 3./n)).todense()

    A = SparseMatrix(matrix=csc_matrix(A))
    m = A.nrows
    n = A.ncols
    b = np.random.rand(nproblems, m)
    c = np.random.rand(nproblems, n)

    # Create sparse matrix with scipy.sparse
    #A.set_num_problems(nproblems)
    # TODO make this random.
    #A.data = np.ones(A.data.shape)*old_A_data

    return StandardLP(A, b, c, 0.0)
Ejemplo n.º 23
0
def rand_ket(N, density=1, dims=None):
    """Creates a random Nx1 sparse ket vector.

    Parameters
    ----------
    N : int
        Number of rows for output quantum operator.
    density : float
        Density between [0,1] of output ket state.
    dims : list
        Dimensions of quantum object.  Used for specifying
        tensor structure. Default is dims=[[N],[1]].

    Returns
    -------
    oper : qobj
        Nx1 ket state quantum operator.

    """
    if dims:
        _check_dims(dims, N, 1)
    X = sp.rand(N, 1, density, format='csr')
    X.data = X.data - 0.5
    Y = X.copy()
    Y.data = 1.0j * np.random.random(len(X.data)) - (0.5 + 0.5j)
    X = X + Y
    X = Qobj(X)
    if dims:
        return Qobj(X / X.norm(), dims=dims, shape=[N, 1])
    else:
        return Qobj(X / X.norm())
Ejemplo n.º 24
0
def test_column_permutation():
    "Graph: Column Permutation"
    A = sp.rand(5, 5, 0.25, format='csc')
    perm = column_permutation(A)
    B = sp_permute(A, [], perm)
    counts = np.diff(B.indptr)
    assert_equal(np.all(np.argsort(counts) == np.arange(5)), True)
Ejemplo n.º 25
0
 def setUp(self):
     n = 50
     nrhs = 20
     self.A = sp.rand(n, n, 0.4) + sp.identity(n)
     self.sol = np.ones((n, nrhs))
     self.rhsU = sp.triu(self.A) * self.sol
     self.rhsL = sp.tril(self.A) * self.sol
Ejemplo n.º 26
0
    def setup(self, N, sparsity_pattern, format):
        if format == 'dok' and N > 500:
            raise NotImplementedError()

        self.A = rand(1000, 1000, density=1e-5)

        A = self.A
        N = int(N)

        # indices to assign to
        i, j = [], []
        while len(i) < N:
            n = N - len(i)
            ip = numpy.random.randint(0, A.shape[0], size=n)
            jp = numpy.random.randint(0, A.shape[1], size=n)
            i = numpy.r_[i, ip]
            j = numpy.r_[j, jp]
        v = numpy.random.rand(n)

        if N == 1:
            i = int(i)
            j = int(j)
            v = float(v)

        base = A.asformat(format)

        self.m = base.copy()
        self.i = i
        self.j = j
        self.v = v
Ejemplo n.º 27
0
    def test_portfolio_problem(self):
        """Test portfolio problem that caused dcp_attr errors.
        """
        import numpy as np
        import scipy.sparse as sp
        np.random.seed(5)
        n = 100#10000
        m = 10#100
        pbar = (np.ones((n, 1)) * .03 +
                np.matrix(np.append(np.random.rand(n - 1, 1), 0)).T * .12)

        F = sp.rand(m, n, density=0.01)
        F.data = np.ones(len(F.data))
        D = sp.eye(n).tocoo()
        D.data = np.random.randn(len(D.data))**2
        Z = np.random.randn(m, 1)
        Z = Z.dot(Z.T)

        x = Variable(n)
        y = x.__rmul__(F)
        mu = 1
        ret = pbar.T * x
        # DCP attr causes error because not all the curvature
        # matrices are reduced to constants when an atom
        # is scalar.
        risk = square(norm(x.__rmul__(D))) + square(Z*y)
Ejemplo n.º 28
0
def main():
    n_samples = 50    # specify the number of samples in the simulated data
    n_features = 100    # specify the number of features in the simulated data

    # simulate the dataset
    X = np.random.rand(n_samples, n_features)

    # simulate the feature weight
    w_orin = rand(n_features, 1, 1).toarray()
    w_orin[0:50] = 0

    # obtain the ground truth of the simulated dataset
    noise = np.random.rand(n_samples, 1)
    y = np.dot(X, w_orin) + 0.01 * noise
    y = y[:, 0]


    z = 0.01  # specify the regularization parameter of regularization parameter of L2 norm for the non-overlapping group

    # specify the tree structure among features
    idx = np.array([[-1, -1, 1], [1, 20, np.sqrt(20)], [21, 40, np.sqrt(20)], [41, 50, np.sqrt(10)],
                    [51, 70, np.sqrt(20)], [71, 100, np.sqrt(30)], [1, 50, np.sqrt(50)], [51, 100, np.sqrt(50)]]).T
    idx = idx.astype(int)

    # perform feature selection and obtain the feature weight of all the features
    w, obj, value_gamma = tree_fs.tree_fs(X, y, z, idx, verbose=True)
Ejemplo n.º 29
0
def random_graph(n, rho):
    R = rand(n, n, density=rho, format='dense')
    R_I = np.ceil(R).astype(int)
    A = np.array(R_I)
    for i in range(n):
        A[i, i] = 0
    return A
Ejemplo n.º 30
0
def test_inplace_row_scale():
    rng = np.random.RandomState(0)
    X = sp.rand(100, 200, 0.05)
    Xr = X.tocsr()
    Xc = X.tocsc()
    XA = X.toarray()
    scale = rng.rand(100)
    XA *= scale.reshape(-1, 1)

    inplace_row_scale(Xc, scale)
    inplace_row_scale(Xr, scale)
    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
    assert_array_almost_equal(XA, Xc.toarray())
    assert_array_almost_equal(XA, Xr.toarray())
    assert_raises(TypeError, inplace_column_scale, X.tolil(), scale)

    X = X.astype(np.float32)
    scale = scale.astype(np.float32)
    Xr = X.tocsr()
    Xc = X.tocsc()
    XA = X.toarray()
    XA *= scale.reshape(-1, 1)
    inplace_row_scale(Xc, scale)
    inplace_row_scale(Xr, scale)
    assert_array_almost_equal(Xr.toarray(), Xc.toarray())
    assert_array_almost_equal(XA, Xc.toarray())
    assert_array_almost_equal(XA, Xr.toarray())
    assert_raises(TypeError, inplace_column_scale, X.tolil(), scale)
Ejemplo n.º 31
0
    def test_asaga_solver(self):
        """...Check ASAGA solver for a Logistic Regression with Elastic net
        penalization
        """
        seed = 1398
        np.random.seed(seed)
        n_samples = 4000
        n_features = 30
        weights = weights_sparse_gauss(n_features, nnz=3).astype(self.dtype)
        intercept = 0.2
        penalty_strength = 1e-3
        sparsity = 1e-4
        features = sparse.rand(n_samples, n_features, density=sparsity,
                               format='csr', random_state=8).astype(self.dtype)

        simulator = SimuLogReg(weights, n_samples=n_samples, features=features,
                               verbose=False, intercept=intercept,
                               dtype=self.dtype)
        features, labels = simulator.simulate()

        model = ModelLogReg(fit_intercept=True)
        model.fit(features, labels)
        prox = ProxElasticNet(penalty_strength, ratio=0.1, range=(0,
                                                                  n_features))
        solver_step = 1. / model.get_lip_max()
        saga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False,
                    n_threads=1, record_every=10, seed=seed)
        saga.set_model(model).set_prox(prox)
        saga.solve()

        asaga = SAGA(step=solver_step, max_iter=100, tol=1e-10, verbose=False,
                     n_threads=2, record_every=10, seed=seed)
        asaga.set_model(model).set_prox(prox)
        asaga.solve()

        np.testing.assert_array_almost_equal(saga.solution, asaga.solution,
                                             decimal=4)
        self.assertGreater(np.linalg.norm(saga.solution[:-1]), 0)
Ejemplo n.º 32
0
def rand_herm(N, density=0.75, dims=None):
    """Creates a random NxN sparse Hermitian quantum object.
    
    Uses :math:`H=X+X^{+}` where :math:`X` is
    a randomly generated quantum operator with a given `density`.
    
    Parameters
    ----------
    N : int
        Shape of output quantum operator.
    density : float
        Density etween [0,1] of output Hermitian operator.
    dims : list 
        Dimensions of quantum object.  Used for specifying
        tensor structure. Default is dims=[[N],[N]].
    
    Returns
    -------
    oper : qobj
        NxN Hermitian quantum operator.
    
    """
    if dims:
        _check_dims(dims, N, N)
    # to get appropriate density of output
    # Hermitian operator must convert via:
    herm_density = 2.0 * arcsin(density) / pi

    X = sp.rand(N, N, herm_density, format='csr')
    X.data = X.data - 0.5
    Y = X.copy()
    Y.data = 1.0j * np.random.random(len(X.data)) - (0.5 + 0.5j)
    X = X + Y
    X = Qobj(X)
    if dims:
        return Qobj((X + X.dag()) / 2.0, dims=dims, shape=[N, N])
    else:
        return Qobj((X + X.dag()) / 2.0)
Ejemplo n.º 33
0
def l1(m=100, seed=0):
    """ Solve random least-l1 norm problem.

    Data is for problem:

    min. ||x||_1
    s.t. Cx = d

    C is m by n, with n = 2*m
    C is sparse, with each element 10 percent chance of nonzero

    Note: if m is too small, C may have a row of all zeros, making the problem infeasible

    todo: why does normalization seem to hurt?
    """
    n = 2 * m
    np.random.seed(seed)

    C = sp.rand(m, n, 0.1, format='csc')
    Ae = sp.hstack([C, sp.csc_matrix((m, n))], format="csc")
    h = np.zeros(2 * n)
    d = np.random.randn(m)
    bt = np.hstack([d, h])  # in cone formulation
    c = np.hstack([np.zeros(n), np.ones(n)])
    I = sp.eye(n)
    G = sp.vstack([sp.hstack([I, -I]), sp.hstack([-I, -I])], format="csc")
    At = sp.vstack([Ae, G], format="csc")  # in cone formulation
    At.indices = At.indices.astype(np.int64)
    At.indptr = At.indptr.astype(np.int64)

    data = {'A': At, 'b': bt, 'c': c}
    cone = {'l': 2 * n, 'f': m}
    #opts = {'normalize': True}

    # the unstuffed problem data
    extra = dict(C=C, d=d)

    return data, cone
Ejemplo n.º 34
0
    def _initialize_internal_weights(self, n_internal_units, connectivity, spectral_radius):
        # The eigs function might not converge. Attempt until it does.
        convergence = False
        while (not convergence):
            # Generate sparse, uniformly distributed weights.
            internal_weights = sparse.rand(n_internal_units, n_internal_units, density=connectivity).todense()

            # Ensure that the nonzero values are uniformly distributed in [-0.5, 0.5]
            internal_weights[np.where(internal_weights > 0)] -= 0.5

            try:
                # Get the largest eigenvalue
                w,_ = slinalg.eigs(internal_weights, k=1, which='LM')

                convergence = True

            except:
                continue

        # Adjust the spectral radius.
        internal_weights /= np.abs(w)/spectral_radius

        return internal_weights
Ejemplo n.º 35
0
def portfolioProblem(problemOptions, solverOptions):
    m = problemOptions['m']
    n = problemOptions['n']
    density = problemOptions['density']

    mu = np.exp(problemOptions['noiselevel']*np.random.randn(n))-1  # returns
    D = np.random.rand(n)/10;               # idiosyncratic risk
    F = sps.rand(n,m,density)                # factor model
    F.data = np.random.randn(len(F.data))/10
    gamma = 1
    B = 1
    x = cp.Variable(n)
    
    # Problem construction
    f = mu.T*x - gamma*(cp.sum_squares(F.T.dot(x)) +
                        cp.sum_squares(cp.mul_elemwise(D, x)))
    C = [cp.sum_entries(x) == B,
         x >= 0]

    prob = cp.Problem(cp.Maximize(f), C)
    
    prob.solve(**solverOptions)
    return {'Problem':prob, 'name':'portfolioProblem'}
Ejemplo n.º 36
0
def test_sag_adaptive():
    """Check that the adaptive step size strategy yields the same
    solution as the non-adaptive"""
    np.random.seed(0)
    X = sparse.rand(100, 10, density=.5, random_state=0).tocsr()
    y = np.random.randint(0, high=2, size=100)
    for alpha in np.logspace(-3, 1, 5):
        clf_adaptive = SAGClassifier(
            eta='line-search', random_state=0, alpha=alpha)
        clf_adaptive.fit(X, y)
        clf = SAGClassifier(
            eta='auto', random_state=0, alpha=alpha)
        clf.fit(X, y)
        assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)

        clf_adaptive = SAGAClassifier(
            eta='line-search', loss='log', random_state=0, alpha=alpha, max_iter=20)
        clf_adaptive.fit(X, y)
        assert np.isnan(clf_adaptive.coef_.sum()) == False
        clf = SAGAClassifier(
            eta='auto', loss='log', random_state=0, alpha=alpha, max_iter=20)
        clf.fit(X, y)
        assert_almost_equal(clf_adaptive.score(X, y), clf.score(X, y), 1)
Ejemplo n.º 37
0
 def check_create_csr_from_scipy(shape, density, f):
     def assert_csr_almost_equal(nd, sp):
         assert_almost_equal(nd.data.asnumpy(), sp.data)
         assert_almost_equal(nd.indptr.asnumpy(), sp.indptr)
         assert_almost_equal(nd.indices.asnumpy(), sp.indices)
     try:
         import scipy.sparse as spsp
         # random canonical csr
         csr_sp = spsp.rand(shape[0], shape[1], density, format="csr")
         csr_nd = f(csr_sp)
         assert_csr_almost_equal(csr_nd, csr_sp)
         # non-canonical csr which contains duplicates and unsorted indices
         indptr = np.array([0, 2, 3, 7])
         indices = np.array([0, 2, 2, 0, 1, 2, 1])
         data = np.array([1, 2, 3, 4, 5, 6, 1])
         non_canonical_csr = spsp.csr_matrix((data, indices, indptr), shape=(3, 3))
         canonical_csr_nd = f(non_canonical_csr)
         canonical_csr_sp = non_canonical_csr.copy()
         canonical_csr_sp.sum_duplicates()
         canonical_csr_sp.sort_indices()
         assert_csr_almost_equal(canonical_csr_nd, canonical_csr_sp)
     except ImportError:
         print("Could not import scipy.sparse. Skipping unit tests for scipy csr creation")
def test_pool_adjacency_mat_forplot(benchmark, dims, density, kernel_size,
                                    stride, padding):
    adj = (sparse.rand(dims, dims, density) + sparse.eye(dims)).tocoo()
    adj.data = np.ones(adj.nnz, dtype=np.int16)

    shape = conv2d_shape(adj.shape, kernel_size, stride, padding)

    adj_pooled = pool_adjacency_mat_reference(
        to_sparse_tensor(adj).to_dense(), kernel_size, stride,
        padding).to("cpu")
    assert adj_pooled.shape[0] == shape[0]

    with set_device("cpu"):
        adj_pooled_ = benchmark.pedantic(
            pool_adjacency_mat,
            args=(adj, kernel_size, stride, padding),
            rounds=1,
            iterations=1,
            warmup_rounds=1,
        )
    assert adj_pooled.shape[0] == shape[0]

    assert np.allclose(adj_pooled.numpy(), adj_pooled_.todense())
Ejemplo n.º 39
0
def test_real_eigs_real_k_subset():
    np.random.seed(1)

    n = 10
    A = rand(n, n, density=0.5)
    A.data *= 2
    A.data -= 1

    v0 = np.ones(n)

    whichs = ['LM', 'SM', 'LR', 'SR', 'LI', 'SI']
    dtypes = [np.float32, np.float64]

    for which, sigma, dtype in itertools.product(whichs, [None, 0, 5], dtypes):
        prev_w = np.array([], dtype=dtype)
        eps = np.finfo(dtype).eps
        for k in range(1, 9):
            w, z = eigs(A.astype(dtype), k=k, which=which, sigma=sigma,
                        v0=v0.astype(dtype), tol=0)
            assert_allclose(np.linalg.norm(A.dot(z) - z * w), 0, atol=np.sqrt(eps))

            # Check that the set of eigenvalues for `k` is a subset of that for `k+1`
            dist = abs(prev_w[:,None] - w).min(axis=1)
            assert_allclose(dist, 0, atol=np.sqrt(eps))

            prev_w = w

            # Check sort order
            if sigma is None:
                d = w
            else:
                d = 1 / (w - sigma)

            if which == 'LM':
                # ARPACK is systematic for 'LM', but sort order
                # appears not well defined for other modes
                assert np.all(np.diff(abs(d)) <= 1e-6)
Ejemplo n.º 40
0
    def testSparse(self):
        data = sps.rand(9, 9, density=0.1)
        t = mt.tensor(data, chunk_size=3)

        t1 = t * 2 / 3
        g = t1.build_graph(tiled=True, fuse_enabled=True)
        graph_nodes = list(g)
        self.assertTrue(
            all(isinstance(n.op, TensorFuseChunk) for n in graph_nodes))
        self.assertTrue(all(n.op.sparse for n in graph_nodes))
        self.assertTrue(all(n.shape == (3, 3) for n in graph_nodes))

        fuse_node = graph_nodes[0]
        self.assertEqual(fuse_node.shape, (3, 3))
        self.assertEqual(len(fuse_node.composed), 3)
        self.assertIsInstance(fuse_node.composed[0].op, CSRMatrixDataSource)
        self.assertIsInstance(fuse_node.composed[1].op, TensorMultiply)
        self.assertIsInstance(fuse_node.composed[2].op,
                              (TensorTrueDiv, TensorDivide))
        self.assertTrue(all(c.op.sparse for c in fuse_node.composed))

        t2 = (t * 2).todense()
        g = t2.build_graph(tiled=True, fuse_enabled=True)
        graph_nodes = list(g)
        self.assertTrue(
            all([isinstance(n.op, TensorFuseChunk) for n in graph_nodes]))
        self.assertTrue(all([not n.op.sparse for n in graph_nodes]))
        self.assertTrue(all(n.shape == (3, 3) for n in graph_nodes))

        fuse_node = graph_nodes[0]
        self.assertEqual(fuse_node.shape, (3, 3))
        self.assertEqual(len(fuse_node.composed), 3)
        self.assertIsInstance(fuse_node.composed[0].op, CSRMatrixDataSource)
        self.assertIsInstance(fuse_node.composed[1].op, TensorMultiply)
        self.assertTrue(fuse_node.composed[1].op.sparse)
        self.assertIsInstance(fuse_node.composed[2].op, SparseToDense)
        self.assertFalse(fuse_node.composed[2].op.sparse)
Ejemplo n.º 41
0
def SchurPCD(Mass, L, F, backend):
    Mass = Mass.sparray()
    F = F.sparray()
    F = F + 1e-10 * sp.identity(Mass.shape[0])
    F = PETSc.Mat().createAIJ(size=F.shape, csr=(F.indptr, F.indices, F.data))
    Mass.tocsc()
    Schur = sp.rand(Mass.shape[0], Mass.shape[0], density=0.00, format='csr')
    ksp = PETSc.KSP().create()
    pc = ksp.getPC()
    ksp.setOperators(F, F)
    ksp.setType('preonly')
    pc.setType('lu')
    OptDB = PETSc.Options()
    OptDB['pc_factor_shift_amount'] = "0.1"
    # OptDB['pc_factor_shift_type'] = 'POSITIVE_DEFINITE'
    OptDB['pc_factor_mat_ordering_type'] = 'amd'
    # OptDB['rtol']  = 1e-8
    # ksp.max_it = 5
    ksp.setFromOptions()
    for i in range(0, Mass.shape[0]):
        Col = Mass.getcol(i)
        Col = Col.toarray()
        Col = IO.arrayToVec(Col)
        u = Col.duplicate()
        ksp.solve(Col, u)
        C = u.duplicate()
        L.mult(u, C)
        # print C.array
        Schur[i, :] = C.array

    if backend == "PETSc":
        return PETSc.Mat().createAIJ(size=Schur.transpose().shape,
                                     csr=(Schur.transpose().indptr,
                                          Schur.transpose().indices,
                                          Schur.transpose().data))
    else:
        return Schur.transpose()
Ejemplo n.º 42
0
def test_gcn_fw():
    N = 2
    n = 5
    L = [sp.eye(n, format='csr') for i in range(N)]
    X = np.ones((N, n))
    Theta = np.array([.3, .4])

    Y, _ = gcn_fw(L, X, Theta)
    Y_correct = np.ones((N, n)) * .7

    diff = rel_error(Y, Y_correct)
    assert diff < 1e-16, print('Error:', diff)

    #test with random L's
    N = 2
    n = 5
    L = [sp.rand(n, n, density=1, format='csr') for i in range(N)]
    X = np.random.rand(N, n)
    Theta = np.array([.3, .4])

    Y, _ = gcn_fw(L, X, Theta)

    Y_correct = []
    for i in range(N):
        expL = sp.eye(n)
        y = np.zeros(n)
        for theta in Theta:
            expL = expL.dot(L[i])
            y += theta * expL.dot(X[i])

        Y_correct.append(y)

    Y_correct = np.array(Y_correct)
    diff = rel_error(Y, Y_correct)
    assert diff < 1e-15, print('Error:', diff, Y, Y_correct)

    print('Correct!')
Ejemplo n.º 43
0
    def test_parallel_dot(self):

        n = 1000
        #x = 3*np.arange(n)
        x = np.array([1.0] * n)
        mpi_size = 2
        if False:
            top = [4, -1]
            top.extend([0.0] * (n - 2))
            A = linalg.toeplitz(top, top)
            A[n - 1, 0] = -1
            A[0, n - 1] = -1
        else:
            A = sparse.rand(n, n, density=0.5, dtype=np.float, random_state=1)

        A = sparse.csc_matrix(A)

        print('Starting Serial dot Multiplication .......')
        start_time = time.time()
        u_target = A.dot(x)
        elapsed_time = time.time() - start_time
        print('Serial matvec : Elapsed time : %f ' % elapsed_time)

        print('Starting Parallel Matrix dot Multiplication 1.......')
        start_time = time.time()
        parallelA = ParallelMatrix(A, mpi_size)
        u = parallelA.dot(x)
        elapsed_time = time.time() - start_time
        print('1 Parallel dot : Elapsed time : %f ' % elapsed_time)

        print('Starting Parallel Matrix dot Multiplication 2.......')
        start_time = time.time()
        u = parallelA.dot(x)
        elapsed_time = time.time() - start_time
        print('2 Parallel dot : Elapsed time : %f ' % elapsed_time)

        np.testing.assert_almost_equal(u_target, u, decimal=10)
Ejemplo n.º 44
0
def rand_ket(N=0, density=1, dims=None, seed=None):
    """Creates a random Nx1 sparse ket vector.

    Parameters
    ----------
    N : int
        Number of rows for output quantum operator.
        If None or 0, N is deduced from dims.
    density : float
        Density between [0,1] of output ket state.
    dims : list
        Dimensions of quantum object.  Used for specifying
        tensor structure. Default is dims=[[N],[1]].

    Returns
    -------
    oper : qobj
        Nx1 ket state quantum operator.

    """
    if seed is not None:
        np.random.seed(seed=seed)
    if N and dims:
        _check_dims(dims, N, 1)
    elif dims:
        N = prod(dims[0])
        _check_dims(dims, N, 1)
    else:
        dims = [[N], [1]]
    X = sp.rand(N, 1, density, format='csr')
    X.data = X.data - 0.5
    Y = X.copy()
    Y.data = 1.0j * (np.random.random(len(X.data)) - 0.5)
    X = X + Y
    X.sort_indices()
    X = Qobj(X)
    return Qobj(X / X.norm(), dims=dims)
Ejemplo n.º 45
0
def test_sparse_preprocess_data_with_return_mean():
    n_samples = 200
    n_features = 2
    # random_state not supported yet in sparse.rand
    X = sparse.rand(n_samples, n_features, density=.5)  # , random_state=rng
    X = X.tolil()
    y = rng.rand(n_samples)
    XA = X.toarray()
    expected_X_norm = np.std(XA, axis=0) * np.sqrt(X.shape[0])

    Xt, yt, X_mean, y_mean, X_norm = \
        _preprocess_data(X, y, fit_intercept=False, normalize=False,
                         return_mean=True)
    assert_array_almost_equal(X_mean, np.zeros(n_features))
    assert_array_almost_equal(y_mean, 0)
    assert_array_almost_equal(X_norm, np.ones(n_features))
    assert_array_almost_equal(Xt.A, XA)
    assert_array_almost_equal(yt, y)

    Xt, yt, X_mean, y_mean, X_norm = \
        _preprocess_data(X, y, fit_intercept=True, normalize=False,
                         return_mean=True)
    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
    assert_array_almost_equal(X_norm, np.ones(n_features))
    assert_array_almost_equal(Xt.A, XA)
    assert_array_almost_equal(yt, y - np.mean(y, axis=0))

    Xt, yt, X_mean, y_mean, X_norm = \
        _preprocess_data(X, y, fit_intercept=True, normalize=True,
                         return_mean=True)
    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
    assert_array_almost_equal(X_norm, expected_X_norm)
    assert_array_almost_equal(Xt.A, XA / expected_X_norm)
    assert_array_almost_equal(yt, y - np.mean(y, axis=0))
Ejemplo n.º 46
0
def main():
    t0 = time()
    n = 30
    m = 10
    A = np.random.rand(m, n)

    x = sparse.rand(n, 1, density=0.1)
    b = A * x

    xtrue = x

    bp = BasisPursuit(1, 1)
    x = bp.fit(A, b)
    t0 = time()
    for _ in range(100):
        x = bp.fit(A, b)
    print(time() - t0)
    K = len(-bp.history['objval'])

    fig, axs = plt.subplots(3, 1, sharex=True)

    axs[0].plot(bp.history['objval'])
    axs[0].set_ylabel('f(x^k) + g(z^k)')

    axs[1].plot(bp.history['r_norm'])
    axs[1].plot(bp.history['eps_pri'])
    axs[1].set_yscale('log')
    axs[1].set_ylabel('||r||_2')

    axs[2].plot(bp.history['s_norm'])
    axs[2].plot(bp.history['eps_dual'])
    axs[2].set_yscale('log')
    axs[2].set_ylabel('||s||_2')
    axs[2].set_xlabel('iter (k)')
    plt.tight_layout()
    plt.show()
Ejemplo n.º 47
0
def test_closureap():
    """ Correctedness of all-pairs parallel closure. """
    np.random.seed(100)
    dt = DirTree('test', (2, 5, 10), root='test_parallel')
    N = 100
    thresh = 0.1
    A = sp.rand(N, N, thresh, 'csr')
    nnz = A.getnnz()
    sparsity = float(nnz) / N**2
    print 'Number of nnz = {}, sparsity = {:g}'.format(nnz, sparsity)
    A = np.asarray(A.todense())
    clo.closureap(A, dt)
    coords = np.asarray(fromdirtree(dt, N), dtype=coo_dtype)
    coo = (coords['weight'], (coords['row'], coords['col']))
    B = np.asarray(sp.coo_matrix(coo, shape=(N, N)).todense())
    rows = []
    for row in xrange(N):
        r, _ = clo.cclosuress(A, row)
        rows.append(r)
    C = np.asarray(rows)
    assert np.allclose(B, C)
    # cleanup
    for logpath in glob('closure-*.log'):
        os.remove(logpath)
Ejemplo n.º 48
0
def test_onelayer_gcn():

    #Test loss func

    N, n, l, K = 10, 5, 3, 2

    X = np.random.rand(N, n)
    y = np.random.randint(l, size=N)

    L = [sp.rand(n, n, density=1, format='csr') for i in range(N)]

    model = OneLayer(N, K, l, weight_scale=1e-3)
    loss, grads = model.loss(X, L, y)

    Theta1 = model.params['Theta1']
    W2 = model.params['W2']
    out1 = np.array([expmulit(L[i], X[i], Theta1) for i in range(N)])
    out1 = np.maximum(out1, 0)
    out2 = np.dot(out1, np.ones(n)).reshape(-1, 1).dot(W2.reshape(1, -1))
    correct_loss, _ = softmax_loss(out2, y)

    print('check loss diff')
    assert_diff(loss, correct_loss)

    #Test gradient
    _, grads = model.loss(X, L, y)

    for name in ['Theta1', 'W2']:
        grad = grads[name]
        f = lambda _: model.loss(X, L, y)[0]
        grad_num = eval_numerical_gradient(f,
                                           model.params[name],
                                           verbose=False)

        print('Check grad', name)
        assert_diff(grad, grad_num, 1e-8)
Ejemplo n.º 49
0
def test_sparse_preprocess_data_offsets(global_random_seed):
    rng = np.random.RandomState(global_random_seed)
    n_samples = 200
    n_features = 2
    X = sparse.rand(n_samples, n_features, density=0.5, random_state=rng)
    X = X.tolil()
    y = rng.rand(n_samples)
    XA = X.toarray()
    expected_X_scale = np.std(XA, axis=0) * np.sqrt(X.shape[0])

    Xt, yt, X_mean, y_mean, X_scale = _preprocess_data(
        X, y, fit_intercept=False, normalize=False
    )
    assert_array_almost_equal(X_mean, np.zeros(n_features))
    assert_array_almost_equal(y_mean, 0)
    assert_array_almost_equal(X_scale, np.ones(n_features))
    assert_array_almost_equal(Xt.A, XA)
    assert_array_almost_equal(yt, y)

    Xt, yt, X_mean, y_mean, X_scale = _preprocess_data(
        X, y, fit_intercept=True, normalize=False
    )
    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
    assert_array_almost_equal(X_scale, np.ones(n_features))
    assert_array_almost_equal(Xt.A, XA)
    assert_array_almost_equal(yt, y - np.mean(y, axis=0))

    Xt, yt, X_mean, y_mean, X_scale = _preprocess_data(
        X, y, fit_intercept=True, normalize=True
    )
    assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
    assert_array_almost_equal(y_mean, np.mean(y, axis=0))
    assert_array_almost_equal(X_scale, expected_X_scale)
    assert_array_almost_equal(Xt.A, XA / expected_X_scale)
    assert_array_almost_equal(yt, y - np.mean(y, axis=0))
Ejemplo n.º 50
0
def X_64bit(request):
    X = sp.rand(20, 10, format=request.param)
    for attr in ['indices', 'indptr', 'row', 'col']:
        if hasattr(X, attr):
            setattr(X, attr, getattr(X, attr).astype('int64'))
    yield X
Ejemplo n.º 51
0
    assert np.isfortran(as_float_array(X, copy=True))

    # Test the copy parameter with some matrices
    matrices = [
        np.matrix(np.arange(5)),
        sp.csc_matrix(np.arange(5)).toarray(),
        _sparse_random_matrix(10, 10, density=0.10).toarray()
    ]
    for M in matrices:
        N = as_float_array(M, copy=True)
        N[0, 0] = np.nan
        assert not np.isnan(M).any()


@pytest.mark.parametrize("X", [(np.random.random((10, 2))),
                               (sp.rand(10, 2).tocsr())])
def test_as_float_array_nan(X):
    X[5, 0] = np.nan
    X[6, 1] = np.nan
    X_converted = as_float_array(X, force_all_finite='allow-nan')
    assert_allclose_dense_sparse(X_converted, X)


def test_np_matrix():
    # Confirm that input validation code does not return np.matrix
    X = np.arange(12).reshape(3, 4)

    assert not isinstance(as_float_array(X), np.matrix)
    assert not isinstance(as_float_array(np.matrix(X)), np.matrix)
    assert not isinstance(as_float_array(sp.csc_matrix(X)), np.matrix)
Ejemplo n.º 52
0
 def setup(self, density, format):
     n = 500
     k = 1000
     self.X = sparse.rand(n, k, format=format, density=density)
Ejemplo n.º 53
0
    def test_sparse_sample_down_label_space(self):
        y = sparse.rand(200, 20, format='csc')
        sample10 = sample_down_label_space(y, 10)

        assert sample10.shape[1] == 10
Ejemplo n.º 54
0
def test_predict_ranks():

    no_users, no_items = (10, 100)

    train = sp.coo_matrix((no_users, no_items), dtype=np.float32)
    train = sp.rand(no_users, no_items, format="csr", random_state=42)

    model = LightFM()
    model.fit_partial(train)

    # Compute ranks for all items
    rank_input = sp.csr_matrix(np.ones((no_users, no_items)))
    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 0)
    assert np.all(ranks.max(axis=1) == no_items - 1)

    for row in range(no_users):
        assert np.all(np.sort(ranks[row]) == np.arange(no_items))

    # Train set exclusions. All ranks should be zero
    # if train interactions is dense.
    ranks = model.predict_rank(rank_input,
                               train_interactions=rank_input,
                               check_intersections=False).todense()
    assert np.all(ranks == 0)

    # Max rank should be num_items - 1 - number of positives
    # in train in that row
    ranks = model.predict_rank(rank_input,
                               train_interactions=train,
                               check_intersections=False).todense()
    assert np.all(
        np.squeeze(np.array(ranks.max(axis=1))) == no_items - 1 -
        np.squeeze(np.array(train.getnnz(axis=1))))

    # check error is raised when train and test have interactions in common
    with pytest.raises(ValueError):
        model.predict_rank(train,
                           train_interactions=train,
                           check_intersections=True)

    # check error not raised when flag is False
    model.predict_rank(train,
                       train_interactions=train,
                       check_intersections=False)

    # check no errors raised when train and test have no interactions in common
    not_train = sp.rand(no_users, no_items, format="csr",
                        random_state=43) - train
    not_train.data[not_train.data < 0] = 0
    not_train.eliminate_zeros()
    model.predict_rank(not_train,
                       train_interactions=train,
                       check_intersections=True)

    # Make sure ranks are computed pessimistically when
    # there are ties (that is, equal predictions for every
    # item will assign maximum rank to each).
    model.user_embeddings = np.zeros_like(model.user_embeddings)
    model.item_embeddings = np.zeros_like(model.item_embeddings)
    model.user_biases = np.zeros_like(model.user_biases)
    model.item_biases = np.zeros_like(model.item_biases)

    ranks = model.predict_rank(rank_input, num_threads=2).todense()

    assert np.all(ranks.min(axis=1) == 99)
    assert np.all(ranks.max(axis=1) == 99)

    # Wrong input dimensions
    with pytest.raises(ValueError):
        model.predict_rank(sp.csr_matrix((5, 5)), num_threads=2)
Ejemplo n.º 55
0
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dim', type=int, default=4096)
    parser.add_argument('--k', type=int, default=128)
    parser.add_argument('--density', type=float, default=0.01)
    parser.add_argument('--seed', type=int, default=123)
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()
    np.random.seed(args.seed)

    t = time()
    A = sparse.rand(args.dim, args.dim, density=args.density, format='csr')
    B = sparse.rand(args.dim, args.dim, density=args.density, format='csr')
    gen_time = time() - t
    print('gen_time  ', gen_time, file=sys.stderr)

    t = time()
    td_D, td_I = run_topdot(A, B, args.k)
    td_time = time() - t
    print('td_time   ', td_time, file=sys.stderr)

    t = time()
    na_D, na_I = run_naive(A, B, args.k)
    naive_time = time() - t
    print('naive_time', naive_time, file=sys.stderr)

    rand_idx = np.random.choice(args.dim, args.k, replace=False)
Ejemplo n.º 56
0
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import rand
from sparse_dot_topn import awesome_cossim_topn

N = 10
a = rand(100, 1000000, density=0.005, format='csr')
b = rand(1000000, 200, density=0.005, format='csr')

c = awesome_cossim_topn(a, b, 5, 0.01)
Ejemplo n.º 57
0
def test_tfidf_transformer_type(X_dtype):
    X = sparse.rand(10, 20000, dtype=X_dtype, random_state=42)
    X_trans = TfidfTransformer().fit_transform(X)
    assert X_trans.dtype == X.dtype
Ejemplo n.º 58
0
    def setup(self, density, format):
        n = 1000
        if format == 'dok' and n * density >= 500:
            raise NotImplementedError()

        self.X = sparse.rand(n, n, format=format, density=density)
Ejemplo n.º 59
0
 def setUp(self):
     self.matrix = rand(self.states,self.states, density=0.1, format='csr')
Ejemplo n.º 60
0
 def setup(self, n, m):
     rng = np.random.default_rng(1234)
     self.A = sparse.eye(n, n) + sparse.rand(n, n, density=0.01, random_state=rng)
     self.b = np.ones(n)