Exemple #1
0
    def fit(self, train):
        """ train is a pandas DataFrame, which has columns:
                'user_id'
                'movie_id'
                'rating'
        """
        self.sparse_matrix = self._df_to_sparse_matrix(train)

        # copied from mrec ItemSimilarityRecommender
        if not isinstance(self.sparse_matrix, fast_sparse_matrix):
            dataset = fast_sparse_matrix(self.sparse_matrix)
        num_users, num_items = dataset.shape
        # build up a sparse similarity matrix
        data = []
        row = []
        col = []
        for j in xrange(num_items):
            w = self.compute_similarities(dataset, j)
            for k, v in enumerate(w):
                if v != 0:
                    data.append(v)
                    row.append(j)
                    col.append(k)
        idx = np.array([row, col], dtype='int32')
        self.similarity_matrix = csr_matrix((data, idx), (num_items, num_items))
Exemple #2
0
    def fit(self, dataset, item_features=None):
        """
        Learn the complete similarity matrix from a user-item matrix.

        Parameters
        ==========
        dataset : scipy sparse matrix or mrec.sparse.fast_sparse_matrix, shape = [num_users, num_items]
            The matrix of user-item counts, row i holds the counts for
            the i-th user.
        item_features : array_like, shape = [num_items, num_features]
            Features for items in training set, ignored here.
        """
        if not isinstance(dataset, fast_sparse_matrix):
            dataset = fast_sparse_matrix(dataset)
        num_users, num_items = dataset.shape
        # build up a sparse similarity matrix
        data = []
        row = []
        col = []
        for j in range(num_items):
            w = self.compute_similarities(dataset, j)
            for k, v in enumerate(w):
                if v != 0:
                    data.append(v)
                    row.append(j)
                    col.append(k)
        idx = np.array([row, col], dtype='int32')
        self.similarity_matrix = csr_matrix((data, idx),
                                            (num_items, num_items))
Exemple #3
0
    def fit(self,train):
        """
        Learn factors from training set. User and item factors are
        fitted alternately.

        Parameters
        ==========
        train : scipy.sparse.csr_matrix or mrec.sparse.fast_sparse_matrix
            User-item matrix.
        """
        if type(train) == csr_matrix:
            train = fast_sparse_matrix(train)

        self._init(train)
        self.U = self.init_factors(self.num_users,False)  # don't need values, will compute them
        self.V = self.init_factors(self.num_items)
        for it in xrange(self.num_iters):
            print 'iteration',it
            # fit user factors
            VV = self.V.T.dot(self.V)
            for u in xrange(self.num_users):
                # get (positive i.e. non-zero scored) items for user
                indices = self.data.X[u].nonzero()[1]
                if indices.size:
                    self.U[u,:] = self.update(indices,self.V,VV)
                else:
                    self.U[u,:] = np.zeros(self.d)
            # fit item factors
            UU = self.U.T.dot(self.U)
            for i in xrange(self.num_items):
                indices = self.data.fast_get_col(i).nonzero()[0]
                if indices.size:
                    self.V[i,:] = self.update(indices,self.U,UU)
                else:
                    self.V[i,:] = np.zeros(self.d)
Exemple #4
0
def test_init_fast_sparse_matrix():
    X = get_random_coo_matrix()
    Y = X.tocsr()
    Z = X.tocsc()
    for M in [X, Y, Z]:
        m = fast_sparse_matrix(M)
        assert_array_equal(m.X.toarray(), M.toarray())
        assert_equal(m.shape, M.shape)
Exemple #5
0
def test_init_fast_sparse_matrix():
    X = get_random_coo_matrix()
    Y = X.tocsr()
    Z = X.tocsc()
    for M in [X, Y, Z]:
        m = fast_sparse_matrix(M)
        assert_array_equal(m.X.toarray(), M.toarray())
        assert_equal(m.shape, M.shape)
Exemple #6
0
def test_save_load():
    """Save to file as arrays in numpy binary format."""
    X = get_random_coo_matrix()
    m = fast_sparse_matrix(X)
    f, path = tempfile.mkstemp(suffix='.npz')
    m.save(path)
    n = fast_sparse_matrix.load(path)
    os.remove(path)
    assert_equal(m.shape, n.shape)
    assert_array_equal(m.X.toarray(), n.X.toarray())
    assert_array_equal(m.col_view.toarray(), n.col_view.toarray())
Exemple #7
0
def test_save_load():
    """Save to file as arrays in numpy binary format."""
    X = get_random_coo_matrix()
    m = fast_sparse_matrix(X)
    f, path = tempfile.mkstemp(suffix=".npz")
    m.save(path)
    n = fast_sparse_matrix.load(path)
    os.remove(path)
    assert_equal(m.shape, n.shape)
    assert_array_equal(m.X.toarray(), n.X.toarray())
    assert_array_equal(m.col_view.toarray(), n.col_view.toarray())
Exemple #8
0
def save_sparse_matrix(data, fmt, filepath):
    """
    Save a scipy sparse matrix in the specified format. Row and column
    indices will be converted to 1-indexed if you specify a plain text
    format (tsv, csv, mm). Note that zero entries are guaranteed to be
    saved in tsv or csv format.

    Parameters
    ----------
    data : scipy sparse matrix to save
    fmt : str
        Specifies the file format to write:
        - tsv
        - csv
        - mm  (MatrixMarket)
        - npz (save as npz archive of numpy arrays)
        - fsm (mrec.sparse.fast_sparse_matrix)
    filepath : str
        The file to load.
    """
    if fmt == 'tsv':
        m = data.tocoo()
        with open(filepath, 'w') as out:
            for u, i, v in zip(m.row, m.col, m.data):
                print('{0}\t{1}\t{2}'.format(u + 1, i + 1, v), file=out)
    elif fmt == 'csv':
        m = data.tocoo()
        with open(filepath, 'w') as out:
            for u, i, v in zip(m.row, m.col, m.data):
                print('{0},{1},{2}'.format(u + 1, i + 1, v), file=out)
    elif fmt == 'mm':
        mmwrite(filepath, data)
    elif fmt == 'npz':
        savez(data.tocoo(), filepath)
    elif fmt == 'fsm':
        fast_sparse_matrix(data).save(filepath)
    else:
        raise ValueError('unknown output format: {0}'.format(fmt))
Exemple #9
0
def test_fast_update_col():
    X = get_random_coo_matrix().tocsc()
    m = fast_sparse_matrix(X)
    cols = X.shape[1]
    for j in xrange(cols):
        vals = m.fast_get_col(j).data
        if (vals == 0).all():
            continue
        vals[vals != 0] += 1
        m.fast_update_col(j, vals)
        expected = X[:, j].toarray()
        for i in xrange(expected.shape[0]):
            if expected[i] != 0:
                expected[i] += 1
        assert_array_equal(m.fast_get_col(j).toarray(), expected)
Exemple #10
0
def test_fast_update_col():
    X = get_random_coo_matrix().tocsc()
    m = fast_sparse_matrix(X)
    cols = X.shape[1]
    for j in xrange(cols):
        vals = m.fast_get_col(j).data
        if (vals == 0).all():
            continue
        vals[vals != 0] += 1
        m.fast_update_col(j, vals)
        expected = X[:, j].toarray()
        for i in xrange(expected.shape[0]):
            if expected[i] != 0:
                expected[i] += 1
        assert_array_equal(m.fast_get_col(j).toarray(), expected)
Exemple #11
0
    def fit(self,train,S,item_features=None):
        """
        Learn factors from training set. User and item factors are
        fitted alternately.
        Parameters
        ==========
        train : scipy.sparse.csr_matrix or mrec.sparse.fast_sparse_matrix
            User-item matrix.
        S: item similaities
        item_features : array_like, shape = [num_items, num_features]
            Features for each item in the dataset, ignored here.
        """
        self.S = S
        nbrs = NearestNeighbors(n_neighbors=self.k + 1).fit(self.S)
        # Does contain itself, hence k + 1
        self.k_distances, self.k_indices = nbrs.kneighbors(self.S)

        if type(train) == csr_matrix:
            train = fast_sparse_matrix(train)

        num_users,num_items = train.shape

        self.U = self.init_factors(num_users,False)  # don't need values, will compute them
        self.V = self.init_factors(num_items) # Items
        for it in xrange(self.num_iters):
            print 'iteration',it
            # fit user factors
            VV = self.V.T.dot(self.V)
            for u in xrange(num_users):
                # get (positive i.e. non-zero scored) items for user
                indices = train.X[u].nonzero()[1]
                if indices.size:
                    self.U[u,:] = self.update(indices,self.V,VV)
                else:
                    self.U[u,:] = np.zeros(self.d)
            # fit item factors
            UU = self.U.T.dot(self.U)
            for i in xrange(num_items):
                indices = train.fast_get_col(i).nonzero()[0]
                if indices.size:
                    self.V[i,:] = self.update_item(indices,self.U,UU)
                else:
                    self.V[i,:] = np.zeros(self.d)
            elif type(learner) == WeightedMf:  
                trainX = X.toScipyCsr()

                if modelSelect:                     
                    modelSelectX, userInds = Sampling.sampleUsers2(X, modelSelectSamples)
                    modelSelectX = modelSelectX.toScipyCsc()  
                    meanMetrics, stdMetrics = learner.modelSelect(modelSelectX)                          
                
                learner.learnModel(trainX)
                U = learner.U 
                V = learner.V
            elif type(learner) == CosineKNNRecommender or type(learner) == SLIM: 
                #We take a subsample of users for these recommenders as they are slow 
                X, userInds = Sampling.sampleUsers2(X, modelSelectSamples)                
                
                fastTrainX = fast_sparse_matrix(X.toScipyCsr())
                trainX = X.toScipyCsr()
                m, n = trainX.shape
                learner.fit(fastTrainX)
                
                recommendations = learner.range_recommend_items(trainX, 0, m, max_items=maxItems)
                
                orderedItems = numpy.zeros((m, maxItems), numpy.int)
                scores = numpy.zeros((m, maxItems))
                
                for i in range(m):
                    itemScores = numpy.array(recommendations[i])
                    if itemScores.shape[0] != 0: 
                        orderedItems[i, 0:itemScores.shape[0]] =  itemScores[:, 0]
                        scores[i, 0:itemScores.shape[0]] = itemScores[:, 1]
            else: 
Exemple #13
0
def test_fast_get_col():
    X = get_random_coo_matrix().tocsc()
    m = fast_sparse_matrix(X)
    rows, cols = X.shape
    for j in xrange(cols):
        assert_array_equal(m.fast_get_col(j).toarray(), X[:, j].toarray())
Exemple #14
0
def test_fast_get_col():
    X = get_random_coo_matrix().tocsc()
    m = fast_sparse_matrix(X)
    rows, cols = X.shape
    for j in xrange(cols):
        assert_array_equal(m.fast_get_col(j).toarray(), X[:, j].toarray())