Ejemplo n.º 1
0
    def estimate_precision(self, decomposition, train, validation, k=30):
        """
        Compute prec@k for a sample of training rows.

        Parameters
        ==========
        decomposition : WARPDecomposition
            The current decomposition.
        train : scipy.sparse.csr_matrix
            The training data.
        k : int
            Measure precision@k.
        validation : dict or int
            Validation set over which we compute precision. Either supply
            a dict of row -> list of hidden cols, or an integer n, in which
            case we simply evaluate against the training data for the first
            n rows.

        Returns
        =======
        prec : float
            Precision@k computed over a sample of the training rows.

        Notes
        =====
        At the moment this will underestimate the precision of real
        recommendations because we do not exclude training cols with zero
        ratings from the top-k predictions evaluated.
        """
        if isinstance(validation, dict):
            have_validation_set = True
            rows = validation.keys()
        elif isinstance(validation, (int, long)):
            have_validation_set = False
            rows = range(validation)
        else:
            raise ValueError('validation must be dict or int')

        r = decomposition.reconstruct(rows)
        prec = 0
        diff = 0
        sample_count = 0
        for u, ru in izip(rows, r):
            predicted = ru.argsort()[::-1][:k]
            if have_validation_set:
                actual = validation[u]
            #actual = []
            #for (index, rating) in validation[u]:
            #    actual.append(index)
            #    diff += pow(ru[index] - rating, 2)
            #    sample_count += 1
            else:
                actual = train[u].indices[train[u].data > 0]
            prec += metrics.prec(predicted, actual, k, True)
        rmse = diff / (sample_count - 1)
        return float(prec) / len(rows), rmse
Ejemplo n.º 2
0
    def estimate_precision(self,decomposition,train,validation,k=30):
        """
        Compute prec@k for a sample of training rows.

        Parameters
        ==========
        decomposition : WARPDecomposition
            The current decomposition.
        train : scipy.sparse.csr_matrix
            The training data.
        k : int
            Measure precision@k.
        validation : dict or int
            Validation set over which we compute precision. Either supply
            a dict of row -> list of hidden cols, or an integer n, in which
            case we simply evaluate against the training data for the first
            n rows.

        Returns
        =======
        prec : float
            Precision@k computed over a sample of the training rows.

        Notes
        =====
        At the moment this will underestimate the precision of real
        recommendations because we do not exclude training cols with zero
        ratings from the top-k predictions evaluated.
        """
        if isinstance(validation,dict):
            have_validation_set = True
            rows = validation.keys()
        elif isinstance(validation,(int,long)):
            have_validation_set = False
            rows = range(validation)
        else:
            raise ValueError('validation must be dict or int')

        r = decomposition.reconstruct(rows)
        prec = 0
        diff = 0
        sample_count = 0
        for u,ru in izip(rows,r):
            predicted = ru.argsort()[::-1][:k]
            if have_validation_set:
                actual = validation[u]
               #actual = []
               #for (index, rating) in validation[u]:
               #    actual.append(index)
               #    diff += pow(ru[index] - rating, 2)
               #    sample_count += 1
            else:
                actual = train[u].indices[train[u].data > 0]
            prec += metrics.prec(predicted,actual,k, True)
        rmse =  diff /(sample_count - 1)
        return float(prec)/len(rows), rmse
Ejemplo n.º 3
0
def test_prec():
    true = [2, 8, 6, 4]
    predicted = [6, 5, 8, 7]
    expected = [1, 0.5, 2. / 3., 0.5]
    for k in range(1, 5):
        assert_equal(metrics.prec([], true, k), 0)
        assert_equal(metrics.prec(true, true, k), 1)
        assert_equal(metrics.prec(predicted, true, k), expected[k - 1])
    assert_equal(metrics.prec(true, true, 5), 0.8)
    assert_equal(metrics.prec(true, true, 5, ignore_missing=True), 1)
    assert_equal(metrics.prec(predicted, true, 5), 0.4)
    assert_equal(metrics.prec(predicted, true, 5, ignore_missing=True), expected[3])
Ejemplo n.º 4
0
def test_prec():
    true = [2,8,6,4]
    predicted = [6,5,8,7]
    expected = [1,0.5,2./3.,0.5]
    for k in xrange(1,5):
        assert_equal(metrics.prec([],true,k),0)
        assert_equal(metrics.prec(true,true,k),1)
        assert_equal(metrics.prec(predicted,true,k),expected[k-1])
    assert_equal(metrics.prec(true,true,5),0.8)
    assert_equal(metrics.prec(true,true,5,ignore_missing=True),1)
    assert_equal(metrics.prec(predicted,true,5),0.4)
    assert_equal(metrics.prec(predicted,true,5,ignore_missing=True),expected[3])