def estimate_precision(self, decomposition, train, validation, k=30): """ Compute prec@k for a sample of training rows. Parameters ========== decomposition : WARPDecomposition The current decomposition. train : scipy.sparse.csr_matrix The training data. k : int Measure precision@k. validation : dict or int Validation set over which we compute precision. Either supply a dict of row -> list of hidden cols, or an integer n, in which case we simply evaluate against the training data for the first n rows. Returns ======= prec : float Precision@k computed over a sample of the training rows. Notes ===== At the moment this will underestimate the precision of real recommendations because we do not exclude training cols with zero ratings from the top-k predictions evaluated. """ if isinstance(validation, dict): have_validation_set = True rows = validation.keys() elif isinstance(validation, (int, long)): have_validation_set = False rows = range(validation) else: raise ValueError('validation must be dict or int') r = decomposition.reconstruct(rows) prec = 0 diff = 0 sample_count = 0 for u, ru in izip(rows, r): predicted = ru.argsort()[::-1][:k] if have_validation_set: actual = validation[u] #actual = [] #for (index, rating) in validation[u]: # actual.append(index) # diff += pow(ru[index] - rating, 2) # sample_count += 1 else: actual = train[u].indices[train[u].data > 0] prec += metrics.prec(predicted, actual, k, True) rmse = diff / (sample_count - 1) return float(prec) / len(rows), rmse
def estimate_precision(self,decomposition,train,validation,k=30): """ Compute prec@k for a sample of training rows. Parameters ========== decomposition : WARPDecomposition The current decomposition. train : scipy.sparse.csr_matrix The training data. k : int Measure precision@k. validation : dict or int Validation set over which we compute precision. Either supply a dict of row -> list of hidden cols, or an integer n, in which case we simply evaluate against the training data for the first n rows. Returns ======= prec : float Precision@k computed over a sample of the training rows. Notes ===== At the moment this will underestimate the precision of real recommendations because we do not exclude training cols with zero ratings from the top-k predictions evaluated. """ if isinstance(validation,dict): have_validation_set = True rows = validation.keys() elif isinstance(validation,(int,long)): have_validation_set = False rows = range(validation) else: raise ValueError('validation must be dict or int') r = decomposition.reconstruct(rows) prec = 0 diff = 0 sample_count = 0 for u,ru in izip(rows,r): predicted = ru.argsort()[::-1][:k] if have_validation_set: actual = validation[u] #actual = [] #for (index, rating) in validation[u]: # actual.append(index) # diff += pow(ru[index] - rating, 2) # sample_count += 1 else: actual = train[u].indices[train[u].data > 0] prec += metrics.prec(predicted,actual,k, True) rmse = diff /(sample_count - 1) return float(prec)/len(rows), rmse
def test_prec(): true = [2, 8, 6, 4] predicted = [6, 5, 8, 7] expected = [1, 0.5, 2. / 3., 0.5] for k in range(1, 5): assert_equal(metrics.prec([], true, k), 0) assert_equal(metrics.prec(true, true, k), 1) assert_equal(metrics.prec(predicted, true, k), expected[k - 1]) assert_equal(metrics.prec(true, true, 5), 0.8) assert_equal(metrics.prec(true, true, 5, ignore_missing=True), 1) assert_equal(metrics.prec(predicted, true, 5), 0.4) assert_equal(metrics.prec(predicted, true, 5, ignore_missing=True), expected[3])
def test_prec(): true = [2,8,6,4] predicted = [6,5,8,7] expected = [1,0.5,2./3.,0.5] for k in xrange(1,5): assert_equal(metrics.prec([],true,k),0) assert_equal(metrics.prec(true,true,k),1) assert_equal(metrics.prec(predicted,true,k),expected[k-1]) assert_equal(metrics.prec(true,true,5),0.8) assert_equal(metrics.prec(true,true,5,ignore_missing=True),1) assert_equal(metrics.prec(predicted,true,5),0.4) assert_equal(metrics.prec(predicted,true,5,ignore_missing=True),expected[3])