def test_classifier(): username = "******" # the only one with annotated data # get the ranked resources temp = get_ranked_dataset(username) # split in two equal parts training_set, real_set = temp[:len(temp)/2], temp[len(temp)/2:] rankings = get_rankings([username,], lambda x: training_set, lambda x: real_set) # now compare the returned rankings with the real ones with_labels = {} for resource in real_set: with_labels[resource['url']] = \ int(db.views.find({ 'url': resource['url'], 'user.username': username }).distinct('feedback')[0]) def _confusion_matrix(values): matrix = defaultdict(int) for url, value in values.items(): matrix[round(value)] += 1 return matrix #actual = _confusion_matrix(with_labels) #jpredicted = _confusion_matrix(dict([(k, v) for k,v in rankings[username].items() # if k in with_labels.keys()])) rmse = root_mean_squared_error(rankings['alexis'], with_labels) mae = mean_absolute_error(rankings['alexis'], with_labels) return mae, rmse
def test_estimator(): username = '******' dataset = get_ranked_dataset(username) rankings = get_rankings([username,], lambda x: [], lambda x:dataset) with_labels = {} for resource in dataset: with_labels[resource['url']] = \ int(db.views.find({ 'url': resource['url'], 'user.username': username }).distinct('feedback')[0]) # once we get the estimations, compare them to the actual values rmse = root_mean_squared_error(rankings['alexis'], with_labels) mae = mean_absolute_error(rankings['alexis'], with_labels) return mae, rmse