def test_RANK_Kendall_load_ground_truth(self):
     kendall = KendallTau()
     kendall.load_ground_truth(self.GT_DATA)
     assert_equal(len(kendall.get_ground_truth()), len(self.GT_DATA))
Exemple #2
0
 def test_RANK_Kendall_load_ground_truth(self):
     kendall = KendallTau()
     kendall.load_ground_truth(self.GT_DATA)
     assert_equal(len(kendall.get_ground_truth()), len(self.GT_DATA))
class TestRanking(Test):
    def __init__(self):
        super(TestRanking, self).__init__()
        # Rank-based metrics:  KendallTau, SpearmanRho, MeanReciprocalRank, ReciprocalRank
        self.kendall = KendallTau()
        self.kendall.load(self.GT_RANKING, self.TEST_RANKING)
        self.spearman = SpearmanRho()
        self.spearman.load(self.GT_RANKING, self.TEST_RANKING)
        self.mrr = MeanReciprocalRank()

        for elem in self.TEST_DECISION:
            self.mrr.load(self.GT_DECISION, elem)

    # TEST_CORR Spearman
    def test_RANK_Spearman_compute_all(self):
        assert_equal(self.spearman.compute(), 0.5) #0.55 ?

    #def test_RANK_Spearman_compute_tied_ranks():
    #    assert_equal(spearman.compute(tied_ranks=True), 0.5) #In fact, it uses Pearsonr corr. of the ranks

    def test_RANK_Spearman_compute_floats(self):
        spearman = SpearmanRho(self.DATA_PRED)
        assert_equal(spearman.compute(), 0.947368) #0.95 ?

    #def test_RANK_Spearman_compute_floats_tied_ranks():
    #    spearman = SpearmanRho(self.DATA_PRED)
    #    assert_equal(spearman.compute(tied_ranks=True), 0.930024) #In fact, it uses Pearsonr corr. of the ranks

    def test_RANK_Spearman_load_test(self):
        spearman = SpearmanRho()
        spearman.load_test(self.TEST_DATA)
        assert_equal(len(spearman.get_test()), len(self.TEST_DATA))

    def test_RANK_Spearman_load_ground_truth(self):
        spearman = SpearmanRho()
        spearman.load_ground_truth(self.GT_DATA)
        assert_equal(len(spearman.get_ground_truth()), len(self.TEST_DATA))

    def test_RANK_Spearman_add_entry(self):
        self.spearman.add(('guitar', 4), ('guitar', 4)) #add tag 'guitar' at rank-4
        assert_equal(len(self.spearman.get_test()), len(self.TEST_RANKING)+1)
        assert_equal(len(self.spearman.get_ground_truth()), len(self.GT_RANKING)+1)
        assert_equal(self.spearman.compute(), 0.763158) #0.775 ?

    def test_RANK_Spearman_different_list_sizes(self):
        TEST_DATA = ['classical', 'invented', 'baroque']
        GT_DATA = ['classical', 'instrumental', 'piano', 'baroque']
        spearman = SpearmanRho()
        spearman.load_ground_truth(GT_DATA)
        spearman.load_test(TEST_DATA)
        assert_raises(ValueError, spearman.compute) #Raise: GT & TEST list have different sizes

    # TEST_CORR Kendall
    def test_RANK_Kendall_compute_all(self):
        assert_equal(self.kendall.compute(), 0.4)

    def test_RANK_Kendall_compute_floats(self):
        kendall = KendallTau(self.DATA_PRED)
        assert_equal(kendall.compute(), 0.888889)

    def test_RANK_Kendall_load_test(self):
        kendall = KendallTau()
        kendall.load_test(self.TEST_DATA)
        assert_equal(len(kendall.get_test()), len(self.TEST_DATA))

    def test_RANK_Kendall_load_ground_truth(self):
        kendall = KendallTau()
        kendall.load_ground_truth(self.GT_DATA)
        assert_equal(len(kendall.get_ground_truth()), len(self.GT_DATA))

    def test_RANK_Kendall_add_entry(self):
        self.kendall.add(('guitar', 4.0), ('guitar', 4.0)) #add tag 'guitar'
        assert_equal(len(self.kendall.get_test()), len(self.TEST_RANKING)+1)
        assert_equal(len(self.kendall.get_ground_truth()), len(self.GT_RANKING)+1)
        assert_equal(self.kendall.compute(), 0.666667)

    def test_RANK_Kendall_diff_elems(self):
        TEST_DECISION = ['class', 'invented', 'baro', 'instru']
        GT_DECISION = ['classical', 'instrumental', 'piano', 'baroque']
        kendall = KendallTau()
        kendall.load_ground_truth(self.GT_DECISION)
        kendall.load_test(self.TEST_DECISION)
        assert_raises(ValueError, kendall.compute) #Different elements

    # TEST_RANK ReciprocalRank
    def test_RANK_ReciprocalRank_compute(self):
        rr = ReciprocalRank()
        QUERY = 'instrumental'
        assert_equal(rr.compute(self.GT_DECISION, QUERY), 0.5)

    def test_RANK_ReciprocalRank_add_entry(self):
        rr= ReciprocalRank()
        QUERY = 'invented'
        rr.load(self.GT_DECISION, QUERY)
        assert_equal(rr.compute(), 0.0)

    # TEST_RANK MeanReciprocalRank
    # Internally, MeanReciprocalRank uses a list of ReciprocalRank results
    def test_RANK_MeanReciprocalRank_compute_all(self):
        assert_equal(self.mrr.compute(), 0.4375)

    def test_RANK_MeanReciprocalRank_compute_one(self):
        mrr  = MeanReciprocalRank()
        QUERY = 'instrumental'
        assert_equal(mrr.compute(self.GT_DECISION, QUERY), 0.5)

    def test_RANK_MeanReciprocalRank_load(self):
        mrr  = MeanReciprocalRank()
        assert_raises(ValueError, mrr.load, self.GT_DECISION, self.TEST_RANKING)

    def test_RANK_MeanReciprocalRank_load_test(self):
        mrr  = MeanReciprocalRank()
        assert_raises(NotImplementedError, mrr.load_test, self.TEST_RANKING)

    def test_RANK_MeanReciprocalRank_load_ground_truth(self):
        mrr  = MeanReciprocalRank()
        assert_raises(NotImplementedError, mrr.load_ground_truth, self.GT_RANKING)

    def test_RANK_MeanReciprocalRank_add_entry(self):
        mrr  = MeanReciprocalRank()
        QUERY = 'invented'
        mrr.load(self.GT_DECISION, QUERY)
        assert_equal(mrr.compute(), 0.0)

    #mAP tests
    def test_RANK_AveragePrecision(self):
        GT_DECISION = [1, 2, 4]
        TEST_DECISION = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        avgp = AveragePrecision()
        avgp.load(GT_DECISION, TEST_DECISION)
        assert_equal(round(avgp.compute(), 4), 0.9167)

        GT_DECISION = [1, 4, 8]
        avgp = AveragePrecision()
        avgp.load(GT_DECISION, TEST_DECISION)
        assert_equal(round(avgp.compute(), 4), 0.625)

        GT_DECISION = [3, 5, 9, 25, 39, 44, 56, 71, 89, 123]
        TEST_DECISION = [123, 84, 56, 6, 8, 9, 511, 129, 187, 25, 38, 48, 250, 113, 3]
        avgp = AveragePrecision()
        avgp.load(GT_DECISION, TEST_DECISION)
        assert_equal(avgp.compute(), 0.58)

    #mAP tests
    def test_RANK_MeanAveragePrecision(self):
        mavgp = MeanAveragePrecision()
        GT_DECISION = [1, 2, 4]
        TEST_DECISION = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        mavgp.load(GT_DECISION, TEST_DECISION)

        GT_DECISION = [1, 4, 8]
        mavgp.load(GT_DECISION, TEST_DECISION)

        GT_DECISION = [3, 5, 9, 25, 39, 44, 56, 71, 89, 123]
        TEST_DECISION = [123, 84, 56, 6, 8, 9, 511, 129, 187, 25, 38, 48, 250, 113, 3]
        mavgp.load(GT_DECISION, TEST_DECISION)

        assert_equal(mavgp.compute(), 0.707222)
Exemple #4
0
class TestRanking(Test):
    def __init__(self):
        super(TestRanking, self).__init__()
        # Rank-based metrics:  KendallTau, SpearmanRho, MeanReciprocalRank, ReciprocalRank
        self.kendall = KendallTau()
        self.kendall.load(self.GT_RANKING, self.TEST_RANKING)
        self.spearman = SpearmanRho()
        self.spearman.load(self.GT_RANKING, self.TEST_RANKING)
        self.mrr = MeanReciprocalRank()

        for elem in self.TEST_DECISION:
            self.mrr.load(self.GT_DECISION, elem)

    # TEST_CORR Spearman
    def test_RANK_Spearman_compute_all(self):
        assert_equal(self.spearman.compute(), 0.5)  #0.55 ?

    #def test_RANK_Spearman_compute_tied_ranks():
    #    assert_equal(spearman.compute(tied_ranks=True), 0.5) #In fact, it uses Pearsonr corr. of the ranks

    def test_RANK_Spearman_compute_floats(self):
        spearman = SpearmanRho(self.DATA_PRED)
        assert_equal(spearman.compute(), 0.947368)  #0.95 ?

    #def test_RANK_Spearman_compute_floats_tied_ranks():
    #    spearman = SpearmanRho(self.DATA_PRED)
    #    assert_equal(spearman.compute(tied_ranks=True), 0.930024) #In fact, it uses Pearsonr corr. of the ranks

    def test_RANK_Spearman_load_test(self):
        spearman = SpearmanRho()
        spearman.load_test(self.TEST_DATA)
        assert_equal(len(spearman.get_test()), len(self.TEST_DATA))

    def test_RANK_Spearman_load_ground_truth(self):
        spearman = SpearmanRho()
        spearman.load_ground_truth(self.GT_DATA)
        assert_equal(len(spearman.get_ground_truth()), len(self.TEST_DATA))

    def test_RANK_Spearman_add_entry(self):
        self.spearman.add(('guitar', 4),
                          ('guitar', 4))  #add tag 'guitar' at rank-4
        assert_equal(len(self.spearman.get_test()), len(self.TEST_RANKING) + 1)
        assert_equal(len(self.spearman.get_ground_truth()),
                     len(self.GT_RANKING) + 1)
        assert_equal(self.spearman.compute(), 0.763158)  #0.775 ?

    def test_RANK_Spearman_different_list_sizes(self):
        TEST_DATA = ['classical', 'invented', 'baroque']
        GT_DATA = ['classical', 'instrumental', 'piano', 'baroque']
        spearman = SpearmanRho()
        spearman.load_ground_truth(GT_DATA)
        spearman.load_test(TEST_DATA)
        assert_raises(
            ValueError,
            spearman.compute)  #Raise: GT & TEST list have different sizes

    # TEST_CORR Kendall
    def test_RANK_Kendall_compute_all(self):
        assert_equal(self.kendall.compute(), 0.4)

    def test_RANK_Kendall_compute_floats(self):
        kendall = KendallTau(self.DATA_PRED)
        assert_equal(kendall.compute(), 0.888889)

    def test_RANK_Kendall_load_test(self):
        kendall = KendallTau()
        kendall.load_test(self.TEST_DATA)
        assert_equal(len(kendall.get_test()), len(self.TEST_DATA))

    def test_RANK_Kendall_load_ground_truth(self):
        kendall = KendallTau()
        kendall.load_ground_truth(self.GT_DATA)
        assert_equal(len(kendall.get_ground_truth()), len(self.GT_DATA))

    def test_RANK_Kendall_add_entry(self):
        self.kendall.add(('guitar', 4.0), ('guitar', 4.0))  #add tag 'guitar'
        assert_equal(len(self.kendall.get_test()), len(self.TEST_RANKING) + 1)
        assert_equal(len(self.kendall.get_ground_truth()),
                     len(self.GT_RANKING) + 1)
        assert_equal(self.kendall.compute(), 0.666667)

    def test_RANK_Kendall_diff_elems(self):
        TEST_DECISION = ['class', 'invented', 'baro', 'instru']
        GT_DECISION = ['classical', 'instrumental', 'piano', 'baroque']
        kendall = KendallTau()
        kendall.load_ground_truth(self.GT_DECISION)
        kendall.load_test(self.TEST_DECISION)
        assert_raises(ValueError, kendall.compute)  #Different elements

    # TEST_RANK ReciprocalRank
    def test_RANK_ReciprocalRank_compute(self):
        rr = ReciprocalRank()
        QUERY = 'instrumental'
        assert_equal(rr.compute(self.GT_DECISION, QUERY), 0.5)

    def test_RANK_ReciprocalRank_add_entry(self):
        rr = ReciprocalRank()
        QUERY = 'invented'
        rr.load(self.GT_DECISION, QUERY)
        assert_equal(rr.compute(), 0.0)

    # TEST_RANK MeanReciprocalRank
    # Internally, MeanReciprocalRank uses a list of ReciprocalRank results
    def test_RANK_MeanReciprocalRank_compute_all(self):
        assert_equal(self.mrr.compute(), 0.4375)

    def test_RANK_MeanReciprocalRank_compute_one(self):
        mrr = MeanReciprocalRank()
        QUERY = 'instrumental'
        assert_equal(mrr.compute(self.GT_DECISION, QUERY), 0.5)

    def test_RANK_MeanReciprocalRank_load(self):
        mrr = MeanReciprocalRank()
        assert_raises(ValueError, mrr.load, self.GT_DECISION,
                      self.TEST_RANKING)

    def test_RANK_MeanReciprocalRank_load_test(self):
        mrr = MeanReciprocalRank()
        assert_raises(NotImplementedError, mrr.load_test, self.TEST_RANKING)

    def test_RANK_MeanReciprocalRank_load_ground_truth(self):
        mrr = MeanReciprocalRank()
        assert_raises(NotImplementedError, mrr.load_ground_truth,
                      self.GT_RANKING)

    def test_RANK_MeanReciprocalRank_add_entry(self):
        mrr = MeanReciprocalRank()
        QUERY = 'invented'
        mrr.load(self.GT_DECISION, QUERY)
        assert_equal(mrr.compute(), 0.0)

    #mAP tests
    def test_RANK_AveragePrecision(self):
        GT_DECISION = [1, 2, 4]
        TEST_DECISION = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        avgp = AveragePrecision()
        avgp.load(GT_DECISION, TEST_DECISION)
        assert_equal(round(avgp.compute(), 4), 0.9167)

        GT_DECISION = [1, 4, 8]
        avgp = AveragePrecision()
        avgp.load(GT_DECISION, TEST_DECISION)
        assert_equal(round(avgp.compute(), 4), 0.625)

        GT_DECISION = [3, 5, 9, 25, 39, 44, 56, 71, 89, 123]
        TEST_DECISION = [
            123, 84, 56, 6, 8, 9, 511, 129, 187, 25, 38, 48, 250, 113, 3
        ]
        avgp = AveragePrecision()
        avgp.load(GT_DECISION, TEST_DECISION)
        assert_equal(avgp.compute(), 0.58)

    #mAP tests
    def test_RANK_MeanAveragePrecision(self):
        mavgp = MeanAveragePrecision()
        GT_DECISION = [1, 2, 4]
        TEST_DECISION = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        mavgp.load(GT_DECISION, TEST_DECISION)

        GT_DECISION = [1, 4, 8]
        mavgp.load(GT_DECISION, TEST_DECISION)

        GT_DECISION = [3, 5, 9, 25, 39, 44, 56, 71, 89, 123]
        TEST_DECISION = [
            123, 84, 56, 6, 8, 9, 511, 129, 187, 25, 38, 48, 250, 113, 3
        ]
        mavgp.load(GT_DECISION, TEST_DECISION)

        assert_equal(mavgp.compute(), 0.707222)