Ejemplo n.º 1
0
    def test_different_penalty(self):
        '''Tests different penalty values'''

        l1 = [1, 2, 3, 4]
        l2 = [2, 5, 4, 3]

        dktau_p1 = topk.kendall_tau_distance(l1, l2, p=1)
        dktau_p0 = topk.kendall_tau_distance(l1, l2, p=0)
        dktau_p05 = topk.kendall_tau_distance(l1, l2, p=0.5)

        self.assertTrue(dktau_p1 != dktau_p0)
        self.assertTrue(dktau_p1 != dktau_p05)
        self.assertTrue(dktau_p0 != dktau_p05)
Ejemplo n.º 2
0
    def test_different_k(self):
        '''Tests different k values'''

        l1 = [1, 2, 3, 4]
        l2 = [2, 5, 4, 3]

        dktau_k2 = topk.kendall_tau_distance(l1, l2, k=2)
        dktau_k3 = topk.kendall_tau_distance(l1, l2, k=3)
        dktau_kneg1 = topk.kendall_tau_distance(l1, l2, k=-1)

        self.assertTrue(dktau_k2 != dktau_k3)
        self.assertTrue(dktau_k2 != dktau_kneg1)
        self.assertTrue(dktau_k3 != dktau_kneg1)
Ejemplo n.º 3
0
 def test_different_penalty(self):
     '''Tests different penalty values'''
     
     l1 = [1, 2, 3, 4]
     l2 = [2, 5, 4, 3]
     
     dktau_p1 = topk.kendall_tau_distance(l1, l2, p=1)
     dktau_p0 = topk.kendall_tau_distance(l1, l2, p=0)
     dktau_p05 = topk.kendall_tau_distance(l1, l2, p=0.5)
     
     self.assertTrue(dktau_p1 != dktau_p0)
     self.assertTrue(dktau_p1 != dktau_p05)
     self.assertTrue(dktau_p0 != dktau_p05)
Ejemplo n.º 4
0
    def test_different_k(self):
        '''Tests different k values'''
        
        l1 = [1, 2, 3, 4]
        l2 = [2, 5, 4, 3]

        dktau_k2 = topk.kendall_tau_distance(l1, l2, k=2)
        dktau_k3 = topk.kendall_tau_distance(l1, l2, k=3)
        dktau_kneg1 = topk.kendall_tau_distance(l1, l2, k=-1)
        
        self.assertTrue(dktau_k2 != dktau_k3)
        self.assertTrue(dktau_k2 != dktau_kneg1)
        self.assertTrue(dktau_k3 != dktau_kneg1)
Ejemplo n.º 5
0
def main(args=None):
    if not args: args = []
    
    if len(args) != 2:
        print('Usage %s <in file>' % sys.argv[0])
        return 1
    
    in_fpath = args[1]
    
    #Each row will have 10 cols: 
    #user, tag, value, baseline, mean_tag_prob, tag_freq, \
    #item_tag_prob, item_tag_prob, tag_item_freq, user_tags
    data = np.genfromtxt(in_fpath, skip_footer=1)
    users = np.unique(data[:,0])
    
    to_plot_corr = []
    for i, user in enumerate(users):
        #select based on first column
        udata = data[data[:,0] == user]
        
        our_method = udata[:,2]
        baseline = 1.0 / udata[:,3]
        
        #If the first is nan, user has too many tags leading to probs = 0
        if our_method[0] == np.nan:
            continue 
        
        rank_our  = udata[our_method.argsort(),:][:,1]
        rank_base = udata[baseline.argsort(),:][:,1]
        
        print(i, rank_our[:10])
        print(i, rank_base[:10])
        
        corr = topk.kendall_tau_distance(rank_our, rank_base)
        to_plot_corr.append(corr)
        
    corr_data = np.array(to_plot_corr)
    mean = np.mean(corr_data)
    std = np.std(corr_data)
    skew = stats.skew(corr_data)
    _10perc = stats.scoreatpercentile(corr_data, 10)
    _50perc = stats.scoreatpercentile(corr_data, 50)
    _90perc = stats.scoreatpercentile(corr_data, 90)
    
    print('Mean            %.3f' % mean)
    print('Std             %.3f' % std)
    print('Skew            %.3f' % skew)
    print('10%% Percentile  %.3f' % _10perc)
    print('50%% Percentile  %.3f' % _50perc)
    print('90%% Percentile  %.3f' % _90perc)
    print()
Ejemplo n.º 6
0
    def test_all_paper_values(self):
        '''
        Tests based on the datasets by the paper:
        [1] Algorithms for Estimating Relative Importance in Networks.
           Scott White, Padhraic Smyth
           KDD 2003
        
        Values will not match the paper exactly because we use a different
        normalization.
        '''
        Khemais = 1
        Beghal = 2
        Moussaoui = 3
        Maaroufi = 4
        Qatada = 5
        Daoudi = 6
        Courtaillier = 7
        Bensakhria = 8
        Walid = 9
        Khammoun = 10
        Atta = 11
        Al_Shehhi = 12
        al_Shibh = 13
        Jarrah = 14
        Hanjour = 15
        Al_Omari = 16
        Bahaji = 17

        list_page_rank = [
            Khemais, Beghal, Moussaoui, Maaroufi, Qatada, Daoudi, Courtaillier,
            Bensakhria, Walid, Khammoun
        ]

        list_wkpaths = [
            Beghal, Khemais, Moussaoui, Maaroufi, Bensakhria, Daoudi, Qatada,
            Walid, Courtaillier, Khammoun
        ]

        list_markov_c = [
            Atta, Al_Shehhi, al_Shibh, Moussaoui, Jarrah, Hanjour, Al_Omari,
            Khemais, Qatada, Bahaji
        ]

        self.assertEquals(
            0, topk.kendall_tau_distance(list_page_rank, list_page_rank))

        self.assertEquals(
            0, topk.kendall_tau_distance(list_wkpaths, list_wkpaths))

        self.assertEquals(
            0, topk.kendall_tau_distance(list_markov_c, list_markov_c))

        self.assertEquals(
            0.45,
            topk.kendall_tau_distance(list_page_rank, list_page_rank[::-1]))

        self.assertEquals(
            0.45, topk.kendall_tau_distance(list_wkpaths, list_wkpaths[::-1]))

        self.assertEquals(
            0.45, topk.kendall_tau_distance(list_markov_c,
                                            list_markov_c[::-1]))

        self.assertAlmostEquals(
            0.06, topk.kendall_tau_distance(list_page_rank, list_wkpaths), 2)

        self.assertAlmostEquals(
            0.68, topk.kendall_tau_distance(list_page_rank, list_markov_c), 2)

        self.assertAlmostEquals(
            0.06, topk.kendall_tau_distance(list_wkpaths, list_page_rank), 2)

        self.assertAlmostEquals(
            0.71, topk.kendall_tau_distance(list_wkpaths, list_markov_c), 2)

        self.assertAlmostEquals(
            0.68, topk.kendall_tau_distance(list_markov_c, list_page_rank), 2)

        self.assertAlmostEquals(
            0.71, topk.kendall_tau_distance(list_markov_c, list_wkpaths), 2)

        self.assertEquals(
            1, topk.kendall_tau_distance(list_page_rank, range(20, 30)))

        self.assertEquals(
            1, topk.kendall_tau_distance(list_wkpaths, range(20, 30)))

        self.assertEquals(
            1, topk.kendall_tau_distance(list_markov_c, range(20, 30)))
Ejemplo n.º 7
0
 def test_all_paper_values(self):
     '''
     Tests based on the datasets by the paper:
     [1] Algorithms for Estimating Relative Importance in Networks.
        Scott White, Padhraic Smyth
        KDD 2003
     
     Values will not match the paper exactly because we use a different
     normalization.
     '''
     Khemais = 1
     Beghal = 2
     Moussaoui = 3
     Maaroufi = 4
     Qatada = 5
     Daoudi = 6
     Courtaillier = 7 
     Bensakhria = 8
     Walid = 9
     Khammoun = 10
     Atta = 11
     Al_Shehhi = 12
     al_Shibh = 13
     Jarrah = 14
     Hanjour = 15
     Al_Omari = 16
     Bahaji = 17
         
     list_page_rank = [
         Khemais,
         Beghal,
         Moussaoui,
         Maaroufi,
         Qatada,
         Daoudi,
         Courtaillier, 
         Bensakhria,
         Walid,
         Khammoun 
     ]
     
     list_wkpaths = [
         Beghal,
         Khemais,
         Moussaoui,
         Maaroufi,
         Bensakhria, 
         Daoudi,
         Qatada,
         Walid,
         Courtaillier, 
         Khammoun 
     ]
     
     list_markov_c = [
         Atta,
         Al_Shehhi,
         al_Shibh,
         Moussaoui, 
         Jarrah,
         Hanjour,
         Al_Omari,
         Khemais,
         Qatada,
         Bahaji
     ]
     
     self.assertEquals(0, 
         topk.kendall_tau_distance(list_page_rank, list_page_rank))
     
     self.assertEquals(0, 
         topk.kendall_tau_distance(list_wkpaths, list_wkpaths))
     
     self.assertEquals(0, 
         topk.kendall_tau_distance(list_markov_c, list_markov_c))
     
     self.assertEquals(0.45, 
         topk.kendall_tau_distance(list_page_rank, list_page_rank[::-1]))
     
     self.assertEquals(0.45, 
         topk.kendall_tau_distance(list_wkpaths, list_wkpaths[::-1]))
     
     self.assertEquals(0.45, 
         topk.kendall_tau_distance(list_markov_c, list_markov_c[::-1]))
     
     self.assertAlmostEquals(0.06, 
         topk.kendall_tau_distance(list_page_rank, list_wkpaths), 2)
     
     self.assertAlmostEquals(0.68, 
         topk.kendall_tau_distance(list_page_rank, list_markov_c), 2)
     
     self.assertAlmostEquals(0.06, 
             topk.kendall_tau_distance(list_wkpaths, list_page_rank), 2)
     
     self.assertAlmostEquals(0.71, 
             topk.kendall_tau_distance(list_wkpaths, list_markov_c), 2)
     
     self.assertAlmostEquals(0.68, 
             topk.kendall_tau_distance(list_markov_c, list_page_rank), 2)
     
     self.assertAlmostEquals(0.71, 
             topk.kendall_tau_distance(list_markov_c, list_wkpaths), 2)
     
     self.assertEquals(1, 
             topk.kendall_tau_distance(list_page_rank, range(20, 30)))
     
     self.assertEquals(1, 
             topk.kendall_tau_distance(list_wkpaths, range(20, 30)))
     
     self.assertEquals(1, 
             topk.kendall_tau_distance(list_markov_c, range(20, 30)))