Esempio n. 1
0
    def testDist(self):

        x = "mama ma czerwonego, zielonego kotka"  #5
        y = "tata nie ma czerwonego, czarnego krokodyla ani kotka. "  #8
        z = "mama oraz tata maja kotka"  #5
        #nie wspolne: 7
        lrecords = []
        lrecords.append({'ab': x, 'ut': "", "ti": ""})
        lrecords.append({'ab': y, 'ut': "", "ti": ""})
        lrecords.append({'ab': z, 'ut': "", "ti": ""})

        def frecords():
            for i in lrecords:
                yield i

        training_turns = len(lrecords)
        zd = JaccardDistance(frecords,
                             len(lrecords),
                             training_turns,
                             stopwords=[])

        avg = (7 / 13 + 6 / 10 + 9 / 13) / 3
        self.assertEqual(
            zd.calc_weights(frecords, len(lrecords), training_turns), ([
                1 /
                ((abs(7 / 13 - avg) + abs(6 / 10 - avg) + abs(9 / 13 - avg)) /
                 3)
            ], [avg]))
        self.assertEqual(zd.distance(lrecords[0], lrecords[1]), (
            1 /
            ((abs(7 / 13 - avg) + abs(6 / 10 - avg) + abs(9 / 13 - avg)) / 3) *
            ((7 / 13) - avg)))
 def testDistTxt(self):
     
     lrecords = []
     for k, v in enumerate(self.letters):
         lrecords.append((2*k, v))
     
     def frecords():
         for i in lrecords:
             yield i
             
     training_turns = 0
     zd = JaccardDistance(frecords, len(lrecords), training_turns, stopwords = [])
     
     x = "mama ma czerwonego, zielonego kotka"  #5
     y =  "tata nie ma czerwonego, czarnego krokodyla ani kotka" #8
     z = "mama oraz tata maja kotka" #5
     #nie wspolne: 7 
     self.assertEqual(zd.dist_txt(x, y), 7/13)
     self.assertEqual(zd.dist_txt(x, z), 6/10)
     self.assertEqual(zd.dist_txt(y, z), 9/13)
 def testCalcWeights(self):
     
     x = "mama ma czerwonego, zielonego kotka"  #5
     y =  "tata nie ma czerwonego, czarnego krokodyla ani kotka. " #8
     z = "mama oraz tata maja kotka" #5
     #nie wspolne: 7 
     lrecords = []
     lrecords.append({'ab': x, 'ut': "", "ti": ""})
     lrecords.append({'ab': y, 'ut': "", "ti": ""})
     lrecords.append({'ab': z, 'ut': "", "ti": ""})
     
     def frecords():
         for i in lrecords:
             yield i
             
     training_turns = len(lrecords)
     zd = JaccardDistance(frecords, len(lrecords), training_turns, stopwords = [])
     
     avg = (7/13 + 6/10 + 9/13)/3
     self.assertEqual(zd.calc_weights(frecords, len(lrecords), training_turns), 
                      ([1/((abs(7/13 - avg) + abs(6/10 - avg) + abs(9/13 - avg))/3)],
                       [avg]))
    def testDist(self):
        
        x = "mama ma czerwonego, zielonego kotka"  #5
        y =  "tata nie ma czerwonego, czarnego krokodyla ani kotka. " #8
        z = "mama oraz tata maja kotka" #5
        #nie wspolne: 7 
        lrecords = []
        lrecords.append({'ab': x, 'ut': "", "ti": ""})
        lrecords.append({'ab': y, 'ut': "", "ti": ""})
        lrecords.append({'ab': z, 'ut': "", "ti": ""})
        
        def frecords():
            for i in lrecords:
                yield i
                
        training_turns = len(lrecords)
        zd = JaccardDistance(frecords, len(lrecords), training_turns, stopwords = [])
        

        self.assertEqual(zd.dist_txt(x, y), 7/13)
        self.assertEqual(zd.dist_txt(x, z), 6/10)
        self.assertEqual(zd.dist_txt(y, z), 9/13)
Esempio n. 5
0
    def testDistTxt(self):

        lrecords = []
        for k, v in enumerate(self.letters):
            lrecords.append((2 * k, v))

        def frecords():
            for i in lrecords:
                yield i

        training_turns = 0
        zd = JaccardDistance(frecords,
                             len(lrecords),
                             training_turns,
                             stopwords=[])

        x = "mama ma czerwonego, zielonego kotka"  #5
        y = "tata nie ma czerwonego, czarnego krokodyla ani kotka"  #8
        z = "mama oraz tata maja kotka"  #5
        #nie wspolne: 7
        self.assertEqual(zd.dist_txt(x, y), 7 / 13)
        self.assertEqual(zd.dist_txt(x, z), 6 / 10)
        self.assertEqual(zd.dist_txt(y, z), 9 / 13)
Esempio n. 6
0
    def testDist(self):

        x = "mama ma czerwonego, zielonego kotka"  #5
        y = "tata nie ma czerwonego, czarnego krokodyla ani kotka. "  #8
        z = "mama oraz tata maja kotka"  #5
        #nie wspolne: 7
        lrecords = []
        lrecords.append({'ab': x, 'ut': "", "ti": ""})
        lrecords.append({'ab': y, 'ut': "", "ti": ""})
        lrecords.append({'ab': z, 'ut': "", "ti": ""})

        def frecords():
            for i in lrecords:
                yield i

        training_turns = len(lrecords)
        zd = JaccardDistance(frecords,
                             len(lrecords),
                             training_turns,
                             stopwords=[])

        self.assertEqual(zd.dist_txt(x, y), 7 / 13)
        self.assertEqual(zd.dist_txt(x, z), 6 / 10)
        self.assertEqual(zd.dist_txt(y, z), 9 / 13)