Esempio n. 1
0
    def testDist(self):

        x = "mama ma czerwonego, zielonego kotka"  #5
        y = "tata nie ma czerwonego, czarnego krokodyla ani kotka. "  #8
        z = "mama oraz tata maja kotka"  #5
        #nie wspolne: 7
        lrecords = []
        lrecords.append({'ab': x, 'ut': "", "ti": ""})
        lrecords.append({'ab': y, 'ut': "", "ti": ""})
        lrecords.append({'ab': z, 'ut': "", "ti": ""})

        def frecords():
            for i in lrecords:
                yield i

        training_turns = len(lrecords)
        zd = JaccardDistance(frecords,
                             len(lrecords),
                             training_turns,
                             stopwords=[])

        avg = (7 / 13 + 6 / 10 + 9 / 13) / 3
        self.assertEqual(
            zd.calc_weights(frecords, len(lrecords), training_turns), ([
                1 /
                ((abs(7 / 13 - avg) + abs(6 / 10 - avg) + abs(9 / 13 - avg)) /
                 3)
            ], [avg]))
        self.assertEqual(zd.distance(lrecords[0], lrecords[1]), (
            1 /
            ((abs(7 / 13 - avg) + abs(6 / 10 - avg) + abs(9 / 13 - avg)) / 3) *
            ((7 / 13) - avg)))
Esempio n. 2
0
    def testDistTxt(self):

        lrecords = []
        for k, v in enumerate(self.letters):
            lrecords.append((2 * k, v))

        def frecords():
            for i in lrecords:
                yield i

        training_turns = 0
        zd = JaccardDistance(frecords,
                             len(lrecords),
                             training_turns,
                             stopwords=[])

        x = "mama ma czerwonego, zielonego kotka"  #5
        y = "tata nie ma czerwonego, czarnego krokodyla ani kotka"  #8
        z = "mama oraz tata maja kotka"  #5
        #nie wspolne: 7
        self.assertEqual(zd.dist_txt(x, y), 7 / 13)
        self.assertEqual(zd.dist_txt(x, z), 6 / 10)
        self.assertEqual(zd.dist_txt(y, z), 9 / 13)
Esempio n. 3
0
    def testDist(self):

        x = "mama ma czerwonego, zielonego kotka"  #5
        y = "tata nie ma czerwonego, czarnego krokodyla ani kotka. "  #8
        z = "mama oraz tata maja kotka"  #5
        #nie wspolne: 7
        lrecords = []
        lrecords.append({'ab': x, 'ut': "", "ti": ""})
        lrecords.append({'ab': y, 'ut': "", "ti": ""})
        lrecords.append({'ab': z, 'ut': "", "ti": ""})

        def frecords():
            for i in lrecords:
                yield i

        training_turns = len(lrecords)
        zd = JaccardDistance(frecords,
                             len(lrecords),
                             training_turns,
                             stopwords=[])

        self.assertEqual(zd.dist_txt(x, y), 7 / 13)
        self.assertEqual(zd.dist_txt(x, z), 6 / 10)
        self.assertEqual(zd.dist_txt(y, z), 9 / 13)