def testDist(self): x = "mama ma czerwonego, zielonego kotka" #5 y = "tata nie ma czerwonego, czarnego krokodyla ani kotka. " #8 z = "mama oraz tata maja kotka" #5 #nie wspolne: 7 lrecords = [] lrecords.append({'ab': x, 'ut': "", "ti": ""}) lrecords.append({'ab': y, 'ut': "", "ti": ""}) lrecords.append({'ab': z, 'ut': "", "ti": ""}) def frecords(): for i in lrecords: yield i training_turns = len(lrecords) zd = JaccardDistance(frecords, len(lrecords), training_turns, stopwords=[]) avg = (7 / 13 + 6 / 10 + 9 / 13) / 3 self.assertEqual( zd.calc_weights(frecords, len(lrecords), training_turns), ([ 1 / ((abs(7 / 13 - avg) + abs(6 / 10 - avg) + abs(9 / 13 - avg)) / 3) ], [avg])) self.assertEqual(zd.distance(lrecords[0], lrecords[1]), ( 1 / ((abs(7 / 13 - avg) + abs(6 / 10 - avg) + abs(9 / 13 - avg)) / 3) * ((7 / 13) - avg)))
def testDistTxt(self): lrecords = [] for k, v in enumerate(self.letters): lrecords.append((2*k, v)) def frecords(): for i in lrecords: yield i training_turns = 0 zd = JaccardDistance(frecords, len(lrecords), training_turns, stopwords = []) x = "mama ma czerwonego, zielonego kotka" #5 y = "tata nie ma czerwonego, czarnego krokodyla ani kotka" #8 z = "mama oraz tata maja kotka" #5 #nie wspolne: 7 self.assertEqual(zd.dist_txt(x, y), 7/13) self.assertEqual(zd.dist_txt(x, z), 6/10) self.assertEqual(zd.dist_txt(y, z), 9/13)
def testCalcWeights(self): x = "mama ma czerwonego, zielonego kotka" #5 y = "tata nie ma czerwonego, czarnego krokodyla ani kotka. " #8 z = "mama oraz tata maja kotka" #5 #nie wspolne: 7 lrecords = [] lrecords.append({'ab': x, 'ut': "", "ti": ""}) lrecords.append({'ab': y, 'ut': "", "ti": ""}) lrecords.append({'ab': z, 'ut': "", "ti": ""}) def frecords(): for i in lrecords: yield i training_turns = len(lrecords) zd = JaccardDistance(frecords, len(lrecords), training_turns, stopwords = []) avg = (7/13 + 6/10 + 9/13)/3 self.assertEqual(zd.calc_weights(frecords, len(lrecords), training_turns), ([1/((abs(7/13 - avg) + abs(6/10 - avg) + abs(9/13 - avg))/3)], [avg]))
def testDist(self): x = "mama ma czerwonego, zielonego kotka" #5 y = "tata nie ma czerwonego, czarnego krokodyla ani kotka. " #8 z = "mama oraz tata maja kotka" #5 #nie wspolne: 7 lrecords = [] lrecords.append({'ab': x, 'ut': "", "ti": ""}) lrecords.append({'ab': y, 'ut': "", "ti": ""}) lrecords.append({'ab': z, 'ut': "", "ti": ""}) def frecords(): for i in lrecords: yield i training_turns = len(lrecords) zd = JaccardDistance(frecords, len(lrecords), training_turns, stopwords = []) self.assertEqual(zd.dist_txt(x, y), 7/13) self.assertEqual(zd.dist_txt(x, z), 6/10) self.assertEqual(zd.dist_txt(y, z), 9/13)
def testDistTxt(self): lrecords = [] for k, v in enumerate(self.letters): lrecords.append((2 * k, v)) def frecords(): for i in lrecords: yield i training_turns = 0 zd = JaccardDistance(frecords, len(lrecords), training_turns, stopwords=[]) x = "mama ma czerwonego, zielonego kotka" #5 y = "tata nie ma czerwonego, czarnego krokodyla ani kotka" #8 z = "mama oraz tata maja kotka" #5 #nie wspolne: 7 self.assertEqual(zd.dist_txt(x, y), 7 / 13) self.assertEqual(zd.dist_txt(x, z), 6 / 10) self.assertEqual(zd.dist_txt(y, z), 9 / 13)
def testDist(self): x = "mama ma czerwonego, zielonego kotka" #5 y = "tata nie ma czerwonego, czarnego krokodyla ani kotka. " #8 z = "mama oraz tata maja kotka" #5 #nie wspolne: 7 lrecords = [] lrecords.append({'ab': x, 'ut': "", "ti": ""}) lrecords.append({'ab': y, 'ut': "", "ti": ""}) lrecords.append({'ab': z, 'ut': "", "ti": ""}) def frecords(): for i in lrecords: yield i training_turns = len(lrecords) zd = JaccardDistance(frecords, len(lrecords), training_turns, stopwords=[]) self.assertEqual(zd.dist_txt(x, y), 7 / 13) self.assertEqual(zd.dist_txt(x, z), 6 / 10) self.assertEqual(zd.dist_txt(y, z), 9 / 13)