def test_euclideanDistanceLengthNormalisation(self): vector1 = ('cricket','is','a','game','played','by','fools','watched','others') vector1 = feeds.create_word_vector(vector1) #test data same article, doubled length to test normalisation vector2 = ('cricket','is','a','game','played','by','fools','watched','others','cricket','is','a','game','played','by','fools','watched','others') vector2 = feeds.create_word_vector(vector2) result = processor.get_euclidean_dif(vector1, vector2) self.assertEqual(result, 0.0)
def test_euclideanDistanceDifferentArticle(self): vector1 = ('cricket','is','a','game','played','by','fools','watched','others') vector1 = feeds.create_word_vector(vector1) #test data same article, doubled length to test normalisation vector2 = ('bing','bong','boop','billa','zac','time','test','shane','red') vector2 = feeds.create_word_vector(vector2)aquire result = processor.get_euclidean_dif(vector1, vector2) self.assertAlmostEqual(result, 1.0, 4 )
def addToStory(self, item, conn): if len(self.storyList) == 0: newStory = Story(item, conn) self.storyList.append(newStory) #print 'empty feedList' return newStory else: bestDif = (1, None) for thing in self.storyList: dif = get_euclidean_dif(thing.items[0].article.vector, item.article.vector) if dif < 0.6: if dif < bestDif[0]: bestDif = (dif, thing) #print dif thing.addArticle(item.article, conn) return bestDif[1] #print item anewStory = Story(item, conn) self.storyList.append(anewStory) #print 'no similar stories' #print 'story: ' + str(anewStory) return anewStory
def test_euclideanDistanceSameArticleisZero(self): vector1 = ('cricket','is','a','game','played','by','fools','watched','others') vector1 = feeds.create_word_vector(vector1) result = processor.get_euclidean_dif(vector1, vector1) self.assertEqual(result, 0.0)