Beispiel #1
0
    def test_cosine_pickle(self) :
        cosine = CosineSetSimilarity(self.ilist)
        s1 = self.ilist[0]
        s2 = self.ilist[1]
        cosine_sim = cosine(s1, s2)
        pickle.dumps(cosine)

        cosine = CosineSetSimilarity([])
        s1 = self.ilist[0]
        s2 = self.ilist[1]
        cosine_sim = cosine(s1, s2)
        pickle.dumps(cosine)
Beispiel #2
0
Datei: set.py Projekt: glg/dedupe
    def __init__(self, definition):
        super(SetType, self).__init__(definition)

        if 'corpus' not in definition:
            definition['corpus'] = []

        self.comparator = CosineSetSimilarity(definition['corpus'])
Beispiel #3
0
 def test_cosine_no_corpus(self):
     cosine = CosineSetSimilarity([])
     s1 = self.ilist[0]
     s2 = self.ilist[1]
     cosine_sim = cosine(s1, s2)
     self.assertAlmostEqual(cosine_sim, 0.667, places=3)
     cosine_sim = cosine(('g', 'h', 'd k'), s2)
     self.assertAlmostEqual(cosine_sim, 0.333, places=3)
Beispiel #4
0
 def test_cosine_cache(self):
     cosine = CosineSetSimilarity(self.ilist)
     s1 = self.ilist[0]
     s2 = self.ilist[1]
     cosine_sim = cosine(s1, s2)
     self.assertAlmostEqual(cosine_sim, 0.378, places=3)
     cosine_sim = cosine(s1, s2)
     self.assertAlmostEqual(cosine_sim, 0.378, places=3)
Beispiel #5
0
    def __init__(self, definition) :
        super(SetType, self).__init__(definition)

        canopy_predicates = [predicates.TfidfSetPredicate(threshold, 
                                                       self.field)
                             for threshold in self._canopy_thresholds]

        self.predicates += canopy_predicates

        if 'corpus' not in definition :
            definition['corpus'] = [] 

        self.comparator = CosineSetSimilarity(definition['corpus'])
Beispiel #6
0
 def test_cosine_identical(self):
     cosine = CosineSetSimilarity(self.ilist)
     cosine_sim = cosine(self.ilist[0], self.ilist[0])
     self.assertAlmostEqual(cosine_sim, 1, places=5)
Beispiel #7
0
 def test_cosine_na(self):
     cosine = CosineSetSimilarity(self.ilist)
     cosine_sim = cosine(self.ilist[0], ())
     assert numpy.isnan(cosine_sim)