コード例 #1
0
 def _getTopWords(self, k, stopword_removal=False):
     # get top words by counting the frequecy
     text_parser = TextParser(stopword_removal=stopword_removal)
     for photo in self._event['photos']:
         p = Photo(photo)
         caption = p.getCaption()
         if not caption is None:
             text_parser.insertCaption(caption)
     return text_parser.getTopWords(k)
コード例 #2
0
        def PhotoDistanceByCaption(photo1, photo2):

            p1 = Photo(photo1)
            p2 = Photo(photo2)
            cap1 = p1.getCaption()
            cap2 = p2.getCaption()
            cp1 = TextParser(True)
            cp1.insertCaption(cap1)
            cp2 = TextParser(True)
            cp2.insertCaption(cap2)
            word_list1 = cp1.getTopWords(-1)
            word_list2 = cp2.getTopWords(-1)
            if len(word_list1) == 0 or len(word_list2) == 0:
                # unable to compare
                return None
            word_dict1 = {}
            for word, freq in word_list1:
                word_dict1[word] = freq
            word_dict2 = {}
            for word, freq in word_list2:
                word_dict2[word] = freq
            return kldiv(word_dict1, word_dict2)