コード例 #1
0
 def _getTopWords(self, k, stopword_removal=False):
     # get top words by counting the frequecy
     text_parser = TextParser(stopword_removal=stopword_removal)
     for photo in self._event['photos']:
         p = Photo(photo)
         caption = p.getCaption()
         if not caption is None:
             text_parser.insertCaption(caption)
     return text_parser.getTopWords(k)
コード例 #2
0
 def _getTopWords(self, k, stopword_removal=True):
     # get top words by counting the frequecy
     text_parser = TextParser(stopword_removal=stopword_removal)
     for element in self._event[self._element_type]:
         element = createElement(self._element_type, element)
         text = element.getText()
         if not text is None:
             text_parser.insertText(text)
     return text_parser.getTopWords(k)
コード例 #3
0
        def PhotoDistanceByCaption(photo1, photo2):

            p1 = Photo(photo1)
            p2 = Photo(photo2)
            cap1 = p1.getCaption()
            cap2 = p2.getCaption()
            cp1 = TextParser(True)
            cp1.insertCaption(cap1)
            cp2 = TextParser(True)
            cp2.insertCaption(cap2)
            word_list1 = cp1.getTopWords(-1)
            word_list2 = cp2.getTopWords(-1)
            if len(word_list1) == 0 or len(word_list2) == 0:
                # unable to compare
                return None
            word_dict1 = {}
            for word, freq in word_list1:
                word_dict1[word] = freq
            word_dict2 = {}
            for word, freq in word_list2:
                word_dict2[word] = freq
            return kldiv(word_dict1, word_dict2)
コード例 #4
0
        def ElementDistanceByText(element1, element2):

            p1 = createElement(self._element_type, element1)
            p2 = createElement(self._element_type, element2)
            cap1 = p1.getText()
            cap2 = p2.getText()
            cp1 = TextParser(True)
            cp1.insertText(cap1)
            cp2 = TextParser(True)
            cp2.insertText(cap2)
            word_list1 = cp1.getTopWords(-1)
            word_list2 = cp2.getTopWords(-1)
            if len(word_list1) == 0 or len(word_list2) == 0:
                # unable to compare
                return None
            word_dict1 = {}
            for word, freq in word_list1:
                word_dict1[word] = freq
            word_dict2 = {}
            for word, freq in word_list2:
                word_dict2[word] = freq
            return kldiv(word_dict1, word_dict2)