def _getTopWords(self, k, stopword_removal=False): # get top words by counting the frequecy text_parser = TextParser(stopword_removal=stopword_removal) for photo in self._event['photos']: p = Photo(photo) caption = p.getCaption() if not caption is None: text_parser.insertCaption(caption) return text_parser.getTopWords(k)
def _getTopWords(self, k, stopword_removal=True): # get top words by counting the frequecy text_parser = TextParser(stopword_removal=stopword_removal) for element in self._event[self._element_type]: element = createElement(self._element_type, element) text = element.getText() if not text is None: text_parser.insertText(text) return text_parser.getTopWords(k)
def PhotoDistanceByCaption(photo1, photo2): p1 = Photo(photo1) p2 = Photo(photo2) cap1 = p1.getCaption() cap2 = p2.getCaption() cp1 = TextParser(True) cp1.insertCaption(cap1) cp2 = TextParser(True) cp2.insertCaption(cap2) word_list1 = cp1.getTopWords(-1) word_list2 = cp2.getTopWords(-1) if len(word_list1) == 0 or len(word_list2) == 0: # unable to compare return None word_dict1 = {} for word, freq in word_list1: word_dict1[word] = freq word_dict2 = {} for word, freq in word_list2: word_dict2[word] = freq return kldiv(word_dict1, word_dict2)
def ElementDistanceByText(element1, element2): p1 = createElement(self._element_type, element1) p2 = createElement(self._element_type, element2) cap1 = p1.getText() cap2 = p2.getText() cp1 = TextParser(True) cp1.insertText(cap1) cp2 = TextParser(True) cp2.insertText(cap2) word_list1 = cp1.getTopWords(-1) word_list2 = cp2.getTopWords(-1) if len(word_list1) == 0 or len(word_list2) == 0: # unable to compare return None word_dict1 = {} for word, freq in word_list1: word_dict1[word] = freq word_dict2 = {} for word, freq in word_list2: word_dict2[word] = freq return kldiv(word_dict1, word_dict2)