def computeWordKLDivergenceWithByEddie(self, event): # this method calls the kl divergence computation by eddie's methods text1 = "" text2 = "" for element in self._event[self._element_type]: p = createElement(self._element_type, element) text1 += " " text1 += p.getText() if type(event) is not types.DictType: event = event.toDict() for element in event[self._element_type]: p = createElement(self._element_type, element) text2 += " " text2 += p.getText() return kldiv(tokenize(text1), tokenize(text2))
def computeWordKLDivergenceWithByEddie(self, event): # this method calls the kl divergence computation by eddie's methods text1 = '' text2 = '' for photo in self._event['photos']: p = Photo(photo) text1 += ' ' text1 += p.getCaption() if type(event) is types.DictType: pass else: event = event.toDict() for photo in event['photos']: p = Photo(photo) text2 += ' ' text2 += p.getCaption() return kldiv(tokenize(text1), tokenize(text2))
def computeWordKLDivergenceWithByEddie(self, event): # this method calls the kl divergence computation by eddie's methods text1 = '' text2 = '' for photo in self._event['photos']: p = Photo(photo) text1 += ' ' text1 += p.getCaption() if type(event) is types.DictType: pass else: event = event.toJSON() for photo in event['photos']: p = Photo(photo) text2 += ' ' text2 += p.getCaption() return kldiv(tokenize(text1), tokenize(text2))
def PhotoDistanceByCaption(photo1, photo2): p1 = Photo(photo1) p2 = Photo(photo2) cap1 = p1.getCaption() cap2 = p2.getCaption() cp1 = TextParser(True) cp1.insertCaption(cap1) cp2 = TextParser(True) cp2.insertCaption(cap2) word_list1 = cp1.getTopWords(-1) word_list2 = cp2.getTopWords(-1) if len(word_list1) == 0 or len(word_list2) == 0: # unable to compare return None word_dict1 = {} for word, freq in word_list1: word_dict1[word] = freq word_dict2 = {} for word, freq in word_list2: word_dict2[word] = freq return kldiv(word_dict1, word_dict2)
def ElementDistanceByText(element1, element2): p1 = createElement(self._element_type, element1) p2 = createElement(self._element_type, element2) cap1 = p1.getText() cap2 = p2.getText() cp1 = TextParser(True) cp1.insertText(cap1) cp2 = TextParser(True) cp2.insertText(cap2) word_list1 = cp1.getTopWords(-1) word_list2 = cp2.getTopWords(-1) if len(word_list1) == 0 or len(word_list2) == 0: # unable to compare return None word_dict1 = {} for word, freq in word_list1: word_dict1[word] = freq word_dict2 = {} for word, freq in word_list2: word_dict2[word] = freq return kldiv(word_dict1, word_dict2)
def PhotoDistanceByCaption(photo1, photo2): p1 = Photo(photo1) p2 = Photo(photo2) cap1 = p1.getCaption() cap2 = p2.getCaption() cp1 = CaptionParser(True) cp1.insertCaption(cap1) cp2 = CaptionParser(True) cp2.insertCaption(cap2) word_list1 = cp1.getTopWords(-1) word_list2 = cp2.getTopWords(-1) if len(word_list1) == 0 or len(word_list2) == 0: # unable to compare return None word_dict1 = {} for word, freq in word_list1: word_dict1[word] = freq word_dict2 ={} for word, freq in word_list2: word_dict2[word] = freq return kldiv(word_dict1, word_dict2)