def _getEventText(self, event): """For a given event, return the text as a list. Note for photo without text, use a None to hold the place""" assert self._element_type in Event(event).toDict().keys() event_text = [] for element in event[self._element_type]: if self._element_type == "photos": element = Photo(element) else: element = Tweet(element) try: if self._is_ascii(element.getText()): event_text.append(element.getText().lower()) else: event_text.append("") except: event_text.append("") return event_text
def removeDuplicateElements(self): new_elements = {} for element in self._event[self._element_type]: if self._element_type == 'photos': d = Photo(element) else: d = Tweet(element) key = d.getText() + '|' + d.getUserId() new_elements[key] = d self._event[self._element_type] = [] for key, d in new_elements.items(): self._event[self._element_type].append(d) # need to sort the elements elements or tweets self.sortElements()