def _getEventText(self, event):
        """For a given event, return the text as a list. Note for photo without text,
        use a None to hold the place"""

        assert self._element_type in Event(event).toDict().keys()

        event_text = []
        for element in event[self._element_type]:
            if self._element_type == "photos":
                element = Photo(element)
            else:
                element = Tweet(element)
            try:
                if self._is_ascii(element.getText()):
                    event_text.append(element.getText().lower())
                else:
                    event_text.append("")
            except:
                event_text.append("")
        return event_text
Exemple #2
0
 def removeDuplicateElements(self):
     new_elements = {}
     for element in self._event[self._element_type]:
         if self._element_type == 'photos':
             d = Photo(element)
         else:
             d = Tweet(element)
         key = d.getText() + '|' + d.getUserId()
         new_elements[key] = d
     self._event[self._element_type] = []
     for key, d in new_elements.items():
         self._event[self._element_type].append(d)
         # need to sort the elements elements or tweets
     self.sortElements()