def process_readmessage(self, fd): result = {'type': 'Read', 'message': ''} root = self.parser.root tag = root.find('div', {'class': 'ReadMsgContainer'}) if not tag: return ## Find the subject: sbj = tag.find('td', {'class': 'ReadMsgSubject'}) if sbj: result['subject'] = HTML.decode_entity(sbj.innerHTML()) ## Fill in all the other fields: context = None for td in tag.search('td'): data = td.innerHTML() if context: result[context] = HTML.decode_entity(data) context = None if data.lower().startswith('from:'): context = 'From' elif data.lower().startswith('to:'): context = 'To' elif data.lower().startswith('sent:'): context = 'sent' ## Now the message: ## On newer sites its injected using script: for s in root.search('script'): m = re.match( "document\.getElementById\(\"MsgContainer\"\)\.innerHTML='([^']*)'", s.innerHTML()) if m: result['message'] += HTML.decode_unicode( m.group(1).decode("string_escape")) break try: result['sent'] = Time.parse(result['sent']) except: pass return self.insert_message(result)
def process_readmessage(self,fd): result = {'type': 'Read', 'message':''} root = self.parser.root tag = root.find('div', {'class':'ReadMsgContainer'}) if not tag: return ## Find the subject: sbj = tag.find('td', {'class':'ReadMsgSubject'}) if sbj: result['subject'] = HTML.decode_entity(sbj.innerHTML()) ## Fill in all the other fields: context = None for td in tag.search('td'): data = td.innerHTML() if context: result[context] = HTML.decode_entity(data) context = None if data.lower().startswith('from:'): context = 'From' elif data.lower().startswith('to:'): context = 'To' elif data.lower().startswith('sent:'): context = 'sent' ## Now the message: ## On newer sites its injected using script: for s in root.search('script'): m=re.match("document\.getElementById\(\"MsgContainer\"\)\.innerHTML='([^']*)'", s.innerHTML()) if m: result['message'] += HTML.decode_unicode(m.group(1).decode("string_escape")) break try: result['sent'] = Time.parse(result['sent']) except: pass return self.insert_message(fd, result)