def _email_decode_header(header): """Decode a message header value without converting charset. Returns a list of (decoded_string, charset) pairs containing each of the decoded parts of the header. Charset is None for non-encoded parts of the header, otherwise a lower-case string containing the name of the character set specified in the encoded string. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). """ # If no encoding, just return the header header = str(header) if not email.header.ecre.search(header): return [(header, None)] decoded = [] dec = '' for line in header.splitlines(): # This line might not have an encoding in it if not email.header.ecre.search(line): decoded.append((line, None)) continue parts = email.header.ecre.split(line) while parts: unenc = parts.pop(0).strip() if unenc: # Should we continue a long line? if decoded and decoded[-1][1] is None: decoded[-1] = (decoded[-1][0] + SPACE + unenc, None) else: decoded.append((unenc, None)) if parts: charset, encoding = [s.lower() for s in parts[0:2]] encoded = parts[2] dec = None if encoding == 'q': dec = email.quoprimime.header_decode(encoded) elif encoding == 'b': paderr = len(encoded) % 4 # Postel's law: add missing padding if paderr: encoded += '==='[:4 - paderr] try: dec = email.base64mime.decode(encoded) except binascii.Error, e: dec = None if dec is None: dec = encoded if decoded and decoded[-1][1] == charset: decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1]) else: decoded.append((dec, charset)) del parts[0:3]
def main(): username = raw_input('Username: '******'Password: '******'imap.gmail.com', 993) im.login(username, password) dup_cnt = 0 line_yet = False res, boxes = im.list() for box_info in boxes: flags, delim, name = re.search('\((.*?)\) "(.*?)" "(.*)"', box_info).groups() if '\Noselect' in flags: continue name_d = name.decode('imap4-utf-7') if name_d not in mboxes_to_check: continue print '* Mailbox: %s' % name_d res, data = im.select(name, readonly=True) if res != 'OK': print >> sys.stderr, '* Something bad happened.' raise SystemExit mail_cnt = int(data[0]) visit = {} prev_per_mil = -1 res, [nums_s] = im.search(None, 'ALL') nums = nums_s.split() for i, num in enumerate(nums): num = int(num) res, data = im.fetch(num, '(BODY.PEEK[HEADER])') header = data[0][1] digest = hashlib.sha1(header).hexdigest() arr = {} try: del key except NameError: pass for row in header.splitlines(): if not row: continue if row.startswith(' '): arr[key] += row else: try: key, val = row.split(': ', 1) arr[key] = val except ValueError: pass try: subj = email.header.decode_header(arr['Subject'])[0] subj = subj[0].decode( 'euc-kr' if subj[1] in ['ibm-euckr', '5601'] else subj[1] if subj[1] else 'utf-8', 'replace') except KeyError: subj = '?' try: date = arr['Date'] except KeyError: date = '?' if digest in visit: if line_yet: print line_yet = False print '* Duplicate: %d. %s (%s)' % (num, subj, date) dup_cnt += 1 continue visit[digest] = {'subj': subj, 'date': date} per_mil = 1000 * (i + 1) / len(nums) if prev_per_mil != per_mil: print '\r%d.%d%% (%d/%d)' % (per_mil / 10, per_mil % 10, i + 1, len(nums)), sys.stdout.flush() line_yet = True prev_per_mil = per_mil if line_yet: print line_yet = False print '* %d duplicate(s) found.' % dup_cnt
def main(): username = raw_input('Username: '******'Password: '******'imap.gmail.com', 993) im.login(username, password) dup_cnt = 0 line_yet = False res, boxes = im.list() for box_info in boxes: flags, delim, name = re.search('\((.*?)\) "(.*?)" "(.*)"', box_info).groups() if '\Noselect' in flags: continue name_d = name.decode('imap4-utf-7') if name_d not in mboxes_to_check: continue print '* Mailbox: %s' % name_d res, data = im.select(name, readonly=True) if res != 'OK': print >> sys.stderr, '* Something bad happened.' raise SystemExit mail_cnt = int(data[0]) visit = {} prev_per_mil = -1 res, [nums_s] = im.search(None, 'ALL') nums = nums_s.split() for i, num in enumerate(nums): num = int(num) res, data = im.fetch(num, '(BODY.PEEK[HEADER])') header = data[0][1] digest = hashlib.sha1(header).hexdigest() arr = {} try: del key except NameError: pass for row in header.splitlines(): if not row: continue if row.startswith(' '): arr[key] += row else: try: key, val = row.split(': ', 1) arr[key] = val except ValueError: pass try: subj = email.header.decode_header(arr['Subject'])[0] subj = subj[0].decode('euc-kr' if subj[1] in ['ibm-euckr', '5601'] else subj[1] if subj[1] else 'utf-8', 'replace') except KeyError: subj = '?' try: date = arr['Date'] except KeyError: date = '?' if digest in visit: if line_yet: print; line_yet = False print '* Duplicate: %d. %s (%s)' % (num, subj, date) dup_cnt += 1 continue visit[digest] = {'subj': subj, 'date': date} per_mil = 1000*(i+1)/len(nums) if prev_per_mil != per_mil: print '\r%d.%d%% (%d/%d)' % (per_mil/10, per_mil%10, i+1, len(nums)), sys.stdout.flush() line_yet = True prev_per_mil = per_mil if line_yet: print; line_yet = False print '* %d duplicate(s) found.' % dup_cnt