Beispiel #1
0
def _email_decode_header(header):
    """Decode a message header value without converting charset.

    Returns a list of (decoded_string, charset) pairs containing each of the
    decoded parts of the header.  Charset is None for non-encoded parts of the
    header, otherwise a lower-case string containing the name of the character
    set specified in the encoded string.

    An email.errors.HeaderParseError may be raised when certain decoding error
    occurs (e.g. a base64 decoding exception).
    """
    # If no encoding, just return the header
    header = str(header)
    if not email.header.ecre.search(header):
        return [(header, None)]
    decoded = []
    dec = ''
    for line in header.splitlines():
        # This line might not have an encoding in it
        if not email.header.ecre.search(line):
            decoded.append((line, None))
            continue
        parts = email.header.ecre.split(line)
        while parts:
            unenc = parts.pop(0).strip()
            if unenc:
                # Should we continue a long line?
                if decoded and decoded[-1][1] is None:
                    decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
                else:
                    decoded.append((unenc, None))
            if parts:
                charset, encoding = [s.lower() for s in parts[0:2]]
                encoded = parts[2]
                dec = None
                if encoding == 'q':
                    dec = email.quoprimime.header_decode(encoded)
                elif encoding == 'b':
                    paderr = len(encoded) % 4   # Postel's law: add missing padding
                    if paderr:
                        encoded += '==='[:4 - paderr]
                    try:
                        dec = email.base64mime.decode(encoded)
                    except binascii.Error, e:
                        dec = None
                if dec is None:
                    dec = encoded

                if decoded and decoded[-1][1] == charset:
                    decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
                else:
                    decoded.append((dec, charset))
            del parts[0:3]
Beispiel #2
0
def main():
    username = raw_input('Username: '******'Password: '******'imap.gmail.com', 993)
    im.login(username, password)

    dup_cnt = 0

    line_yet = False
    res, boxes = im.list()
    for box_info in boxes:
        flags, delim, name = re.search('\((.*?)\) "(.*?)" "(.*)"',
                                       box_info).groups()

        if '\Noselect' in flags: continue

        name_d = name.decode('imap4-utf-7')
        if name_d not in mboxes_to_check: continue

        print '* Mailbox: %s' % name_d

        res, data = im.select(name, readonly=True)
        if res != 'OK':
            print >> sys.stderr, '* Something bad happened.'
            raise SystemExit
        mail_cnt = int(data[0])

        visit = {}

        prev_per_mil = -1
        res, [nums_s] = im.search(None, 'ALL')
        nums = nums_s.split()
        for i, num in enumerate(nums):
            num = int(num)

            res, data = im.fetch(num, '(BODY.PEEK[HEADER])')
            header = data[0][1]
            digest = hashlib.sha1(header).hexdigest()

            arr = {}
            try:
                del key
            except NameError:
                pass

            for row in header.splitlines():
                if not row: continue

                if row.startswith(' '):
                    arr[key] += row
                else:
                    try:
                        key, val = row.split(': ', 1)
                        arr[key] = val
                    except ValueError:
                        pass

            try:
                subj = email.header.decode_header(arr['Subject'])[0]
                subj = subj[0].decode(
                    'euc-kr' if subj[1] in ['ibm-euckr', '5601'] else
                    subj[1] if subj[1] else 'utf-8', 'replace')
            except KeyError:
                subj = '?'

            try:
                date = arr['Date']
            except KeyError:
                date = '?'

            if digest in visit:
                if line_yet:
                    print
                    line_yet = False
                print '* Duplicate: %d. %s (%s)' % (num, subj, date)
                dup_cnt += 1
                continue

            visit[digest] = {'subj': subj, 'date': date}

            per_mil = 1000 * (i + 1) / len(nums)
            if prev_per_mil != per_mil:
                print '\r%d.%d%% (%d/%d)' % (per_mil / 10, per_mil % 10, i + 1,
                                             len(nums)),
                sys.stdout.flush()
                line_yet = True
            prev_per_mil = per_mil

        if line_yet:
            print
            line_yet = False

    print '* %d duplicate(s) found.' % dup_cnt
Beispiel #3
0
def main():
	username = raw_input('Username: '******'Password: '******'imap.gmail.com', 993)
	im.login(username, password)

	dup_cnt = 0

	line_yet = False
	res, boxes = im.list()
	for box_info in boxes:
		flags, delim, name = re.search('\((.*?)\) "(.*?)" "(.*)"', box_info).groups()

		if '\Noselect' in flags: continue

		name_d = name.decode('imap4-utf-7')
		if name_d not in mboxes_to_check: continue

		print '* Mailbox: %s' % name_d

		res, data = im.select(name, readonly=True)
		if res != 'OK':
			print >> sys.stderr, '* Something bad happened.'
			raise SystemExit
		mail_cnt = int(data[0])

		visit = {}

		prev_per_mil = -1
		res, [nums_s] = im.search(None, 'ALL')
		nums = nums_s.split()
		for i, num in enumerate(nums):
			num = int(num)

			res, data = im.fetch(num, '(BODY.PEEK[HEADER])')
			header = data[0][1]
			digest = hashlib.sha1(header).hexdigest()

			arr = {}
			try: del key
			except NameError: pass

			for row in header.splitlines():
				if not row: continue

				if row.startswith(' '):
					arr[key] += row
				else:
					try:
						key, val = row.split(': ', 1)
						arr[key] = val
					except ValueError: pass

			try:
				subj = email.header.decode_header(arr['Subject'])[0]
				subj = subj[0].decode('euc-kr' if subj[1] in ['ibm-euckr', '5601'] else subj[1] if subj[1] else 'utf-8', 'replace')
			except KeyError: subj = '?'

			try: date = arr['Date']
			except KeyError: date = '?'

			if digest in visit:
				if line_yet: print; line_yet = False
				print '* Duplicate: %d. %s (%s)' % (num, subj, date)
				dup_cnt += 1
				continue

			visit[digest] = {'subj': subj, 'date': date}

			per_mil = 1000*(i+1)/len(nums)
			if prev_per_mil != per_mil:
				print '\r%d.%d%% (%d/%d)' % (per_mil/10, per_mil%10, i+1, len(nums)),
				sys.stdout.flush()
				line_yet = True
			prev_per_mil = per_mil

		if line_yet: print; line_yet = False

	print '* %d duplicate(s) found.' % dup_cnt