Ejemplo n.º 1
0
def fetch_email(imap, email_id):
    print "*******************fetch_email**************************"
    # def timeout_handler(signum, frame):
    #   raise self.TimeoutException()
    #
    # signal.signal(signal.SIGALRM, timeout_handler)
    # signal.alarm(30) # triger alarm in 30 seconds
    #
    # avro_record = dict()
    # status = 'FAIL'
    utils = EmailUtils()

    try:
        status, data = imap.fetch(
            str(email_id), '(X-GM-THRID RFC822)'
        )  # Gmail's X-GM-THRID will get the thread of the message
    except TimeoutException:
        return 'TIMEOUT', {}, None
    except:
        return 'ABORT', {}, None

    charset = None
    if status != 'OK':
        return 'ERROR', {}, None
    else:
        raw_thread_id = data[0][0]
        encoded_email = data[0][1]

    try:
        charset = utils.get_charset(encoded_email)

        # RFC2822 says default charset is us-ascii, which often saves us when no charset is specified
        if (charset):
            pass
        else:
            charset = 'us-ascii'

        if (charset):  # redundant, but saves our ass if we edit above
            #raw_email = encoded_email.decode(charset)
            thread_id = utils.get_thread_id(raw_thread_id)
            #   print "CHARSET: " + charset
            avro_record, charset = utils.process_email(encoded_email,
                                                       thread_id)
        else:
            return 'UNICODE', {}, charset
    except UnicodeDecodeError:
        return 'UNICODE', {}, charset
    except:
        return 'ERROR', {}, None

    # Without a charset we pass bad chars to avro, and it dies. See AVRO-565.
    if charset:
        return status, avro_record, charset
    else:
        return 'CHARSET', {}, charset
    print "*******************fetch_email end**************************"
Ejemplo n.º 2
0
 def __init__(self):
     """This class downloads all emails in folders from your 163mail inbox
     and writes them as raw UTF-8 text in simple Avro records for further processing."""
     self.utils = EmailUtils()
     self.username = None
     self.password = None
     self.imap = None
     self.schema = None
     self.avro_writer = None
     self.avro_writertmp = None
     self.imap_folder = None
     self.id_list = None
     self.folder_count = None
     # Only the email BODY which RFC822.SIZE are smaller than 3M are fetched
     # otherwise the email HEADER are fetched.
     self.threshold_size = 2 * 1024 * 1024
Ejemplo n.º 3
0
def fetch_email(imap, email_id):
    print "*******************fetch_email**************************"
    # def timeout_handler(signum, frame):
    #   raise self.TimeoutException()
    #
    # signal.signal(signal.SIGALRM, timeout_handler)
    # signal.alarm(30) # triger alarm in 30 seconds
    #
    # avro_record = dict()
    # status = 'FAIL'
    utils = EmailUtils()

    try:
        status, data = imap.fetch(
            str(email_id), '(X-GM-THRID RFC822)'
        )  # Gmail's X-GM-THRID will get the thread of the message
    except Exception, e:
        print Exception, " : ", e
Ejemplo n.º 4
0
 def __init__(self):
     self.utils = EmailUtils()
     """This class downloads all emails in folders from your Gmail inbox and writes them as raw UTF-8 text in simple Avro records for further processing."""