コード例 #1
0
ファイル: to_sqldb.py プロジェクト: syst3mw0rm/kittystore
def to_db(mbfile, list_name):
    """ Upload all the emails in a mbox file into the database using
    kittystore API.

    :arg mbfile, a mailbox file from which the emails are extracted and
    upload to the database.
    :arg list_name, the fully qualified list name.
    """
    global TOTALCNT
    cnt = 0
    cnt_read = 0
    email = get_class_object(list_to_table_name(list_name), "email", MetaData(engine), create=True)
    for message in mailbox.mbox(mbfile):
        cnt_read = cnt_read + 1
        # print cnt_read
        TOTALCNT = TOTALCNT + 1
        infos = {}
        ## TODO: We need to catch-up Subjects/From which are of a specific
        ## encoding.
        for it in message.keys():
            it2 = it.replace("-", "")
            infos[it2] = message[it]
        keys = infos.keys()
        ## There seem to be a problem to parse some messages
        if not keys:
            print '  Failed: %s keys: "%s"' % (mbfile, keys)
            # print message
            continue
        if "MessageID" in infos:
            infos["MessageID"] = infos["MessageID"].replace("<", "").replace(">", "")
        if "From" in infos:
            regex = "(.*)\((.*)\)"
            match = re.match(regex, infos["From"])
            if match:
                email_add, name = match.groups()
                infos["From"] = name
                email_add = email_add.replace(" at ", "@")
                infos["Email"] = email_add.strip()
        try:
            if not "MessageID" in infos:
                print "  Failed: No Message-ID for email:"
                print "   Content:", message["Subject"], message["Date"], message["From"]
                continue
            if not store.get_email(list_name, infos["MessageID"]):
                infos["Date"] = convert_date(infos["Date"])
                infos["Content"] = message.get_payload()
                thread_id = 0
                if not "References" in infos and not "InReplyTo" in infos:
                    infos["ThreadID"] = b32encode(sha1(infos["MessageID"]).digest())
                else:
                    ref = None
                    if "References" in infos:
                        ref = infos["References"].split()[0].strip()
                    else:
                        ref = infos["InReplyTo"]
                        infos["References"] = infos["InReplyTo"]
                        del (infos["InReplyTo"])
                    ref = ref.replace("<", "").replace(">", "")
                    res = store.get_email(list_name, ref)
                    if res and res.thread_id:
                        infos["ThreadID"] = res.thread_id
                    else:
                        infos["ThreadID"] = b32encode(sha1(infos["MessageID"]).digest())
                infos["Category"] = "Question"
                if "agenda" in infos["Subject"].lower():
                    infos["Category"] = "Agenda"
                if "reminder" in infos["Subject"].lower():
                    infos["Category"] = "Agenda"
                infos["Full"] = message.as_string()

                ## TODO: I'm not sure the TOTALCNT approach is the right one
                ## we should discuss this with the pipermail guys
                infos["LegacyID"] = TOTALCNT
                if not "References" in infos:
                    infos["References"] = None
                # print infos.keys()
                mail = email(
                    sender=infos["From"],
                    email=infos["Email"],
                    subject=infos["Subject"],
                    content=infos["Content"],
                    date=infos["Date"],
                    message_id=infos["MessageID"],
                    stable_url_id=infos["MessageID"],
                    thread_id=infos["ThreadID"],
                    references=infos["References"],
                    full=infos["Full"],
                )
                mail.save(session)
                cnt = cnt + 1
                session.commit()
        except Exception, err:
            print ' Error: "%s"' % err
            print "File:", mbfile, "Content:", message["Subject"], message["Date"], message["From"]
            pass
コード例 #2
0
ファイル: to_sqldb.py プロジェクト: syst3mw0rm/kittystore
def get_table_size(list_name):
    """ Return the size of the document in mongodb. """
    email = get_class_object(list_to_table_name(list_name), "email", MetaData(engine))
    print "  %s emails are stored into the database" % session.query(email).count()