Ejemplo n.º 1
0
def main(pool):
    emailDatabase = email_database.buildEmailDatabase()    
    start_time = time.time()
    mostEmails = stat_mostEmails(pool,emailDatabase.values())
    countDatabase = {}
    print "--------------------"
    print "Most emails sent:"
    for k in mostEmails:
        print str(k[0]) + " sent " + str(k[1]) + " emails"
        #keep a database of email counts to use later for the reading level 
        countDatabase[k[0]] = k[1]
    print "-------------------"
    print "Most replied to emails"
    mostRepliedTo = stat_mostRepliedTo(pool,emailDatabase.values())    
    for k in mostRepliedTo[:20]:
        print str(k[0]) + " had " + str(k[1]) + " email replies to his/her email threads"
    mostThreads = stat_mostThreads(pool,emailDatabase.values())
    print "--------------------"
    print "Most likely to reply to an email thread"
    for k in mostThreads[:20]:
        print str(k[0]) + " has a thread reply frequency score of " + str(k[1])
    print "--------------------"
    readingLevel = stat_readingDifficulty(pool,emailDatabase.values())
    avgReadingLevel = sorted([(k[0],k[1]/countDatabase[k[0]]) for k in readingLevel], key=lambda obj : (-obj[1],obj[0]))
    print "Highest grade reading level of emails"
    for k in avgReadingLevel[:10]:
        print str(k[0]) + " averaged a score of " + str(k[1])
    print "-------------------"
    print "Lowest grade reading level of emails"
    for k in avgReadingLevel[::-1][:10]:
        print str(k[0]) + " averaged a score of " + str(k[1])
    print "-------------------"
    print "finished in "+str(time.time() - start_time)+"s"
Ejemplo n.º 2
0
def main():
    f = open("emails.js","w")
    emailDatabase = email_database.buildEmailDatabase()

    # complete list of threads
    emailList = []
    threads = {}
    for k in emailDatabase:
         del emailDatabase[k]["content"]
         parsedEmail = re.search(EMAIL_PATTERN,emailDatabase[k]["from"]).group(0)
         parsedDate = email.utils.parsedate_tz(emailDatabase[k]["date"])
         timestamp = time.mktime(parsedDate[:-1])+parsedDate[-1]
         emailDatabase[k]["from"] = parsedEmail
         emailList.append(emailDatabase[k])
         try:
             threads[emailDatabase[k]["threadid"]].append((parsedEmail, timestamp, emailDatabase[k]["subject"]))
         except KeyError:
             threads[emailDatabase[k]["threadid"]] = [(parsedEmail, timestamp, emailDatabase[k]["subject"])]
             
    threadLinks = []
    #thread mappings
    for thread in threads:
        #sort thread groups based on timestamp to get the first email        
        sortedThreadEmails = sorted(threads[thread], key=lambda obj: obj[1])
        for em in range(1,len(sortedThreadEmails)):
            if sortedThreadEmails[em][0] == sortedThreadEmails[0][0]: 
                continue
            threadLinks.append({"source":sortedThreadEmails[em][0], "target":sortedThreadEmails[0][0], "subject":sortedThreadEmails[0][2]})
    print("var threadlinks = "+json.dumps(threadLinks)+";",file=f)

    #emails by date
    dateList = {}
    for em in emailList:
        date = time.mktime(time.strptime(" ".join(em["date"].split()[1:4]), "%d %b %Y"))
        date = int(date)*1000
        if date in dateList:
            dateList[date].append(em)
        else:
            dateList[date] = [em]
    print("var emailsbydate = "+json.dumps(dateList)+";",file=f)

    #email authors
    authors = [re.search(EMAIL_PATTERN,em["from"]).group(0) for em in emailList]
    print("var authors = "+json.dumps(list(set(authors)))+";",file=f)