def get_mail(gid, pw, sender): # Gmail 모듈 활성화 g = Gmail() g.login(gid, pw) # Gmail 에서 안 읽은 메일 리스트를 가져온다. mails = g.inbox().mail(sender=sender, unread=True) m = [] d = [] # 메일 데이터를 배열에 저장하고 읽음 상태로 쳐리한다. for mail in mails: mail.fetch() mail.read() logging.debug(mail.sent_at) m.append(mail.html) d.append(mail.sent_at) # 로그아웃 필수 g.logout() # 메일 내용과 날짜를 리턴 return [m, d]
def post(self): try: self.response.headers['Content-Type'] = 'application/json;charset=utf-8' account = urllib2.unquote(self.request.get('account')).encode('utf-8') contacts = urllib2.unquote(self.request.get('contacts')).encode('utf-8') duration = urllib2.unquote(self.request.get('duration')).encode('utf-8') accountInfo = json.loads(account) contactsInfo = json.loads(contacts) durationInfo = json.loads(duration) username = accountInfo['email'] access_token = accountInfo['access_token'] targets = contactsInfo['targets'] starttime = durationInfo['starttime'] endtime = durationInfo['endtime'] #print "Duration:", starttime, "~", endtime after = datetime.datetime.utcfromtimestamp(starttime) before = datetime.datetime.utcfromtimestamp(endtime) #print "contacts info:", contactsInfo g = Gmail() g.authenticate(username, access_token) for p in range(0, len(targets)): contact = targets[p] #print contact if (starttime!=-1 & endtime!=-1): #print "Before:", before #print "After:", after emails = g.inbox().mail(fr=contact, before=before, after=after) #print "AssignTask for from:", contact, ", ", len(emails), "emails between ", starttime, " and ", endtime self.assignTasks(account, contact, 'from', len(emails), starttime, endtime) emails = g.sent().mail(to=contact, before=before, after=after) #print "AssignTask for to:", contact, ", ", len(emails), "emails between ", starttime, " and ", endtime self.assignTasks(account, contact, 'to', len(emails), starttime, endtime) elif (starttime!=-1 & endtime==-1): before = datetime.utcnow(); emails = g.inbox().mail(fr=contact, before=before, after=after) #print "AssignTask for from:", contact, ", ", len(emails), "emails between ", starttime, " and ", endtime self.assignTasks(account, contact, 'from', len(emails), starttime, endtime) emails = g.sent().mail(to=contact, before=before, after=after) #print "AssignTask for to:", contact, ", ", len(emails), "emails between ", starttime, " and ", endtime self.assignTasks(account, contact, 'to', len(emails), starttime, endtime) elif (starttime==-1 & endtime!=-1): emails = g.inbox().mail(fr=contact, before=before, after=after) #print "AssignTask for from:", contact, ", ", len(emails), "emails between ", starttime, " and ", endtime self.assignTasks(account, contact, 'from', len(emails), starttime, endtime) emails = g.sent().mail(to=contact, before=before, after=after) #print "AssignTask for to:", contact, ", ", len(emails), "emails between ", starttime, " and ", endtime self.assignTasks(account, contact, 'to', len(emails), starttime, endtime) else: # Get all emails from contact emails = g.inbox().mail(fr=contact) #print "AssignTask for from:", contact, ", ", len(emails), "emails" self.assignTasks(account, contact, 'from', len(emails), starttime, endtime) # Get all emails to contact emails = g.sent().mail(to=contact) #print "AssignTask for to:", contact, ", ", len(emails), "emails" self.assignTasks(account, contact, 'to', len(emails), starttime, endtime) g.logout() except Exception, e: print "Exception: ", e print traceback.format_exc() self.abort(403)
def post(self): context = {} searchType = self.request.get('searchType') contact = self.request.get('contact') start = self.request.get('start') stop = self.request.get('stop') starttime = self.request.get('starttime') endtime = self.request.get('endtime') account = self.request.get('account') accountInfo = json.loads(account) username = accountInfo['email'] access_token= accountInfo['access_token'] start = int(start) stop = int(stop) #print "Worker: start = ", start, ", stop = ", stop #print "starttime = ", starttime, ", endtime = ", endtime starttime = int(starttime) endtime = int(endtime) #print "username = "******"start = ", start, ", stop = ", stop #print "starttime = ", starttime, ", endtime = ", endtime #print "contact:", contact #print "searchType", searchType db = cloudsqldb.connect_db() #Worker is spawn off from RunVerbalucceHandler3, no need to do another authentication userID = cloudsqldb.getUserID(accountInfo['email'], db) try: vblc = verbalucce.Verbalucce() g = Gmail() g.authenticate(username, access_token) emails = None #print contact if (starttime!=-1 & endtime!=-1): #print "Find emails between ", starttime, " and ", endtime after = datetime.datetime.utcfromtimestamp(starttime) before = datetime.datetime.utcfromtimestamp(endtime) if (searchType =='from'): emails = g.inbox().mail(fr=contact, before=before, after=after) FROM = contact TO = username elif (searchType == 'to'): emails = g.sent().mail(to=contact, before=before, after=after) FROM = username TO = contact elif (starttime!=-1 & endtime==-1): #print "Find emails between ", starttime, " and ", endtime after = datetime.datetime.utcfromtimestamp(starttime) before = datetime.datetime.utcnow() if (searchType =='from'): emails = g.inbox().mail(fr=contact, before=before, after=after) FROM = contact TO = username elif (searchType == 'to'): emails = g.sent().mail(to=contact, before=before, after=after) FROM = username TO = contact elif (starttime==-1 & endtime!=-1): #print "Find emails between ", starttime, " and ", endtime after = datetime.datetime.utcfromtimestamp(starttime) before = datetime.datetime.utcfromtimestamp(endtime) if (searchType =='from'): emails = g.inbox().mail(fr=contact, before=before, after=after) FROM = contact TO = username elif (searchType == 'to'): emails = g.sent().mail(to=contact, before=before, after=after) FROM = username TO = contact else: if (searchType =='from'): emails = g.inbox().mail(fr=contact) FROM = contact TO = username elif (searchType == 'to'): emails = g.sent().mail(to=contact) FROM = username TO = contact #print "Number of emails:", len(emails) #print "From:", FROM #print "to:", TO for i in range(start, stop, -1): #print "#", i," email to be fetched" try: emails[i].fetch() except Exception, e: print "Couldn't fetch email #", i print str(e) pass g.logout() for i in range(start, stop, -1): #print "Run verbalucce on #", i email = emails[i] #Only search for emails specifically to USER or #Only search for emails specifically to target contact if (email.to!=None and email.fr!=None and (email.to.lower().find(TO.lower())!=-1) and (email.fr.lower().find(FROM.lower())!=-1) and (len(email.to.split("@")) == 2) and (email.cc==None) and (not(re.search("^Chat with",email.subject))) and email.body!=None and (email.body.find("BEGIN:VCALENDAR")==-1) and (email.calendar==None)): #print "UID:", email.uid if (cloudsqldb.isEmailInDB(userID, email.uid, db)): #print "Email(UID", email.uid,") is found. Skipping.." continue r = vblc.RunVerbalucce(email) timestamp = email.sent_at timestamp = time.mktime(timestamp.timetuple()) cloudsqldb.insertOrUpdateEmailData(userID, email, FROM, TO, timestamp, r, db) #g.logout() db.commit() db.close() print "Successful verbalucce run!"
def main(): global CAMOU, totalTime, OUTPUT_DIR OUTPUT_DIR = os.path.abspath(os.path.join(os.getcwd(), "../output/")) + "/" print "Output directory: ", OUTPUT_DIR DEBUG = 0 USER = "" FROM = "" TO = "" SEARCH_CRITERIA = "" FULL = False email_index = 1 parser = OptionParser() parser.add_option("-u", "--user", dest="c_user", help="Gmail login address") parser.add_option("-f", "--from", dest="c_from", help="from email address to be searched") parser.add_option("-t", "--to", dest="c_to", help="to email address to be searched") parser.add_option("-i", action="store", type="int", dest="email_index", help="email index (default: 0)") parser.add_option("-c", "--camouflage", dest="c_camouflage", help="camouflage id for the contact") parser.add_option("--full", action="store_true", dest="c_full", help="get full history") parser.add_option("--uid", action="store", type="int", dest="c_uid", help="fetch email with specific uid") parser.add_option("-b", dest="c_before", help="Before UTC time") parser.add_option("-a", dest="c_after", help="After UTC time") parser.add_option("--filter", dest="c_filter", help="Filter type") parser.add_option("--start", action="store", type="int", dest="c_start", help="start # of emails to fetch") parser.add_option("--stop", action="store", type="int", dest="c_stop", help="stop # of emails to fetch") (options, args) = parser.parse_args() print options if (options.c_user): USER = options.c_user PWD = getpass.getpass() if (options.email_index): email_index = options.email_index if (options.c_full): FULL = True if (USER == ""): print "Please provide user email address using -u <email> \nExiting ..." raise SystemExit g = Gmail() result = g.login(USER, PWD) if (options.c_uid): emails = g.inbox().mail(uid=options.c_uid) if (options.c_filter == 'duration'): starttime = datetime.datetime.utcfromtimestamp(int(options.c_after)) endtime = datetime.datetime.utcfromtimestamp(int(options.c_before)) print "search duration:", starttime, "~", endtime if (options.c_from): emails = g.inbox().mail(fr=options.c_from, before=endtime, after=starttime) FROM = options.c_from TO = USER elif (options.c_to): emails = g.sent().mail(to=options.c_to, before=endtime, after=starttime) FROM = USER TO = options.c_to elif (options.c_from): emails = g.inbox().mail(fr=options.c_from) FROM = options.c_from TO = USER elif (options.c_to): emails = g.sent().mail(to=options.c_to) FROM = USER TO = options.c_to else: emails = g.inbox().mail() if (options.c_camouflage): CAMOU = options.c_camouflage vblc = verbalucce.Verbalucce() threadLock = threading.Lock() threads = [] print "\n From:", FROM print "To:", TO print "\n Number of emails = ", len(emails) # Ignore email_index if requested full content if (FULL): start = 0 email_index = 0 else: start = len(emails) - email_index - 1 stop = len(emails) - 1 if (options.c_start): start = options.c_start if (options.c_stop): stop = options.c_stop print "search emails between start, stop" try: for i in range(stop, start, -1): # create new thread print "#", i, " email to be fetched", emails[i] emails[i].fetch() g.logout() for i in range(stop, start, -1): print "Run verbalucce on ", '#', i, email = emails[i] #print "\nuid:", email.uid #print "\nFrom:", email.fr #print "\nTo:", email.to #print "\nCC:", email.cc #print "\nDelivered to:", email.delivered_to #print "\nSent At:", email.sent_at #print "\nmessage_id", email.message_id #print "\nThread ID:", email.thread_id #print "\nSubject:", email.subject #print "\n\n====Raw Header:", email.raw_headers #print "\n\n====Raw Email:", email.raw_emails #print "\n\n======Message======\n", email.message #print "\n\n======Body======\n", email.body #print "\n\n======calendar======\n", email.calendar # Only search for emails specifically to USER or # Only search for emails specifically to target contact if (email.to != None and email.fr != None and (email.to.lower().find(TO.lower()) != -1) and (email.fr.lower().find(FROM.lower()) != -1) and (len(email.to.split("@")) == 2) and (email.cc == None) and (not (re.search("^Chat with", email.subject))) and email.body != None and (email.body.find("BEGIN:VCALENDAR") == -1) and (email.calendar == None)): print "Starts thread [", i - email_index, "]..." thread = vbThread(vblc, i - email_index, email, i - email_index, FROM, TO) # start new thread thread.start() # Add thread to thread list threads.append(thread) except (RuntimeError, TypeError, NameError): print "Error:", RuntimeError, TypeError, NameError pass for t in threads: t.join() print "Exiting Main Thread" #g.logout() dbname = OUTPUT_DIR + "mail.db" # Write to database db = rdb.connect_database(dbname) db.text_factory = str cursor = db.cursor() try: print("\n\n=====Ngrams====") # Extract ngram from each emails and update top ngrams in RDB table cursor.execute( '''SELECT unigrams FROM emails WHERE from_email=? AND to_email=?''', ( FROM, TO, )) allrows = cursor.fetchall() numOfEmails = len(allrows) print "Number of emails :", numOfEmails #print "All rows:", allrows unigrams = [] for r in allrows: #print "row:", r u = ast.literal_eval(r[0]) #print "u:", u unigrams = unigrams + u topunigrams = [] fdist = nltk.FreqDist(unigrams) for sample in fdist: if (fdist[sample] > UNIGRAM_THREASHOLD): print "Sample:", sample print "Frequency:", fdist[sample] topunigrams.append(sample) #print "Top unigrams:", topunigrams cursor.execute( '''SELECT bigrams FROM emails WHERE from_email=? AND to_email=?''', ( FROM, TO, )) allrows = cursor.fetchall() #print "All rows:", allrows bigrams = [] for r in allrows: #print "row:", r b = ast.literal_eval(r[0]) #print "b:", b bigrams = bigrams + b #print "Aggregatd bigrams:", bigrams topbigrams = [] fdist = nltk.FreqDist(bigrams) for sample in fdist: if (fdist[sample] > BIGRAM_THREASHOLD): print "Sample:", sample print "Frequency:", fdist[sample] topbigrams.append(sample) #print "Top bigrams:", topbigrams cursor.execute( '''SELECT trigrams FROM emails WHERE from_email=? AND to_email=?''', ( FROM, TO, )) allrows = cursor.fetchall() trigrams = [] for r in allrows: #print "row:", r tr = ast.literal_eval(r[0]) #print "tr:", tr trigrams = trigrams + tr #print "Aggregatd trigrams:", trigrams toptrigrams = [] fdist = nltk.FreqDist(trigrams) for sample in fdist: if (fdist[sample] > BIGRAM_THREASHOLD): print "Sample:", sample print "Frequency:", fdist[sample] toptrigrams.append(sample) #print "Top trigrams:", toptrigrams rdb.insertOrUpdateRDB(FROM, TO, numOfEmails, topunigrams, topbigrams, toptrigrams, db) #print("\n\n====Table RDB====") #cursor.execute("PRAGMA table_info(RDB)") #command = 'SELECT * FROM RDB' #cursor.execute(command) #for row in cursor: # print row outfile = open(OUTPUT_DIR + "TableEmails.dat", "wb") #csv.register_dialect("custom", delimiter="\t") #writer = csv.writer(outfile, dialect="custom") print("\n\n====Writing Table emails to file====") #cursor.execute("PRAGMA table_info(email)") outfile.write("uid\t" \ "timestamp\t" \ "num_total_words\t" \ "num_big_words\t" \ "num_allcaps\t" \ "num_words_per_sentence\t" \ "mobile\t" \ "num_high_pos\t" \ "num_low_pos\t" \ "num_high_neg\t" \ "num_low_neg") command = 'SELECT uid, ' \ 'timestamp, ' \ 'num_total_words ,' \ 'num_big_words,' \ 'num_allcaps ,' \ 'num_words_per_sentence ,' \ 'mobile,num_high_pos ,' \ 'num_low_pos ,' \ 'num_high_neg ,' \ 'num_low_neg FROM emails' cursor.execute(command) rows = cursor.fetchall() for r in rows: print r outfile.write('\n') outfile.write('\t'.join(str(s) for s in r)) # Output table emails to files cursor.close() outfile.close() db.commit() except (RuntimeError, TypeError, NameError): print "Error:", RuntimeError, TypeError, NameError pass
def main(): global CAMOU, totalTime, OUTPUT_DIR OUTPUT_DIR = os.path.abspath(os.path.join(os.getcwd(), "../output/")) + "/" print "Output directory: ", OUTPUT_DIR DEBUG = 0 USER = "" FROM = "" TO = "" SEARCH_CRITERIA = "" FULL = False email_index = 1 parser = OptionParser() parser.add_option("-u", "--user", dest="c_user", help="Gmail login address") parser.add_option("-f", "--from", dest="c_from", help="from email address to be searched") parser.add_option("-t", "--to", dest="c_to", help="to email address to be searched") parser.add_option("-i", action="store", type = "int", dest="email_index", help="email index (default: 0)") parser.add_option("-c", "--camouflage", dest="c_camouflage", help="camouflage id for the contact") parser.add_option("--full", action = "store_true", dest="c_full", help="get full history") parser.add_option("--uid", action="store", type = "int", dest="c_uid", help="fetch email with specific uid") parser.add_option("-b", dest="c_before", help="Before UTC time") parser.add_option("-a", dest="c_after", help="After UTC time") parser.add_option("--filter", dest="c_filter", help="Filter type") parser.add_option("--start", action="store", type = "int", dest = "c_start", help= "start # of emails to fetch") parser.add_option("--stop", action="store", type = "int", dest = "c_stop", help= "stop # of emails to fetch") (options, args) = parser.parse_args() print options if (options.c_user): USER = options.c_user PWD = getpass.getpass() if (options.email_index): email_index = options.email_index if (options.c_full): FULL = True; if (USER==""): print "Please provide user email address using -u <email> \nExiting ..." raise SystemExit g = Gmail() result = g.login(USER, PWD) if (options.c_uid): emails = g.inbox().mail(uid=options.c_uid) if (options.c_filter == 'duration'): starttime = datetime.datetime.utcfromtimestamp(int(options.c_after)) endtime = datetime.datetime.utcfromtimestamp(int(options.c_before)) print "search duration:", starttime, "~", endtime if (options.c_from): emails = g.inbox().mail(fr=options.c_from, before=endtime, after=starttime) FROM = options.c_from TO = USER elif (options.c_to): emails = g.sent().mail(to=options.c_to, before=endtime, after=starttime) FROM = USER TO = options.c_to elif (options.c_from): emails = g.inbox().mail(fr=options.c_from) FROM = options.c_from TO = USER elif (options.c_to): emails = g.sent().mail(to=options.c_to) FROM = USER TO = options.c_to else: emails = g.inbox().mail() if (options.c_camouflage): CAMOU = options.c_camouflage; vblc = verbalucce.Verbalucce() threadLock = threading.Lock() threads = [] print "\n From:", FROM print "To:", TO print "\n Number of emails = ", len(emails) # Ignore email_index if requested full content if (FULL): start = 0 email_index = 0 else: start = len(emails)-email_index-1 stop = len(emails)-1 if (options.c_start): start = options.c_start if (options.c_stop): stop = options.c_stop print "search emails between start, stop" try: for i in range(stop, start, -1): # create new thread print "#", i," email to be fetched", emails[i] emails[i].fetch() g.logout() for i in range(stop, start, -1): print "Run verbalucce on ",'#', i, email = emails[i] #print "\nuid:", email.uid #print "\nFrom:", email.fr #print "\nTo:", email.to #print "\nCC:", email.cc #print "\nDelivered to:", email.delivered_to #print "\nSent At:", email.sent_at #print "\nmessage_id", email.message_id #print "\nThread ID:", email.thread_id #print "\nSubject:", email.subject #print "\n\n====Raw Header:", email.raw_headers #print "\n\n====Raw Email:", email.raw_emails #print "\n\n======Message======\n", email.message #print "\n\n======Body======\n", email.body #print "\n\n======calendar======\n", email.calendar # Only search for emails specifically to USER or # Only search for emails specifically to target contact if (email.to!=None and email.fr!=None and (email.to.lower().find(TO.lower())!=-1) and (email.fr.lower().find(FROM.lower())!=-1) and (len(email.to.split("@")) == 2) and (email.cc==None) and (not(re.search("^Chat with",email.subject))) and email.body!=None and (email.body.find("BEGIN:VCALENDAR")==-1) and (email.calendar==None)): print "Starts thread [", i-email_index, "]..." thread = vbThread(vblc, i-email_index, email, i-email_index, FROM, TO) # start new thread thread.start() # Add thread to thread list threads.append(thread) except (RuntimeError, TypeError, NameError): print "Error:", RuntimeError, TypeError, NameError pass for t in threads: t.join() print "Exiting Main Thread" #g.logout() dbname = OUTPUT_DIR + "mail.db" # Write to database db = rdb.connect_database(dbname) db.text_factory = str cursor = db.cursor() try: print("\n\n=====Ngrams====") # Extract ngram from each emails and update top ngrams in RDB table cursor.execute('''SELECT unigrams FROM emails WHERE from_email=? AND to_email=?''', (FROM, TO,)) allrows = cursor.fetchall() numOfEmails = len(allrows) print "Number of emails :", numOfEmails #print "All rows:", allrows unigrams = [] for r in allrows: #print "row:", r u = ast.literal_eval(r[0]) #print "u:", u unigrams = unigrams + u topunigrams = [] fdist = nltk.FreqDist(unigrams) for sample in fdist: if (fdist[sample] > UNIGRAM_THREASHOLD): print "Sample:", sample print "Frequency:", fdist[sample] topunigrams.append(sample) #print "Top unigrams:", topunigrams cursor.execute('''SELECT bigrams FROM emails WHERE from_email=? AND to_email=?''', (FROM, TO,)) allrows = cursor.fetchall() #print "All rows:", allrows bigrams = [] for r in allrows: #print "row:", r b = ast.literal_eval(r[0]) #print "b:", b bigrams = bigrams + b #print "Aggregatd bigrams:", bigrams topbigrams = [] fdist = nltk.FreqDist(bigrams) for sample in fdist: if (fdist[sample] > BIGRAM_THREASHOLD): print "Sample:", sample print "Frequency:", fdist[sample] topbigrams.append(sample) #print "Top bigrams:", topbigrams cursor.execute('''SELECT trigrams FROM emails WHERE from_email=? AND to_email=?''', (FROM, TO,)) allrows = cursor.fetchall() trigrams = [] for r in allrows: #print "row:", r tr = ast.literal_eval(r[0]) #print "tr:", tr trigrams = trigrams + tr #print "Aggregatd trigrams:", trigrams toptrigrams = [] fdist = nltk.FreqDist(trigrams) for sample in fdist: if (fdist[sample] > BIGRAM_THREASHOLD): print "Sample:", sample print "Frequency:", fdist[sample] toptrigrams.append(sample) #print "Top trigrams:", toptrigrams rdb.insertOrUpdateRDB(FROM, TO, numOfEmails, topunigrams, topbigrams, toptrigrams, db) #print("\n\n====Table RDB====") #cursor.execute("PRAGMA table_info(RDB)") #command = 'SELECT * FROM RDB' #cursor.execute(command) #for row in cursor: # print row outfile = open(OUTPUT_DIR+"TableEmails.dat", "wb") #csv.register_dialect("custom", delimiter="\t") #writer = csv.writer(outfile, dialect="custom") print("\n\n====Writing Table emails to file====") #cursor.execute("PRAGMA table_info(email)") outfile.write("uid\t" \ "timestamp\t" \ "num_total_words\t" \ "num_big_words\t" \ "num_allcaps\t" \ "num_words_per_sentence\t" \ "mobile\t" \ "num_high_pos\t" \ "num_low_pos\t" \ "num_high_neg\t" \ "num_low_neg") command = 'SELECT uid, ' \ 'timestamp, ' \ 'num_total_words ,' \ 'num_big_words,' \ 'num_allcaps ,' \ 'num_words_per_sentence ,' \ 'mobile,num_high_pos ,' \ 'num_low_pos ,' \ 'num_high_neg ,' \ 'num_low_neg FROM emails' cursor.execute(command) rows = cursor.fetchall() for r in rows: print r outfile.write('\n') outfile.write('\t'.join(str(s) for s in r)) # Output table emails to files cursor.close() outfile.close() db.commit() except (RuntimeError, TypeError, NameError): print "Error:", RuntimeError, TypeError, NameError pass