def check_email(): # set script to run at intervals (time.sleep?) - morn, mid-day, afternoon current_time = datetime.now() previous_check = cpm.unpickle(os.path.join(pkl_dir, 'last_check_time.pkl')) print previous_check emails = cpm.get_emails('INBOX', previous_check) if not emails: # avoid unpickling if empty inbox return feature_model = cpm.unpickle(os.path.join(pkl_dir, 'final_vec.pkl')) classifier_model = cpm.unpickle(os.path.join(pkl_dir, 'final_model.pkl')) print 'classifier model', classifier_model for email in emails: if email.sent_at > previous_check: clean_b = cpm.clean_raw_txt(email.body) clean_s = cpm.clean_raw_txt(email.subject) print 'clean_email', clean_s, clean_b email_features = feature_model.transform([clean_b+clean_s]) print 'email_bag', email_features.shape classifier_result = classifier_model.predict(email_features) eval_email(classifier_result, email) cpm.pickle(current_time, os.path.join(pkl_dir,'last_check_time.pkl'))
def get_data(box, email_owner, date): start = time() emails = cpm.get_emails(box, date) end = time() print "Pulled gmail in %0.2fs." % (end - start) for email in emails: store_email(email, box, email_owner) print "Stored emails in %0.2fs." % (time() - end)