def check_email():
# set script to run at intervals (time.sleep?) - morn, mid-day, afternoon

    current_time = datetime.now()
    previous_check = cpm.unpickle(os.path.join(pkl_dir, 'last_check_time.pkl'))
    print previous_check
    emails = cpm.get_emails('INBOX', previous_check)

    if not emails: # avoid unpickling if empty inbox
        return

    feature_model = cpm.unpickle(os.path.join(pkl_dir, 'final_vec.pkl'))

    classifier_model = cpm.unpickle(os.path.join(pkl_dir, 'final_model.pkl')) 
    
    print 'classifier model', classifier_model

    for email in emails:
        if email.sent_at > previous_check:
            clean_b = cpm.clean_raw_txt(email.body)
            clean_s = cpm.clean_raw_txt(email.subject)
            print 'clean_email', clean_s, clean_b
            email_features = feature_model.transform([clean_b+clean_s])
            print 'email_bag', email_features.shape
            classifier_result = classifier_model.predict(email_features)
            
            eval_email(classifier_result, email)

    cpm.pickle(current_time, os.path.join(pkl_dir,'last_check_time.pkl'))
Example #2
0
def get_data(box, email_owner, date):
    start = time()
    emails = cpm.get_emails(box, date)
    end = time()
    print "Pulled gmail in %0.2fs." % (end - start)
    for email in emails:
        store_email(email, box, email_owner)
    print "Stored emails in %0.2fs." % (time() - end)