def check_email(): # set script to run at intervals (time.sleep?) - morn, mid-day, afternoon current_time = datetime.now() previous_check = cpm.unpickle(os.path.join(pkl_dir, 'last_check_time.pkl')) print previous_check emails = cpm.get_emails('INBOX', previous_check) if not emails: # avoid unpickling if empty inbox return feature_model = cpm.unpickle(os.path.join(pkl_dir, 'final_vec.pkl')) classifier_model = cpm.unpickle(os.path.join(pkl_dir, 'final_model.pkl')) print 'classifier model', classifier_model for email in emails: if email.sent_at > previous_check: clean_b = cpm.clean_raw_txt(email.body) clean_s = cpm.clean_raw_txt(email.subject) print 'clean_email', clean_s, clean_b email_features = feature_model.transform([clean_b+clean_s]) print 'email_bag', email_features.shape classifier_result = classifier_model.predict(email_features) eval_email(classifier_result, email) cpm.pickle(current_time, os.path.join(pkl_dir,'last_check_time.pkl'))
def store_email(email, box, email_owner): # Use nvarchar in case storing non-english data target, starred = False, False # TO DO - make table name a variable store_e = ''' insert into raw_data_2 (message_id, thread_id, to_email, from_email, cc, date, starred, subject, body, sub_body, email_owner, box, target) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); ''' # in psycopg - var placeholder must be %s even with int or dates or other types # prep text for storage body = cpm.clean_raw_txt(email.body) subject = cpm.clean_raw_txt(email.subject) sub_body = subject + ' ' + body # Marks taget colum if 'Jeeves' in email.labels and '\\Starred' in email.labels: target = starred = True elif '\\Starred' in email.labels: starred = True # Could apply executemany with query and list of values but still need to do fetch first and apply changes with connect_db() as db: try: db.execute(store_e, (email.message_id, email.thread_id, email.to, email.fr, email.cc, email.sent_at, starred, subject, body, sub_body, email_owner, box, target)) except psycopg2.IntegrityError: # if exists then skip loading it with open('../not_needed/load_errors.txt', 'a') as f: print "Problem loading email" f.write(email.message_id, email_fr, body)