def extract_students_mail_and_name_from_gmail(user=None, pwd=None, server="imap.gmail.com", mailfolder=["ensae/actuariat"], date="1-Jan-2016", fLOG=noLOG): """ Extracts mails and names from a mail box. @param user user of the gmail inbox @param pwd password of the gmail inbox @param server gmail server, it should be ``"imap.gmail.com"``, it works with others mail servers using the *IMAP* protocol @param mailfolder folder in your inbox to look into, there can be several @param date when to start looking (do not change the format, look at the default value) @param fLOG logging function @return list of dictionary ``[{"name": ..., "mail": ...}]`` """ box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() emails, suggestions = grab_addresses( box, mailfolder, date, names=True, fLOG=fLOG) box.logout() rows = [] for mail in emails: el = {"mail": mail} if mail in suggestions: el["name"] = ";".join(sorted(suggestions[mail])) rows.append(el) return rows
def should_bemocked_test_fetch_mail(self): imap = MailBoxImap("somebody", "pwd", "imap.gmail.com", True) imap.login() iter = imap.enumerate_search_subject("subject", "inbox") for m in iter: m.dump(iter, "destination") imap.logout()
def test_mailbox_extended(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor(): warnings.warn("requires a password") return now = datetime.datetime.now() now -= datetime.timedelta(1) date = now.strftime("%d-%b-%Y") if "DOUZE2016" in os.environ.get("COMPUTERNAME", ""): # does not work on the remote build server return with warnings.catch_warnings(): warnings.simplefilter('ignore', DeprecationWarning) import keyring user = keyring.get_password("gmail", "pymmails,user") code = keyring.get_password("gmail", "pymmails,pwd") box = MailBoxImap(user, code, "imap.gmail.com", ssl=True, fLOG=fLOG) box.login() mails = box.enumerate_mails_in_folder("inbox", date=date) li = list(mails) self.assertTrue(len(li) > 0) box.logout() issues = [] for mail in li: name = mail.get_name() if "=?" in mail: issues.append(name) fr = mail.get_from() frm = [_ for _ in fr if _] if "=?" in frm[0]: issues.append(name) if "@" not in fr[1]: issues.append(name) if len(issues) > 0: raise Exception("Issues with\n{0}".format( "\n".join(str(_) for _ in issues)))
def test_mailbox(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor(): warnings.warn("requires a password") return with warnings.catch_warnings(): warnings.simplefilter('ignore', DeprecationWarning) import keyring code = keyring.get_password("sdut", "pymmails") box = MailBoxImap("unittest.sdpython", code, "imap.gmail.com", ssl=True, fLOG=fLOG) box.login() mails = box.enumerate_mails_in_folder("test4", date="1-Jan-2016") li = list(mails) self.assertEqual(len(li), 3) box.logout()
def test_mailbox_dump(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor(): warnings.warn("requires a password") return with warnings.catch_warnings(): warnings.simplefilter('ignore', DeprecationWarning) import keyring code = keyring.get_password("sdut", "pymmails") temp = get_temp_folder(__file__, "temp_dump") box = MailBoxImap("unittest.sdpython", code, "imap.gmail.com", ssl=True, fLOG=fLOG) render = EmailMessageRenderer() box.login() mails = box.enumerate_mails_in_folder("test4", date="1-Jan-2016") for mail in mails: mail.dump(render, location=temp, fLOG=fLOG) render.flush() box.logout()
def extract_students_mail_and_name_from_gmail(user=None, pwd=None, server="imap.gmail.com", mailfolder=["ensae/actuariat"], date="1-Jan-2016", fLOG=noLOG): """ Extract mails and names from a mail box @param user user of the gmail inbox @param pwd password of the gmail inbox @param server gmail server, it should be ``"imap.gmail.com"``, it works with others mail servers using the *IMAP* protocol @param mailfolder folder in your inbox to look into, there can be several @param date when to start looking (do not change the format, look at the default value) @param fLOG logging function @return list of dictionary ``[{"name": ..., "mail": ...}]`` """ box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() emails, suggestions = grab_addresses(box, mailfolder, date, names=True, fLOG=fLOG) box.logout() rows = [] for mail in emails: el = {"mail": mail} if mail in suggestions: el["name"] = ";".join(sorted(suggestions[mail])) rows.append(el) return rows
"*****@*****.**", "*****@*****.**", ] ############### # gather mails fLOG("fetch mails") if os.path.exists(filename_mails): with open(filename_mails, "r", encoding="utf8") as f: lines = f.readlines() emails = [l.strip("\r\t\n ") for l in lines] else: box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() emails = grab_addresses(box, mailfolder, date, fLOG=fLOG) box.logout() emails = list( sorted( set([ _.strip("<>").lower() for _ in emails if _ not in skip_address ]))) with open(filename_mails, "w", encoding="utf8") as f: f.write("\n".join(emails)) ##################### # create a dataframe import pandas
def extract_students_mails_from_gmail_and_stores_in_folders(folder=".", filemails="emails.txt", user=None, pwd=None, server="imap.gmail.com", mailfolder=[ "ensae/ENSAE_2016_3A"], date="1-Jan-2016", zipfilename="projet_3A_2016.zip", zipencpwd=b"sixteenbyteskeys", dataframe=None, columns={ "name": "nom_prenom", "group": "groupe", "subject": "sujet"}, skip_names=None, process_name=None, title="List of emails", nolink_if=None, fLOG=fLOG): """ The scenario is the following: * You are the teacher. * Students started their projects at date *t*. * They can work alone or by group. * They send mails, you reply. * Their address mail follows the convention: ``<first name>.<last name>@anything`` so it is to associate a mail address to a student name. * You move every mail you received in a separate folder in your inbox. * Sometime, you send a mail to everybody. * Finally they send their project with attachments. * You want to store everything (mails and attachements) in folders, one per group. * You want a summary of what was received. * You want to build a zip file to share their work with others teachers. * You want to update the folder if a new mail was sent. This function looks into a folder of your inbox and grabs every mails and attachements from a groups of students. @param folder where to store the results @param filemails files used to store students address, the operation is done once, remove the file to force the function to rebuild the information. @param user user of the gmail inbox @param pwd password of the gmail inbox @param server gmail server, it should be ``"imap.gmail.com"``, it works with others mail servers using the *IMAP* protocol @param mailfolder folder in your inbox to look into, there can be several @param date when to start looking (do not change the format, look at the default value) @param zipfilename name of the zip file to create @param zipencpwd the zip file is also encrypted for a safer share with this key and function `encrypt_stream <http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/ pyquickhelper/filehelper/encryption.html#pyquickhelper.filehelper.encryption.encrypt_stream>`_. @param dataframe dataframe which contains the definition of students groups @param columns columns the function will look into, students names, group definition (a unique number for all students in the same group), subject @param skip_names list of names to skip @param process_name to operate a transformation before matching students names with their emails @param title each group folder contains a html file connecting them, this is its title @param nolink_if The summary extracts links from url, it skips the urls which contains on the substrings included in that list (None to use a default set) @param fLOG logging function @return @see cl ProjectsRepository By default, Gmail does not let you programmatically access you own inbox, you need to modify your gmail parameters to let this function do so. """ folder = os.path.abspath(".") filemails = os.path.join(folder, filemails) zipfilename = os.path.join(folder, zipfilename) zipfilenameenc = zipfilename + ".enc" # load the groups if isinstance(dataframe, pandas.DataFrame): df = dataframe elif dataframe.endswith("xlsx"): fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] read dataframe", dataframe) df = pandas.read_excel(dataframe) else: df = pandas.read_csv(dataframe, sep="\t", encoding="utf8") # check mails if "mail" not in columns: if os.path.exists(filemails): fLOG( "[extract_students_mails_from_gmail_and_stores_in_folders] read addresses from ", filemails) with open(filemails, "r", encoding="utf8") as f: lines = f.readlines() emails = [li.strip("\r\t\n ") for li in lines] else: fLOG( "[extract_students_mails_from_gmail_and_stores_in_folders] mine address ") box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() emails = grab_addresses(box, mailfolder, date, fLOG=fLOG) box.logout() with open(filemails, "w", encoding="utf8") as f: f.write("\n".join(emails)) else: # nothing to do mail already present emails = set(df[columns["mail"]]) # we remove empty names df = df[~df[columns["name"]].isnull()].copy() if process_name: df[columns["name"]] = df[columns["name"]].apply( lambda f: process_name(f)) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] create groups folders in", folder) proj = ProjectsRepository(folder, fLOG=fLOG) proj = ProjectsRepository.create_folders_from_dataframe(df, folder, col_subject=columns[ "subject"], fLOG=fLOG, col_group=columns["group"], col_student=columns[ "name"], email_function=emails, skip_if_nomail=False, col_mail=columns["mail"], must_have_email=True, skip_names=skip_names) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] nb groups", len( proj.Groups)) # gathers mails email_renderer = EmailMessageRenderer(tmpl=template_email_html_short, fLOG=fLOG) renderer = EmailMessageListRenderer(title=title, email_renderer=email_renderer, fLOG=fLOG) box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() proj.dump_group_mails(renderer, group=None, mailbox=box, subfolder=mailfolder, date=date, overwrite=False, skip_if_empty=True) box.logout() # cleaning files for group in proj.Groups: files = list(proj.enumerate_group_files(group)) att = [_ for _ in files if ".html" in _] if len(att) <= 1: fLOG( "[extract_students_mails_from_gmail_and_stores_in_folders] remove '{}'".format(group)) proj.remove_group(group) # unzip files and convert notebooks for group in proj.Groups: proj.unzip_convert(group) summary = os.path.join(folder, "index.html") fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] write summary '{}'".format(summary)) if os.path.exists(summary): os.remove(summary) proj.write_run_command() proj.write_summary(nolink_if=nolink_if) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] zip everything in", zipfilename) if os.path.exists(zipfilename): os.remove(zipfilename) proj.zip_group(None, zipfilename, addition=["index.html", "mail_style.css", "emails.txt"]) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] encrypt the zip file in '{}'.".format( zipfilenameenc)) if os.path.exists(zipfilenameenc): os.remove(zipfilenameenc) encrypt_stream(zipencpwd, zipfilename, zipfilenameenc, chunksize=2 ** 30) return proj
"*****@*****.**", "*****@*****.**", ] ############### # gather mails fLOG("fetch mails") if os.path.exists(filename_mails): with open(filename_mails, "r", encoding="utf8") as f: lines = f.readlines() emails = [l.strip("\r\t\n ") for l in lines] else: box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() emails = grab_addresses(box, mailfolder, date, fLOG=fLOG) box.logout() emails = list(sorted(set([_.strip("<>").lower() for _ in emails if _ not in skip_address]))) with open(filename_mails, "w", encoding="utf8") as f: f.write("\n".join(emails)) ##################### # create a dataframe import pandas rows = [{"nom_prenom": mail, "sujet": "octobre", "groupe": i + 1} for i, mail in enumerate(emails)] df = pandas.DataFrame(rows)
def extract_students_mails_from_gmail_and_stores_in_folders(folder=".", filemails="emails.txt", user=None, pwd=None, server="imap.gmail.com", mailfolder=[ "ensae/ENSAE_2016_3A"], date="1-Jan-2016", zipfilename="projet_3A_2016.zip", zipencpwd=b"sixteenbyteskeys", dataframe=None, columns={ "name": "nom_prenom", "group": "groupe", "subject": "sujet"}, skip_names=None, process_name=None, title="List of emails", nolink_if=None, fLOG=fLOG): """ The scenario is the following: * You are the teacher. * Students started their projects at date *t*. * They can work alone or by group. * They send mails, you reply. * Their address mail follows the convention: ``<first name>.<last name>@anything`` so it is to associate a mail address to a student name. * You move every mail you received in a separate folder in your inbox. * Sometime, you send a mail to everybody. * Finally they send their project with attachments. * You want to store everything (mails and attachements) in folders, one per group. * You want a summary of what was received. * You want to build a zip file to share their work with others teachers. * You want to update the folder if a new mail was sent. This function looks into a folder of your inbox and grabs every mails and attachements from a groups of students. @param folder where to store the results @param filemails files used to store students address, the operation is done once, remove the file to force the function to rebuild the information. @param user user of the gmail inbox @param pwd password of the gmail inbox @param server gmail server, it should be ``"imap.gmail.com"``, it works with others mail servers using the *IMAP* protocol @param mailfolder folder in your inbox to look into, there can be several @param date when to start looking (do not change the format, look at the default value) @param zipfilename name of the zip file to create @param zipencpwd the zip file is also encrypted for a safer share with this key and function `encrypt_stream <http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/ pyquickhelper/filehelper/encryption.html#pyquickhelper.filehelper.encryption.encrypt_stream>`_. @param dataframe dataframe which contains the definition of students groups @param columns columns the function will look into, students names, group definition (a unique number for all students in the same group), subject @param skip_names list of names to skip @param process_name to operate a transformation before matching students names with their emails @param title each group folder contains a html file connecting them, this is its title @param nolink_if The summary extracts links from url, it skips the urls which contains on the substrings included in that list (None to use a default set) @param fLOG logging function @return @see cl ProjectsRepository By default, Gmail does not let you programmatically access you own inbox, you need to modify your gmail parameters to let this function do so. """ folder = os.path.abspath(".") filemails = os.path.join(folder, filemails) zipfilename = os.path.join(folder, zipfilename) zipfilenameenc = zipfilename + ".enc" # load the groups if isinstance(dataframe, pandas.DataFrame): df = dataframe elif dataframe.endswith("xlsx"): fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] read dataframe", dataframe) df = pandas.read_excel(dataframe) else: df = pandas.read_csv(dataframe, sep="\t", encoding="utf8") # check mails if "mail" not in columns: if os.path.exists(filemails): fLOG( "[extract_students_mails_from_gmail_and_stores_in_folders] read addresses from ", filemails) with open(filemails, "r", encoding="utf8") as f: lines = f.readlines() emails = [l.strip("\r\t\n ") for l in lines] else: fLOG( "[extract_students_mails_from_gmail_and_stores_in_folders] mine address ") box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() emails = grab_addresses(box, mailfolder, date, fLOG=fLOG) box.logout() with open(filemails, "w", encoding="utf8") as f: f.write("\n".join(emails)) else: # nothing to do mail already present emails = set(df[columns["mail"]]) # we remove empty names df = df[~df[columns["name"]].isnull()].copy() if process_name: df[columns["name"]] = df[columns["name"]].apply( lambda f: process_name(f)) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] create groups folders in", folder) proj = ProjectsRepository(folder, fLOG=fLOG) proj = ProjectsRepository.create_folders_from_dataframe(df, folder, col_subject=columns[ "subject"], fLOG=fLOG, col_group=columns["group"], col_student=columns[ "name"], email_function=emails, skip_if_nomail=False, col_mail=columns["mail"], must_have_email=True, skip_names=skip_names) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] nb groups", len( proj.Groups)) # gathers mails email_renderer = EmailMessageRenderer(tmpl=template_email_html_short, fLOG=fLOG) renderer = EmailMessageListRenderer(title=title, email_renderer=email_renderer, fLOG=fLOG) box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() proj.dump_group_mails(renderer, group=None, mailbox=box, subfolder=mailfolder, date=date, overwrite=False, skip_if_empty=True) box.logout() # cleaning files for group in proj.Groups: files = list(proj.enumerate_group_files(group)) att = [_ for _ in files if ".html" in _] if len(att) <= 1: fLOG( "[extract_students_mails_from_gmail_and_stores_in_folders] remove ", group) proj.remove_group(group) # unzip files and convert notebooks for group in proj.Groups: proj.unzip_convert(group) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] summary ") summary = os.path.join(folder, "index.html") if os.path.exists(summary): os.remove(summary) proj.write_summary(nolink_if=nolink_if) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] zip everything in", zipfilename) if os.path.exists(zipfilename): os.remove(zipfilename) proj.zip_group(None, zipfilename, addition=["index.html", "mail_style.css", "emails.txt"]) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] encrypt the zip file in", zipfilenameenc) if os.path.exists(zipfilenameenc): os.remove(zipfilenameenc) encrypt_stream(zipencpwd, zipfilename, zipfilenameenc, chunksize=2 ** 30) return proj