def test_tohtml(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") data = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) mesf = os.path.join(data, "message.pickle") if "anaconda" in sys.executable.lower() or "anaconda" in sys.base_prefix.lower(): # issue with Anaconda about module pickle # pickle has issues when getting a file saved by pickle on another # distribution return with open(mesf, "rb") as f: try: import pymmails assert pymmails is not None obj = pickle.load(f) except ImportError: path = os.path.normpath( os.path.abspath( os.path.join( os.path.split(__file__)[0], "..", "..", "src"))) if path not in sys.path: sys.path.append(path) import pymmails assert pymmails is not None obj = pickle.load(f) del sys.path[-1] temp = get_temp_folder(__file__, "temp_dump_html") render = EmailMessageRenderer() ff = obj.dump(render, location=temp, fLOG=fLOG) render.flush() fLOG("ff=", type(ff), ff) with open(ff[0][0], "r", encoding="utf8") as f: content = f.read() if '<link rel="stylesheet" type="text/css" href="mail_style.css">' not in content: raise Exception(content) if "d_2014-12-15_p_yyyyy-matthieu-at-xxxxx-xxx_ii_48bdbc9f9fd180ab917cec5bed8ca529.html" not in ff[0][0]: raise Exception(ff[0][0]) if "<h1>2014/12/15 - projet 3A - élément logiciel</h1>" not in content: raise Exception(content)
def test_box_mock(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") data = os.path.abspath(os.path.join(os.path.dirname(__file__), "data")) box = MailBoxMock(data, b"unittestunittest", fLOG) box.login() folders = box.folders() self.assertEqual(len(folders), 1) fLOG(folders) mails = list(box.enumerate_mails_in_folder("trav")) box.logout() fLOG(len(mails)) self.assertTrue(len(mails) > 0) mail0 = mails[0] # fLOG(mail0) bin = mail0.as_bytes() ema = EmailMessage.create_from_bytes(bin) d0 = mail0.to_dict() d1 = ema.to_dict() self.assertEqual(d0["Subject"], d1["Subject"]) render = EmailMessageRenderer() html, _, __ = render.render( "__LOC__", mail0, file_css="example_css.css", attachments=None) if "example_css.css" not in html: raise Exception(html) # fLOG(css) if "<tr><th>Date</th><td>Sat, 1 Aug 2015" not in html and \ "<tr><th>Date</th><td>Fri, 14 Aug 2015" not in html and \ "<tr><th>Date</th><td>Fri, 20 Aug 2015" not in html and \ "<tr><th>Date</th><td>Sun, 20 Dec 2015" not in html: raise Exception(html) fLOG(html)
def test_mailbox_dump(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor(): warnings.warn("requires a password") return with warnings.catch_warnings(): warnings.simplefilter('ignore', DeprecationWarning) import keyring code = keyring.get_password("sdut", "pymmails") temp = get_temp_folder(__file__, "temp_dump") box = MailBoxImap("unittest.sdpython", code, "imap.gmail.com", ssl=True, fLOG=fLOG) render = EmailMessageRenderer() box.login() mails = box.enumerate_mails_in_folder("test4", date="1-Jan-2016") for mail in mails: mail.dump(render, location=temp, fLOG=fLOG) render.flush() box.logout()
def test_regular_expression(self): fold = os.path.abspath(os.path.dirname(__file__)) body = """ </div> <div><img name="14a318e16161c62a_14a31789f7a34aae_null" title="pastedImage.png" src="cid:f8b05bd4-1c83-47bc-af9d-0032ba9c018e"><br> </div> <div>4. Vous m'avez demande d'afficher l'arbre de decision pour mon random forest, mais apparemment ".tree_" n'existe que pour les decision trees . J'ai donc essaye de le faire avec un DT apres avoir telecharge le logiciel pour tracer l'arbre mais ca ne marche pas<br> </div> <div><br> </div> <div><img name="14a318e16161c62a_14a31789f7a34aae_null" title="pastedImage.png" src="cid:1146aa0a-244a-440e-8ea5-7b272c94f89a" height="153.02644466209597" width="560"><br> </div> <div><br> ?<sp """.replace(" ", "") atts = [ (os.path.join( fold, "attachements", "image.png"), None, "1146aa0a-244a-440e-8ea5-7b272c94f89a")] em = EmailMessageRenderer().process_body_html(fold, body, atts) assert "1146aa0a-244a-440e-8ea5-7b272c94f89a" not in em exp = 'src="attachements/image.png"' if exp not in em.replace("\\", "/"): raise Exception( 'string "attachements/image.png" not found in\n{0}'.format(em))
df, folder, col_subject="sujet", fLOG=fLOG, col_group="groupe", col_student="nom_prenom", email_function=emails, skip_if_nomail=False, must_have_email=True) fLOG("nb groups", len(proj.Groups)) ############# # dump mails if do_mail: email_render = EmailMessageRenderer(tmpl=template_email_html_short, fLOG=fLOG) render = EmailMessageListRenderer(title="list of mails", email_renderer=email_render, fLOG=fLOG) box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() mails = proj.dump_group_mails(render, group=None, mailbox=box, subfolder=mailfolder, date=date, overwrite=False, convert_files=True) box.logout()
def write_summary(self, renderer=None, link="index_mails.html", outfile="index.html", title="summary", nolink_if=None): """ Produces a summary and uses a :epkg:`Jinja2` template. @param renderer instance of `EmailMessageRenderer <http://www.xavierdupre.fr/app/pymmails/ helpsphinx//pymmails/render/email_message_renderer.html>`_), can be None @param link look for this file in each folder @param outfile output file @param nolink_if link containing those strings will be removed (if None, a default set will be assigned) @param title title @return summary The current default template is:: .. runpython:: from ensae_teaching_cs.automation_students.projects_repository import _default_template_summary_template print(_default_template_summary) """ if nolink_if is None: nolink_if = ProjectsRepository._known_strings def filter_in(url): if "\n" in url or "\r" in url or "\t" in url: return False if url.endswith("""): return False for _ in nolink_if: if _ in url: return False if ".ipynb_checkpoints" in url: return False return True def clean_url(u): u = u.replace("+", "+").strip(".#'/ \r\n\t ") if u.endswith(" "): u = u[:-6] return u def url_domain_name(url): r = urlparse(url) domain = r.netloc name = [_ for _ in url.split("/") if _] last = name[-1] if len(name) > 0 else domain if len(last) > 30: last = last[-30:] return domain, clean_url(last) def format_size(s): if s <= 2**11: return "{0} bytes".format(s) elif s <= 2**21: return "{0} Kb".format(s // (2**10)) elif s <= 2**31: return "{0} Mb".format(s // (2**20)) else: return "{0} Gb".format(s // (2**30)) groups = [] for group in self.Groups: lp = os.path.join(self.get_group_location(group), link) if os.path.exists(lp): c = os.path.relpath(lp, self._location), group else: c = "file:///{0}".format(group), group nb_files = 0 size = 0 atts = [] emails = [] links = [] created_files = [] for name in self.enumerate_group_files(group): if name.endswith(".metadata"): continue loc = self.get_group_location(group) nb_files += 1 tn = name size += os.stat(tn).st_size folder = os.path.split(name)[0] splf = folder.replace("\\", "/").split("/") if folder.endswith("attachments"): meta = name + ".metadata" if os.path.exists(meta): data = EmailMessage.read_metadata(meta) day = data["date"].strftime("%Y-%m-%d") else: data = None day = "" atts.append((day, os.path.relpath(name, self._location), data)) elif "attachments" in splf: rel = os.path.relpath(name, loc) dest = os.path.relpath(name, self._location) if rel == dest: raise Exception("weird\n{0}\n{1}".format(rel, dest)) ssize = format_size(os.stat(name).st_size) if "__MACOSX" not in rel and "__MACOSX" not in dest and \ ".ipynb_checkpoints" not in dest and ".ipynb_checkpoints" not in rel: created_files.append((rel, dest, ssize)) else: mail = os.path.split(name)[-1] res = EmailMessage.interpret_default_filename(mail) if "date" in res and "uid" in res and "from" in res: emails.append( (res["date"], res["from"], res["uid"], res)) with open(os.path.join(loc, mail), "r", encoding="utf8") as f: content = f.read() urls = ProjectsRepository._link_regex.findall(content) if urls: for u in set(urls): u = clean_url(u) if not filter_in(u): continue domain, last = url_domain_name(u) links.append((res["date"], res["from"], clean_url(u), domain, last)) # we sort atts.sort() links.sort() # we clean duplicated links mlinks = links links = [] done = {} for date, from_, url, domain, last in mlinks: if url in done: continue if "__MACOSX" in url or "__MACOSX" in last or \ ".ipynb_checkpoints" in last or ".ipynb_checkpoints" in url: continue links.append((date, from_, url, domain, last)) done[url] = True # we create the variable for the template emails = [_[-1] for _ in sorted(emails)] c = dict(link=c[0].replace("\\", "/"), group=c[1], nb=nb_files, size=size, attachments=atts, emails=emails, links=links, created_files=created_files) groups.append(c) # final summary if renderer is None: tmpl = ProjectsRepository._default_template_summary renderer = EmailMessageRenderer(tmpl=tmpl, fLOG=self.fLOG) dof = True else: dof = False res = renderer.write(filename=outfile, location=self.Location, mail=None, attachments=None, groups=groups, title=title, len=len, os=os, format_size=format_size) if dof: renderer.flush() return res
def extract_students_mails_from_gmail_and_stores_in_folders(folder=".", filemails="emails.txt", user=None, pwd=None, server="imap.gmail.com", mailfolder=[ "ensae/ENSAE_2016_3A"], date="1-Jan-2016", zipfilename="projet_3A_2016.zip", zipencpwd=b"sixteenbyteskeys", dataframe=None, columns={ "name": "nom_prenom", "group": "groupe", "subject": "sujet"}, skip_names=None, process_name=None, title="List of emails", nolink_if=None, fLOG=fLOG): """ The scenario is the following: * You are the teacher. * Students started their projects at date *t*. * They can work alone or by group. * They send mails, you reply. * Their address mail follows the convention: ``<first name>.<last name>@anything`` so it is to associate a mail address to a student name. * You move every mail you received in a separate folder in your inbox. * Sometime, you send a mail to everybody. * Finally they send their project with attachments. * You want to store everything (mails and attachements) in folders, one per group. * You want a summary of what was received. * You want to build a zip file to share their work with others teachers. * You want to update the folder if a new mail was sent. This function looks into a folder of your inbox and grabs every mails and attachements from a groups of students. @param folder where to store the results @param filemails files used to store students address, the operation is done once, remove the file to force the function to rebuild the information. @param user user of the gmail inbox @param pwd password of the gmail inbox @param server gmail server, it should be ``"imap.gmail.com"``, it works with others mail servers using the *IMAP* protocol @param mailfolder folder in your inbox to look into, there can be several @param date when to start looking (do not change the format, look at the default value) @param zipfilename name of the zip file to create @param zipencpwd the zip file is also encrypted for a safer share with this key and function `encrypt_stream <http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/ pyquickhelper/filehelper/encryption.html#pyquickhelper.filehelper.encryption.encrypt_stream>`_. @param dataframe dataframe which contains the definition of students groups @param columns columns the function will look into, students names, group definition (a unique number for all students in the same group), subject @param skip_names list of names to skip @param process_name to operate a transformation before matching students names with their emails @param title each group folder contains a html file connecting them, this is its title @param nolink_if The summary extracts links from url, it skips the urls which contains on the substrings included in that list (None to use a default set) @param fLOG logging function @return @see cl ProjectsRepository By default, Gmail does not let you programmatically access you own inbox, you need to modify your gmail parameters to let this function do so. """ folder = os.path.abspath(".") filemails = os.path.join(folder, filemails) zipfilename = os.path.join(folder, zipfilename) zipfilenameenc = zipfilename + ".enc" # load the groups if isinstance(dataframe, pandas.DataFrame): df = dataframe elif dataframe.endswith("xlsx"): fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] read dataframe", dataframe) df = pandas.read_excel(dataframe) else: df = pandas.read_csv(dataframe, sep="\t", encoding="utf8") # check mails if "mail" not in columns: if os.path.exists(filemails): fLOG( "[extract_students_mails_from_gmail_and_stores_in_folders] read addresses from ", filemails) with open(filemails, "r", encoding="utf8") as f: lines = f.readlines() emails = [li.strip("\r\t\n ") for li in lines] else: fLOG( "[extract_students_mails_from_gmail_and_stores_in_folders] mine address ") box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() emails = grab_addresses(box, mailfolder, date, fLOG=fLOG) box.logout() with open(filemails, "w", encoding="utf8") as f: f.write("\n".join(emails)) else: # nothing to do mail already present emails = set(df[columns["mail"]]) # we remove empty names df = df[~df[columns["name"]].isnull()].copy() if process_name: df[columns["name"]] = df[columns["name"]].apply( lambda f: process_name(f)) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] create groups folders in", folder) proj = ProjectsRepository(folder, fLOG=fLOG) proj = ProjectsRepository.create_folders_from_dataframe(df, folder, col_subject=columns[ "subject"], fLOG=fLOG, col_group=columns["group"], col_student=columns[ "name"], email_function=emails, skip_if_nomail=False, col_mail=columns["mail"], must_have_email=True, skip_names=skip_names) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] nb groups", len( proj.Groups)) # gathers mails email_renderer = EmailMessageRenderer(tmpl=template_email_html_short, fLOG=fLOG) renderer = EmailMessageListRenderer(title=title, email_renderer=email_renderer, fLOG=fLOG) box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG) box.login() proj.dump_group_mails(renderer, group=None, mailbox=box, subfolder=mailfolder, date=date, overwrite=False, skip_if_empty=True) box.logout() # cleaning files for group in proj.Groups: files = list(proj.enumerate_group_files(group)) att = [_ for _ in files if ".html" in _] if len(att) <= 1: fLOG( "[extract_students_mails_from_gmail_and_stores_in_folders] remove '{}'".format(group)) proj.remove_group(group) # unzip files and convert notebooks for group in proj.Groups: proj.unzip_convert(group) summary = os.path.join(folder, "index.html") fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] write summary '{}'".format(summary)) if os.path.exists(summary): os.remove(summary) proj.write_run_command() proj.write_summary(nolink_if=nolink_if) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] zip everything in", zipfilename) if os.path.exists(zipfilename): os.remove(zipfilename) proj.zip_group(None, zipfilename, addition=["index.html", "mail_style.css", "emails.txt"]) fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] encrypt the zip file in '{}'.".format( zipfilenameenc)) if os.path.exists(zipfilenameenc): os.remove(zipfilenameenc) encrypt_stream(zipencpwd, zipfilename, zipfilenameenc, chunksize=2 ** 30) return proj
def write_summary(self, render=None, link="index_mails.html", outfile="index.html", title="summary", nolink_if=None): """ produces a summary and uses a Jinja2 template @param render instance of `EmailMessageRenderer <http://www.xavierdupre.fr/app/pymmails/helpsphinx//pymmails/render/email_message_renderer.html>`_), can be None @param link look for this file in each folder @param outfile output file @param nolink_if link containing those strings will be removed (if None, a default set will be assigned) @return summary the current default template is:: <?xml version="1.0" encoding="utf-8"?> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> </head> <body> <html> <head> <title>{{ title }}</title> <link rel="stylesheet" type="text/css" href="{{ css }}"> </head> <body> <h1>{{ title }}</h1> <ol type="1"> {% for ps in groups %} <li><a href="{{ ps["link"] }}">{{ ps["group"] }}</a><small><i> {{ ps["nb"] }} files - {{ format_size(ps["size"]) }} - last mail {{ ps["emails"][-1]["date"] }} --- {{ len(ps["attachments"]) }} attachments</i></small> {% if len(ps["attachments"]) > 0 %} <ul> {% for day, att, data in ps["attachments"] %} <li>att: {{ day }} - <a href="{{ att }}">{{ os.path.split(att)[-1] }}</a></li> {% endfor %} {% for date, from_, url, domain, last in ps["links"] %} <li>link: {{ date }} <a href="{{ url }}">{{ domain }} // {{ last }}</a> from {{ from_ }}</li> {% endfor %} </ul> {% endif %} {% if len(ps["created_files"]) > 0 %} <ul> {% for name, relpath, size in ps["created_files"] %} <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li> {% endfor %} </ul> {% endif %} </li> {% endfor %} </ol> </body> </html> """ if nolink_if is None: nolink_if = ProjectsRepository._known_strings def filter_in(url): if "\n" in url or "\r" in u or "\t" in u: return False if url.endswith("""): return False for _ in nolink_if: if _ in url: return False return True def clean_url(u): u = u.replace("+", "+").strip(".#'/ \r\n\t ") if u.endswith(" "): u = u[:-6] return u def url_domain_name(url): r = urlparse(url) domain = r.netloc name = [_ for _ in url.split("/") if _] last = name[-1] if len(name) > 0 else domain if len(last) > 30: last = last[-30:] return domain, clean_url(last) def format_size(s): if s <= 2**11: return "{0} bytes".format(s) elif s <= 2**21: return "{0} Kb".format(s // (2**10)) elif s <= 2**31: return "{0} Mb".format(s // (2**20)) else: return "{0} Gb".format(s // (2**30)) groups = [] for group in self.Groups: lp = os.path.join(self.get_group_location(group), link) if os.path.exists(lp): c = os.path.relpath(lp, self._location), group else: c = "file:///{0}".format(group), group nb_files = 0 size = 0 atts = [] emails = [] links = [] created_files = [] for name in self.enumerate_group_files(group): if name.endswith(".metadata"): continue loc = self.get_group_location(group) nb_files += 1 tn = name size += os.stat(tn).st_size folder = os.path.split(name)[0] splf = folder.replace("\\", "/").split("/") if folder.endswith("attachments"): meta = name + ".metadata" if os.path.exists(meta): data = EmailMessage.read_metadata(meta) day = data["date"].strftime("%Y-%m-%d") else: data = None day = "" atts.append((day, os.path.relpath(name, self._location), data)) elif "attachments" in splf: rel = os.path.relpath(name, loc) dest = os.path.relpath(name, self._location) if rel == dest: raise Exception("weird\n{0}\n{1}".format(rel, dest)) ssize = format_size(os.stat(name).st_size) created_files.append((rel, dest, ssize)) else: mail = os.path.split(name)[-1] res = EmailMessage.interpret_default_filename(mail) if "date" in res and "uid" in res and "from" in res: emails.append( (res["date"], res["from"], res["uid"], res)) with open(os.path.join(loc, mail), "r", encoding="utf8") as f: content = f.read() urls = ProjectsRepository._link_regex.findall(content) if urls: for u in set(urls): u = clean_url(u) if not filter_in(u): continue domain, last = url_domain_name(u) links.append((res["date"], res["from"], clean_url(u), domain, last)) # we sort atts.sort() links.sort() # we clean dupicated links mlinks = links links = [] done = {} for date, from_, url, domain, last in mlinks: if url in done: continue links.append((date, from_, url, domain, last)) done[url] = True # we create the variable for the template emails = [_[-1] for _ in sorted(emails)] c = dict(link=c[0].replace("\\", "/"), group=c[1], nb=nb_files, size=size, attachments=atts, emails=emails, links=links, created_files=created_files) groups.append(c) if render is None: tmpl = """<?xml version="1.0" encoding="utf-8"?> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> </head> <body> <html> <head> <title>{{ title }}</title> <link rel="stylesheet" type="text/css" href="{{ css }}"> </head> <body> <h1>{{ title }}</h1> <ol type="1"> {% for ps in groups %} <li><a href="{{ ps["link"] }}">{{ ps["group"] }}</a><small><i> {{ ps["nb"] }} files - {{ format_size(ps["size"]) }} - last mail {{ ps["emails"][-1]["date"] }} --- {{ len(ps["attachments"]) }} attachments</i></small> {% if len(ps["attachments"]) > 0 %} <ul> {% for day, att, data in ps["attachments"] %} <li>att: {{ day }} - <a href="{{ att }}">{{ os.path.split(att)[-1] }}</a></li> {% endfor %} {% for date, from_, url, domain, last in ps["links"] %} <li>link: {{ date }} <a href="{{ url }}">{{ domain }} // {{ last }}</a> from {{ from_ }}</li> {% endfor %} </ul> {% endif %} {% if len(ps["created_files"]) > 0 %} <ul> {% for name, relpath, size in ps["created_files"] %} <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li> {% endfor %} </ul> {% endif %} </li> {% endfor %} </ol> </body> </html> """.replace(" ", "") render = EmailMessageRenderer(tmpl=tmpl, fLOG=self.fLOG) dof = True else: dof = False res = render.write(filename=outfile, location=self.Location, mail=None, attachments=None, groups=groups, title=title, len=len, os=os, format_size=format_size) if dof: render.flush() return res
def test_sections(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") data = os.path.abspath(os.path.dirname(__file__)) data = os.path.join(data, "data") dfile = os.path.join(data, "notes_eleves_2104_2015.xlsx") df = pandas.read_excel(dfile, skiprows=5, engine='openpyxl') df = df[df["Groupe"] != "moyenne"].copy() df = df[~df["Eleves"].isna()].copy() fLOG(df.columns) fLOG(df.tail()) fLOG(df.shape) emails = ["*****@*****.**".lower(), "*****@*****.**"] temp = get_temp_folder(__file__, "temp_repository") try: proj = ProjectsRepository.create_folders_from_dataframe( df, temp, col_subject="sujet", fLOG=fLOG, col_group=None, col_student="Eleves", col_mail=None, email_function=emails, skip_if_nomail=True) except ProjectsRepository.MailNotFound: pass emails = ["*****@*****.**".lower(), "*****@*****.**", "*****@*****.**"] proj = ProjectsRepository.create_folders_from_dataframe( df, temp, col_subject="sujet", fLOG=fLOG, col_group=None, col_student="Eleves", col_mail=None, email_function=emails, must_have_email=False) do_test = True if do_test: data = os.path.abspath(os.path.join( os.path.dirname(__file__), "data")) box = MailBoxMock(data, b"unittestunittest", fLOG) box.login() email_render = EmailMessageRenderer( tmpl=template_email_html_short, fLOG=fLOG) render = EmailMessageListRenderer(title="list of mails", email_renderer=email_render, fLOG=fLOG) mails = proj.dump_group_mails(render, group=None, mailbox=box, subfolder="trav", date=datetime.datetime(2015, 1, 9)) box.logout() suivi = os.path.join(temp, "ABOUT.firstname", "suivi.rst") with open(suivi, "r", encoding="utf8") as f: content = f.read() self.assertIn("* mails: [email protected]", content) self.assertEqual(len(proj.Groups), 3) mails = proj.get_emails(proj.Groups[0]) self.assertEqual(len(mails), 1) self.assertIn(mails[0], emails) fLOG("------", os.path.exists(os.path.join(temp, "mail_style.css"))) proj.write_run_command() proj.write_summary() fLOG("------") files = [os.path.join(temp, "index.html"), os.path.join( temp, "ABOUT.firstname", "d_2015-08-01_p_noreply-at-voyages-sncf-com_ii_8de6a63addb7c03407bc6f0caabd967e.html"), os.path.join(temp, "mail_style.css")] nb = 0 for name in files: if not os.path.exists(name): raise FileNotFoundError(name) nb += 1 with open(name, "r", encoding="utf8") as f: content = f.read() self.assertNotIn("ut_automation_students", content) self.assertEqual(nb, len(files))
def write_summary(self, renderer=None, link="index_mails.html", outfile="index.html", title="summary", nolink_if=None): """ Produces a summary and uses a :epkg:`Jinja2` template. @param renderer instance of `EmailMessageRenderer <http://www.xavierdupre.fr/app/pymmails/ helpsphinx//pymmails/render/email_message_renderer.html>`_), can be None @param link look for this file in each folder @param outfile output file @param nolink_if link containing those strings will be removed (if None, a default set will be assigned) @param title title @return summary The current default template is:: .. runpython:: from ensae_teaching_cs.automation_students.projects_repository import _default_template_summary_template print(_default_template_summary) """ if nolink_if is None: nolink_if = ProjectsRepository._known_strings def filter_in(url): if "\n" in url or "\r" in url or "\t" in url: return False if url.endswith("""): return False for _ in nolink_if: if _ in url: return False if ".ipynb_checkpoints" in url: return False return True def clean_url(u): u = u.replace("+", "+").strip(".#'/ \r\n\t ") if u.endswith(" "): u = u[:-6] return u def url_domain_name(url): r = urlparse(url) domain = r.netloc name = [_ for _ in url.split("/") if _] last = name[-1] if len(name) > 0 else domain if len(last) > 30: last = last[-30:] return domain, clean_url(last) def format_size(s): if s <= 2 ** 11: return "{0} bytes".format(s) elif s <= 2 ** 21: return "{0} Kb".format(s // (2 ** 10)) elif s <= 2 ** 31: return "{0} Mb".format(s // (2 ** 20)) else: return "{0} Gb".format(s // (2 ** 30)) groups = [] for group in self.Groups: lp = os.path.join(self.get_group_location(group), link) if os.path.exists(lp): c = os.path.relpath(lp, self._location), group else: c = "file:///{0}".format(group), group nb_files = 0 size = 0 atts = [] emails = [] links = [] created_files = [] for name in self.enumerate_group_files(group): if name.endswith(".metadata"): continue loc = self.get_group_location(group) nb_files += 1 tn = name size += os.stat(tn).st_size folder = os.path.split(name)[0] splf = folder.replace("\\", "/").split("/") if folder.endswith("attachments"): meta = name + ".metadata" if os.path.exists(meta): data = EmailMessage.read_metadata(meta) day = data["date"].strftime("%Y-%m-%d") else: data = None day = "" atts.append((day, os.path.relpath( name, self._location), data)) elif "attachments" in splf: rel = os.path.relpath(name, loc) dest = os.path.relpath(name, self._location) if rel == dest: raise Exception("weird\n{0}\n{1}".format(rel, dest)) ssize = format_size(os.stat(name).st_size) if "__MACOSX" not in rel and "__MACOSX" not in dest and \ ".ipynb_checkpoints" not in dest and ".ipynb_checkpoints" not in rel: created_files.append((rel, dest, ssize)) else: mail = os.path.split(name)[-1] res = EmailMessage.interpret_default_filename(mail) if "date" in res and "uid" in res and "from" in res: emails.append( (res["date"], res["from"], res["uid"], res)) with open(os.path.join(loc, mail), "r", encoding="utf8") as f: content = f.read() urls = ProjectsRepository._link_regex.findall(content) if urls: for u in set(urls): u = clean_url(u) if not filter_in(u): continue domain, last = url_domain_name(u) links.append( (res["date"], res["from"], clean_url(u), domain, last)) # we sort atts.sort() links.sort() # we clean duplicated links mlinks = links links = [] done = {} for date, from_, url, domain, last in mlinks: if url in done: continue if "__MACOSX" in url or "__MACOSX" in last or \ ".ipynb_checkpoints" in last or ".ipynb_checkpoints" in url: continue links.append((date, from_, url, domain, last)) done[url] = True # we create the variable for the template emails = [_[-1] for _ in sorted(emails)] c = dict(link=c[0].replace("\\", "/"), group=c[1], nb=nb_files, size=size, attachments=atts, emails=emails, links=links, created_files=created_files) groups.append(c) # final summary if renderer is None: tmpl = ProjectsRepository._default_template_summary renderer = EmailMessageRenderer(tmpl=tmpl, fLOG=self.fLOG) dof = True else: dof = False res = renderer.write(filename=outfile, location=self.Location, mail=None, attachments=None, groups=groups, title=title, len=len, os=os, format_size=format_size) if dof: renderer.flush() return res
def write_summary(self, render=None, link="index_mails.html", outfile="index.html", title="summary", nolink_if=None): """ produces a summary and uses a Jinja2 template @param render instance of `EmailMessageRenderer <http://www.xavierdupre.fr/app/pymmails/helpsphinx//pymmails/render/email_message_renderer.html>`_), can be None @param link look for this file in each folder @param outfile output file @param nolink_if link containing those strings will be removed (if None, a default set will be assigned) @return summary the current default template is:: <?xml version="1.0" encoding="utf-8"?> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> </head> <body> <html> <head> <title>{{ title }}</title> <link rel="stylesheet" type="text/css" href="{{ css }}"> </head> <body> <h1>{{ title }}</h1> <ol type="1"> {% for ps in groups %} <li><a href="{{ ps["link"] }}">{{ ps["group"] }}</a><small><i> {{ ps["nb"] }} files - {{ format_size(ps["size"]) }} - last mail {{ ps["emails"][-1]["date"] }} --- {{ len(ps["attachments"]) }} attachments</i></small> {% if len(ps["attachments"]) > 0 %} <ul> {% for day, att, data in ps["attachments"] %} <li>att: {{ day }} - <a href="{{ att }}">{{ os.path.split(att)[-1] }}</a></li> {% endfor %} {% for date, from_, url, domain, last in ps["links"] %} <li>link: {{ date }} <a href="{{ url }}">{{ domain }} // {{ last }}</a> from {{ from_ }}</li> {% endfor %} </ul> {% endif %} {% if len(ps["created_files"]) > 0 %} <ul> {% for name, relpath, size in ps["created_files"] %} <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li> {% endfor %} </ul> {% endif %} </li> {% endfor %} </ol> </body> </html> """ if nolink_if is None: nolink_if = ProjectsRepository._known_strings def filter_in(url): if "\n" in url or "\r" in u or "\t" in u: return False if url.endswith("""): return False for _ in nolink_if: if _ in url: return False return True def clean_url(u): u = u.replace("+", "+").strip(".#'/ \r\n\t ") if u.endswith(" "): u = u[:-6] return u def url_domain_name(url): r = urlparse(url) domain = r.netloc name = [_ for _ in url.split("/") if _] last = name[-1] if len(name) > 0 else domain if len(last) > 30: last = last[-30:] return domain, clean_url(last) def format_size(s): if s <= 2 ** 11: return "{0} bytes".format(s) elif s <= 2 ** 21: return "{0} Kb".format(s // (2 ** 10)) elif s <= 2 ** 31: return "{0} Mb".format(s // (2 ** 20)) else: return "{0} Gb".format(s // (2 ** 30)) groups = [] for group in self.Groups: lp = os.path.join(self.get_group_location(group), link) if os.path.exists(lp): c = os.path.relpath(lp, self._location), group else: c = "file:///{0}".format(group), group nb_files = 0 size = 0 atts = [] emails = [] links = [] created_files = [] for name in self.enumerate_group_files(group): if name.endswith(".metadata"): continue loc = self.get_group_location(group) nb_files += 1 tn = name size += os.stat(tn).st_size folder = os.path.split(name)[0] splf = folder.replace("\\", "/").split("/") if folder.endswith("attachments"): meta = name + ".metadata" if os.path.exists(meta): data = EmailMessage.read_metadata(meta) day = data["date"].strftime("%Y-%m-%d") else: data = None day = "" atts.append((day, os.path.relpath( name, self._location), data)) elif "attachments" in splf: rel = os.path.relpath(name, loc) dest = os.path.relpath(name, self._location) if rel == dest: raise Exception("weird\n{0}\n{1}".format(rel, dest)) ssize = format_size(os.stat(name).st_size) created_files.append((rel, dest, ssize)) else: mail = os.path.split(name)[-1] res = EmailMessage.interpret_default_filename(mail) if "date" in res and "uid" in res and "from" in res: emails.append( (res["date"], res["from"], res["uid"], res)) with open(os.path.join(loc, mail), "r", encoding="utf8") as f: content = f.read() urls = ProjectsRepository._link_regex.findall(content) if urls: for u in set(urls): u = clean_url(u) if not filter_in(u): continue domain, last = url_domain_name(u) links.append( (res["date"], res["from"], clean_url(u), domain, last)) # we sort atts.sort() links.sort() # we clean dupicated links mlinks = links links = [] done = {} for date, from_, url, domain, last in mlinks: if url in done: continue links.append((date, from_, url, domain, last)) done[url] = True # we create the variable for the template emails = [_[-1] for _ in sorted(emails)] c = dict(link=c[0].replace("\\", "/"), group=c[1], nb=nb_files, size=size, attachments=atts, emails=emails, links=links, created_files=created_files) groups.append(c) if render is None: tmpl = """<?xml version="1.0" encoding="utf-8"?> <head> <meta http-equiv="content-type" content="text/html; charset=utf-8" /> </head> <body> <html> <head> <title>{{ title }}</title> <link rel="stylesheet" type="text/css" href="{{ css }}"> </head> <body> <h1>{{ title }}</h1> <ol type="1"> {% for ps in groups %} <li><a href="{{ ps["link"] }}">{{ ps["group"] }}</a><small><i> {{ ps["nb"] }} files - {{ format_size(ps["size"]) }} - last mail {{ ps["emails"][-1]["date"] }} --- {{ len(ps["attachments"]) }} attachments</i></small> {% if len(ps["attachments"]) > 0 %} <ul> {% for day, att, data in ps["attachments"] %} <li>att: {{ day }} - <a href="{{ att }}">{{ os.path.split(att)[-1] }}</a></li> {% endfor %} {% for date, from_, url, domain, last in ps["links"] %} <li>link: {{ date }} <a href="{{ url }}">{{ domain }} // {{ last }}</a> from {{ from_ }}</li> {% endfor %} </ul> {% endif %} {% if len(ps["created_files"]) > 0 %} <ul> {% for name, relpath, size in ps["created_files"] %} <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li> {% endfor %} </ul> {% endif %} </li> {% endfor %} </ol> </body> </html> """.replace(" ", "") render = EmailMessageRenderer(tmpl=tmpl, fLOG=self.fLOG) dof = True else: dof = False res = render.write(filename=outfile, location=self.Location, mail=None, attachments=None, groups=groups, title=title, len=len, os=os, format_size=format_size) if dof: render.flush() return res