Exemple #1
0
    def test_tohtml(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        data = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))
        mesf = os.path.join(data, "message.pickle")

        if "anaconda" in sys.executable.lower() or "anaconda" in sys.base_prefix.lower():
            # issue with Anaconda about module pickle
            # pickle has issues when getting a file saved by pickle on another
            # distribution
            return

        with open(mesf, "rb") as f:
            try:
                import pymmails
                assert pymmails is not None
                obj = pickle.load(f)
            except ImportError:
                path = os.path.normpath(
                    os.path.abspath(
                        os.path.join(
                            os.path.split(__file__)[0],
                            "..",
                            "..",
                            "src")))
                if path not in sys.path:
                    sys.path.append(path)
                import pymmails
                assert pymmails is not None
                obj = pickle.load(f)
                del sys.path[-1]

        temp = get_temp_folder(__file__, "temp_dump_html")
        render = EmailMessageRenderer()
        ff = obj.dump(render, location=temp, fLOG=fLOG)
        render.flush()
        fLOG("ff=", type(ff), ff)
        with open(ff[0][0], "r", encoding="utf8") as f:
            content = f.read()
        if '<link rel="stylesheet" type="text/css" href="mail_style.css">' not in content:
            raise Exception(content)
        if "d_2014-12-15_p_yyyyy-matthieu-at-xxxxx-xxx_ii_48bdbc9f9fd180ab917cec5bed8ca529.html" not in ff[0][0]:
            raise Exception(ff[0][0])
        if "<h1>2014/12/15 - projet 3A - élément logiciel</h1>" not in content:
            raise Exception(content)
Exemple #2
0
    def test_box_mock(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")
        data = os.path.abspath(os.path.join(os.path.dirname(__file__), "data"))

        box = MailBoxMock(data, b"unittestunittest", fLOG)
        box.login()
        folders = box.folders()
        self.assertEqual(len(folders), 1)
        fLOG(folders)
        mails = list(box.enumerate_mails_in_folder("trav"))
        box.logout()

        fLOG(len(mails))
        self.assertTrue(len(mails) > 0)
        mail0 = mails[0]
        # fLOG(mail0)

        bin = mail0.as_bytes()
        ema = EmailMessage.create_from_bytes(bin)
        d0 = mail0.to_dict()
        d1 = ema.to_dict()
        self.assertEqual(d0["Subject"], d1["Subject"])

        render = EmailMessageRenderer()
        html, _, __ = render.render(
            "__LOC__", mail0, file_css="example_css.css", attachments=None)
        if "example_css.css" not in html:
            raise Exception(html)
        # fLOG(css)
        if "<tr><th>Date</th><td>Sat, 1 Aug 2015" not in html and \
                "<tr><th>Date</th><td>Fri, 14 Aug 2015" not in html and \
                "<tr><th>Date</th><td>Fri, 20 Aug 2015" not in html and \
                "<tr><th>Date</th><td>Sun, 20 Dec 2015" not in html:
            raise Exception(html)
        fLOG(html)
Exemple #3
0
    def test_mailbox_dump(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        if is_travis_or_appveyor():
            warnings.warn("requires a password")
            return
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', DeprecationWarning)
            import keyring
        code = keyring.get_password("sdut", "pymmails")
        temp = get_temp_folder(__file__, "temp_dump")
        box = MailBoxImap("unittest.sdpython", code,
                          "imap.gmail.com", ssl=True, fLOG=fLOG)
        render = EmailMessageRenderer()
        box.login()
        mails = box.enumerate_mails_in_folder("test4", date="1-Jan-2016")
        for mail in mails:
            mail.dump(render, location=temp, fLOG=fLOG)
        render.flush()
        box.logout()
Exemple #4
0
    def test_regular_expression(self):
        fold = os.path.abspath(os.path.dirname(__file__))

        body = """
                    </div>
                    <div><img name="14a318e16161c62a_14a31789f7a34aae_null"
                              title="pastedImage.png"
                              src="cid:f8b05bd4-1c83-47bc-af9d-0032ba9c018e"><br>
                    </div>
                    <div>4. Vous m&#39;avez demande d&#39;afficher l&#39;arbre de decision pour mon random forest,
                    mais apparemment &quot;.tree_&quot; n&#39;existe que pour les decision trees . J&#39;ai donc
                    essaye de le faire avec un DT apres avoir telecharge le logiciel pour tracer l&#39;arbre mais ca ne marche
                     pas<br>
                    </div>
                    <div><br>
                    </div>
                    <div><img name="14a318e16161c62a_14a31789f7a34aae_null" title="pastedImage.png"
                              src="cid:1146aa0a-244a-440e-8ea5-7b272c94f89a"
                              height="153.02644466209597" width="560"><br>
                    </div>
                    <div><br>
                    ?<sp
                    """.replace("                    ", "")

        atts = [
            (os.path.join(
                fold,
                "attachements",
                "image.png"),
                None,
                "1146aa0a-244a-440e-8ea5-7b272c94f89a")]
        em = EmailMessageRenderer().process_body_html(fold, body, atts)
        assert "1146aa0a-244a-440e-8ea5-7b272c94f89a" not in em
        exp = 'src="attachements/image.png"'
        if exp not in em.replace("\\", "/"):
            raise Exception(
                'string "attachements/image.png" not found in\n{0}'.format(em))
        df,
        folder,
        col_subject="sujet",
        fLOG=fLOG,
        col_group="groupe",
        col_student="nom_prenom",
        email_function=emails,
        skip_if_nomail=False,
        must_have_email=True)
fLOG("nb groups", len(proj.Groups))

#############
# dump mails

if do_mail:
    email_render = EmailMessageRenderer(tmpl=template_email_html_short,
                                        fLOG=fLOG)
    render = EmailMessageListRenderer(title="list of mails",
                                      email_renderer=email_render,
                                      fLOG=fLOG)

    box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
    box.login()
    mails = proj.dump_group_mails(render,
                                  group=None,
                                  mailbox=box,
                                  subfolder=mailfolder,
                                  date=date,
                                  overwrite=False,
                                  convert_files=True)
    box.logout()
Exemple #6
0
    def write_summary(self,
                      renderer=None,
                      link="index_mails.html",
                      outfile="index.html",
                      title="summary",
                      nolink_if=None):
        """
        Produces a summary and uses a :epkg:`Jinja2` template.

        @param      renderer    instance of `EmailMessageRenderer
                                <http://www.xavierdupre.fr/app/pymmails/
                                helpsphinx//pymmails/render/email_message_renderer.html>`_),
                                can be None
        @param      link        look for this file in each folder
        @param      outfile     output file
        @param      nolink_if   link containing those strings will be removed (if None, a default set will be assigned)
        @param      title       title
        @return                 summary

        The current default template is::

        .. runpython::

            from ensae_teaching_cs.automation_students.projects_repository import _default_template_summary_template
            print(_default_template_summary)
        """
        if nolink_if is None:
            nolink_if = ProjectsRepository._known_strings

        def filter_in(url):
            if "\n" in url or "\r" in url or "\t" in url:
                return False
            if url.endswith("&quot;"):
                return False
            for _ in nolink_if:
                if _ in url:
                    return False
            if ".ipynb_checkpoints" in url:
                return False
            return True

        def clean_url(u):
            u = u.replace("&#43;", "+").strip(".#'/ \r\n\t ")
            if u.endswith("&nbsp;"):
                u = u[:-6]
            return u

        def url_domain_name(url):
            r = urlparse(url)
            domain = r.netloc
            name = [_ for _ in url.split("/") if _]
            last = name[-1] if len(name) > 0 else domain
            if len(last) > 30:
                last = last[-30:]
            return domain, clean_url(last)

        def format_size(s):
            if s <= 2**11:
                return "{0} bytes".format(s)
            elif s <= 2**21:
                return "{0} Kb".format(s // (2**10))
            elif s <= 2**31:
                return "{0} Mb".format(s // (2**20))
            else:
                return "{0} Gb".format(s // (2**30))

        groups = []
        for group in self.Groups:
            lp = os.path.join(self.get_group_location(group), link)
            if os.path.exists(lp):
                c = os.path.relpath(lp, self._location), group
            else:
                c = "file:///{0}".format(group), group
            nb_files = 0
            size = 0
            atts = []
            emails = []
            links = []
            created_files = []
            for name in self.enumerate_group_files(group):
                if name.endswith(".metadata"):
                    continue
                loc = self.get_group_location(group)
                nb_files += 1
                tn = name
                size += os.stat(tn).st_size
                folder = os.path.split(name)[0]
                splf = folder.replace("\\", "/").split("/")
                if folder.endswith("attachments"):
                    meta = name + ".metadata"
                    if os.path.exists(meta):
                        data = EmailMessage.read_metadata(meta)
                        day = data["date"].strftime("%Y-%m-%d")
                    else:
                        data = None
                        day = ""
                    atts.append((day, os.path.relpath(name,
                                                      self._location), data))
                elif "attachments" in splf:
                    rel = os.path.relpath(name, loc)
                    dest = os.path.relpath(name, self._location)
                    if rel == dest:
                        raise Exception("weird\n{0}\n{1}".format(rel, dest))
                    ssize = format_size(os.stat(name).st_size)
                    if "__MACOSX" not in rel and "__MACOSX" not in dest and \
                            ".ipynb_checkpoints" not in dest and ".ipynb_checkpoints" not in rel:
                        created_files.append((rel, dest, ssize))
                else:
                    mail = os.path.split(name)[-1]
                    res = EmailMessage.interpret_default_filename(mail)
                    if "date" in res and "uid" in res and "from" in res:
                        emails.append(
                            (res["date"], res["from"], res["uid"], res))
                        with open(os.path.join(loc, mail),
                                  "r",
                                  encoding="utf8") as f:
                            content = f.read()
                        urls = ProjectsRepository._link_regex.findall(content)
                        if urls:
                            for u in set(urls):
                                u = clean_url(u)
                                if not filter_in(u):
                                    continue
                                domain, last = url_domain_name(u)
                                links.append((res["date"], res["from"],
                                              clean_url(u), domain, last))

            # we sort
            atts.sort()
            links.sort()

            # we clean duplicated links
            mlinks = links
            links = []
            done = {}
            for date, from_, url, domain, last in mlinks:
                if url in done:
                    continue
                if "__MACOSX" in url or "__MACOSX" in last or \
                        ".ipynb_checkpoints" in last or ".ipynb_checkpoints" in url:
                    continue
                links.append((date, from_, url, domain, last))
                done[url] = True

            # we create the variable for the template
            emails = [_[-1] for _ in sorted(emails)]
            c = dict(link=c[0].replace("\\", "/"),
                     group=c[1],
                     nb=nb_files,
                     size=size,
                     attachments=atts,
                     emails=emails,
                     links=links,
                     created_files=created_files)

            groups.append(c)

        # final summary
        if renderer is None:
            tmpl = ProjectsRepository._default_template_summary
            renderer = EmailMessageRenderer(tmpl=tmpl, fLOG=self.fLOG)
            dof = True
        else:
            dof = False
        res = renderer.write(filename=outfile,
                             location=self.Location,
                             mail=None,
                             attachments=None,
                             groups=groups,
                             title=title,
                             len=len,
                             os=os,
                             format_size=format_size)
        if dof:
            renderer.flush()
        return res
Exemple #7
0
def extract_students_mails_from_gmail_and_stores_in_folders(folder=".", filemails="emails.txt",
                                                            user=None, pwd=None, server="imap.gmail.com",
                                                            mailfolder=[
                                                                "ensae/ENSAE_2016_3A"],
                                                            date="1-Jan-2016", zipfilename="projet_3A_2016.zip",
                                                            zipencpwd=b"sixteenbyteskeys", dataframe=None,
                                                            columns={
                                                                "name": "nom_prenom", "group": "groupe", "subject": "sujet"},
                                                            skip_names=None, process_name=None,
                                                            title="List of emails", nolink_if=None, fLOG=fLOG):
    """
    The scenario is the following:

    * You are the teacher.
    * Students started their projects at date *t*.
    * They can work alone or by group.
    * They send mails, you reply.
    * Their address mail follows the convention: ``<first name>.<last name>@anything``
      so it is to associate a mail address to a student name.
    * You move every mail you received in a separate folder in your inbox.
    * Sometime, you send a mail to everybody.
    * Finally they send their project with attachments.
    * You want to store everything (mails and attachements) in folders, one per group.
    * You want a summary of what was received.
    * You want to build a zip file to share their work with others teachers.
    * You want to update the folder if a new mail was sent.

    This function looks into a folder of your inbox and grabs every mails and
    attachements from a groups of students.

    @param      folder              where to store the results
    @param      filemails           files used to store students address,
                                    the operation is done once, remove the file
                                    to force the function to rebuild the information.
    @param      user                user of the gmail inbox
    @param      pwd                 password of the gmail inbox
    @param      server              gmail server, it should be ``"imap.gmail.com"``,
                                    it works with others mail servers using the *IMAP* protocol
    @param      mailfolder          folder in your inbox to look into,
                                    there can be several
    @param      date                when to start looking (do not change the format,
                                    look at the default value)
    @param      zipfilename         name of the zip file to create
    @param      zipencpwd           the zip file is also encrypted for a safer share with this key
                                    and function `encrypt_stream <http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/
                                    pyquickhelper/filehelper/encryption.html#pyquickhelper.filehelper.encryption.encrypt_stream>`_.
    @param      dataframe           dataframe which contains the definition of students groups
    @param      columns             columns the function will look into, students names, group definition
                                    (a unique number for all students in the same group), subject
    @param      skip_names          list of names to skip
    @param      process_name        to operate a transformation before matching students names with
                                    their emails
    @param      title               each group folder contains a html file connecting them,
                                    this is its title
    @param      nolink_if           The summary extracts links from url, it skips the urls which
                                    contains on the substrings included in that list (None to use a default set)
    @param      fLOG                logging function
    @return                         @see cl ProjectsRepository

    By default, Gmail does not let you programmatically access you own inbox,
    you need to modify your gmail parameters to let this function do so.
    """
    folder = os.path.abspath(".")
    filemails = os.path.join(folder, filemails)
    zipfilename = os.path.join(folder, zipfilename)
    zipfilenameenc = zipfilename + ".enc"

    # load the groups
    if isinstance(dataframe, pandas.DataFrame):
        df = dataframe
    elif dataframe.endswith("xlsx"):
        fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] read dataframe", dataframe)
        df = pandas.read_excel(dataframe)
    else:
        df = pandas.read_csv(dataframe, sep="\t", encoding="utf8")

    # check mails
    if "mail" not in columns:
        if os.path.exists(filemails):
            fLOG(
                "[extract_students_mails_from_gmail_and_stores_in_folders] read addresses from ", filemails)
            with open(filemails, "r", encoding="utf8") as f:
                lines = f.readlines()
            emails = [li.strip("\r\t\n ") for li in lines]
        else:
            fLOG(
                "[extract_students_mails_from_gmail_and_stores_in_folders] mine address ")
            box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
            box.login()
            emails = grab_addresses(box, mailfolder, date, fLOG=fLOG)
            box.logout()

            with open(filemails, "w", encoding="utf8") as f:
                f.write("\n".join(emails))
    else:
        # nothing to do mail already present
        emails = set(df[columns["mail"]])

    # we remove empty names
    df = df[~df[columns["name"]].isnull()].copy()

    if process_name:
        df[columns["name"]] = df[columns["name"]].apply(
            lambda f: process_name(f))

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] create groups folders in", folder)
    proj = ProjectsRepository(folder, fLOG=fLOG)

    proj = ProjectsRepository.create_folders_from_dataframe(df, folder,
                                                            col_subject=columns[
                                                                "subject"], fLOG=fLOG, col_group=columns["group"],
                                                            col_student=columns[
                                                                "name"], email_function=emails, skip_if_nomail=False,
                                                            col_mail=columns["mail"], must_have_email=True, skip_names=skip_names)
    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] nb groups", len(
        proj.Groups))

    # gathers mails
    email_renderer = EmailMessageRenderer(tmpl=template_email_html_short,
                                          fLOG=fLOG)
    renderer = EmailMessageListRenderer(title=title, email_renderer=email_renderer,
                                        fLOG=fLOG)

    box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
    box.login()
    proj.dump_group_mails(renderer, group=None, mailbox=box, subfolder=mailfolder,
                          date=date, overwrite=False, skip_if_empty=True)

    box.logout()

    # cleaning files
    for group in proj.Groups:
        files = list(proj.enumerate_group_files(group))
        att = [_ for _ in files if ".html" in _]
        if len(att) <= 1:
            fLOG(
                "[extract_students_mails_from_gmail_and_stores_in_folders] remove '{}'".format(group))
            proj.remove_group(group)

    # unzip files and convert notebooks
    for group in proj.Groups:
        proj.unzip_convert(group)

    summary = os.path.join(folder, "index.html")
    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] write summary '{}'".format(summary))
    if os.path.exists(summary):
        os.remove(summary)
    proj.write_run_command()
    proj.write_summary(nolink_if=nolink_if)

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] zip everything in", zipfilename)
    if os.path.exists(zipfilename):
        os.remove(zipfilename)
    proj.zip_group(None, zipfilename,
                   addition=["index.html", "mail_style.css", "emails.txt"])

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] encrypt the zip file in '{}'.".format(
        zipfilenameenc))
    if os.path.exists(zipfilenameenc):
        os.remove(zipfilenameenc)
    encrypt_stream(zipencpwd, zipfilename, zipfilenameenc, chunksize=2 ** 30)

    return proj
    def write_summary(self,
                      render=None,
                      link="index_mails.html",
                      outfile="index.html",
                      title="summary",
                      nolink_if=None):
        """
        produces a summary and uses a Jinja2 template

        @param      render      instance of `EmailMessageRenderer <http://www.xavierdupre.fr/app/pymmails/helpsphinx//pymmails/render/email_message_renderer.html>`_),
                                can be None
        @param      link        look for this file in each folder
        @param      outfile     output file
        @param      nolink_if   link containing those strings will be removed (if None, a default set will be assigned)
        @return                 summary

        the current default template is::

            <?xml version="1.0" encoding="utf-8"?>
            <head>
            <meta http-equiv="content-type" content="text/html; charset=utf-8" />
            </head>
            <body>
            <html>
            <head>
            <title>{{ title }}</title>
            <link rel="stylesheet" type="text/css" href="{{ css }}">
            </head>
            <body>
            <h1>{{ title }}</h1>
            <ol type="1">
            {% for ps in groups %}
                <li><a href="{{ ps["link"] }}">{{ ps["group"] }}</a><small><i>
                    {{ ps["nb"] }} files - {{ format_size(ps["size"]) }} -
                    last mail {{ ps["emails"][-1]["date"] }} ---
                    {{ len(ps["attachments"]) }} attachments</i></small>
                {% if len(ps["attachments"]) > 0 %}
                    <ul>
                    {% for day, att, data in ps["attachments"] %}
                        <li>att: {{ day }} - <a href="{{ att }}">{{ os.path.split(att)[-1] }}</a></li>
                    {% endfor %}
                    {% for date, from_, url, domain, last in ps["links"] %}
                        <li>link: {{ date }} <a href="{{ url }}">{{ domain }} // {{ last }}</a> from {{ from_ }}</li>
                    {% endfor %}
                    </ul>
                {% endif %}
                {% if len(ps["created_files"]) > 0 %}
                    <ul>
                    {% for name, relpath, size in ps["created_files"] %}
                        <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li>
                    {% endfor %}
                    </ul>
                {% endif %}
                </li>
            {% endfor %}
            </ol>
            </body>
            </html>

        """
        if nolink_if is None:
            nolink_if = ProjectsRepository._known_strings

        def filter_in(url):
            if "\n" in url or "\r" in u or "\t" in u:
                return False
            if url.endswith("&quot;"):
                return False
            for _ in nolink_if:
                if _ in url:
                    return False
            return True

        def clean_url(u):
            u = u.replace("&#43;", "+").strip(".#'/ \r\n\t ")
            if u.endswith("&nbsp;"):
                u = u[:-6]
            return u

        def url_domain_name(url):
            r = urlparse(url)
            domain = r.netloc
            name = [_ for _ in url.split("/") if _]
            last = name[-1] if len(name) > 0 else domain
            if len(last) > 30:
                last = last[-30:]
            return domain, clean_url(last)

        def format_size(s):
            if s <= 2**11:
                return "{0} bytes".format(s)
            elif s <= 2**21:
                return "{0} Kb".format(s // (2**10))
            elif s <= 2**31:
                return "{0} Mb".format(s // (2**20))
            else:
                return "{0} Gb".format(s // (2**30))

        groups = []
        for group in self.Groups:
            lp = os.path.join(self.get_group_location(group), link)
            if os.path.exists(lp):
                c = os.path.relpath(lp, self._location), group
            else:
                c = "file:///{0}".format(group), group
            nb_files = 0
            size = 0
            atts = []
            emails = []
            links = []
            created_files = []
            for name in self.enumerate_group_files(group):
                if name.endswith(".metadata"):
                    continue
                loc = self.get_group_location(group)
                nb_files += 1
                tn = name
                size += os.stat(tn).st_size
                folder = os.path.split(name)[0]
                splf = folder.replace("\\", "/").split("/")
                if folder.endswith("attachments"):
                    meta = name + ".metadata"
                    if os.path.exists(meta):
                        data = EmailMessage.read_metadata(meta)
                        day = data["date"].strftime("%Y-%m-%d")
                    else:
                        data = None
                        day = ""
                    atts.append((day, os.path.relpath(name,
                                                      self._location), data))
                elif "attachments" in splf:
                    rel = os.path.relpath(name, loc)
                    dest = os.path.relpath(name, self._location)
                    if rel == dest:
                        raise Exception("weird\n{0}\n{1}".format(rel, dest))
                    ssize = format_size(os.stat(name).st_size)
                    created_files.append((rel, dest, ssize))
                else:
                    mail = os.path.split(name)[-1]
                    res = EmailMessage.interpret_default_filename(mail)
                    if "date" in res and "uid" in res and "from" in res:
                        emails.append(
                            (res["date"], res["from"], res["uid"], res))
                        with open(os.path.join(loc, mail),
                                  "r",
                                  encoding="utf8") as f:
                            content = f.read()
                        urls = ProjectsRepository._link_regex.findall(content)
                        if urls:
                            for u in set(urls):
                                u = clean_url(u)
                                if not filter_in(u):
                                    continue
                                domain, last = url_domain_name(u)
                                links.append((res["date"], res["from"],
                                              clean_url(u), domain, last))

            # we sort
            atts.sort()
            links.sort()

            # we clean dupicated links
            mlinks = links
            links = []
            done = {}
            for date, from_, url, domain, last in mlinks:
                if url in done:
                    continue
                links.append((date, from_, url, domain, last))
                done[url] = True

            # we create the variable for the template
            emails = [_[-1] for _ in sorted(emails)]
            c = dict(link=c[0].replace("\\", "/"),
                     group=c[1],
                     nb=nb_files,
                     size=size,
                     attachments=atts,
                     emails=emails,
                     links=links,
                     created_files=created_files)

            groups.append(c)

        if render is None:
            tmpl = """<?xml version="1.0" encoding="utf-8"?>
                    <head>
                    <meta http-equiv="content-type" content="text/html; charset=utf-8" />
                    </head>
                    <body>
                    <html>
                    <head>
                    <title>{{ title }}</title>
                    <link rel="stylesheet" type="text/css" href="{{ css }}">
                    </head>
                    <body>
                    <h1>{{ title }}</h1>
                    <ol type="1">
                    {% for ps in groups %}
                        <li><a href="{{ ps["link"] }}">{{ ps["group"] }}</a><small><i>
                            {{ ps["nb"] }} files - {{ format_size(ps["size"]) }} -
                            last mail {{ ps["emails"][-1]["date"] }} ---
                            {{ len(ps["attachments"]) }} attachments</i></small>
                        {% if len(ps["attachments"]) > 0 %}
                            <ul>
                            {% for day, att, data in ps["attachments"] %}
                                <li>att: {{ day }} - <a href="{{ att }}">{{ os.path.split(att)[-1] }}</a></li>
                            {% endfor %}
                            {% for date, from_, url, domain, last in ps["links"] %}
                                <li>link: {{ date }} <a href="{{ url }}">{{ domain }} // {{ last }}</a> from {{ from_ }}</li>
                            {% endfor %}
                            </ul>
                        {% endif %}
                        {% if len(ps["created_files"]) > 0 %}
                            <ul>
                            {% for name, relpath, size in ps["created_files"] %}
                                <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li>
                            {% endfor %}
                            </ul>
                        {% endif %}
                        </li>
                    {% endfor %}
                    </ol>
                    </body>
                    </html>
                    """.replace("                    ", "")
            render = EmailMessageRenderer(tmpl=tmpl, fLOG=self.fLOG)
            dof = True
        else:
            dof = False
        res = render.write(filename=outfile,
                           location=self.Location,
                           mail=None,
                           attachments=None,
                           groups=groups,
                           title=title,
                           len=len,
                           os=os,
                           format_size=format_size)
        if dof:
            render.flush()
        return res
Exemple #9
0
    def test_sections(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        data = os.path.abspath(os.path.dirname(__file__))
        data = os.path.join(data, "data")
        dfile = os.path.join(data, "notes_eleves_2104_2015.xlsx")
        df = pandas.read_excel(dfile, skiprows=5, engine='openpyxl')
        df = df[df["Groupe"] != "moyenne"].copy()
        df = df[~df["Eleves"].isna()].copy()
        fLOG(df.columns)
        fLOG(df.tail())
        fLOG(df.shape)
        emails = ["*****@*****.**".lower(),
                  "*****@*****.**"]
        temp = get_temp_folder(__file__, "temp_repository")
        try:
            proj = ProjectsRepository.create_folders_from_dataframe(
                df, temp, col_subject="sujet", fLOG=fLOG, col_group=None,
                col_student="Eleves", col_mail=None, email_function=emails,
                skip_if_nomail=True)
        except ProjectsRepository.MailNotFound:
            pass

        emails = ["*****@*****.**".lower(),
                  "*****@*****.**",
                  "*****@*****.**"]

        proj = ProjectsRepository.create_folders_from_dataframe(
            df, temp, col_subject="sujet", fLOG=fLOG, col_group=None,
            col_student="Eleves", col_mail=None, email_function=emails,
            must_have_email=False)

        do_test = True
        if do_test:
            data = os.path.abspath(os.path.join(
                os.path.dirname(__file__), "data"))
            box = MailBoxMock(data, b"unittestunittest", fLOG)
            box.login()

            email_render = EmailMessageRenderer(
                tmpl=template_email_html_short, fLOG=fLOG)
            render = EmailMessageListRenderer(title="list of mails",
                                              email_renderer=email_render, fLOG=fLOG)

            mails = proj.dump_group_mails(render, group=None,
                                          mailbox=box, subfolder="trav",
                                          date=datetime.datetime(2015, 1, 9))

            box.logout()

        suivi = os.path.join(temp, "ABOUT.firstname", "suivi.rst")
        with open(suivi, "r", encoding="utf8") as f:
            content = f.read()
        self.assertIn("* mails: [email protected]", content)

        self.assertEqual(len(proj.Groups), 3)
        mails = proj.get_emails(proj.Groups[0])
        self.assertEqual(len(mails), 1)
        self.assertIn(mails[0], emails)
        fLOG("------", os.path.exists(os.path.join(temp, "mail_style.css")))
        proj.write_run_command()
        proj.write_summary()
        fLOG("------")
        files = [os.path.join(temp, "index.html"),
                 os.path.join(
                     temp, "ABOUT.firstname", "d_2015-08-01_p_noreply-at-voyages-sncf-com_ii_8de6a63addb7c03407bc6f0caabd967e.html"),
                 os.path.join(temp, "mail_style.css")]
        nb = 0
        for name in files:
            if not os.path.exists(name):
                raise FileNotFoundError(name)
            nb += 1
            with open(name, "r", encoding="utf8") as f:
                content = f.read()
            self.assertNotIn("ut_automation_students", content)
        self.assertEqual(nb, len(files))
    def write_summary(self, renderer=None, link="index_mails.html",
                      outfile="index.html", title="summary",
                      nolink_if=None):
        """
        Produces a summary and uses a :epkg:`Jinja2` template.

        @param      renderer    instance of `EmailMessageRenderer
                                <http://www.xavierdupre.fr/app/pymmails/
                                helpsphinx//pymmails/render/email_message_renderer.html>`_),
                                can be None
        @param      link        look for this file in each folder
        @param      outfile     output file
        @param      nolink_if   link containing those strings will be removed (if None, a default set will be assigned)
        @param      title       title
        @return                 summary

        The current default template is::

        .. runpython::

            from ensae_teaching_cs.automation_students.projects_repository import _default_template_summary_template
            print(_default_template_summary)
        """
        if nolink_if is None:
            nolink_if = ProjectsRepository._known_strings

        def filter_in(url):
            if "\n" in url or "\r" in url or "\t" in url:
                return False
            if url.endswith("&quot;"):
                return False
            for _ in nolink_if:
                if _ in url:
                    return False
            if ".ipynb_checkpoints" in url:
                return False
            return True

        def clean_url(u):
            u = u.replace("&#43;", "+").strip(".#'/ \r\n\t ")
            if u.endswith("&nbsp;"):
                u = u[:-6]
            return u

        def url_domain_name(url):
            r = urlparse(url)
            domain = r.netloc
            name = [_ for _ in url.split("/") if _]
            last = name[-1] if len(name) > 0 else domain
            if len(last) > 30:
                last = last[-30:]
            return domain, clean_url(last)

        def format_size(s):
            if s <= 2 ** 11:
                return "{0} bytes".format(s)
            elif s <= 2 ** 21:
                return "{0} Kb".format(s // (2 ** 10))
            elif s <= 2 ** 31:
                return "{0} Mb".format(s // (2 ** 20))
            else:
                return "{0} Gb".format(s // (2 ** 30))

        groups = []
        for group in self.Groups:
            lp = os.path.join(self.get_group_location(group), link)
            if os.path.exists(lp):
                c = os.path.relpath(lp, self._location), group
            else:
                c = "file:///{0}".format(group), group
            nb_files = 0
            size = 0
            atts = []
            emails = []
            links = []
            created_files = []
            for name in self.enumerate_group_files(group):
                if name.endswith(".metadata"):
                    continue
                loc = self.get_group_location(group)
                nb_files += 1
                tn = name
                size += os.stat(tn).st_size
                folder = os.path.split(name)[0]
                splf = folder.replace("\\", "/").split("/")
                if folder.endswith("attachments"):
                    meta = name + ".metadata"
                    if os.path.exists(meta):
                        data = EmailMessage.read_metadata(meta)
                        day = data["date"].strftime("%Y-%m-%d")
                    else:
                        data = None
                        day = ""
                    atts.append((day, os.path.relpath(
                        name, self._location), data))
                elif "attachments" in splf:
                    rel = os.path.relpath(name, loc)
                    dest = os.path.relpath(name, self._location)
                    if rel == dest:
                        raise Exception("weird\n{0}\n{1}".format(rel, dest))
                    ssize = format_size(os.stat(name).st_size)
                    if "__MACOSX" not in rel and "__MACOSX" not in dest and \
                            ".ipynb_checkpoints" not in dest and ".ipynb_checkpoints" not in rel:
                        created_files.append((rel, dest, ssize))
                else:
                    mail = os.path.split(name)[-1]
                    res = EmailMessage.interpret_default_filename(mail)
                    if "date" in res and "uid" in res and "from" in res:
                        emails.append(
                            (res["date"], res["from"], res["uid"], res))
                        with open(os.path.join(loc, mail), "r", encoding="utf8") as f:
                            content = f.read()
                        urls = ProjectsRepository._link_regex.findall(content)
                        if urls:
                            for u in set(urls):
                                u = clean_url(u)
                                if not filter_in(u):
                                    continue
                                domain, last = url_domain_name(u)
                                links.append(
                                    (res["date"], res["from"], clean_url(u), domain, last))

            # we sort
            atts.sort()
            links.sort()

            # we clean duplicated links
            mlinks = links
            links = []
            done = {}
            for date, from_, url, domain, last in mlinks:
                if url in done:
                    continue
                if "__MACOSX" in url or "__MACOSX" in last or \
                        ".ipynb_checkpoints" in last or ".ipynb_checkpoints" in url:
                    continue
                links.append((date, from_, url, domain, last))
                done[url] = True

            # we create the variable for the template
            emails = [_[-1] for _ in sorted(emails)]
            c = dict(link=c[0].replace("\\", "/"), group=c[1], nb=nb_files,
                     size=size, attachments=atts, emails=emails, links=links,
                     created_files=created_files)

            groups.append(c)

        # final summary
        if renderer is None:
            tmpl = ProjectsRepository._default_template_summary
            renderer = EmailMessageRenderer(tmpl=tmpl, fLOG=self.fLOG)
            dof = True
        else:
            dof = False
        res = renderer.write(filename=outfile, location=self.Location,
                             mail=None, attachments=None, groups=groups,
                             title=title, len=len, os=os,
                             format_size=format_size)
        if dof:
            renderer.flush()
        return res
Exemple #11
0
    def write_summary(self, render=None, link="index_mails.html",
                      outfile="index.html", title="summary",
                      nolink_if=None):
        """
        produces a summary and uses a Jinja2 template

        @param      render      instance of `EmailMessageRenderer <http://www.xavierdupre.fr/app/pymmails/helpsphinx//pymmails/render/email_message_renderer.html>`_),
                                can be None
        @param      link        look for this file in each folder
        @param      outfile     output file
        @param      nolink_if   link containing those strings will be removed (if None, a default set will be assigned)
        @return                 summary

        the current default template is::

            <?xml version="1.0" encoding="utf-8"?>
            <head>
            <meta http-equiv="content-type" content="text/html; charset=utf-8" />
            </head>
            <body>
            <html>
            <head>
            <title>{{ title }}</title>
            <link rel="stylesheet" type="text/css" href="{{ css }}">
            </head>
            <body>
            <h1>{{ title }}</h1>
            <ol type="1">
            {% for ps in groups %}
                <li><a href="{{ ps["link"] }}">{{ ps["group"] }}</a><small><i>
                    {{ ps["nb"] }} files - {{ format_size(ps["size"]) }} -
                    last mail {{ ps["emails"][-1]["date"] }} ---
                    {{ len(ps["attachments"]) }} attachments</i></small>
                {% if len(ps["attachments"]) > 0 %}
                    <ul>
                    {% for day, att, data in ps["attachments"] %}
                        <li>att: {{ day }} - <a href="{{ att }}">{{ os.path.split(att)[-1] }}</a></li>
                    {% endfor %}
                    {% for date, from_, url, domain, last in ps["links"] %}
                        <li>link: {{ date }} <a href="{{ url }}">{{ domain }} // {{ last }}</a> from {{ from_ }}</li>
                    {% endfor %}
                    </ul>
                {% endif %}
                {% if len(ps["created_files"]) > 0 %}
                    <ul>
                    {% for name, relpath, size in ps["created_files"] %}
                        <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li>
                    {% endfor %}
                    </ul>
                {% endif %}
                </li>
            {% endfor %}
            </ol>
            </body>
            </html>

        """
        if nolink_if is None:
            nolink_if = ProjectsRepository._known_strings

        def filter_in(url):
            if "\n" in url or "\r" in u or "\t" in u:
                return False
            if url.endswith("&quot;"):
                return False
            for _ in nolink_if:
                if _ in url:
                    return False
            return True

        def clean_url(u):
            u = u.replace("&#43;", "+").strip(".#'/ \r\n\t ")
            if u.endswith("&nbsp;"):
                u = u[:-6]
            return u

        def url_domain_name(url):
            r = urlparse(url)
            domain = r.netloc
            name = [_ for _ in url.split("/") if _]
            last = name[-1] if len(name) > 0 else domain
            if len(last) > 30:
                last = last[-30:]
            return domain, clean_url(last)

        def format_size(s):
            if s <= 2 ** 11:
                return "{0} bytes".format(s)
            elif s <= 2 ** 21:
                return "{0} Kb".format(s // (2 ** 10))
            elif s <= 2 ** 31:
                return "{0} Mb".format(s // (2 ** 20))
            else:
                return "{0} Gb".format(s // (2 ** 30))

        groups = []
        for group in self.Groups:
            lp = os.path.join(self.get_group_location(group), link)
            if os.path.exists(lp):
                c = os.path.relpath(lp, self._location), group
            else:
                c = "file:///{0}".format(group), group
            nb_files = 0
            size = 0
            atts = []
            emails = []
            links = []
            created_files = []
            for name in self.enumerate_group_files(group):
                if name.endswith(".metadata"):
                    continue
                loc = self.get_group_location(group)
                nb_files += 1
                tn = name
                size += os.stat(tn).st_size
                folder = os.path.split(name)[0]
                splf = folder.replace("\\", "/").split("/")
                if folder.endswith("attachments"):
                    meta = name + ".metadata"
                    if os.path.exists(meta):
                        data = EmailMessage.read_metadata(meta)
                        day = data["date"].strftime("%Y-%m-%d")
                    else:
                        data = None
                        day = ""
                    atts.append((day, os.path.relpath(
                        name, self._location), data))
                elif "attachments" in splf:
                    rel = os.path.relpath(name, loc)
                    dest = os.path.relpath(name, self._location)
                    if rel == dest:
                        raise Exception("weird\n{0}\n{1}".format(rel, dest))
                    ssize = format_size(os.stat(name).st_size)
                    created_files.append((rel, dest, ssize))
                else:
                    mail = os.path.split(name)[-1]
                    res = EmailMessage.interpret_default_filename(mail)
                    if "date" in res and "uid" in res and "from" in res:
                        emails.append(
                            (res["date"], res["from"], res["uid"], res))
                        with open(os.path.join(loc, mail), "r", encoding="utf8") as f:
                            content = f.read()
                        urls = ProjectsRepository._link_regex.findall(content)
                        if urls:
                            for u in set(urls):
                                u = clean_url(u)
                                if not filter_in(u):
                                    continue
                                domain, last = url_domain_name(u)
                                links.append(
                                    (res["date"], res["from"], clean_url(u), domain, last))

            # we sort
            atts.sort()
            links.sort()

            # we clean dupicated links
            mlinks = links
            links = []
            done = {}
            for date, from_, url, domain, last in mlinks:
                if url in done:
                    continue
                links.append((date, from_, url, domain, last))
                done[url] = True

            # we create the variable for the template
            emails = [_[-1] for _ in sorted(emails)]
            c = dict(link=c[0].replace("\\", "/"), group=c[1], nb=nb_files,
                     size=size, attachments=atts, emails=emails, links=links,
                     created_files=created_files)

            groups.append(c)

        if render is None:
            tmpl = """<?xml version="1.0" encoding="utf-8"?>
                    <head>
                    <meta http-equiv="content-type" content="text/html; charset=utf-8" />
                    </head>
                    <body>
                    <html>
                    <head>
                    <title>{{ title }}</title>
                    <link rel="stylesheet" type="text/css" href="{{ css }}">
                    </head>
                    <body>
                    <h1>{{ title }}</h1>
                    <ol type="1">
                    {% for ps in groups %}
                        <li><a href="{{ ps["link"] }}">{{ ps["group"] }}</a><small><i>
                            {{ ps["nb"] }} files - {{ format_size(ps["size"]) }} -
                            last mail {{ ps["emails"][-1]["date"] }} ---
                            {{ len(ps["attachments"]) }} attachments</i></small>
                        {% if len(ps["attachments"]) > 0 %}
                            <ul>
                            {% for day, att, data in ps["attachments"] %}
                                <li>att: {{ day }} - <a href="{{ att }}">{{ os.path.split(att)[-1] }}</a></li>
                            {% endfor %}
                            {% for date, from_, url, domain, last in ps["links"] %}
                                <li>link: {{ date }} <a href="{{ url }}">{{ domain }} // {{ last }}</a> from {{ from_ }}</li>
                            {% endfor %}
                            </ul>
                        {% endif %}
                        {% if len(ps["created_files"]) > 0 %}
                            <ul>
                            {% for name, relpath, size in ps["created_files"] %}
                                <li>added: <a href="{{ relpath }}">{{ name }}</a> {{ size }}</li>
                            {% endfor %}
                            </ul>
                        {% endif %}
                        </li>
                    {% endfor %}
                    </ol>
                    </body>
                    </html>
                    """.replace("                    ", "")
            render = EmailMessageRenderer(tmpl=tmpl, fLOG=self.fLOG)
            dof = True
        else:
            dof = False
        res = render.write(filename=outfile, location=self.Location,
                           mail=None, attachments=None, groups=groups,
                           title=title, len=len, os=os,
                           format_size=format_size)
        if dof:
            render.flush()
        return res