def extract_students_mail_and_name_from_gmail(user=None, pwd=None, server="imap.gmail.com",
                                              mailfolder=["ensae/actuariat"],
                                              date="1-Jan-2016", fLOG=noLOG):
    """
    Extracts mails and names from a mail box.

    @param      user                user of the gmail inbox
    @param      pwd                 password of the gmail inbox
    @param      server              gmail server, it should be ``"imap.gmail.com"``,
                                    it works with others mail servers using the *IMAP* protocol
    @param      mailfolder          folder in your inbox to look into,
                                    there can be several
    @param      date                when to start looking (do not change the format,
                                    look at the default value)
    @param      fLOG                logging function
    @return                         list of dictionary ``[{"name": ..., "mail": ...}]``
    """
    box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
    box.login()
    emails, suggestions = grab_addresses(
        box, mailfolder, date, names=True, fLOG=fLOG)
    box.logout()

    rows = []
    for mail in emails:
        el = {"mail": mail}
        if mail in suggestions:
            el["name"] = ";".join(sorted(suggestions[mail]))
        rows.append(el)
    return rows
Example #2
0
 def should_bemocked_test_fetch_mail(self):
     imap = MailBoxImap("somebody", "pwd", "imap.gmail.com", True)
     imap.login()
     iter = imap.enumerate_search_subject("subject", "inbox")
     for m in iter:
         m.dump(iter, "destination")
     imap.logout()
    def test_mailbox_extended(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        if is_travis_or_appveyor():
            warnings.warn("requires a password")
            return

        now = datetime.datetime.now()
        now -= datetime.timedelta(1)
        date = now.strftime("%d-%b-%Y")

        if "DOUZE2016" in os.environ.get("COMPUTERNAME", ""):
            # does not work on the remote build server
            return

        with warnings.catch_warnings():
            warnings.simplefilter('ignore', DeprecationWarning)
            import keyring
        user = keyring.get_password("gmail", "pymmails,user")
        code = keyring.get_password("gmail", "pymmails,pwd")

        box = MailBoxImap(user, code, "imap.gmail.com", ssl=True, fLOG=fLOG)
        box.login()

        mails = box.enumerate_mails_in_folder("inbox", date=date)
        li = list(mails)
        self.assertTrue(len(li) > 0)
        box.logout()

        issues = []
        for mail in li:
            name = mail.get_name()
            if "=?" in mail:
                issues.append(name)
            fr = mail.get_from()
            frm = [_ for _ in fr if _]
            if "=?" in frm[0]:
                issues.append(name)
            if "@" not in fr[1]:
                issues.append(name)
        if len(issues) > 0:
            raise Exception("Issues with\n{0}".format(
                "\n".join(str(_) for _ in issues)))
Example #4
0
    def test_mailbox(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        if is_travis_or_appveyor():
            warnings.warn("requires a password")
            return
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', DeprecationWarning)
            import keyring
        code = keyring.get_password("sdut", "pymmails")

        box = MailBoxImap("unittest.sdpython", code,
                          "imap.gmail.com", ssl=True, fLOG=fLOG)
        box.login()
        mails = box.enumerate_mails_in_folder("test4", date="1-Jan-2016")
        li = list(mails)
        self.assertEqual(len(li), 3)
        box.logout()
Example #5
0
    def test_mailbox_dump(self):
        fLOG(
            __file__,
            self._testMethodName,
            OutputPrint=__name__ == "__main__")

        if is_travis_or_appveyor():
            warnings.warn("requires a password")
            return
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', DeprecationWarning)
            import keyring
        code = keyring.get_password("sdut", "pymmails")
        temp = get_temp_folder(__file__, "temp_dump")
        box = MailBoxImap("unittest.sdpython", code,
                          "imap.gmail.com", ssl=True, fLOG=fLOG)
        render = EmailMessageRenderer()
        box.login()
        mails = box.enumerate_mails_in_folder("test4", date="1-Jan-2016")
        for mail in mails:
            mail.dump(render, location=temp, fLOG=fLOG)
        render.flush()
        box.logout()
Example #6
0
def extract_students_mail_and_name_from_gmail(user=None,
                                              pwd=None,
                                              server="imap.gmail.com",
                                              mailfolder=["ensae/actuariat"],
                                              date="1-Jan-2016",
                                              fLOG=noLOG):
    """
    Extract mails and names from a mail box

    @param      user                user of the gmail inbox
    @param      pwd                 password of the gmail inbox
    @param      server              gmail server, it should be ``"imap.gmail.com"``,
                                    it works with others mail servers using the *IMAP* protocol
    @param      mailfolder          folder in your inbox to look into,
                                    there can be several
    @param      date                when to start looking (do not change the format,
                                    look at the default value)
    @param      fLOG                logging function
    @return                         list of dictionary ``[{"name": ..., "mail": ...}]``
    """
    box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
    box.login()
    emails, suggestions = grab_addresses(box,
                                         mailfolder,
                                         date,
                                         names=True,
                                         fLOG=fLOG)
    box.logout()

    rows = []
    for mail in emails:
        el = {"mail": mail}
        if mail in suggestions:
            el["name"] = ";".join(sorted(suggestions[mail]))
        rows.append(el)
    return rows
skip_address = [
    "*****@*****.**",
    "*****@*****.**",
]

###############
# gather mails

fLOG("fetch mails")

if os.path.exists(filename_mails):
    with open(filename_mails, "r", encoding="utf8") as f:
        lines = f.readlines()
    emails = [l.strip("\r\t\n ") for l in lines]
else:
    box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
    box.login()
    emails = grab_addresses(box, mailfolder, date, fLOG=fLOG)
    box.logout()
    emails = list(
        sorted(
            set([
                _.strip("<>").lower() for _ in emails if _ not in skip_address
            ])))

    with open(filename_mails, "w", encoding="utf8") as f:
        f.write("\n".join(emails))

#####################
# create a dataframe
Example #8
0
def extract_students_mails_from_gmail_and_stores_in_folders(folder=".", filemails="emails.txt",
                                                            user=None, pwd=None, server="imap.gmail.com",
                                                            mailfolder=[
                                                                "ensae/ENSAE_2016_3A"],
                                                            date="1-Jan-2016", zipfilename="projet_3A_2016.zip",
                                                            zipencpwd=b"sixteenbyteskeys", dataframe=None,
                                                            columns={
                                                                "name": "nom_prenom", "group": "groupe", "subject": "sujet"},
                                                            skip_names=None, process_name=None,
                                                            title="List of emails", nolink_if=None, fLOG=fLOG):
    """
    The scenario is the following:

    * You are the teacher.
    * Students started their projects at date *t*.
    * They can work alone or by group.
    * They send mails, you reply.
    * Their address mail follows the convention: ``<first name>.<last name>@anything``
      so it is to associate a mail address to a student name.
    * You move every mail you received in a separate folder in your inbox.
    * Sometime, you send a mail to everybody.
    * Finally they send their project with attachments.
    * You want to store everything (mails and attachements) in folders, one per group.
    * You want a summary of what was received.
    * You want to build a zip file to share their work with others teachers.
    * You want to update the folder if a new mail was sent.

    This function looks into a folder of your inbox and grabs every mails and
    attachements from a groups of students.

    @param      folder              where to store the results
    @param      filemails           files used to store students address,
                                    the operation is done once, remove the file
                                    to force the function to rebuild the information.
    @param      user                user of the gmail inbox
    @param      pwd                 password of the gmail inbox
    @param      server              gmail server, it should be ``"imap.gmail.com"``,
                                    it works with others mail servers using the *IMAP* protocol
    @param      mailfolder          folder in your inbox to look into,
                                    there can be several
    @param      date                when to start looking (do not change the format,
                                    look at the default value)
    @param      zipfilename         name of the zip file to create
    @param      zipencpwd           the zip file is also encrypted for a safer share with this key
                                    and function `encrypt_stream <http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/
                                    pyquickhelper/filehelper/encryption.html#pyquickhelper.filehelper.encryption.encrypt_stream>`_.
    @param      dataframe           dataframe which contains the definition of students groups
    @param      columns             columns the function will look into, students names, group definition
                                    (a unique number for all students in the same group), subject
    @param      skip_names          list of names to skip
    @param      process_name        to operate a transformation before matching students names with
                                    their emails
    @param      title               each group folder contains a html file connecting them,
                                    this is its title
    @param      nolink_if           The summary extracts links from url, it skips the urls which
                                    contains on the substrings included in that list (None to use a default set)
    @param      fLOG                logging function
    @return                         @see cl ProjectsRepository

    By default, Gmail does not let you programmatically access you own inbox,
    you need to modify your gmail parameters to let this function do so.
    """
    folder = os.path.abspath(".")
    filemails = os.path.join(folder, filemails)
    zipfilename = os.path.join(folder, zipfilename)
    zipfilenameenc = zipfilename + ".enc"

    # load the groups
    if isinstance(dataframe, pandas.DataFrame):
        df = dataframe
    elif dataframe.endswith("xlsx"):
        fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] read dataframe", dataframe)
        df = pandas.read_excel(dataframe)
    else:
        df = pandas.read_csv(dataframe, sep="\t", encoding="utf8")

    # check mails
    if "mail" not in columns:
        if os.path.exists(filemails):
            fLOG(
                "[extract_students_mails_from_gmail_and_stores_in_folders] read addresses from ", filemails)
            with open(filemails, "r", encoding="utf8") as f:
                lines = f.readlines()
            emails = [li.strip("\r\t\n ") for li in lines]
        else:
            fLOG(
                "[extract_students_mails_from_gmail_and_stores_in_folders] mine address ")
            box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
            box.login()
            emails = grab_addresses(box, mailfolder, date, fLOG=fLOG)
            box.logout()

            with open(filemails, "w", encoding="utf8") as f:
                f.write("\n".join(emails))
    else:
        # nothing to do mail already present
        emails = set(df[columns["mail"]])

    # we remove empty names
    df = df[~df[columns["name"]].isnull()].copy()

    if process_name:
        df[columns["name"]] = df[columns["name"]].apply(
            lambda f: process_name(f))

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] create groups folders in", folder)
    proj = ProjectsRepository(folder, fLOG=fLOG)

    proj = ProjectsRepository.create_folders_from_dataframe(df, folder,
                                                            col_subject=columns[
                                                                "subject"], fLOG=fLOG, col_group=columns["group"],
                                                            col_student=columns[
                                                                "name"], email_function=emails, skip_if_nomail=False,
                                                            col_mail=columns["mail"], must_have_email=True, skip_names=skip_names)
    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] nb groups", len(
        proj.Groups))

    # gathers mails
    email_renderer = EmailMessageRenderer(tmpl=template_email_html_short,
                                          fLOG=fLOG)
    renderer = EmailMessageListRenderer(title=title, email_renderer=email_renderer,
                                        fLOG=fLOG)

    box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
    box.login()
    proj.dump_group_mails(renderer, group=None, mailbox=box, subfolder=mailfolder,
                          date=date, overwrite=False, skip_if_empty=True)

    box.logout()

    # cleaning files
    for group in proj.Groups:
        files = list(proj.enumerate_group_files(group))
        att = [_ for _ in files if ".html" in _]
        if len(att) <= 1:
            fLOG(
                "[extract_students_mails_from_gmail_and_stores_in_folders] remove '{}'".format(group))
            proj.remove_group(group)

    # unzip files and convert notebooks
    for group in proj.Groups:
        proj.unzip_convert(group)

    summary = os.path.join(folder, "index.html")
    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] write summary '{}'".format(summary))
    if os.path.exists(summary):
        os.remove(summary)
    proj.write_run_command()
    proj.write_summary(nolink_if=nolink_if)

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] zip everything in", zipfilename)
    if os.path.exists(zipfilename):
        os.remove(zipfilename)
    proj.zip_group(None, zipfilename,
                   addition=["index.html", "mail_style.css", "emails.txt"])

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] encrypt the zip file in '{}'.".format(
        zipfilenameenc))
    if os.path.exists(zipfilenameenc):
        os.remove(zipfilenameenc)
    encrypt_stream(zipencpwd, zipfilename, zipfilenameenc, chunksize=2 ** 30)

    return proj
Example #9
0
skip_address = [
    "*****@*****.**",
    "*****@*****.**",
]

###############
# gather mails

fLOG("fetch mails")

if os.path.exists(filename_mails):
    with open(filename_mails, "r", encoding="utf8") as f:
        lines = f.readlines()
    emails = [l.strip("\r\t\n ") for l in lines]
else:
    box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
    box.login()
    emails = grab_addresses(box, mailfolder, date, fLOG=fLOG)
    box.logout()
    emails = list(sorted(set([_.strip("<>").lower()
                              for _ in emails if _ not in skip_address])))

    with open(filename_mails, "w", encoding="utf8") as f:
        f.write("\n".join(emails))

#####################
# create a dataframe

import pandas
rows = [{"nom_prenom": mail, "sujet": "octobre", "groupe": i + 1}
        for i, mail in enumerate(emails)]
Example #10
0
    rec = compute_metrics(v)
    rec['a_sender'] = k
    perf_both.append(rec)

for name in os.listdir("sub"):
    with open(os.path.join("sub", name), "rb") as f:
        c = f.read()
    mail = dict(From=name,
                Date=os.stat(os.path.join('sub', name)).st_mtime,
                name=name,
                att=c)
    rev.append((mail['Date'], mail))

if False:
    server = "imap.gmail.com"
    box = MailBoxImap(user, pwd, server, ssl=True)
    box.login()
    for i, mail in enumerate(
            box.enumerate_mails_in_folder("ensae/hackathon",
                                          date="24-Nov-2017")):
        if 'dupre' in mail['From']:
            continue
        rev.append((mail["Date"], mail))
        fLOG(i, mail["Date"], mail['From'])
else:
    box = None

fLOG('----------------------')
for date, mail in rev:
    fLOG(mail["Date"], mail['From'])
    if 'att' in mail:
def extract_students_mails_from_gmail_and_stores_in_folders(folder=".", filemails="emails.txt",
                                                            user=None, pwd=None, server="imap.gmail.com",
                                                            mailfolder=[
                                                                "ensae/ENSAE_2016_3A"],
                                                            date="1-Jan-2016", zipfilename="projet_3A_2016.zip",
                                                            zipencpwd=b"sixteenbyteskeys", dataframe=None,
                                                            columns={
                                                                "name": "nom_prenom", "group": "groupe", "subject": "sujet"},
                                                            skip_names=None, process_name=None,
                                                            title="List of emails", nolink_if=None, fLOG=fLOG):
    """
    The scenario is the following:

    * You are the teacher.
    * Students started their projects at date *t*.
    * They can work alone or by group.
    * They send mails, you reply.
    * Their address mail follows the convention: ``<first name>.<last name>@anything``
      so it is to associate a mail address to a student name.
    * You move every mail you received in a separate folder in your inbox.
    * Sometime, you send a mail to everybody.
    * Finally they send their project with attachments.
    * You want to store everything (mails and attachements) in folders, one per group.
    * You want a summary of what was received.
    * You want to build a zip file to share their work with others teachers.
    * You want to update the folder if a new mail was sent.

    This function looks into a folder of your inbox and grabs every mails and
    attachements from a groups of students.

    @param      folder              where to store the results
    @param      filemails           files used to store students address,
                                    the operation is done once, remove the file
                                    to force the function to rebuild the information.
    @param      user                user of the gmail inbox
    @param      pwd                 password of the gmail inbox
    @param      server              gmail server, it should be ``"imap.gmail.com"``,
                                    it works with others mail servers using the *IMAP* protocol
    @param      mailfolder          folder in your inbox to look into,
                                    there can be several
    @param      date                when to start looking (do not change the format,
                                    look at the default value)
    @param      zipfilename         name of the zip file to create
    @param      zipencpwd           the zip file is also encrypted for a safer share with this key
                                    and function `encrypt_stream <http://www.xavierdupre.fr/app/pyquickhelper/helpsphinx/
                                    pyquickhelper/filehelper/encryption.html#pyquickhelper.filehelper.encryption.encrypt_stream>`_.
    @param      dataframe           dataframe which contains the definition of students groups
    @param      columns             columns the function will look into, students names, group definition
                                    (a unique number for all students in the same group), subject
    @param      skip_names          list of names to skip
    @param      process_name        to operate a transformation before matching students names with
                                    their emails
    @param      title               each group folder contains a html file connecting them,
                                    this is its title
    @param      nolink_if           The summary extracts links from url, it skips the urls which
                                    contains on the substrings included in that list (None to use a default set)
    @param      fLOG                logging function
    @return                         @see cl ProjectsRepository

    By default, Gmail does not let you programmatically access you own inbox,
    you need to modify your gmail parameters to let this function do so.
    """
    folder = os.path.abspath(".")
    filemails = os.path.join(folder, filemails)
    zipfilename = os.path.join(folder, zipfilename)
    zipfilenameenc = zipfilename + ".enc"

    # load the groups
    if isinstance(dataframe, pandas.DataFrame):
        df = dataframe
    elif dataframe.endswith("xlsx"):
        fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] read dataframe", dataframe)
        df = pandas.read_excel(dataframe)
    else:
        df = pandas.read_csv(dataframe, sep="\t", encoding="utf8")

    # check mails
    if "mail" not in columns:
        if os.path.exists(filemails):
            fLOG(
                "[extract_students_mails_from_gmail_and_stores_in_folders] read addresses from ", filemails)
            with open(filemails, "r", encoding="utf8") as f:
                lines = f.readlines()
            emails = [l.strip("\r\t\n ") for l in lines]
        else:
            fLOG(
                "[extract_students_mails_from_gmail_and_stores_in_folders] mine address ")
            box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
            box.login()
            emails = grab_addresses(box, mailfolder, date, fLOG=fLOG)
            box.logout()

            with open(filemails, "w", encoding="utf8") as f:
                f.write("\n".join(emails))
    else:
        # nothing to do mail already present
        emails = set(df[columns["mail"]])

    # we remove empty names
    df = df[~df[columns["name"]].isnull()].copy()

    if process_name:
        df[columns["name"]] = df[columns["name"]].apply(
            lambda f: process_name(f))

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] create groups folders in", folder)
    proj = ProjectsRepository(folder, fLOG=fLOG)

    proj = ProjectsRepository.create_folders_from_dataframe(df, folder,
                                                            col_subject=columns[
                                                                "subject"], fLOG=fLOG, col_group=columns["group"],
                                                            col_student=columns[
                                                                "name"], email_function=emails, skip_if_nomail=False,
                                                            col_mail=columns["mail"], must_have_email=True, skip_names=skip_names)
    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] nb groups", len(
        proj.Groups))

    # gathers mails
    email_renderer = EmailMessageRenderer(tmpl=template_email_html_short,
                                          fLOG=fLOG)
    renderer = EmailMessageListRenderer(title=title, email_renderer=email_renderer,
                                        fLOG=fLOG)

    box = MailBoxImap(user, pwd, server, ssl=True, fLOG=fLOG)
    box.login()
    proj.dump_group_mails(renderer, group=None, mailbox=box, subfolder=mailfolder,
                          date=date, overwrite=False, skip_if_empty=True)

    box.logout()

    # cleaning files
    for group in proj.Groups:
        files = list(proj.enumerate_group_files(group))
        att = [_ for _ in files if ".html" in _]
        if len(att) <= 1:
            fLOG(
                "[extract_students_mails_from_gmail_and_stores_in_folders] remove ", group)
            proj.remove_group(group)

    # unzip files and convert notebooks
    for group in proj.Groups:
        proj.unzip_convert(group)

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] summary ")
    summary = os.path.join(folder, "index.html")
    if os.path.exists(summary):
        os.remove(summary)
    proj.write_summary(nolink_if=nolink_if)

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] zip everything in", zipfilename)
    if os.path.exists(zipfilename):
        os.remove(zipfilename)
    proj.zip_group(None, zipfilename,
                   addition=["index.html", "mail_style.css", "emails.txt"])

    fLOG("[extract_students_mails_from_gmail_and_stores_in_folders] encrypt the zip file in", zipfilenameenc)
    if os.path.exists(zipfilenameenc):
        os.remove(zipfilenameenc)
    encrypt_stream(zipencpwd, zipfilename, zipfilenameenc, chunksize=2 ** 30)

    return proj