def test_process_data(self):
     fLOG(
         __file__,
         self._testMethodName,
         OutputPrint=__name__ == "__main__")
     if is_travis_or_appveyor():
         warnings.warn("disabled on appveyor and travis")
         return
     temp = get_temp_folder(__file__, "temp_process_data_cresus_2016")
     import keyring
     pwd = keyring.get_password(
         "cresus", os.environ["COMPUTERNAME"] + "ensae")
     assert pwd
     name = cresus_dummy_file()
     if not os.path.exists(name):
         raise FileNotFoundError(name)
     zipname = os.path.join(temp, "bdd.zip")
     pwd = pwd.encode("ascii")
     decrypt_stream(pwd, name, zipname)
     res = unzip_files(zipname, temp)
     fLOG(res)
     infile = res[0]
     train, test = process_cresus_whole_process(
         infile, outfold=temp, fLOG=fLOG)
     for r in train.values():
         df = pandas.read_csv(r, sep="\t", encoding="utf-8")
         fLOG(df.columns)
def decrypt_data(password, input, output):
    """
    decrypt a file

    @param      input       input filename
    @param      output      output filename
    @param      password    The encryption key - a string that must be either 16, 24 or 32
                            bytes long. Longer keys are more secure. If the data to encrypt
                            is in bytes, the key must be given in bytes too.
    """
    if not isinstance(password, bytes):
        password = bytes(password, "ascii")
    decrypt_stream(password, input, output)
Example #3
0
def data_cpt_ENSAE_2016_11_blind_set(password):
    """
    Returns the evaluation set for the competition
    :epkg:`Python 2A ENSAE 2016`.

    @param      fLOG        logging function
    @return                 2 dataframes, one with X, Y, the others one with only X

    The competition is over. The password is ``xdameratxdamerat``.
    """
    if password == "dummy":
        return [random.random() for i in range(7500)]
    else:
        name = os.path.join(os.path.dirname(__file__),
                            "data_competition", "answers.bin")
        if not os.path.exists(name):
            raise FileNotFoundError(name)
        with open(name, "rb") as f:
            c = f.read()
        if not isinstance(password, bytes):
            password = bytes(password, "ascii")
        res = decrypt_stream(password, c)
        g = res.decode("ascii").replace("\r", "")
        s = g.split("\n")
        return [int(_) for _ in s if _]
Example #4
0
    def read_mail_from_file(self, filename):
        """
        extract a mail from a file

        @param      filename        filename
        @return                     MailMessage
        """
        with open(filename, "rb") as f:
            content = f.read()
        if self._password:
            b = decrypt_stream(self._password, content)
        else:
            b = content
        return email.message_from_bytes(b, _class=EmailMessage)