def test_process_data(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") if is_travis_or_appveyor(): warnings.warn("disabled on appveyor and travis") return temp = get_temp_folder(__file__, "temp_process_data_cresus_2016") import keyring pwd = keyring.get_password( "cresus", os.environ["COMPUTERNAME"] + "ensae") assert pwd name = cresus_dummy_file() if not os.path.exists(name): raise FileNotFoundError(name) zipname = os.path.join(temp, "bdd.zip") pwd = pwd.encode("ascii") decrypt_stream(pwd, name, zipname) res = unzip_files(zipname, temp) fLOG(res) infile = res[0] train, test = process_cresus_whole_process( infile, outfold=temp, fLOG=fLOG) for r in train.values(): df = pandas.read_csv(r, sep="\t", encoding="utf-8") fLOG(df.columns)
def decrypt_data(password, input, output): """ decrypt a file @param input input filename @param output output filename @param password The encryption key - a string that must be either 16, 24 or 32 bytes long. Longer keys are more secure. If the data to encrypt is in bytes, the key must be given in bytes too. """ if not isinstance(password, bytes): password = bytes(password, "ascii") decrypt_stream(password, input, output)
def data_cpt_ENSAE_2016_11_blind_set(password): """ Returns the evaluation set for the competition :epkg:`Python 2A ENSAE 2016`. @param fLOG logging function @return 2 dataframes, one with X, Y, the others one with only X The competition is over. The password is ``xdameratxdamerat``. """ if password == "dummy": return [random.random() for i in range(7500)] else: name = os.path.join(os.path.dirname(__file__), "data_competition", "answers.bin") if not os.path.exists(name): raise FileNotFoundError(name) with open(name, "rb") as f: c = f.read() if not isinstance(password, bytes): password = bytes(password, "ascii") res = decrypt_stream(password, c) g = res.decode("ascii").replace("\r", "") s = g.split("\n") return [int(_) for _ in s if _]
def read_mail_from_file(self, filename): """ extract a mail from a file @param filename filename @return MailMessage """ with open(filename, "rb") as f: content = f.read() if self._password: b = decrypt_stream(self._password, content) else: b = content return email.message_from_bytes(b, _class=EmailMessage)