def test_cp_local_no_clobber(self) -> None: with temppathlib.NamedTemporaryFile() as tmp_file1, \ temppathlib.NamedTemporaryFile() as tmp_file2: tmp_file1.path.write_text("hello") tmp_file2.path.write_text("hello there") self.client.cp(src=tmp_file1.path.as_posix(), dst=tmp_file2.path.as_posix(), no_clobber=True) self.assertEqual("hello there", tmp_file2.path.read_text())
def test_upload_preserved_posix(self) -> None: with temppathlib.NamedTemporaryFile() as file: file.path.write_text(tests.common.TEST_GCS_BUCKET) url = "gs://{}/{}/file".format(tests.common.TEST_GCS_BUCKET, self.bucket_prefix) self.client.cp( src=file.path.as_posix(), dst=url, preserve_posix=True) try: gcs_stat = self.client.stat(url=url) self.assertIsNotNone(gcs_stat) file_stat = file.path.stat() self.assertIsNotNone(file_stat) assert isinstance(gcs_stat, gswrap.Stat) self.assertEqual(file_stat.st_size, gcs_stat.content_length) assert isinstance(gcs_stat.file_mtime, datetime.datetime) self.assertEqual( datetime.datetime.utcfromtimestamp( file_stat.st_mtime).replace(microsecond=0).timestamp(), gcs_stat.file_mtime.timestamp()) assert isinstance(gcs_stat.posix_uid, str) assert isinstance(gcs_stat.posix_gid, str) self.assertEqual(file_stat.st_uid, int(gcs_stat.posix_uid)) self.assertEqual(file_stat.st_gid, int(gcs_stat.posix_gid)) self.assertEqual( oct(file_stat.st_mode)[-3:], gcs_stat.posix_mode) finally: tests.common.call_gsutil_rm(path=url, recursive=False)
def test_write_text(self) -> None: try: self.client.write_text(url="gs://{}/{}/utf-file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), text=tests.common.GCS_FILE_CONTENT, encoding='utf-8') self.client.write_text(url="gs://{}/{}/iso-file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), text=tests.common.GCS_FILE_CONTENT, encoding='iso-8859-1') with temppathlib.NamedTemporaryFile() as file: tests.common.call_gsutil_cp(src="gs://{}/{}/utf-file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), dst=file.path.as_posix(), recursive=False) utf_content = file.path.read_text(encoding='utf-8') self.assertEqual(tests.common.GCS_FILE_CONTENT, utf_content) tests.common.call_gsutil_cp(src="gs://{}/{}/iso-file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), dst=file.path.as_posix(), recursive=False) iso_content = file.path.read_text(encoding='iso-8859-1') self.assertEqual(tests.common.GCS_FILE_CONTENT, iso_content) finally: tests.common.call_gsutil_rm(path="gs://{}/{}/utf-file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), recursive=False) tests.common.call_gsutil_rm(path="gs://{}/{}/iso-file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), recursive=False)
def read_bytes(self, remote_path: Union[str, pathlib.Path]) -> bytes: """ Read the binary data from a remote file. First the remote file is copied to a temporary local file making sure that the connection is reestablished if needed. Next the data is read. :param remote_path: to the file :return: binary content of the file """ rmt_pth_str = remote_path if isinstance(remote_path, str) else remote_path.as_posix() permerr = None # type: Optional[PermissionError] notfounderr = None # type: Optional[FileNotFoundError] try: with temppathlib.NamedTemporaryFile() as tmp: self.get(remote_path=rmt_pth_str, local_path=tmp.path.as_posix(), consistent=True) return tmp.path.read_bytes() except PermissionError as err: permerr = err except FileNotFoundError as err: notfounderr = err if permerr is not None: raise PermissionError("The remote path could not be accessed: {}".format(rmt_pth_str)) if notfounderr is not None: raise FileNotFoundError("The remote path was not found: {}".format(rmt_pth_str)) raise AssertionError("Expected an exception before.")
def write_bytes(self, remote_path: Union[str, pathlib.Path], data: bytes, create_directories: bool = True, consistent: bool = True) -> None: """ Write the binary data to a remote file. First, the data is written to a temporary local file. Next the local file is transferred to the remote path making sure that the connection is reestablished if needed. :param remote_path: to the file :param data: to be written :param create_directories: if set, creates the parent directory of the remote path with mode 0o777 :param consistent: if set, writes to a temporary remote file first, and then renames it. :return: """ rmt_pth = remote_path if isinstance(remote_path, pathlib.Path) else pathlib.Path(remote_path) if create_directories: spurplus.sftp._mkdir(sftp=self._sftp, remote_path=rmt_pth.parent, mode=0o777, parents=True, exist_ok=True) with temppathlib.NamedTemporaryFile() as tmp: tmp.path.write_bytes(data) self.put( local_path=tmp.path.as_posix(), remote_path=rmt_pth.as_posix(), consistent=consistent, create_directories=create_directories)
def test_write_csv(): df = pd.DataFrame({ CommonFields.DATE: ["2020-04-01", "2020-04-02"], CommonFields.FIPS: ["06045", "45123"], CommonFields.CASES: [234, 456], }) df_original = df.copy() expected_csv = """fips,date,cases 06045,2020-04-01,234 45123,2020-04-02,456 """ # Call common_df.write_csv with index set to ["fips", "date"], the expected normal index. with temppathlib.NamedTemporaryFile( "w+") as tmp, structlog.testing.capture_logs() as logs: common_df.write_csv(df.set_index(["fips", "date"]), tmp.path, structlog.get_logger()) assert expected_csv == tmp.file.read() assert [l["event"] for l in logs] == ["Writing DataFrame"] # Pass df with other index that will be changed. Check that the same output is written to the # file. with temppathlib.NamedTemporaryFile( "w+") as tmp, structlog.testing.capture_logs() as logs: common_df.write_csv(df, tmp.path, structlog.get_logger()) assert expected_csv == tmp.file.read() assert [l["event"] for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"] with temppathlib.NamedTemporaryFile( "w+") as tmp, structlog.testing.capture_logs() as logs: common_df.write_csv(df.set_index(["date", "cases"]), tmp.path, structlog.get_logger()) assert expected_csv == tmp.file.read() assert [l["event"] for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"] with temppathlib.NamedTemporaryFile( "w+") as tmp, structlog.testing.capture_logs() as logs: common_df.write_csv(df.set_index(["date", "fips"]), tmp.path, structlog.get_logger()) assert expected_csv == tmp.file.read() assert [l["event"] for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"] assert repr(df) == repr(df_original)
def test_that_it_works(self): pth = None # type: Optional[pathlib.Path] with temppathlib.NamedTemporaryFile() as tmp: self.assertIsNotNone(tmp.file) self.assertTrue(tmp.path.exists()) pth = tmp.path self.assertFalse(pth.exists())
def test_write_csv_empty(): df = pd.DataFrame( [], columns=[CommonFields.DATE, CommonFields.FIPS, CommonFields.CASES]) with temppathlib.NamedTemporaryFile( "w+") as tmp, structlog.testing.capture_logs() as logs: common_df.write_csv(df, tmp.path, structlog.get_logger()) assert "fips,date,cases\n" == tmp.file.read() assert [l["event"] for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"]
def test_read_csv(): input_csv = """fips,date,cases 06045,2020-04-01,234 45123,2020-04-02,456 """ with temppathlib.NamedTemporaryFile("w+") as tmp: tmp.path.write_text(input_csv) df = read_csv_to_indexed_df(tmp.path) assert one(df.loc[("06045", "2020-04-01"), "cases"]) == 234
def test_remove_index_column(): df = pd.DataFrame( [("99", "2020-04-01", "a", 123)], columns=["fips", "date", "index", "cases"] ).set_index(COMMON_FIELDS_TIMESERIES_KEYS) with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs: common_df.write_csv(df, tmp.path, structlog.get_logger()) assert "fips,date,cases\n99,2020-04-01,123\n" == tmp.file.read() assert [l["event"] for l in logs] == ["Dropping column named 'index'", "Writing DataFrame"]
def test_write_csv_columns_are_sorted_in_output_with_extras(): df = pd.DataFrame( [], columns=[CommonFields.DATE, CommonFields.FIPS, "extra2", CommonFields.CASES, "extra1"] ) df = df.set_index(COMMON_FIELDS_TIMESERIES_KEYS) with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs: log = structlog.get_logger() common_df.write_csv(df, tmp.path, log) assert "fips,date,cases,extra1,extra2\n" == tmp.file.read() assert [l["event"] for l in logs] == [ "Writing DataFrame", ]
def test_read_csv_no_index(): input_csv = """fips,date,cases 06045,2020-04-01,234 45123,2020-04-02,456 """ with temppathlib.NamedTemporaryFile("w+") as tmp: tmp.path.write_text(input_csv) df = common_df.read_csv(tmp.path, set_index=False) expected_first_row = ["06045", pd.Timestamp("2020-04-01 00:00:00"), 234] assert list(df.iloc[0]) == expected_first_row
def test_write_csv_extra_columns_dropped(): df = pd.DataFrame( [], columns=[CommonFields.DATE, CommonFields.FIPS, "extra1", CommonFields.CASES, "extra2"] ) df = df.set_index(COMMON_FIELDS_TIMESERIES_KEYS) with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs: log = structlog.get_logger() common_df.write_csv(only_common_columns(df, log), tmp.path, log) assert "fips,date,cases\n" == tmp.file.read() assert [l["event"] for l in logs] == [ "Dropping columns not in CommonFields", "Writing DataFrame", ]
def test_same_md5(self) -> None: with temppathlib.NamedTemporaryFile() as file: file.path.write_text(tests.common.GCS_FILE_CONTENT) url = "gs://{}/{}/file".format(tests.common.TEST_GCS_BUCKET, self.bucket_prefix) try: tests.common.call_gsutil_cp(src=file.path.as_posix(), dst=url, recursive=False) self.assertTrue(self.client.same_md5(path=file.path, url=url)) finally: tests.common.call_gsutil_rm(path=url, recursive=False)
def test_same_modtime(self) -> None: with temppathlib.NamedTemporaryFile() as file: file.path.touch() file.path.write_text(tests.common.GCS_FILE_CONTENT) url = "gs://{}/{}/file".format(tests.common.TEST_GCS_BUCKET, self.bucket_prefix) try: subprocess.check_call( ["gsutil", "cp", "-P", file.path.as_posix(), url]) self.assertTrue( self.client.same_modtime(path=file.path, url=url)) finally: tests.common.call_gsutil_rm(path=url, recursive=False)
def test_float_na_formatting(): df = pd.DataFrame( [("99", "2020-04-01", 1.0, 2, 3), ("99", "2020-04-02", pd.NA, pd.NA, None)], columns="fips date metric_a metric_b metric_c".split(), ).set_index(COMMON_FIELDS_TIMESERIES_KEYS) expected_csv = """fips,date,metric_a,metric_b,metric_c 99,2020-04-01,1,2,3 99,2020-04-02,,, """ with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs: common_df.write_csv(df, tmp.path, structlog.get_logger()) assert expected_csv == tmp.file.read() assert [l["event"] for l in logs] == ["Writing DataFrame"]
def test_stat(self) -> None: with temppathlib.NamedTemporaryFile() as file: file.path.write_text(tests.common.GCS_FILE_CONTENT) url = "gs://{}/{}/file".format(tests.common.TEST_GCS_BUCKET, self.bucket_prefix) try: subprocess.check_call( ["gsutil", "cp", "-P", file.path.as_posix(), url]) gcs_stat = self.client.stat(url=url) self.assertIsNotNone(gcs_stat) self.assertIsInstance(gcs_stat, gswrap.Stat) file_stat = file.path.stat() self.assertIsNotNone(file_stat) assert isinstance(gcs_stat, gswrap.Stat) self.assertEqual(file_stat.st_size, gcs_stat.content_length) assert isinstance(gcs_stat.file_mtime, datetime.datetime) self.assertEqual( datetime.datetime.utcfromtimestamp( file_stat.st_mtime).replace(microsecond=0).timestamp(), gcs_stat.file_mtime.timestamp()) assert isinstance(gcs_stat.posix_uid, str) assert isinstance(gcs_stat.posix_gid, str) self.assertEqual(file_stat.st_uid, int(gcs_stat.posix_uid)) self.assertEqual(file_stat.st_gid, int(gcs_stat.posix_gid)) self.assertEqual( oct(file_stat.st_mode)[-3:], gcs_stat.posix_mode) assert gcs_stat.md5 is not None self.assertEqual( b'\xf2\r\x9f r\xbb\xebf\x91\xc0\xf9\xc5\t\x9b\x01\xf3', gcs_stat.md5) self.assertEqual('f20d9f2072bbeb6691c0f9c5099b01f3', gcs_stat.md5.hex()) assert gcs_stat.crc32c is not None self.assertEqual(b'\xd1\x04\x0c\xa8', gcs_stat.crc32c) self.assertEqual('d1040ca8', gcs_stat.crc32c.hex()) finally: tests.common.call_gsutil_rm(path=url, recursive=False)
def test_write_bytes(self) -> None: try: self.client.write_bytes(url="gs://{}/{}/file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), data=b'hello') with temppathlib.NamedTemporaryFile() as file: tests.common.call_gsutil_cp(src="gs://{}/{}/file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), dst=file.path.as_posix(), recursive=False) content = file.path.read_bytes() self.assertEqual(b'hello', content) finally: tests.common.call_gsutil_rm(path="gs://{}/{}/file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), recursive=False)
def main() -> int: """ executes the main routine. """ with temppathlib.NamedTemporaryFile(mode="wt", prefix="logthis_temporary", suffix=".py") as tmp: tmp.file.write('#!/usr/bin/env python3\n' 'import logthis\n' 'logthis.say("Hello!")\n' 'logthis.err("Wrong.")\n') tmp.file.flush() tmp.file.close() tmp.path.chmod(0o700) proc = subprocess.Popen([tmp.path.as_posix()], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = proc.communicate() if proc.returncode != 0: raise RuntimeError("Temporary script failed. Stdout:\n{}\nStderr:\n{}\n".format(out, err)) name = tmp.path.name now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ") expected_out = b'\x1b[34m' + name.encode() + b': 3: ' + now.encode() + b':\x1b[0m Hello!\n' if out != expected_out: for i, (expected, got) in enumerate(zip(expected_out, out)): if expected != got: arrow = " " * (10 + len("{}".format(out[:i - 1]))) + "^" raise AssertionError("Unexpected STDOUT:\nExpected: {}\nBut got: {}\n{}".format( expected_out, out, arrow)) expected_err = b'\x1b[31m' + name.encode() + b': 4: ' + now.encode() + b':\x1b[0m Wrong.\n' if err != expected_err: for i, (expected, got) in enumerate(zip(expected_err, err)): if expected != got: arrow = " " * (10 + len("{}".format(err[:i - 1]))) + "^" raise AssertionError("Unexpected STDERR:\nExpected: {}\nBut got: {}\n{}".format( expected_err, err, arrow)) sys.stdout.write(expected_out.decode()) sys.stderr.write(expected_err.decode()) return 0
def test_read_text(self) -> None: with temppathlib.NamedTemporaryFile() as file: file.path.write_text(tests.common.GCS_FILE_CONTENT, encoding='iso-8859-1') try: tests.common.call_gsutil_cp(src=file.path.as_posix(), dst="gs://{}/{}/file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), recursive=False) content = self.client.read_text(url="gs://{}/{}/file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix), encoding='iso-8859-1') self.assertEqual(tests.common.GCS_FILE_CONTENT, content) finally: tests.common.call_gsutil_rm(path="gs://{}/{}/file".format( tests.common.TEST_GCS_BUCKET, self.bucket_prefix))
def test_write_csv_without_date(): df = pd.DataFrame( { CommonFields.FIPS: ["06045", "45123"], "extra_index": ["idx_1", "idx_2"], CommonFields.CASES: [234, 456], "extra_column": ["extra_data", None], } ) expected_csv = """fips,extra_index,cases,extra_column 06045,idx_1,234,extra_data 45123,idx_2,456, """ with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs: common_df.write_csv( df, tmp.path, structlog.get_logger(), index_names=[CommonFields.FIPS, "extra_index"] ) assert expected_csv == tmp.file.read() assert [l["event"] for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"]
def test_float_formatting(): input_csv = """fips,date,col_1,col_2,col_3,col_4,col_5,col_6 99123,2020-04-01,1,2.0000000,3,0.0004,0.00005,6000000000 99123,2020-04-02,,,,,, 99123,2020-04-03,1,2,3.1234567,4,5,6.0 """ input_df = read_csv_to_indexed_df(StringIO(input_csv)) expected_csv = """fips,date,col_1,col_2,col_3,col_4,col_5,col_6 99123,2020-04-01,1,2,3,0.0004,5e-05,6000000000 99123,2020-04-02,,,,,, 99123,2020-04-03,1,2,3.1234567,4,5,6 """ with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs: common_df.write_csv(input_df, tmp.path, structlog.get_logger()) assert expected_csv == tmp.file.read() assert [l["event"] for l in logs] == ["Writing DataFrame"]
def test_write_csv(): df = pd.DataFrame({ CommonFields.DATE: pd.to_datetime(["2020-04-01", "2020-04-02"]), CommonFields.FIPS: ["06045", "45123"], CommonFields.CASES: [234, 456], }) ts = TimeseriesDataset(df) expected_csv = """,,summary,summary,summary,summary,summary,summary,summary,summary,summary,value,value date,,has_value,min_date,max_date,max_value,min_value,latest_value,num_observations,largest_delta,largest_delta_date,2020-04-01 00:00:00,2020-04-02 00:00:00 fips,variable,,,,,,,,,,, 06045,cases,True,2020-04-01,2020-04-01,234,234,234,1,,,234, 45123,cases,True,2020-04-02,2020-04-02,456,456,456,1,,,,456 """ # Call common_df.write_csv with index set to ["fips", "date"], the expected normal index. with temppathlib.NamedTemporaryFile("w+") as tmp: wide_dates_df.write_csv(ts.get_date_columns(), tmp.path) assert expected_csv == tmp.file.read()
def test_wo_contracts(self): text = textwrap.dedent("""\ def some_func(x: int) -> int: pass class SomeClass: def some_method(self, x: int) -> int: pass @classmethod def some_class_method(self, x: int) -> int: pass @staticmethod def some_static_method(self, x: int) -> int: pass """) with temppathlib.NamedTemporaryFile() as tmp, sys_path_with(tmp.path.parent): tmp.path.write_text(text) errors = icontract_lint.check_file(path=tmp.path) self.assertListEqual([], errors)
def test_with_dir(self): with temppathlib.TemporaryDirectory() as tmp_dir: with temppathlib.NamedTemporaryFile(dir=tmp_dir.path) as tmp: self.assertIsNotNone(tmp.file) self.assertTrue(tmp.path.exists())