Ejemplo n.º 1
0
    def test_cp_local_no_clobber(self) -> None:
        with temppathlib.NamedTemporaryFile() as tmp_file1, \
                temppathlib.NamedTemporaryFile() as tmp_file2:
            tmp_file1.path.write_text("hello")
            tmp_file2.path.write_text("hello there")

            self.client.cp(src=tmp_file1.path.as_posix(),
                           dst=tmp_file2.path.as_posix(),
                           no_clobber=True)

            self.assertEqual("hello there", tmp_file2.path.read_text())
Ejemplo n.º 2
0
    def test_upload_preserved_posix(self) -> None:
        with temppathlib.NamedTemporaryFile() as file:
            file.path.write_text(tests.common.TEST_GCS_BUCKET)

            url = "gs://{}/{}/file".format(tests.common.TEST_GCS_BUCKET,
                                           self.bucket_prefix)

            self.client.cp(
                src=file.path.as_posix(), dst=url, preserve_posix=True)

            try:
                gcs_stat = self.client.stat(url=url)
                self.assertIsNotNone(gcs_stat)

                file_stat = file.path.stat()
                self.assertIsNotNone(file_stat)

                assert isinstance(gcs_stat, gswrap.Stat)
                self.assertEqual(file_stat.st_size, gcs_stat.content_length)

                assert isinstance(gcs_stat.file_mtime, datetime.datetime)
                self.assertEqual(
                    datetime.datetime.utcfromtimestamp(
                        file_stat.st_mtime).replace(microsecond=0).timestamp(),
                    gcs_stat.file_mtime.timestamp())

                assert isinstance(gcs_stat.posix_uid, str)
                assert isinstance(gcs_stat.posix_gid, str)
                self.assertEqual(file_stat.st_uid, int(gcs_stat.posix_uid))
                self.assertEqual(file_stat.st_gid, int(gcs_stat.posix_gid))
                self.assertEqual(
                    oct(file_stat.st_mode)[-3:], gcs_stat.posix_mode)
            finally:
                tests.common.call_gsutil_rm(path=url, recursive=False)
Ejemplo n.º 3
0
    def test_write_text(self) -> None:
        try:
            self.client.write_text(url="gs://{}/{}/utf-file".format(
                tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                   text=tests.common.GCS_FILE_CONTENT,
                                   encoding='utf-8')
            self.client.write_text(url="gs://{}/{}/iso-file".format(
                tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                   text=tests.common.GCS_FILE_CONTENT,
                                   encoding='iso-8859-1')

            with temppathlib.NamedTemporaryFile() as file:
                tests.common.call_gsutil_cp(src="gs://{}/{}/utf-file".format(
                    tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                            dst=file.path.as_posix(),
                                            recursive=False)
                utf_content = file.path.read_text(encoding='utf-8')

                self.assertEqual(tests.common.GCS_FILE_CONTENT, utf_content)

                tests.common.call_gsutil_cp(src="gs://{}/{}/iso-file".format(
                    tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                            dst=file.path.as_posix(),
                                            recursive=False)
                iso_content = file.path.read_text(encoding='iso-8859-1')

                self.assertEqual(tests.common.GCS_FILE_CONTENT, iso_content)
        finally:
            tests.common.call_gsutil_rm(path="gs://{}/{}/utf-file".format(
                tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                        recursive=False)
            tests.common.call_gsutil_rm(path="gs://{}/{}/iso-file".format(
                tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                        recursive=False)
Ejemplo n.º 4
0
    def read_bytes(self, remote_path: Union[str, pathlib.Path]) -> bytes:
        """
        Read the binary data from a remote file.

        First the remote file is copied to a temporary local file making sure that the connection is reestablished if
        needed. Next the data is read.

        :param remote_path: to the file
        :return: binary content of the file
        """
        rmt_pth_str = remote_path if isinstance(remote_path, str) else remote_path.as_posix()

        permerr = None  # type: Optional[PermissionError]
        notfounderr = None  # type: Optional[FileNotFoundError]
        try:
            with temppathlib.NamedTemporaryFile() as tmp:
                self.get(remote_path=rmt_pth_str, local_path=tmp.path.as_posix(), consistent=True)
                return tmp.path.read_bytes()
        except PermissionError as err:
            permerr = err
        except FileNotFoundError as err:
            notfounderr = err

        if permerr is not None:
            raise PermissionError("The remote path could not be accessed: {}".format(rmt_pth_str))

        if notfounderr is not None:
            raise FileNotFoundError("The remote path was not found: {}".format(rmt_pth_str))

        raise AssertionError("Expected an exception before.")
Ejemplo n.º 5
0
    def write_bytes(self,
                    remote_path: Union[str, pathlib.Path],
                    data: bytes,
                    create_directories: bool = True,
                    consistent: bool = True) -> None:
        """
        Write the binary data to a remote file.

        First, the data is written to a temporary local file. Next the local file is transferred to the remote path
        making sure that the connection is reestablished if needed.

        :param remote_path: to the file
        :param data: to be written
        :param create_directories: if set, creates the parent directory of the remote path with mode 0o777
        :param consistent: if set, writes to a temporary remote file first, and then renames it.
        :return:
        """
        rmt_pth = remote_path if isinstance(remote_path, pathlib.Path) else pathlib.Path(remote_path)

        if create_directories:
            spurplus.sftp._mkdir(sftp=self._sftp, remote_path=rmt_pth.parent, mode=0o777, parents=True, exist_ok=True)
        with temppathlib.NamedTemporaryFile() as tmp:
            tmp.path.write_bytes(data)
            self.put(
                local_path=tmp.path.as_posix(),
                remote_path=rmt_pth.as_posix(),
                consistent=consistent,
                create_directories=create_directories)
Ejemplo n.º 6
0
def test_write_csv():
    df = pd.DataFrame({
        CommonFields.DATE: ["2020-04-01", "2020-04-02"],
        CommonFields.FIPS: ["06045", "45123"],
        CommonFields.CASES: [234, 456],
    })
    df_original = df.copy()
    expected_csv = """fips,date,cases
06045,2020-04-01,234
45123,2020-04-02,456
"""
    # Call common_df.write_csv with index set to ["fips", "date"], the expected normal index.
    with temppathlib.NamedTemporaryFile(
            "w+") as tmp, structlog.testing.capture_logs() as logs:
        common_df.write_csv(df.set_index(["fips", "date"]), tmp.path,
                            structlog.get_logger())
        assert expected_csv == tmp.file.read()
    assert [l["event"] for l in logs] == ["Writing DataFrame"]

    # Pass df with other index that will be changed. Check that the same output is written to the
    # file.
    with temppathlib.NamedTemporaryFile(
            "w+") as tmp, structlog.testing.capture_logs() as logs:
        common_df.write_csv(df, tmp.path, structlog.get_logger())
        assert expected_csv == tmp.file.read()
    assert [l["event"]
            for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"]

    with temppathlib.NamedTemporaryFile(
            "w+") as tmp, structlog.testing.capture_logs() as logs:
        common_df.write_csv(df.set_index(["date", "cases"]), tmp.path,
                            structlog.get_logger())
        assert expected_csv == tmp.file.read()
    assert [l["event"]
            for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"]

    with temppathlib.NamedTemporaryFile(
            "w+") as tmp, structlog.testing.capture_logs() as logs:
        common_df.write_csv(df.set_index(["date", "fips"]), tmp.path,
                            structlog.get_logger())
        assert expected_csv == tmp.file.read()
    assert [l["event"]
            for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"]

    assert repr(df) == repr(df_original)
Ejemplo n.º 7
0
    def test_that_it_works(self):
        pth = None  # type: Optional[pathlib.Path]
        with temppathlib.NamedTemporaryFile() as tmp:
            self.assertIsNotNone(tmp.file)
            self.assertTrue(tmp.path.exists())

            pth = tmp.path

        self.assertFalse(pth.exists())
Ejemplo n.º 8
0
def test_write_csv_empty():
    df = pd.DataFrame(
        [], columns=[CommonFields.DATE, CommonFields.FIPS, CommonFields.CASES])
    with temppathlib.NamedTemporaryFile(
            "w+") as tmp, structlog.testing.capture_logs() as logs:
        common_df.write_csv(df, tmp.path, structlog.get_logger())
        assert "fips,date,cases\n" == tmp.file.read()
    assert [l["event"]
            for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"]
Ejemplo n.º 9
0
def test_read_csv():
    input_csv = """fips,date,cases
06045,2020-04-01,234
45123,2020-04-02,456
    """

    with temppathlib.NamedTemporaryFile("w+") as tmp:
        tmp.path.write_text(input_csv)
        df = read_csv_to_indexed_df(tmp.path)
    assert one(df.loc[("06045", "2020-04-01"), "cases"]) == 234
Ejemplo n.º 10
0
def test_remove_index_column():
    df = pd.DataFrame(
        [("99", "2020-04-01", "a", 123)], columns=["fips", "date", "index", "cases"]
    ).set_index(COMMON_FIELDS_TIMESERIES_KEYS)

    with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs:
        common_df.write_csv(df, tmp.path, structlog.get_logger())
        assert "fips,date,cases\n99,2020-04-01,123\n" == tmp.file.read()

    assert [l["event"] for l in logs] == ["Dropping column named 'index'", "Writing DataFrame"]
Ejemplo n.º 11
0
def test_write_csv_columns_are_sorted_in_output_with_extras():
    df = pd.DataFrame(
        [], columns=[CommonFields.DATE, CommonFields.FIPS, "extra2", CommonFields.CASES, "extra1"]
    )
    df = df.set_index(COMMON_FIELDS_TIMESERIES_KEYS)
    with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs:
        log = structlog.get_logger()
        common_df.write_csv(df, tmp.path, log)
        assert "fips,date,cases,extra1,extra2\n" == tmp.file.read()
    assert [l["event"] for l in logs] == [
        "Writing DataFrame",
    ]
Ejemplo n.º 12
0
def test_read_csv_no_index():
    input_csv = """fips,date,cases
06045,2020-04-01,234
45123,2020-04-02,456
    """

    with temppathlib.NamedTemporaryFile("w+") as tmp:
        tmp.path.write_text(input_csv)
        df = common_df.read_csv(tmp.path, set_index=False)

    expected_first_row = ["06045", pd.Timestamp("2020-04-01 00:00:00"), 234]
    assert list(df.iloc[0]) == expected_first_row
Ejemplo n.º 13
0
def test_write_csv_extra_columns_dropped():
    df = pd.DataFrame(
        [], columns=[CommonFields.DATE, CommonFields.FIPS, "extra1", CommonFields.CASES, "extra2"]
    )
    df = df.set_index(COMMON_FIELDS_TIMESERIES_KEYS)
    with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs:
        log = structlog.get_logger()
        common_df.write_csv(only_common_columns(df, log), tmp.path, log)
        assert "fips,date,cases\n" == tmp.file.read()
    assert [l["event"] for l in logs] == [
        "Dropping columns not in CommonFields",
        "Writing DataFrame",
    ]
Ejemplo n.º 14
0
    def test_same_md5(self) -> None:
        with temppathlib.NamedTemporaryFile() as file:
            file.path.write_text(tests.common.GCS_FILE_CONTENT)

            url = "gs://{}/{}/file".format(tests.common.TEST_GCS_BUCKET,
                                           self.bucket_prefix)

            try:
                tests.common.call_gsutil_cp(src=file.path.as_posix(),
                                            dst=url,
                                            recursive=False)

                self.assertTrue(self.client.same_md5(path=file.path, url=url))
            finally:
                tests.common.call_gsutil_rm(path=url, recursive=False)
Ejemplo n.º 15
0
    def test_same_modtime(self) -> None:
        with temppathlib.NamedTemporaryFile() as file:
            file.path.touch()
            file.path.write_text(tests.common.GCS_FILE_CONTENT)
            url = "gs://{}/{}/file".format(tests.common.TEST_GCS_BUCKET,
                                           self.bucket_prefix)

            try:
                subprocess.check_call(
                    ["gsutil", "cp", "-P",
                     file.path.as_posix(), url])

                self.assertTrue(
                    self.client.same_modtime(path=file.path, url=url))
            finally:
                tests.common.call_gsutil_rm(path=url, recursive=False)
Ejemplo n.º 16
0
def test_float_na_formatting():
    df = pd.DataFrame(
        [("99", "2020-04-01", 1.0, 2, 3), ("99", "2020-04-02", pd.NA, pd.NA, None)],
        columns="fips date metric_a metric_b metric_c".split(),
    ).set_index(COMMON_FIELDS_TIMESERIES_KEYS)

    expected_csv = """fips,date,metric_a,metric_b,metric_c
99,2020-04-01,1,2,3
99,2020-04-02,,,
"""

    with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs:
        common_df.write_csv(df, tmp.path, structlog.get_logger())
        assert expected_csv == tmp.file.read()

    assert [l["event"] for l in logs] == ["Writing DataFrame"]
Ejemplo n.º 17
0
    def test_stat(self) -> None:
        with temppathlib.NamedTemporaryFile() as file:
            file.path.write_text(tests.common.GCS_FILE_CONTENT)

            url = "gs://{}/{}/file".format(tests.common.TEST_GCS_BUCKET,
                                           self.bucket_prefix)
            try:
                subprocess.check_call(
                    ["gsutil", "cp", "-P",
                     file.path.as_posix(), url])

                gcs_stat = self.client.stat(url=url)
                self.assertIsNotNone(gcs_stat)
                self.assertIsInstance(gcs_stat, gswrap.Stat)

                file_stat = file.path.stat()
                self.assertIsNotNone(file_stat)

                assert isinstance(gcs_stat, gswrap.Stat)
                self.assertEqual(file_stat.st_size, gcs_stat.content_length)

                assert isinstance(gcs_stat.file_mtime, datetime.datetime)
                self.assertEqual(
                    datetime.datetime.utcfromtimestamp(
                        file_stat.st_mtime).replace(microsecond=0).timestamp(),
                    gcs_stat.file_mtime.timestamp())

                assert isinstance(gcs_stat.posix_uid, str)
                assert isinstance(gcs_stat.posix_gid, str)
                self.assertEqual(file_stat.st_uid, int(gcs_stat.posix_uid))
                self.assertEqual(file_stat.st_gid, int(gcs_stat.posix_gid))
                self.assertEqual(
                    oct(file_stat.st_mode)[-3:], gcs_stat.posix_mode)

                assert gcs_stat.md5 is not None
                self.assertEqual(
                    b'\xf2\r\x9f r\xbb\xebf\x91\xc0\xf9\xc5\t\x9b\x01\xf3',
                    gcs_stat.md5)
                self.assertEqual('f20d9f2072bbeb6691c0f9c5099b01f3',
                                 gcs_stat.md5.hex())

                assert gcs_stat.crc32c is not None
                self.assertEqual(b'\xd1\x04\x0c\xa8', gcs_stat.crc32c)
                self.assertEqual('d1040ca8', gcs_stat.crc32c.hex())
            finally:
                tests.common.call_gsutil_rm(path=url, recursive=False)
Ejemplo n.º 18
0
    def test_write_bytes(self) -> None:
        try:
            self.client.write_bytes(url="gs://{}/{}/file".format(
                tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                    data=b'hello')

            with temppathlib.NamedTemporaryFile() as file:
                tests.common.call_gsutil_cp(src="gs://{}/{}/file".format(
                    tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                            dst=file.path.as_posix(),
                                            recursive=False)
                content = file.path.read_bytes()

                self.assertEqual(b'hello', content)
        finally:
            tests.common.call_gsutil_rm(path="gs://{}/{}/file".format(
                tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                        recursive=False)
Ejemplo n.º 19
0
def main() -> int:
    """
    executes the main routine.
    """
    with temppathlib.NamedTemporaryFile(mode="wt", prefix="logthis_temporary", suffix=".py") as tmp:
        tmp.file.write('#!/usr/bin/env python3\n'
                       'import logthis\n'
                       'logthis.say("Hello!")\n'
                       'logthis.err("Wrong.")\n')
        tmp.file.flush()
        tmp.file.close()

        tmp.path.chmod(0o700)

        proc = subprocess.Popen([tmp.path.as_posix()], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

        out, err = proc.communicate()
        if proc.returncode != 0:
            raise RuntimeError("Temporary script failed. Stdout:\n{}\nStderr:\n{}\n".format(out, err))

        name = tmp.path.name
        now = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%SZ")

        expected_out = b'\x1b[34m' + name.encode() + b':     3: ' + now.encode() + b':\x1b[0m Hello!\n'

        if out != expected_out:
            for i, (expected, got) in enumerate(zip(expected_out, out)):
                if expected != got:
                    arrow = " " * (10 + len("{}".format(out[:i - 1]))) + "^"
                    raise AssertionError("Unexpected STDOUT:\nExpected: {}\nBut got:  {}\n{}".format(
                        expected_out, out, arrow))

        expected_err = b'\x1b[31m' + name.encode() + b':     4: ' + now.encode() + b':\x1b[0m Wrong.\n'
        if err != expected_err:
            for i, (expected, got) in enumerate(zip(expected_err, err)):
                if expected != got:
                    arrow = " " * (10 + len("{}".format(err[:i - 1]))) + "^"
                    raise AssertionError("Unexpected STDERR:\nExpected: {}\nBut got:  {}\n{}".format(
                        expected_err, err, arrow))

        sys.stdout.write(expected_out.decode())
        sys.stderr.write(expected_err.decode())

    return 0
Ejemplo n.º 20
0
    def test_read_text(self) -> None:
        with temppathlib.NamedTemporaryFile() as file:
            file.path.write_text(tests.common.GCS_FILE_CONTENT,
                                 encoding='iso-8859-1')

            try:
                tests.common.call_gsutil_cp(src=file.path.as_posix(),
                                            dst="gs://{}/{}/file".format(
                                                tests.common.TEST_GCS_BUCKET,
                                                self.bucket_prefix),
                                            recursive=False)

                content = self.client.read_text(url="gs://{}/{}/file".format(
                    tests.common.TEST_GCS_BUCKET, self.bucket_prefix),
                                                encoding='iso-8859-1')
                self.assertEqual(tests.common.GCS_FILE_CONTENT, content)
            finally:
                tests.common.call_gsutil_rm(path="gs://{}/{}/file".format(
                    tests.common.TEST_GCS_BUCKET, self.bucket_prefix))
Ejemplo n.º 21
0
def test_write_csv_without_date():
    df = pd.DataFrame(
        {
            CommonFields.FIPS: ["06045", "45123"],
            "extra_index": ["idx_1", "idx_2"],
            CommonFields.CASES: [234, 456],
            "extra_column": ["extra_data", None],
        }
    )
    expected_csv = """fips,extra_index,cases,extra_column
06045,idx_1,234,extra_data
45123,idx_2,456,
"""
    with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs:
        common_df.write_csv(
            df, tmp.path, structlog.get_logger(), index_names=[CommonFields.FIPS, "extra_index"]
        )
        assert expected_csv == tmp.file.read()
    assert [l["event"] for l in logs] == ["Fixing DataFrame index", "Writing DataFrame"]
Ejemplo n.º 22
0
def test_float_formatting():
    input_csv = """fips,date,col_1,col_2,col_3,col_4,col_5,col_6
99123,2020-04-01,1,2.0000000,3,0.0004,0.00005,6000000000
99123,2020-04-02,,,,,,
99123,2020-04-03,1,2,3.1234567,4,5,6.0
"""
    input_df = read_csv_to_indexed_df(StringIO(input_csv))

    expected_csv = """fips,date,col_1,col_2,col_3,col_4,col_5,col_6
99123,2020-04-01,1,2,3,0.0004,5e-05,6000000000
99123,2020-04-02,,,,,,
99123,2020-04-03,1,2,3.1234567,4,5,6
"""

    with temppathlib.NamedTemporaryFile("w+") as tmp, structlog.testing.capture_logs() as logs:
        common_df.write_csv(input_df, tmp.path, structlog.get_logger())
        assert expected_csv == tmp.file.read()

    assert [l["event"] for l in logs] == ["Writing DataFrame"]
def test_write_csv():
    df = pd.DataFrame({
        CommonFields.DATE:
        pd.to_datetime(["2020-04-01", "2020-04-02"]),
        CommonFields.FIPS: ["06045", "45123"],
        CommonFields.CASES: [234, 456],
    })
    ts = TimeseriesDataset(df)

    expected_csv = """,,summary,summary,summary,summary,summary,summary,summary,summary,summary,value,value
date,,has_value,min_date,max_date,max_value,min_value,latest_value,num_observations,largest_delta,largest_delta_date,2020-04-01 00:00:00,2020-04-02 00:00:00
fips,variable,,,,,,,,,,,
06045,cases,True,2020-04-01,2020-04-01,234,234,234,1,,,234,
45123,cases,True,2020-04-02,2020-04-02,456,456,456,1,,,,456
"""
    # Call common_df.write_csv with index set to ["fips", "date"], the expected normal index.
    with temppathlib.NamedTemporaryFile("w+") as tmp:
        wide_dates_df.write_csv(ts.get_date_columns(), tmp.path)
        assert expected_csv == tmp.file.read()
Ejemplo n.º 24
0
    def test_wo_contracts(self):
        text = textwrap.dedent("""\
                def some_func(x: int) -> int:
                    pass
                    
                class SomeClass:
                    def some_method(self, x: int) -> int:
                        pass
                        
                    @classmethod
                    def some_class_method(self, x: int) -> int:
                        pass
                        
                    @staticmethod
                    def some_static_method(self, x: int) -> int:
                        pass
                """)

        with temppathlib.NamedTemporaryFile() as tmp, sys_path_with(tmp.path.parent):
            tmp.path.write_text(text)
            errors = icontract_lint.check_file(path=tmp.path)

            self.assertListEqual([], errors)
Ejemplo n.º 25
0
 def test_with_dir(self):
     with temppathlib.TemporaryDirectory() as tmp_dir:
         with temppathlib.NamedTemporaryFile(dir=tmp_dir.path) as tmp:
             self.assertIsNotNone(tmp.file)
             self.assertTrue(tmp.path.exists())