Beispiel #1
0
    def test_read_csv(self):
        test_dict = self.create_dataframe()
        with TemporaryDirectory() as tmp:
            fp1 = os.path.join(tmp, "test-1.csv")
            fp2 = os.path.join(tmp, "test-2.csv")

            with atomic_write(fp1, as_file=False) as pf1:
                test_dict.to_csv(pf1)

            with atomic_write(fp2, as_file=False) as pf2:
                test_dict.to_csv(pf2)

            t = create_csv_target(tmp)
            df = t.read_dask(check_complete=False)
            v = df["test_1"]
            self.assertEqual(len(v), 8)
Beispiel #2
0
    def test_file_exists(self):
        """Ensure an error is raised when a file exists"""

        with TemporaryFile() as tf:
            tf.write(b"already here!")
            with self.assertRaises(FileExistsError):
                with atomic_write(tf.name) as f:
                    raise AssertionError(
                        "File exists was not properly detected!")
Beispiel #3
0
    def test_atomic_write_by_name(self):
        """Ensure file name is correctly returned vs file object"""

        with TemporaryDirectory() as tmp:
            fp = path.join(tmp, "asdf.txt")
            _, filename = path.split(fp)
            with atomic_write(fp, "w", False) as f:
                _, temp_filename = path.split(f)
                _, ext = path.splitext(temp_filename)
                self.assertTrue(type(temp_filename) == str)
                self.assertEqual(".txt", ext)
Beispiel #4
0
    def test_atomic_failure(self):
        """Ensure that file does not exist after failure during write"""

        with TemporaryDirectory() as tmp:
            fp = path.join(tmp, "asdf.txt")

            with self.assertRaises(FakeFileFailure):
                with atomic_write(fp, "w") as f:
                    tmpfile = f.name
                    assert path.exists(tmpfile)
                    raise FakeFileFailure()

            assert not path.exists(tmpfile)
            assert not path.exists(fp)
Beispiel #5
0
    def test_get_parquet_column(self):
        """Ensure a column can be retrieved from a parquet file"""

        # create a data frame for testing
        test_list = [1, 2]
        test_column = "test_1"
        test_dict = {test_column: test_list, "test_2": [3, 4]}
        test_dict = DataFrame(data=test_dict)

        # create a temporary directory and write the df as a parquet file for test
        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "test.parquet")
            with atomic_write(fp, as_file=False) as pf:
                test_dict.to_parquet(pf)

            self.compare_columns(fp, test_column, test_list)
Beispiel #6
0
    def test_atomic_write(self):
        """Ensure file exists after being written successfully"""

        with TemporaryDirectory() as tmp:
            fp = path.join(tmp, "asdf.txt")

            with atomic_write(fp, "w") as f:
                assert not path.exists(fp)
                tmpfile = f.name
                f.write("asdf")

            assert not path.exists(tmpfile)
            assert path.exists(fp)

            with open(fp) as f:
                self.assertEqual(f.read(), "asdf")
Beispiel #7
0
    def test_convert_xls_to_parquet(self):
        """Ensure a Parquet file can be created from and Excel file"""

        # create a data frame for testing
        test_list = [1, 2]
        test_column = "test_1"
        test_dict = {test_column: test_list, "test_2": [3, 4]}
        test_dict = DataFrame(data=test_dict)

        # create a temporary directory and write the df as a parquet file for test
        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "test.xls")

            with atomic_write(fp, as_file=False) as pf:
                test_dict.to_excel(pf, "Sheet1")

            parquet_file = convert_xls_to_parquet(fp, "Sheet1")
            self.assertTrue(os.path.exists(parquet_file))
            self.compare_columns(parquet_file, test_column, test_list)