Beispiel #1
0
    def test_set_package_entry_as_object(self):
        pkg = Package()
        nasty_string = 'a,"\tb'
        num_col = [11, 22, 33]
        str_col = ['a', 'b', nasty_string]
        df = pd.DataFrame({'col_num': num_col, 'col_str': str_col})

        # Test with serialization_dir set
        pkg.set("mydataframe1.parquet",
                df,
                meta={'user_meta': 'blah'},
                serialization_location=SERIALIZATION_DIR / "df1.parquet")
        pkg.set("mydataframe2.csv",
                df,
                meta={'user_meta': 'blah2'},
                serialization_location=SERIALIZATION_DIR / "df2.csv")
        pkg.set("mydataframe3.tsv",
                df,
                meta={'user_meta': 'blah3'},
                serialization_location=SERIALIZATION_DIR / "df3.tsv")

        # Test without serialization_dir set
        pkg.set("mydataframe4.parquet", df, meta={'user_meta': 'blah4'})
        pkg.set("mydataframe5.csv", df, meta={'user_meta': 'blah5'})
        pkg.set("mydataframe6.tsv", df, meta={'user_meta': 'blah6'})

        for lk, entry in pkg.walk():
            file_path = parse_file_url(urlparse(entry.get()))
            assert pathlib.Path(
                file_path).exists(), "The serialization files should exist"

            self.file_sweeper_path_list.append(
                file_path)  # Make sure files get deleted even if test fails

        pkg._fix_sha256()
        for lk, entry in pkg.walk():
            assert df.equals(entry.deserialize()), "The deserialized PackageEntry should be equal to the object that " \
                                                   "was serialized"

        # Test that push cleans up the temporary files, if and only if the serialization_location was not set
        with patch('botocore.client.BaseClient._make_api_call', new=mock_make_api_call), \
            patch('quilt3.Package._materialize') as materialize_mock, \
            patch('quilt3.Package.build') as build_mock:
            materialize_mock.return_value = pkg

            pkg.push('Quilt/test_pkg_name', 's3://test-bucket')

        for lk in [
                "mydataframe1.parquet", "mydataframe2.csv", "mydataframe3.tsv"
        ]:
            file_path = parse_file_url(urlparse(pkg.get(lk)))
            assert pathlib.Path(file_path).exists(
            ), "These files should not have been deleted during push()"

        for lk in [
                "mydataframe4.parquet", "mydataframe5.csv", "mydataframe6.tsv"
        ]:
            file_path = parse_file_url(urlparse(pkg.get(lk)))
            assert not pathlib.Path(file_path).exists(
            ), "These temp files should have been deleted during push()"
Beispiel #2
0
    def test_set_package_entry_as_object(self):
        pkg = Package()
        nasty_string = 'a,"\tb'
        num_col = [11, 22, 33]
        str_col = ['a', 'b', nasty_string]
        df = pd.DataFrame({'col_num': num_col, 'col_str': str_col})

        # Test with serialization_dir set
        pkg.set("mydataframe1.parquet",
                df,
                meta={'user_meta': 'blah'},
                serialization_location=SERIALIZATION_DIR / "df1.parquet")
        pkg.set("mydataframe2.csv",
                df,
                meta={'user_meta': 'blah2'},
                serialization_location=SERIALIZATION_DIR / "df2.csv")
        pkg.set("mydataframe3.tsv",
                df,
                meta={'user_meta': 'blah3'},
                serialization_location=SERIALIZATION_DIR / "df3.tsv")

        # Test without serialization_dir set
        pkg.set("mydataframe4.parquet", df, meta={'user_meta': 'blah4'})
        pkg.set("mydataframe5.csv", df, meta={'user_meta': 'blah5'})
        pkg.set("mydataframe6.tsv", df, meta={'user_meta': 'blah6'})

        for lk, entry in pkg.walk():
            file_path = parse_file_url(urlparse(entry.physical_keys[0]))
            assert (pathlib.Path(file_path)
                    ).exists(), "The serialization files should exist"

            self.file_sweeper_path_list.append(file_path)

        pkg._fix_sha256()
        for lk, entry in pkg.walk():
            assert df.equals(entry.deserialize()), "The deserialized PackageEntry should be equal to the object that " \
                                                   "was serialized"

        # Confirm that delete of temporary files is trivial
        Package.delete_local_file(pkg.get("mydataframe1.parquet"))
        Package.delete_local_file(pkg.get("mydataframe2.csv"))
        Package.delete_local_file(pkg.get("mydataframe3.tsv"))
        Package.delete_local_file(pkg.get("mydataframe4.parquet"))
        Package.delete_local_file(pkg.get("mydataframe5.csv"))
        Package.delete_local_file(pkg.get("mydataframe6.tsv"))

        for lk, entry in pkg.walk():
            file_path = parse_file_url(urlparse(entry.physical_keys[0]))
            assert not (pathlib.Path(file_path)).exists(
            ), "The serialization files should have been deleted"

            self.file_sweeper_path_list.append(file_path)