# Copyright 2021 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import os import pandas as pd from legate import pandas as lp path = os.path.join(os.path.dirname(__file__), "files", "read_parquet.parquet") df = pd.concat( [pd.read_parquet(path), pd.read_parquet(path)], ignore_index=True) ldf = lp.read_parquet([path, path]) assert ldf.equals(df)
# Copyright 2021 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import os import pandas as pd from legate import pandas as lp path = os.path.join(os.path.dirname(__file__), "files", "web_site.parquet") names = ["web_site_id", "web_company_name", "web_country", "web_rec_end_date"] df = pd.read_parquet(path, columns=names) ldf = lp.read_parquet(path, columns=names) assert ldf.equals(df)
}, index=index, ) df["a"] = df["a"].astype("int32") df["b"] = df["b"].astype("float64") df["c"] = df["c"].astype(pd.StringDtype()) ldf = lp.DataFrame(df) for store_index in [None, False, True]: print(f"Index type: {type(index)}, store index?: {store_index}") path = os.path.join(os.path.dirname(__file__), "files") tmp_dir = tempfile.mkdtemp(dir=path) out_path = os.path.sep.join([tmp_dir, "out.parquet"]) print(f"Dump to {out_path}") try: ldf.to_parquet(out_path, index=store_index) df_copy = pd.read_parquet(out_path) ldf_copy = lp.read_parquet(out_path) if store_index is not False: assert ldf_copy.equals(df) assert ldf_copy.equals(df_copy) else: assert ldf_copy.equals(df.reset_index(drop=True)) assert ldf_copy.equals(df_copy.reset_index(drop=True)) finally: shutil.rmtree(tmp_dir)