# Copyright 2021 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os

import pandas as pd

from legate import pandas as lp

path = os.path.join(os.path.dirname(__file__), "files", "read_parquet.parquet")

df = pd.concat(
    [pd.read_parquet(path), pd.read_parquet(path)], ignore_index=True)

ldf = lp.read_parquet([path, path])

assert ldf.equals(df)
Ejemplo n.º 2
0
# Copyright 2021 NVIDIA Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os

import pandas as pd

from legate import pandas as lp

path = os.path.join(os.path.dirname(__file__), "files", "web_site.parquet")

names = ["web_site_id", "web_company_name", "web_country", "web_rec_end_date"]

df = pd.read_parquet(path, columns=names)

ldf = lp.read_parquet(path, columns=names)

assert ldf.equals(df)
Ejemplo n.º 3
0
        },
        index=index,
    )
    df["a"] = df["a"].astype("int32")
    df["b"] = df["b"].astype("float64")
    df["c"] = df["c"].astype(pd.StringDtype())

    ldf = lp.DataFrame(df)

    for store_index in [None, False, True]:
        print(f"Index type: {type(index)}, store index?: {store_index}")

        path = os.path.join(os.path.dirname(__file__), "files")
        tmp_dir = tempfile.mkdtemp(dir=path)
        out_path = os.path.sep.join([tmp_dir, "out.parquet"])

        print(f"Dump to {out_path}")

        try:
            ldf.to_parquet(out_path, index=store_index)
            df_copy = pd.read_parquet(out_path)
            ldf_copy = lp.read_parquet(out_path)
            if store_index is not False:
                assert ldf_copy.equals(df)
                assert ldf_copy.equals(df_copy)
            else:
                assert ldf_copy.equals(df.reset_index(drop=True))
                assert ldf_copy.equals(df_copy.reset_index(drop=True))
        finally:
            shutil.rmtree(tmp_dir)