def test_build_table_schema(self):
     result = build_table_schema(self.df, version=False)
     expected = {
         "fields": [
             {
                 "name": "idx",
                 "type": "integer"
             },
             {
                 "name": "A",
                 "type": "integer"
             },
             {
                 "name": "B",
                 "type": "string"
             },
             {
                 "name": "C",
                 "type": "datetime"
             },
             {
                 "name": "D",
                 "type": "duration"
             },
         ],
         "primaryKey": ["idx"],
     }
     assert result == expected
     result = build_table_schema(self.df)
     assert "pandas_version" in result
 def test_build_table_schema(self):
     result = build_table_schema(self.df, version=False)
     expected = {
         "fields": [
             {
                 "name": "index",
                 "type": "integer"
             },
             {
                 "name": "A",
                 "type": "any",
                 "extDtype": "DateDtype"
             },
             {
                 "name": "B",
                 "type": "any",
                 "extDtype": "decimal"
             },
             {
                 "name": "C",
                 "type": "any",
                 "extDtype": "string"
             },
             {
                 "name": "D",
                 "type": "integer",
                 "extDtype": "Int64"
             },
         ],
         "primaryKey": ["index"],
     }
     assert result == expected
     result = build_table_schema(self.df)
     assert "pandas_version" in result
Beispiel #3
0
 def test_series(self):
     s = pd.Series([1, 2, 3], name="foo")
     result = build_table_schema(s, version=False)
     expected = {
         "fields": [
             {"name": "index", "type": "integer"},
             {"name": "foo", "type": "integer"},
         ],
         "primaryKey": ["index"],
     }
     assert result == expected
     result = build_table_schema(s)
     assert "pandas_version" in result
Beispiel #4
0
 def test_mi_falsey_name(self):
     # GH 16203
     df = pd.DataFrame(
         np.random.randn(4, 4),
         index=pd.MultiIndex.from_product([("A", "B"), ("a", "b")]),
     )
     result = [x["name"] for x in build_table_schema(df)["fields"]]
     assert result == ["level_0", "level_1", 0, 1, 2, 3]
Beispiel #5
0
 def test_series_unnamed(self):
     result = build_table_schema(pd.Series([1, 2, 3]), version=False)
     expected = {
         "fields": [
             {"name": "index", "type": "integer"},
             {"name": "values", "type": "integer"},
         ],
         "primaryKey": ["index"],
     }
     assert result == expected
    def test_multiindex(self):
        df = self.df.copy()
        idx = pd.MultiIndex.from_product([("a", "b"), (1, 2)])
        df.index = idx

        result = build_table_schema(df, version=False)
        expected = {
            "fields": [
                {
                    "name": "level_0",
                    "type": "string"
                },
                {
                    "name": "level_1",
                    "type": "integer"
                },
                {
                    "name": "A",
                    "type": "integer"
                },
                {
                    "name": "B",
                    "type": "string"
                },
                {
                    "name": "C",
                    "type": "datetime"
                },
                {
                    "name": "D",
                    "type": "duration"
                },
            ],
            "primaryKey": ["level_0", "level_1"],
        }
        assert result == expected

        df.index.names = ["idx0", None]
        expected["fields"][0]["name"] = "idx0"
        expected["primaryKey"] = ["idx0", "level_1"]
        result = build_table_schema(df, version=False)
        assert result == expected
Beispiel #7
0
 def test_build_table_schema(self):
     df = DataFrame({
         "A": DateArray([dt.date(2021, 10, 10)]),
         "B": DecimalArray([decimal.Decimal(10)]),
         "C": array(["pandas"], dtype="string"),
         "D": array([10], dtype="Int64"),
     })
     result = build_table_schema(df, version=False)
     expected = {
         "fields": [
             {
                 "name": "index",
                 "type": "integer"
             },
             {
                 "name": "A",
                 "type": "any",
                 "extDtype": "DateDtype"
             },
             {
                 "name": "B",
                 "type": "any",
                 "extDtype": "decimal"
             },
             {
                 "name": "C",
                 "type": "any",
                 "extDtype": "string"
             },
             {
                 "name": "D",
                 "type": "integer",
                 "extDtype": "Int64"
             },
         ],
         "primaryKey": ["index"],
     }
     assert result == expected
     result = build_table_schema(df)
     assert "pandas_version" in result
Beispiel #8
0
    def __init__(
        self,
        obj,
        orient: str | None,
        date_format: str,
        double_precision: int,
        ensure_ascii: bool,
        date_unit: str,
        index: bool,
        default_handler: Callable[[Any], JSONSerializable] | None = None,
        indent: int = 0,
    ):
        """
        Adds a `schema` attribute with the Table Schema, resets
        the index (can't do in caller, because the schema inference needs
        to know what the index is, forces orient to records, and forces
        date_format to 'iso'.
        """
        super().__init__(
            obj,
            orient,
            date_format,
            double_precision,
            ensure_ascii,
            date_unit,
            index,
            default_handler=default_handler,
            indent=indent,
        )

        if date_format != "iso":
            msg = (
                "Trying to write with `orient='table'` and "
                f"`date_format='{date_format}'`. Table Schema requires dates "
                "to be formatted with `date_format='iso'`")
            raise ValueError(msg)

        self.schema = build_table_schema(obj, index=self.index)

        # NotImplemented on a column MultiIndex
        if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
            raise NotImplementedError(
                "orient='table' is not supported for MultiIndex columns")

        # TODO: Do this timedelta properly in objToJSON.c See GH #15137
        if ((obj.ndim == 1) and (obj.name in set(obj.index.names))
                or len(obj.columns.intersection(obj.index.names))):
            msg = "Overlapping names between the index and columns"
            raise ValueError(msg)

        obj = obj.copy()
        timedeltas = obj.select_dtypes(include=["timedelta"]).columns
        if len(timedeltas):
            obj[timedeltas] = obj[timedeltas].applymap(lambda x: x.isoformat())
        # Convert PeriodIndex to datetimes before serializing
        if is_period_dtype(obj.index.dtype):
            obj.index = obj.index.to_timestamp()

        # exclude index from obj if index=False
        if not self.index:
            self.obj = obj.reset_index(drop=True)
        else:
            self.obj = obj.reset_index(drop=False)
        self.date_format = "iso"
        self.orient = "records"
        self.index = index