Esempio n. 1
0
    def _get_unsupported_cols(cls, obj):
        """
        Return a list of columns with unsupported by OmniSci data types.

        Parameters
        ----------
        obj : pandas.DataFrame or pyarrow.Table
            Object to inspect on unsupported column types.

        Returns
        -------
        tuple
            Arrow representation of `obj` (for future using) and a list of
            unsupported columns.
        """
        if isinstance(obj, (pandas.Series, pandas.DataFrame)):
            # picking first rows from cols with `dtype="object"` to check its actual type,
            # in case of homogen columns that saves us unnecessary convertion to arrow table
            cols = [
                name for name, col in obj.dtypes.items() if col == "object"
            ]
            type_samples = obj.iloc[0][cols]

            unsupported_cols = [
                name for name, col in type_samples.items()
                if not isinstance(col, str)
                and not (is_scalar(col) and pandas.isna(col))
            ]

            if len(unsupported_cols) > 0:
                return None, unsupported_cols

            try:
                at = pyarrow.Table.from_pandas(obj)
            except (pyarrow.lib.ArrowTypeError, pyarrow.lib.ArrowInvalid) as e:
                regex = r"Conversion failed for column ([^\W]*)"
                unsupported_cols = []
                for msg in e.args:
                    match = re.findall(regex, msg)
                    unsupported_cols.extend(match)

                if len(unsupported_cols) == 0:
                    unsupported_cols = obj.columns
                return None, unsupported_cols
            else:
                obj = at

        return (
            obj,
            [
                field.name for field in obj.schema
                if not isinstance(field.type, pyarrow.DictionaryType)
                and field.type.to_pandas_dtype() == np.dtype("O")
                and field.type != "string"
            ],
        )
Esempio n. 2
0
 def wrapper(obj1, obj2, *args, **kwargs):
     error_str = f"obj1 and obj2 has incorrect types: {type(obj1)} and {type(obj2)}"
     assert not (is_scalar(obj1) ^ is_scalar(obj2)), error_str
     assert obj1.__module__.split(".")[0] == "modin", error_str
     assert obj2.__module__.split(".")[0] == "pandas", error_str
     comparator(obj1, obj2, *args, **kwargs)