def _get_unsupported_cols(cls, obj): """ Return a list of columns with unsupported by OmniSci data types. Parameters ---------- obj : pandas.DataFrame or pyarrow.Table Object to inspect on unsupported column types. Returns ------- tuple Arrow representation of `obj` (for future using) and a list of unsupported columns. """ if isinstance(obj, (pandas.Series, pandas.DataFrame)): # picking first rows from cols with `dtype="object"` to check its actual type, # in case of homogen columns that saves us unnecessary convertion to arrow table cols = [ name for name, col in obj.dtypes.items() if col == "object" ] type_samples = obj.iloc[0][cols] unsupported_cols = [ name for name, col in type_samples.items() if not isinstance(col, str) and not (is_scalar(col) and pandas.isna(col)) ] if len(unsupported_cols) > 0: return None, unsupported_cols try: at = pyarrow.Table.from_pandas(obj) except (pyarrow.lib.ArrowTypeError, pyarrow.lib.ArrowInvalid) as e: regex = r"Conversion failed for column ([^\W]*)" unsupported_cols = [] for msg in e.args: match = re.findall(regex, msg) unsupported_cols.extend(match) if len(unsupported_cols) == 0: unsupported_cols = obj.columns return None, unsupported_cols else: obj = at return ( obj, [ field.name for field in obj.schema if not isinstance(field.type, pyarrow.DictionaryType) and field.type.to_pandas_dtype() == np.dtype("O") and field.type != "string" ], )
def wrapper(obj1, obj2, *args, **kwargs): error_str = f"obj1 and obj2 has incorrect types: {type(obj1)} and {type(obj2)}" assert not (is_scalar(obj1) ^ is_scalar(obj2)), error_str assert obj1.__module__.split(".")[0] == "modin", error_str assert obj2.__module__.split(".")[0] == "pandas", error_str comparator(obj1, obj2, *args, **kwargs)