def cast(self, obj: Any, strict: bool = False) -> Any: if strict: if not isinstance(obj, time): raise TypeError(obj) return obj if is_nullish(obj): return None return ensure_time(obj)
def cast(self, obj: Any, strict: bool = False) -> Any: if is_nullish(obj): return None if isinstance(obj, str): if not obj.strip(): return None return int(obj.replace(",", "")) return int(obj)
def cast(self, obj: Any, strict: bool = False) -> Any: if strict: if isinstance(obj, date): obj = datetime(obj.year, obj.month, obj.day) if not isinstance(obj, datetime): raise TypeError(obj) return obj if is_nullish(obj): return None return ensure_datetime(obj)
def assert_dataframes_are_almost_equal( df1: DataFrame, df2: DataFrame, schema: Optional[Schema] = None, ignored_columns: List[str] = None, check_dtypes: bool = False, ): logger.debug(df1) logger.debug(df2) if ignored_columns: df1 = df1[[c for c in df1.columns if c not in ignored_columns]] df2 = df2[[c for c in df2.columns if c not in ignored_columns]] assert df1.shape == df2.shape, f"Different shapes: {df1.shape} {df2.shape}" assert set(df1.columns) == set(df2.columns) if schema is not None and schema.unique_on: df1.sort_values(schema.unique_on, inplace=True) df2.sort_values(schema.unique_on, inplace=True) for (i, r), (i2, r2) in zip(df1.iterrows(), df2.iterrows()): for c in r.keys(): if is_nullish(r[c]) and is_nullish(r2[c]): continue assert_almost_equal(r[c], r2[c], check_dtype=check_dtypes)
def _detect_field_type_fast(obj: Any) -> Optional[FieldType]: """ Fast, but doesn't support adding new types via the registry. TODO: Fixable tho, just need to make sure added types are ranked by cardinality (separate registry?) """ if is_nullish(obj): # TODO: this is pretty aggressive? return None for fth in ALL_FIELD_TYPE_HELPERS.values(): fth = fth() if fth.is_definitely(obj): return ensure_field_type(fth.field_type) for fth in ALL_FIELD_TYPE_HELPERS.values(): fth = fth() if fth.is_maybe(obj): return ensure_field_type(fth.field_type) # I don't think we should get here ever? Some random object type logger.error(obj) return DEFAULT_FIELD_TYPE
def process_raw_value(v: Any) -> Any: if is_nullish(v): return None return v
def cast(self, obj: Any, strict: bool = False) -> Any: if is_nullish(obj): return None return decimal.Decimal(obj)