Beispiel #1
0
def cast_column_to_type(col: dd.Series, expected_type: str):
    """Cast the given column to the expected type"""
    current_type = col.dtype

    if similar_type(current_type, expected_type):
        logger.debug("...not converting.")
        return None

    current_float = pd.api.types.is_float_dtype(current_type)
    expected_integer = pd.api.types.is_integer_dtype(expected_type)
    if current_float and expected_integer:
        logger.debug("...truncating...")
        # Currently "trunc" can not be applied to NA (the pandas missing value type),
        # because NA is a different type. It works with np.NaN though.
        # For our use case, that does not matter, as the conversion to integer later
        # will convert both NA and np.NaN to NA.
        col = da.trunc(col.fillna(value=np.NaN))

    logger.debug(f"Need to cast from {current_type} to {expected_type}")
    return col.astype(expected_type)
Beispiel #2
0
 def coerce_code(v: dd.Series, codes: List[int]) -> dd.Series:
     # Set non-ints and unexpected codes to missing (-1)
     v = dd.to_numeric(v, errors="coerce")
     v = v.where(v.isin(codes), np.nan)
     return v.fillna(-1).astype("int8")