Exemple #1
0
def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
    """Adapt OHLCV columns into uint32 columns.

    Parameters
    ----------
    cols : dict
        A dict mapping each column name (open, high, low, close, volume)
        to a float column to convert to uint32.
    scale_factor : int
        Factor to use to scale float values before converting to uint32.
    sid : int
        Sid of the relevant asset, for logging.
    invalid_data_behavior : str
        Specifies behavior when data cannot be converted to uint32.
        If 'raise', raises an exception.
        If 'warn', logs a warning and filters out incompatible values.
        If 'ignore', silently filters out incompatible values.
    """
    scaled_opens = np.nan_to_num(cols['open']) * scale_factor
    scaled_highs = np.nan_to_num(cols['high']) * scale_factor
    scaled_lows = np.nan_to_num(cols['low']) * scale_factor
    scaled_closes = np.nan_to_num(cols['close']) * scale_factor

    exclude_mask = np.zeros_like(scaled_opens, dtype=bool)

    for col_name, scaled_col in [
        ('open', scaled_opens),
        ('high', scaled_highs),
        ('low', scaled_lows),
        ('close', scaled_closes),
    ]:
        max_val = scaled_col.max()

        try:
            check_uint32_safe(max_val, col_name)
        except ValueError:
            if invalid_data_behavior == 'raise':
                raise

            if invalid_data_behavior == 'warn':
                logger.warn(
                    'Values for sid={}, col={} contain some too large for '
                    'uint32 (max={}), filtering them out',
                    sid,
                    col_name,
                    max_val,
                )

            # We want to exclude all rows that have an unsafe value in
            # this column.
            exclude_mask &= (scaled_col >= np.iinfo(np.uint32).max)

    # Convert all cols to uint32.
    opens = scaled_opens.astype(np.uint32)
    highs = scaled_highs.astype(np.uint32)
    lows = scaled_lows.astype(np.uint32)
    closes = scaled_closes.astype(np.uint32)
    volumes = cols['volume'].astype(np.uint32)

    # Exclude rows with unsafe values by setting to zero.
    opens[exclude_mask] = 0
    highs[exclude_mask] = 0
    lows[exclude_mask] = 0
    closes[exclude_mask] = 0
    volumes[exclude_mask] = 0

    return opens, highs, lows, closes, volumes
Exemple #2
0
def convert_cols(cols, scale_factor, sid, invalid_data_behavior):
    """Adapt OHLCV columns into uint32 columns.

    Parameters
    ----------
    cols : dict
        A dict mapping each column name (open, high, low, close, volume)
        to a float column to convert to uint32.
    scale_factor : int
        Factor to use to scale float values before converting to uint32.
    sid : int
        Sid of the relevant asset, for logging.
    invalid_data_behavior : str
        Specifies behavior when data cannot be converted to uint32.
        If 'raise', raises an exception.
        If 'warn', logs a warning and filters out incompatible values.
        If 'ignore', silently filters out incompatible values.
    """
    scaled_opens = np.nan_to_num(cols['open']) * scale_factor
    scaled_highs = np.nan_to_num(cols['high']) * scale_factor
    scaled_lows = np.nan_to_num(cols['low']) * scale_factor
    scaled_closes = np.nan_to_num(cols['close']) * scale_factor

    exclude_mask = np.zeros_like(scaled_opens, dtype=bool)

    for col_name, scaled_col in [
        ('open', scaled_opens),
        ('high', scaled_highs),
        ('low', scaled_lows),
        ('close', scaled_closes),
    ]:
        max_val = scaled_col.max()

        try:
            check_uint32_safe(max_val, col_name)
        except ValueError:
            if invalid_data_behavior == 'raise':
                raise

            if invalid_data_behavior == 'warn':
                logger.warn(
                    'Values for sid={}, col={} contain some too large for '
                    'uint32 (max={}), filtering them out',
                    sid, col_name, max_val,
                )

            # We want to exclude all rows that have an unsafe value in
            # this column.
            exclude_mask &= (scaled_col >= np.iinfo(np.uint32).max)

    # Convert all cols to uint32.
    opens = scaled_opens.astype(np.uint32)
    highs = scaled_highs.astype(np.uint32)
    lows = scaled_lows.astype(np.uint32)
    closes = scaled_closes.astype(np.uint32)
    volumes = cols['volume'].astype(np.uint32)

    # Exclude rows with unsafe values by setting to zero.
    opens[exclude_mask] = 0
    highs[exclude_mask] = 0
    lows[exclude_mask] = 0
    closes[exclude_mask] = 0
    volumes[exclude_mask] = 0

    return opens, highs, lows, closes, volumes