def _range_scale(self): """ Calculate scaling, intercept based on data range and output type """ mn, mx = self.finite_range() # Values of self.array.dtype type out_dtype = self._out_dtype if mx == mn: # Only one number in array self.inter = mn return # Straight mx-mn can overflow. big_float = best_float() # usually longdouble except in win 32 if mn.dtype.kind == 'f': # Already floats # float64 and below cast correctly to longdouble. Longdouble needs # no casting mn2mx = np.diff(np.array([mn, mx], dtype=big_float)) else: # max possible (u)int range is 2**64-1 (int64, uint64) # int_to_float covers this range. On windows longdouble is the same # as double so mn2mx will be 2**64 - thus overestimating slope # slightly. Casting to int needed to allow mx-mn to be larger than # the largest (u)int value mn2mx = int_to_float(as_int(mx) - as_int(mn), big_float) if out_dtype.kind == 'f': # Type range, these are also floats info = type_info(out_dtype) t_mn_mx = info['min'], info['max'] else: t_mn_mx = np.iinfo(out_dtype).min, np.iinfo(out_dtype).max t_mn_mx= [int_to_float(v, big_float) for v in t_mn_mx] # We want maximum precision for the calculations. Casting will # not lose precision because min/max are of fp type. assert [v.dtype.kind for v in t_mn_mx] == ['f', 'f'] scaled_mn2mx = np.diff(np.array(t_mn_mx, dtype = big_float)) slope = mn2mx / scaled_mn2mx self.inter = mn - t_mn_mx[0] * slope self.slope = slope if not np.all(np.isfinite([self.slope, self.inter])): raise ScalingError("Slope / inter not both finite")
def _range_scale(self): """ Calculate scaling, intercept based on data range and output type """ mn, mx = self.finite_range() # Values of self.array.dtype type out_dtype = self._out_dtype if mx == mn: # Only one number in array self.inter = mn return # Straight mx-mn can overflow. big_float = best_float() # usually longdouble except in win 32 if mn.dtype.kind == 'f': # Already floats # float64 and below cast correctly to longdouble. Longdouble needs # no casting mn2mx = np.diff(np.array([mn, mx], dtype=big_float)) else: # max possible (u)int range is 2**64-1 (int64, uint64) # int_to_float covers this range. On windows longdouble is the same # as double so mn2mx will be 2**64 - thus overestimating slope # slightly. Casting to int needed to allow mx-mn to be larger than # the largest (u)int value mn2mx = int_to_float(as_int(mx) - as_int(mn), big_float) if out_dtype.kind == 'f': # Type range, these are also floats info = type_info(out_dtype) t_mn_mx = info['min'], info['max'] else: t_mn_mx = np.iinfo(out_dtype).min, np.iinfo(out_dtype).max t_mn_mx = [int_to_float(v, big_float) for v in t_mn_mx] # We want maximum precision for the calculations. Casting will # not lose precision because min/max are of fp type. assert [v.dtype.kind for v in t_mn_mx] == ['f', 'f'] scaled_mn2mx = np.diff(np.array(t_mn_mx, dtype=big_float)) slope = mn2mx / scaled_mn2mx self.inter = mn - t_mn_mx[0] * slope self.slope = slope if not np.all(np.isfinite([self.slope, self.inter])): raise ScalingError("Slope / inter not both finite")
def _iu2iu(self): # (u)int to (u)int mn, mx = [as_int(v) for v in self.finite_range()] # range may be greater than the largest integer for this type. # as_int needed to work round numpy 1.4.1 int casting bug out_dtype = self._out_dtype t_min, t_max = np.iinfo(out_dtype).min, np.iinfo(out_dtype).max type_range = as_int(t_max) - as_int(t_min) mn2mx = mx - mn if mn2mx <= type_range: # might offset be enough? if t_min == 0: # uint output - take min to 0 # decrease offset with floor_exact, meaning mn >= t_min after # subtraction. But we may have pushed the data over t_max, # which we check below inter = floor_exact(mn - t_min, self.scaler_dtype) else: # int output - take midpoint to 0 # ceil below increases inter, pushing scale up to 0.5 towards # -inf, because ints have abs min == abs max + 1 midpoint = mn + as_int(np.ceil(mn2mx / 2.0)) # Floor exact decreases inter, so pulling scaled values more # positive. This may make mx - inter > t_max inter = floor_exact(midpoint, self.scaler_dtype) # Need to check still in range after floor_exact-ing int_inter = as_int(inter) assert mn - int_inter >= t_min if mx - int_inter <= t_max: self.inter = inter return # Try slope options (sign flip) and then range scaling super(SlopeInterArrayWriter, self)._iu2iu()
def scaling_needed(self): """ Checks if scaling is needed for input array Raises WriterError if no scaling possible. The rules are in the code, but: * If numpy will cast, return False (no scaling needed) * If input or output is an object or structured type, raise * If input is complex, raise * If the output is float, return False * If there is no finite value in the input array, or the input array is all 0, return False (the writer will strip the non-finite values) * By now we are casting to (u)int. If the input type is a float, return True (we do need scaling) * Now input and output types are (u)ints. If the min and max in the data are within range of the output type, return False * Otherwise return True """ data = self._array arr_dtype = data.dtype out_dtype = self._out_dtype # There's a bug in np.can_cast (at least up to and including 1.6.1) such # that any structured output type passes. Check for this first. if 'V' in (arr_dtype.kind, out_dtype.kind): if arr_dtype == out_dtype: return False raise WriterError('Cannot cast to or from non-numeric types') if np.can_cast(arr_dtype, out_dtype): return False # Direct casting for complex output from any numeric type if out_dtype.kind == 'c': return False if arr_dtype.kind == 'c': raise WriterError('Cannot cast complex types to non-complex') # Direct casting for float output from any non-complex numeric type if out_dtype.kind == 'f': return False # Now we need to look at the data for special cases mn, mx = self.finite_range() # this is cached if (mn, mx) in ((0, 0), (np.inf, -np.inf)): # Data all zero, or no data is finite return False # Floats -> (u)ints always need scaling if arr_dtype.kind == 'f': return True # (u)int input, (u)int output assert arr_dtype.kind in 'iu' and out_dtype.kind in 'iu' info = np.iinfo(out_dtype) # No scaling needed if data already fits in output type # But note - we need to convert to ints, to avoid conversion to float # during comparisons, and therefore int -> float conversions which are # not exact. Only a problem for uint64 though. We need as_int here to # work around a numpy 1.4.1 bug in uint conversion if as_int(mn) >= as_int(info.min) and as_int(mx) <= as_int(info.max): return False return True
def _do_scaling(self): arr = self._array out_dtype = self._out_dtype assert out_dtype.kind in 'iu' mn, mx = self.finite_range() if arr.dtype.kind == 'f': # Float to (u)int scaling self._range_scale() return # (u)int to (u)int info = np.iinfo(out_dtype) out_max, out_min = info.max, info.min # If left as int64, uint64, comparisons will default to floats, and # these are inexact for > 2**53 - so convert to int if (as_int(mx) <= as_int(out_max) and as_int(mn) >= as_int(out_min)): # already in range return # (u)int to (u)int scaling self._iu2iu()
def _inter_type(in_type, inter, out_type=None): """ Return intercept type for array type `in_type`, starting value `inter` When scaling from an (u)int to a (u)int, we can often just use the intercept `inter`. This routine is for that case. It works out if the min and max of `in_type`, plus the `inter` can fit into any other integer type, returning that type if so. Otherwise it returns the most capable float. Parameters ---------- in_type : numpy type Any specifier for a numpy dtype inter : scalar intercept out_type : None or numpy type, optional If not None, check any proposed `inter_type` to see whether the resulting values will fit within `out_type`; if so return proposed `inter_type`, otherwise return highest precision float Returns ------- inter_type : numpy type Type to which inter should be cast for best integer scaling """ info = np.iinfo(in_type) inter = as_int(inter) out_mn, out_mx = info.min + inter, info.max + inter values = [out_mn, out_mx, info.min, info.max] i_type = able_int_type(values + [inter]) if i_type is None: return best_float() if out_type is None: return i_type # The proposal so far is to use an integer type i_type as the working type. # However, we might already know the output type to which we will cast. If # the maximum range in the working type will not fit into the known output # type, this would require extra casting, so we back off to the best # floating point type. o_info = np.iinfo(out_type) if out_mn >= o_info.min and out_mx <= o_info.max: return i_type return best_float()