def json_array_numpy(self, s_and_end, scan_once, **kwargs): values, end = json.decoder.JSONArray(s_and_end, scan_once, **kwargs) if not values: return values, end # TODO: is it faster to convert to numpy array and check if the # resulting dtype is pure numeric? if len(values) <= 1000: check_values = values else: check_values = values[::max([len(values) // 1000, 1])] if not all([isbarenumeric(v) for v in check_values]): return values, end values = np.array(values) return values, end
def llh(actual_values, expected_values): """Compute the log-likelihoods (llh) that each count in `actual_values` came from the the corresponding expected value in `expected_values`. Parameters ---------- actual_values, expected_values : numpy.ndarrays of same shape Returns ------- llh : numpy.ndarray of same shape as the inputs llh corresponding to each pair of elements in `actual_values` and `expected_values`. Notes ----- * Uncertainties are not propagated through this calculation. * Values in `expected_values` are clipped to the range [SMALL_POS, inf] prior to the calculation to avoid infinities due to the log function. """ assert actual_values.shape == expected_values.shape # Convert to simple numpy arrays containing floats if not isbarenumeric(actual_values): actual_values = unp.nominal_values(actual_values) if not isbarenumeric(expected_values): expected_values = unp.nominal_values(expected_values) with np.errstate(invalid='ignore'): # Mask off any nan expected values (these are assumed to be ok) actual_values = np.ma.masked_invalid(actual_values) expected_values = np.ma.masked_invalid(expected_values) # TODO: How should we handle nan / masked values in the "data" # (actual_values) distribution? How about negative numbers? # Make sure actual values (aka "data") are valid -- no infs, no nans, # etc. if np.any((actual_values < 0) | ~np.isfinite(actual_values)): msg = ( '`actual_values` must be >= 0 and neither inf nor nan...\n' + maperror_logmsg(actual_values)) raise ValueError(msg) # Check that new array contains all valid entries if np.any(expected_values < 0.0): msg = ('`expected_values` must all be >= 0...\n' + maperror_logmsg(expected_values)) raise ValueError(msg) # Replace 0's with small positive numbers to avoid inf in log np.clip(expected_values, a_min=SMALL_POS, a_max=np.inf, out=expected_values) # # natural logarith m of the Poisson probability # (uses Stirling's approximation to estimate ln(k!) ~ kln(k)-k) # llh_val = actual_values * np.log(expected_values) - expected_values llh_val -= actual_values * np.log(actual_values) - actual_values return llh_val
def chi2(actual_values, expected_values): """Compute the chi-square between each value in `actual_values` and `expected_values`. Parameters ---------- actual_values, expected_values : numpy.ndarrays of same shape Returns ------- chi2 : numpy.ndarray of same shape as inputs chi-squared values corresponding to each pair of elements in the inputs Notes ----- * Uncertainties are not propagated through this calculation. * Values in expectation are clipped to the range [SMALL_POS, inf] prior to the calculation to avoid infinities due to the divide function. * actual_values are allowed to be = 0, since they don't com up in the denominator """ if actual_values.shape != expected_values.shape: raise ValueError('Shape mismatch: actual_values.shape = %s,' ' expected_values.shape = %s' % (actual_values.shape, expected_values.shape)) # Convert to simple numpy arrays containing floats if not isbarenumeric(actual_values): actual_values = unp.nominal_values(actual_values) if not isbarenumeric(expected_values): expected_values = unp.nominal_values(expected_values) with np.errstate(invalid='ignore'): # Mask off any nan expected values (these are assumed to be ok) actual_values = np.ma.masked_invalid(actual_values) expected_values = np.ma.masked_invalid(expected_values) # TODO: this check (and the same for `actual_values`) should probably # be done elsewhere... maybe? if np.any(actual_values < 0): msg = ('`actual_values` must all be >= 0...\n' + maperror_logmsg(actual_values)) raise ValueError(msg) if np.any(expected_values < 0): msg = ('`expected_values` must all be >= 0...\n' + maperror_logmsg(expected_values)) raise ValueError(msg) # TODO: Is this okay to do? Mathematically suspect at best, and can # still destroy a minimizer's hopes and dreams... # Replace 0's with small positive numbers to avoid inf in division np.clip(expected_values, a_min=SMALL_POS, a_max=np.inf, out=expected_values) delta = actual_values - expected_values if np.all(np.abs(delta) < 5 * FTYPE_PREC): return np.zeros_like(delta, dtype=FTYPE) chi2_val = np.square(delta) / expected_values assert np.all(chi2_val >= 0), str(chi2_val[chi2_val < 0]) return chi2_val
def get_prior_bounds(obj, param=None, stddev=1.0): """Obtain confidence regions for CL corresponding to given number of stddevs from parameter prior. Parameters ---------- obj : string or Mapping if str, interpret as path from which to load a dict if dict, can be: template settings dict; must supply `param` to choose which to plot params dict; must supply `param` to choose which to plot prior dict param : Param Name of param for which to get bounds; necessary if obj is either template settings or params stddev : float or Iterable of floats number of stddevs Returns ------- bounds : OrderedDict A dictionary mapping the passed `stddev` values to the corresponding bounds """ if isbarenumeric(stddev): stddev = [stddev] elif isinstance(stddev, Iterable): stddev = list(stddev) bounds = OrderedDict() for s in stddev: bounds[s] = [] if isinstance(obj, basestring): obj = from_file(obj) if 'params' in obj: obj = obj['params'] if param is not None and param in obj: obj = obj[param] if 'prior' in obj: obj = obj['prior'] prior = Prior(**obj) logging.debug('Getting confidence region from prior: %s', prior) x0 = prior.valid_range[0] x1 = prior.valid_range[1] x = ureg.Quantity(np.linspace(x0, x1, 10000), prior.units) chi2 = prior.chi2(x) for (i, xval) in enumerate(x[:-1]): for s in stddev: chi2_level = s**2 if chi2[i] > chi2_level and chi2[i + 1] < chi2_level: bounds[s].append(xval) elif chi2[i] < chi2_level and chi2[i + 1] > chi2_level: bounds[s].append(x[i + 1]) return bounds