def _designMatrixSize(self): """ Compute the size of the design matrix for a n-D problem of order d. Can also compute the Taylors factors (i.e. the factors that would be applied for the taylor decomposition) :param int dim: Dimension of the problem :param int deg: Degree of the fitting polynomial :param bool factors: If true, the out includes the Taylor factors :returns: The number of columns in the design matrix and, if required, a ndarray with the taylor coefficients for each column of the design matrix. """ dim = self.dim deg = self.deg init = 1 dims = [0] * (dim + 1) cur = init prev = 0 #if factors: # fcts = [1] fact = 1 for i in irange(deg): diff = cur - prev prev = cur old_dims = list(dims) fact *= (i + 1) for j in irange(dim): dp = diff - old_dims[j] cur += dp dims[j + 1] = dims[j] + dp # if factors: # fcts += [fact]*(cur-prev) self.size = cur
def _process(self, block): "process 64 byte block" # unpack block into 16 32-bit ints X = struct.unpack("<16I", block) # clone state orig = self._state state = list(orig) # round 1 - F function - (x&y)|(~x & z) for a,b,c,d,k,s in self._round1: t = (state[a] + F(state[b],state[c],state[d]) + X[k]) & MASK_32 state[a] = ((t<<s) & MASK_32) + (t>>(32-s)) # round 2 - G function for a,b,c,d,k,s in self._round2: t = (state[a] + G(state[b],state[c],state[d]) + X[k] + 0x5a827999) & MASK_32 state[a] = ((t<<s) & MASK_32) + (t>>(32-s)) # round 3 - H function - x ^ y ^ z for a,b,c,d,k,s in self._round3: t = (state[a] + (state[b] ^ state[c] ^ state[d]) + X[k] + 0x6ed9eba1) & MASK_32 state[a] = ((t<<s) & MASK_32) + (t>>(32-s)) # add back into original state for i in irange(4): orig[i] = (orig[i]+state[i]) & MASK_32
def _shmem_as_ndarray(raw_array, shape=None, order='C'): address = ctypes.addressof(raw_array) length = len(raw_array) size = ctypes.sizeof(raw_array) item_size = size // length if shape is None: shape = (length, ) else: assert np.prod(shape) == length dtype = CTYPES_TO_NUMPY.get(raw_array._type_, None) if dtype is None: raise TypeError("Unknown conversion from {} to numpy type".format( raw_array._type_)) strides = tuple(item_size * np.prod(shape[i + 1:], dtype=int) for i in irange(len(shape))) if order != 'C': strides = strides[::-1] d = _dummy() d.__array_interface__ = { 'data': (address, False), 'typestr': dtype.str, 'desc': dtype.descr, 'shape': shape, 'strides': strides, } return np.asarray(d)
def evaluate(self, reg, points, out): """ Evaluate the spatial averaging on a set of points :param ndarray points: Points to evaluate the averaging on :param ndarray out: Pre-allocated array for the result """ xdata = reg.xdata ydata = reg.fitted_ydata[:, np.newaxis] # make it a column vector d, n = xdata.shape designMatrix = self.designMatrix dm_size = designMatrix.size Xx = np.empty((dm_size, n), dtype=xdata.dtype) WxXx = np.empty(Xx.shape, dtype=xdata.dtype) XWX = np.empty((dm_size, dm_size), dtype=xdata.dtype) inv_bw = scipy.linalg.inv(reg.bandwidth) kernel = reg.kernel for i in irange(points.shape[1]): dX = (xdata - points[:, i:i + 1]) Wx = kernel(np.dot(inv_bw, dX)) designMatrix(dX, out=Xx) np.multiply(Wx, Xx, WxXx) np.dot(Xx, WxXx.T, XWX) Lx = linalg.solve(XWX, WxXx)[0] out[i] = np.dot(Lx, ydata) return out
def _process(self, block): "process 64 byte block" # unpack block into 16 32-bit ints X = struct.unpack("<16I", block) # clone state orig = self._state state = list(orig) # round 1 - F function - (x&y)|(~x & z) for a1,b1,c1,d1,k1,s1 in self._round1: t = (state[a1] + F(state[b1],state[c1],state[d1]) + X[k1]) & MASK_32 state[a1] = ((t<<s1) & MASK_32) + (t>>(32-s1)) # round 2 - G function for a1,b1,c1,d1,k1,s1 in self._round2: t = (state[a1] + G(state[b1],state[c1],state[d1]) + X[k1] + 0x5a827999) & MASK_32 state[a1] = ((t<<s1) & MASK_32) + (t>>(32-s1)) # round 3 - H function - x ^ y ^ z for a1,b1,c1,d1,k1,s1 in self._round3: t = (state[a1] + (state[b1] ^ state[c1] ^ state[d1]) + X[k1] + 0x6ed9eba1) & MASK_32 state[a1] = ((t<<s1) & MASK_32) + (t>>(32-s1)) # add back into original state for i in irange(4): orig[i] = (orig[i]+state[i]) & MASK_32
def __call__(self, x, out=None): """ Creates the design matrix for polynomial fitting using the points x. :param ndarray x: Points to create the design matrix. Shape must be (D,N) or (N,), where D is the dimension of the problem, 1 if not there. :param int deg: Degree of the fitting polynomial :param ndarray factors: Scaling factor for the columns of the design matrix. The shape should be (M,) or (M,1), where M is the number of columns of the out. This value can be obtained using the :py:func:`designMatrixSize` function. :returns: The design matrix as a (M,N) matrix. """ dim, deg = self.dim, self.deg #factors = self.factors x = np.atleast_2d(x) dim = x.shape[0] if out is None: s = self._designMatrixSize(dim, deg) out = np.empty((s, x.shape[1]), dtype=x.dtype) dims = [0] * (dim + 1) out[0, :] = 1 cur = 1 for i in irange(deg): old_dims = list(dims) prev = cur for j in irange(x.shape[0]): dims[j] = cur for k in irange(old_dims[j], prev): np.multiply(out[k], x[j], out[cur]) cur += 1 #if factors is not None: # factors = np.asarray(factors) # if len(factors.shape) == 1: # factors = factors[:,np.newaxis] # out /= factors return out
def evaluate(self, reg, points, out): d, m = points.shape norm = np.zeros((m, ), points.dtype) xdata = reg.xdata[..., np.newaxis] ydata = reg.fitted_ydata correction = self.correction N = reg.N inv_bw = scipy.linalg.inv(reg.bandwidth) kernel = reg.kernel out.fill(0) # iterate on the internal points for i, ci in np.broadcast(irange(N), irange(correction.shape[0])): diff = correction[ci] * (xdata[:, i, :] - points) #tdiff = np.dot(inv_cov, diff) #energy = np.exp(-np.sum(diff * tdiff, axis=0) / 2.0) energy = kernel(np.dot(inv_bw, diff)).squeeze() out += ydata[i] * energy norm += energy out[norm > 0] /= norm[norm > 0] return out
def _botev_fixed_point(t, M, I, a2): l = 7 I = large_float(I) M = large_float(M) a2 = large_float(a2) f = 2 * np.pi**(2 * l) * np.sum(I**l * a2 * np.exp(-I * np.pi**2 * t)) for s in irange(l, 1, -1): K0 = np.prod(np.arange(1, 2 * s, 2)) / np.sqrt(2 * np.pi) const = (1 + (1 / 2)**(s + 1 / 2)) / 3 time = (2 * const * K0 / M / f)**(2 / (3 + 2 * s)) f = 2 * np.pi ** (2 * s) * \ np.sum(I ** s * a2 * np.exp(-I * np.pi ** 2 * time)) return t - (2 * M * np.sqrt(np.pi) * f)**(-2 / 5)
def bootstrap_result(worker, start_repeats, end_repeats): #print("Starting worker {} from {} to {}".format(worker, start_repeats, end_repeats)) try: for i in irange(start_repeats, end_repeats): #print("Worker {} runs iteration {} with fit: {}".format(worker, i, fit)) new_fit = fit(shuffled_x[..., i % nx, :], shuffled_y[i % ny, :], *fit_args, **fit_kwrds) new_fit.fit() #print("new_fit = {}".format(new_fit)) result_array[i + 1] = new_fit(eval_points) for ea, attr in izip(extra_arrays, extra_attrs): ea[i + 1] = getattr(new_fit, attr) except Exception: traceback.print_exc(None, sys.stderr) raise
def bootstrap(fit, xdata, ydata, CI, shuffle_method=bootstrap_residuals, shuffle_args=(), shuffle_kwrds={}, repeats=3000, eval_points=None, full_results=False, nb_workers=None, extra_attrs=(), fit_args=(), fit_kwrds={}): """ This function implement the bootstrap algorithm for a regression algorithm. It is capable of spreading the load across many threads using shared memory and the :py:mod:`multiprocess` module. :type fit: callable :param fit: Method used to compute regression. The call is:: f = fit(xdata, ydata, *fit_args, **fit_kwrds) Fit should return an object that would evaluate the regression on a set of points. The next call will be:: f(eval_points) :type xdata: ndarray of shape (N,) or (k,N) for function with k predictors :param xdata: The independent variable where the data is measured :type ydata: ndarray :param ydata: The dependant data :type CI: tuple of float :param CI: List of percentiles to extract :type shuffle_method: callable :param shuffle_method: Create shuffled dataset. The call is:: shuffle_method(xdata, ydata, y_est, repeat=repeats, *shuffle_args, **shuffle_kwrds) where ``y_est`` is the estimated dependant variable on the xdata. :type shuffle_args: tuple :param shuffle_args: List of arguments for the shuffle method :type shuffle_kwrds: dict :param shuffle_kwrds: Dictionnary of arguments for the shuffle method :type repeats: int :param repeats: Number of repeats for the bootstraping :type eval_points: ndarray or None :param eval_points: List of points to evaluate. If None, eval_point is xdata. :type full_results: bool :param full_results: if True, output also the whole set of evaluations :type nb_workers: int or None :param nb_worders: Number of worker threads. If None, the number of detected CPUs will be used. And if 1 or less, a single thread will be used. :type extra_attrs: tuple of str :param extra_attrs: List of attributes of the fitting method to extract on top of the y values for confidence intervals :type fit_args: tuple :param fit_args: List of extra arguments for the fit callable :type fit_kwrds: dict :param fit_kwrds: Dictionnary of extra named arguments for the fit callable :rtype: :py:class:`BootstrapResult` :return: Estimated y on the data, on the evaluation points, the requested confidence intervals and, if requested, the shuffled X, Y and the full estimated distributions. """ xdata = np.asarray(xdata) ydata = np.asarray(ydata) y_fit = fit(xdata, ydata, *fit_args, **fit_kwrds) y_fit.fit() shuffled_x, shuffled_y = shuffle_method(y_fit, xdata, ydata, repeats=repeats, *shuffle_args, **shuffle_kwrds) nx = shuffled_x.shape[-2] ny = shuffled_y.shape[0] extra_values = [] for attr in extra_attrs: extra_values.append(getattr(y_fit, attr)) if eval_points is None: eval_points = xdata if nb_workers is None: nb_workers = mp.cpu_count() multiprocess = nb_workers > 1 # Copy everything in shared mem if multiprocess: ra = sharedmem.zeros((repeats + 1, len(eval_points)), dtype=float) result_array = ra.np sx = sharedmem.array(shuffled_x) sy = sharedmem.array(shuffled_y) ep = sharedmem.array(eval_points) def make_ea(ev): return sharedmem.zeros((repeats + 1, len(ev)), dtype=float) eas = [make_ea(ev) for ev in extra_values] extra_arrays = [ea.np for ea in eas] pool = mp.Pool(mp.cpu_count(), bootstrap_workers.initialize_shared, (nx, ny, ra, eas, sx, sy, ep, extra_attrs, fit, fit_args, fit_kwrds)) else: result_array = np.empty((repeats + 1, len(eval_points)), dtype=float) def make_ea(ev): return np.empty((repeats + 1, len(ev)), dtype=float) extra_arrays = [make_ea(ev) for ev in extra_values] bootstrap_workers.initialize(nx, ny, result_array, extra_arrays, shuffled_x, shuffled_y, eval_points, extra_attrs, fit, fit_args, fit_kwrds) result_array[0] = y_fit(eval_points) for ea, ev in izip(extra_arrays, extra_values): ea[0] = ev base_repeat = repeats // nb_workers if base_repeat * nb_workers < repeats: base_repeat += 1 for i in irange(nb_workers): end_repeats = (i + 1) * base_repeat if end_repeats > repeats: end_repeats = repeats if multiprocess: pool.apply_async(bootstrap_workers.bootstrap_result, (i, i * base_repeat, end_repeats)) else: bootstrap_workers.bootstrap_result(i, i * base_repeat, end_repeats) if multiprocess: pool.close() pool.join() CIs = getCIs(CI, result_array, *extra_arrays) # copy the array to not return a view on a larger array y_eval = np.array(result_array[0]) if not full_results: shuffled_y = shuffled_x = result_array = None extra_arrays = () elif multiprocess: result_array = result_array.copy() # copy in local memory extra_arrays = [ea.copy for ea in extra_arrays] return BootstrapResult(y_fit, y_fit(xdata), eval_points, y_eval, tuple(CI), CIs, shuffled_x, shuffled_y, result_array)