def _slogdet_one(a): util._assert_rank2(a) util._assert_nd_squareness(a) dtype = a.dtype handle = device.get_cusolver_handle() m = len(a) ipiv = cupy.empty(m, 'i') info = cupy.empty((), 'i') # Need to make a copy because getrf works inplace a_copy = a.copy(order='F') if dtype == 'f': getrf_bufferSize = cusolver.sgetrf_bufferSize getrf = cusolver.sgetrf else: getrf_bufferSize = cusolver.dgetrf_bufferSize getrf = cusolver.dgetrf buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m) workspace = cupy.empty(buffersize, dtype=dtype) getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr, ipiv.data.ptr, info.data.ptr) if info[()] == 0: diag = cupy.diag(a_copy) # ipiv is 1-origin non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) + cupy.count_nonzero(diag < 0)) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 logdet = cupy.log(abs(diag)).sum() else: sign = cupy.array(0.0, dtype=dtype) logdet = cupy.array(float('-inf'), dtype) return sign, logdet
def liquidize(self, intens, sigma_A, gamma_A): '''Apply liquidization transform on given intensity''' s_sq = (2. * cp.pi * sigma_A * self.dgen.qrad)**2 patt = cp.fft.fftshift(cp.fft.fftn(cp.fft.ifftshift(intens))) if self.slimits.max() > 2. * np.pi * sigma_A / self.res_max: n_max = np.where( self.slimits > 2. * np.pi * sigma_A / self.res_max)[0][0] + 1 else: print('No effect of liquid-like motions with these parameters') return intens liq = cp.zeros_like(intens) for n in range(n_max): kernel = cp.exp(-n * self.urad / gamma_A) weight = cp.exp(-s_sq + n * cp.log(s_sq) - float(special.loggamma(n + 1))) liq += weight * cp.abs(cp.fft.fftshift(cp.fft.ifftn( patt * kernel))) sys.stderr.write('\rLiquidizing: %d/%d' % (n + 1, n_max)) sys.stderr.write('\n') return liq
def forward(self, x, t): if x.ndim == 2: # ミニバッチ使用時 x = x - x.max(axis=1, keepdims=True) x = cp.exp(x) y = x / x.sum(axis=1, keepdims=True) elif x.ndim == 1: x = x - cp.max(x) y = cp.exp(x) / cp.sum(cp.exp(x)) if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] loss = -1.0 * cp.sum( t * cp.log(y[cp.arange(batch_size), t] + 1e-7)) / batch_size self.y = y self.t = t return loss
def rayleigh(self, scale=1.0, size=None, dtype=float): """Returns an array of samples drawn from a rayleigh distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.rayleigh` for full documentation, :meth:`numpy.random.RandomState.rayleigh <numpy.random.mtrand.RandomState.rayleigh>` """ scale = cupy.asarray(scale) if size is None: size = scale.shape if cupy.any(scale < 0): # synchronize! raise ValueError('scale < 0') x = self._random_sample_raw(size, dtype) x = cupy.log(x, out=x) x = cupy.multiply(x, -2., out=x) x = cupy.sqrt(x, out=x) x = cupy.multiply(x, scale, out=x) return x
def o_f2(imgs, hres_size, row=None, do_fil=False, show=False): """ Fourier Spectrum Initialization #2 ---------------------------------- Mean Image > pad with reflect padding > sqrt > Ft > (optional) filter Ft with 2 * cutoff_freq > return Ft """ im = cp.array(imgs).mean(0) pad = (hres_size[0] - imgs[0].shape[0]) // 2 im = cp.pad(cp.array(im), [(pad, pad), (pad, pad)], mode='reflect') f = Ft(cp.sqrt(im)) if do_fil: _orig = hres_size[0] // 2 - 1 CUTOFF_FREQ_px = get_cutoff(row) fil = np.zeros(hres_size) fil = cp.array( cv2.circle(fil, (_orig, _orig), 2 * CUTOFF_FREQ_px, 1, -1)) f = f * fil if show: plt.imshow(cp.asnumpy(cp.log(abs(f) + 1e-7))) plt.title(f'o_f2 {"without" if not do_fil else "with"} filtering') plt.show() return f
def __extended_likelihood(self, params): data = self.__data_amplitude.calculate(params) mcdata = self.__monte_carlo_amplitude.calculate(params) if self.__data_amplitude.USE_GPU: likelihood_data = cp.sum(self.sweight * cp.log(data)) likelihood_mc = cp.sum(self.mcweight * mcdata) return cp.asnumpy( likelihood_data - self.__generated * likelihood_mc ) else: likelihood_data = ne.evaluate( "sum(sw * log(data))", local_dict={ "sw": self.sweight, "data": data } ) likelihood_mc = ne.evaluate( "sum(mcw * mcdata)", local_dict={ "mcw": self.mcweight, "mcdata": mcdata } ) return likelihood_data - self.__generated * likelihood_mc
def _acausal_classifier_gpu(filter_posterior, movement_state_transition, discrete_state_transition, observed_position_bin, uniform): ''' Parameters ---------- filter_posterior : ndarray, shape (n_time, 2, n_position_bins) movement_state_transition : ndarray, shape (n_position_bins, n_position_bins) discrete_state_transition : ndarray, shape (n_time, 2) discrete_state_transition[k, 0] = Pr(I_{k} = 1 | I_{k-1} = 0, v_{k}) discrete_state_transition[k, 1] = Pr(I_{k} = 1 | I_{k-1} = 1, v_{k}) observed_position_bin : ndarray, shape (n_time,) Which position bin is the animal in. position_bin_size : float Returns ------- smoother_posterior : ndarray, shape (n_time, 2, n_position_bins) p(x_{k + 1}, I_{k + 1} \vert H_{1:T}) smoother_probability : ndarray, shape (n_time, 2) smoother_probability[:, 0] = Pr(I_{1:T} = 0) smoother_probability[:, 1] = Pr(I_{1:T} = 1) smoother_prior : ndarray, shape (n_time, 2, n_position_bins) p(x_{k + 1}, I_{k + 1} \vert H_{1:k}) weights : ndarray, shape (n_time, 2, n_position_bins) \sum_{I_{k+1}} \int \Big[ \frac{p(x_{k+1} \mid x_{k}, I_{k}, I_{k+1}) * Pr(I_{k + 1} \mid I_{k}, v_{k}) * p(x_{k+1}, I_{k+1} \mid H_{1:T})} {p(x_{k + 1}, I_{k + 1} \mid H_{1:k})} \Big] dx_{k+1} ''' # noqa filter_posterior = cp.asarray(filter_posterior, dtype=cp.float32) movement_state_transition = cp.asarray( movement_state_transition, dtype=cp.float32) discrete_state_transition = cp.asarray( discrete_state_transition, dtype=cp.float32) observed_position_bin = cp.asarray(observed_position_bin) uniform = cp.asarray(uniform, dtype=cp.float32) EPS = cp.asarray(np.spacing(1), dtype=cp.float32) filter_probability = cp.sum(filter_posterior, axis=2) smoother_posterior = cp.zeros_like(filter_posterior) n_time, _, n_position_bins = filter_posterior.shape smoother_posterior[-1] = filter_posterior[-1].copy() for k in cp.arange(n_time - 2, -1, -1): smoother_prior = cp.zeros((2, n_position_bins), dtype=cp.float32) weights = cp.zeros((2, n_position_bins), dtype=cp.float32) position_ind = observed_position_bin[k + 1] # Predict p(x_{k + 1}, I_{k + 1} \vert H_{1:k}) # I_{k} = 0, I_{k + 1} = 0 smoother_prior[0, position_ind] = ( (1 - discrete_state_transition[k + 1, 0]) * filter_probability[k, 0]) # I_{k} = 1, I_{k + 1} = 0 smoother_prior[0, position_ind] += ( (1 - discrete_state_transition[k + 1, 1]) * filter_probability[k, 1]) # I_{k} = 0, I_{k + 1} = 1 smoother_prior[1] = ( discrete_state_transition[k + 1, 0] * uniform * filter_probability[k, 0]) # I_{k} = 1, I_{k + 1} = 1 smoother_prior[1] += ( discrete_state_transition[k + 1, 1] * (movement_state_transition.T @ filter_posterior[k, 1])) # Update p(x_{k}, I_{k} \vert H_{1:k}) ratio = cp.exp( cp.log(smoother_posterior[k + 1]) - cp.log(smoother_prior + EPS)) integrated_ratio = cp.sum(ratio, axis=1) # I_{k} = 0, I_{k + 1} = 0 weights[0] = ( (1 - discrete_state_transition[k + 1, 0]) * ratio[0, position_ind]) # I_{k} = 0, I_{k + 1} = 1 weights[0] += ( uniform * discrete_state_transition[k + 1, 0] * integrated_ratio[1]) # I_{k} = 1, I_{k + 1} = 0 weights[1] = ( (1 - discrete_state_transition[k + 1, 1]) * ratio[0, position_ind]) # I_{k} = 1, I_{k + 1} = 1 weights[1] += ( discrete_state_transition[k + 1, 1] * ratio[1] @ movement_state_transition) smoother_posterior[k] = weights * filter_posterior[k] smoother_posterior[k] /= cp.nansum(smoother_posterior[k]) smoother_probability = cp.sum(smoother_posterior, axis=2) return (cp.asnumpy(smoother_posterior), cp.asnumpy(smoother_probability))
def _causal_classifier_gpu(likelihood, movement_state_transition, discrete_state_transition, observed_position_bin, uniform): ''' Parameters ---------- likelihood : ndarray, shape (n_time, ...) movement_state_transition : ndarray, shape (n_position_bins, n_position_bins) discrete_state_transition : ndarray, shape (n_time, 2) discrete_state_transition[k, 0] = Pr(I_{k} = 1 | I_{k-1} = 0, v_{k}) discrete_state_transition[k, 1] = Pr(I_{k} = 1 | I_{k-1} = 1, v_{k}) observed_position_bin : ndarray, shape (n_time,) Which position bin is the animal in. position_bin_size : float Returns ------- posterior : ndarray, shape (n_time, 2, n_position_bins) state_probability : ndarray, shape (n_time, 2) state_probability[:, 0] = Pr(I_{1:T} = 0) state_probability[:, 1] = Pr(I_{1:T} = 1) prior : ndarray, shape (n_time, 2, n_position_bins) ''' likelihood = cp.asarray(likelihood, dtype=cp.float32) movement_state_transition = cp.asarray( movement_state_transition, dtype=cp.float32) discrete_state_transition = cp.asarray( discrete_state_transition, dtype=cp.float32) observed_position_bin = cp.asarray(observed_position_bin) uniform = cp.asarray(uniform, dtype=cp.float32) n_position_bins = movement_state_transition.shape[0] n_time = likelihood.shape[0] n_states = 2 posterior = cp.zeros( (n_time, n_states, n_position_bins), dtype=cp.float32) state_probability = cp.zeros((n_time, n_states), dtype=cp.float32) # Initial Conditions posterior[0, 0, observed_position_bin[0]] = likelihood[0, 0, 0] norm = cp.nansum(posterior[0]) data_log_likelihood = cp.log(norm) posterior[0] /= norm state_probability[0] = cp.sum(posterior[0], axis=1) for k in np.arange(1, n_time): prior = cp.zeros((n_states, n_position_bins), dtype=cp.float32) position_ind = observed_position_bin[k] # I_{k - 1} = 0, I_{k} = 0 prior[0, position_ind] = ( (1 - discrete_state_transition[k, 0]) * state_probability[k - 1, 0]) # I_{k - 1} = 1, I_{k} = 0 prior[0, position_ind] += ( (1 - discrete_state_transition[k, 1]) * state_probability[k - 1, 1]) # I_{k - 1} = 0, I_{k} = 1 prior[1] = (discrete_state_transition[k, 0] * uniform * state_probability[k - 1, 0]) # I_{k - 1} = 1, I_{k} = 1 prior[1] += ( discrete_state_transition[k, 1] * (movement_state_transition.T @ posterior[k - 1, 1])) posterior[k] = prior * likelihood[k] norm = cp.nansum(posterior[k]) data_log_likelihood += cp.log(norm) posterior[k] /= norm state_probability[k] = cp.sum(posterior[k], axis=1) return (cp.asnumpy(posterior), cp.asnumpy(state_probability), data_log_likelihood)
def convolutional_barycenter_gpu(Hv, reg, alpha, stabThresh=1e-30, niter=1500, tol=1e-9, sharpening=False, verbose=False): """Main function solving wasserstein barycenter problem using gpu Arguments: Hv {Set of distributions (cparray)} -- reg {regularization term "gamma"} -- float superior to 0, generally equals size of space/40 alpha {list} -- set of weights Keyword Arguments: stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30}) niter {int} -- Maximum number of loop iteration (default: {1500}) tol {float} -- convergence tolerance at which point iterations stop (default: {1e-9}) sharpening {bool} -- Whether or not entropic sharpening is used (default: {False}) verbose {bool} -- verbose option Returns: cparray -- solution of weighted wassertein barycenter problem """ def K(x): return cp.array(gaussian_filter(cp.asnumpy(x), sigma=reg)) def to_find_root(barycenter, H0, beta): return entropy(barycenter**beta) - H0 alpha = cp.array(alpha) alpha = alpha / alpha.sum() Hv = cp.array(Hv) mean_weights = (Hv[0].sum() + Hv[1].sum()) / 2. #print('mean weights', mean_weights) for i in range(len(Hv)): Hv[i] = Hv[i] / Hv[i].sum() v = cp.ones(Hv.shape) Kw = cp.ones(Hv.shape) entropy_max = max_entropy(Hv) barycenter = cp.zeros(Hv[0].shape) change = 1 for j in range(niter): t0 = time.time() barycenterOld = barycenter barycenter = cp.zeros_like(Hv[0, :, :]) for i in range(Hv.shape[0]): Kw[i, :, :] = K(Hv[i, :, :] / cp.maximum(stabThresh, K(v[i, :, :]))) barycenter += alpha[i] * cp.log( cp.maximum(stabThresh, v[i, :, :] * Kw[i, :, :])) barycenter = cp.exp(barycenter) change = cp.sum(cp.abs(barycenter - barycenterOld)) if sharpening: if (entropy(barycenter)) > (entropy_max): beta = newton( lambda beta: to_find_root(barycenter, entropy_max, beta), 1, tol=1e-6) if beta < 0: beta = 1 else: beta = 1 barycenter = barycenter**beta for i in range(Hv.shape[0]): v[i, :, :] = barycenter / cp.maximum(stabThresh, Kw[i, :, :]) if verbose: #sys.stdout('output.log','a') print("iter : ", j, "change : ", change, 'time :', time.time() - t0) if change < tol: break return cp.asnumpy(barycenter)
def choice(self, a, size=None, replace=True, p=None): """Returns an array of random values from a given 1-D array. .. seealso:: :func:`cupy.random.choice` for full document, :meth:`numpy.random.choice` """ if a is None: raise ValueError('a must be 1-dimensional or an integer') if isinstance(a, cupy.ndarray) and a.ndim == 0: raise NotImplementedError if isinstance(a, six.integer_types): a_size = a if a_size <= 0: raise ValueError('a must be greater than 0') else: a = cupy.array(a, copy=False) if a.ndim != 1: raise ValueError('a must be 1-dimensional or an integer') else: a_size = len(a) if a_size == 0: raise ValueError('a must be non-empty') if p is not None: p = cupy.array(p) if p.ndim != 1: raise ValueError('p must be 1-dimensional') if len(p) != a_size: raise ValueError('a and p must have same size') if not (p >= 0).all(): raise ValueError('probabilities are not non-negative') p_sum = cupy.sum(p).get() if not numpy.allclose(p_sum, 1): raise ValueError('probabilities do not sum to 1') if not replace: raise NotImplementedError if size is None: raise NotImplementedError shape = size size = numpy.prod(shape) if p is not None: p = cupy.broadcast_to(p, (size, a_size)) index = cupy.argmax(cupy.log(p) - cupy.random.gumbel(size=(size, a_size)), axis=1) if not isinstance(shape, six.integer_types): index = cupy.reshape(index, shape) else: index = cupy.random.randint(0, a_size, size=shape) # Align the dtype with NumPy index = index.astype(cupy.int64, copy=False) if isinstance(a, six.integer_types): return index if index.ndim == 0: return cupy.array(a[index], dtype=a.dtype) return a[index]
def compute_cost(AL, Y): m = Y.shape[1] cost = (1./m) * (-np.dot(Y, np.log(AL).T) - np.dot(1-Y, np.log(1 - AL).T)) cost = np.squeeze(cost) return cost
def mi_model_1d_gpu_gd(x, y, biascorrect=False, demeaned=False): """Mutual information between a Gaussian and a discrete variable in bits. This method is based on ANOVA style model comparison. I = mi_model_gd(x,y) returns the MI between the (possibly multidimensional) Gaussian variable x and the discrete variable y. Parameters ---------- x, y : array_like Gaussian arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y must be an array of integers biascorrect : bool | True Specifies whether bias correction should be applied to the estimated MI demeaned : bool | False Specifies whether the input data already has zero mean (true if it has been copula-normalized) Returns ------- i : float Information shared by x and y (in bits) """ # Converting to cupy array #x, y = cp.array(x), cp.array(y) x, y = cp.atleast_2d(x), cp.squeeze(y) if x.ndim > 2: raise ValueError("x must be at most 2d") if y.ndim > 1: raise ValueError("only univariate discrete variables supported") if not cp.issubdtype(y.dtype, cp.integer): raise ValueError("y should be an integer array") nvarx, ntrl = x.shape ym = cp.unique(y) if y.size != ntrl: raise ValueError("number of trials do not match") if not demeaned: x = x - x.mean(axis=1)[:, cp.newaxis] # class-conditional entropies ntrl_y = cp.zeros(len(ym)) hcond = cp.zeros(len(ym)) for n_yi, yi in enumerate(ym): idx = y == yi xm = x[:, idx] ntrl_y[n_yi] = xm.shape[1] xm = xm - xm.mean(axis=1)[:, cp.newaxis] cm = cp.dot(xm, xm.T) / float(ntrl_y[n_yi] - 1) chcm = cp.linalg.cholesky(cm) hcond[n_yi] = cp.sum(cp.log(cp.diagonal(chcm))) # class weights w = ntrl_y / float(ntrl) # unconditional entropy from unconditional Gaussian fit cx = cp.dot(x, x.T) / float(ntrl - 1) chc = cp.linalg.cholesky(cx) hunc = cp.sum(cp.log(cp.diagonal(chc))) # + c*nvarx ln2 = cp.log(2) if biascorrect: vars = cp.arange(1, nvarx + 1) psiterms = psi((ntrl - vars).astype(cp.float) / 2.) / 2. dterm = (ln2 - cp.log(float(ntrl - 1))) / 2. hunc = hunc - nvarx * dterm - psiterms.sum() dterm = (ln2 - cp.log((ntrl_y - 1).astype(cp.float))) / 2.0 psiterms = cp.zeros(len(ym)) for vi in vars: idx = ntrl_y - vi psiterms = psiterms + psi(idx.astype(cp.float) / 2.) hcond = hcond - nvarx * dterm - (psiterms / 2.) # MI in bits i = (hunc - cp.sum(w * hcond)) / ln2 return i
path = '/export/scratch2/kostenko/archive/OwnProjects/al_tests/new/90KV_no_filt/' dark = flex.data.read_raw(path, 'di') flat = flex.data.read_raw(path, 'io') proj = flex.data.read_raw(path, 'scan_') meta = flex.data.read_log(path, 'flexray') #%% Prepro: # Convert to CUPY: proj = cupy.array(proj) flat = cupy.array(flat) dark = cupy.array(dark) # Use CUDA to compute stuff: proj = (proj - dark) / (flat.mean(0) - dark) proj = -cupy.log(proj) proj = flex.data.raw2astra(proj) flex.util.display_slice(proj, title='Sinogram') #%% Recon vol = numpy.zeros([1, 2000, 2000], dtype='float32') flex.project.FDK(proj, vol, meta['geometry']) flex.util.display_slice(vol, bounds=[], title='FDK')
def mlog(self, psi): res = psi.copy() res[cp.abs(psi) < 1e-32] = 1e-32 res = cp.log(res) return res
def _support_choice(dist, rand): return cp.log(dist) + rand
def __call__(self, input_x, t): output = self.predictor(input_x) batch_size, _, grid_h, grid_w = output.shape self.seen += batch_size x, y, w, h, conf, prob = F.split_axis(F.reshape( output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes + 5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) # xのactivation y = F.sigmoid(y) # yのactivation conf = F.sigmoid(conf) # confのactivation prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) # probablitiyのacitivation # 教師データの用意 tw = xp.zeros( w.shape, dtype=xp.float32) # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1) th = xp.zeros(h.shape, dtype=xp.float32) tx = xp.tile(0.5, x.shape).astype(xp.float32) # 活性化後のxとyが0.5になるように学習() ty = xp.tile(0.5, y.shape).astype(xp.float32) if self.seen < self.unstable_seen: # centerの存在しないbbox誤差学習スケールは基本0.1 box_learning_scale = xp.tile(0.1, x.shape).astype(xp.float32) else: box_learning_scale = xp.tile(0, x.shape).astype(xp.float32) tconf = xp.zeros( conf.shape, dtype=xp.float32 ) # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる conf_learning_scale = xp.tile(0.1, conf.shape).astype(xp.float32) tprob = prob.data.copy() # best_anchor以外は学習させない(自身との二乗和誤差 = 0) # 全bboxとtruthのiouを計算(batch単位で計算する) x_shift = Variable( xp.broadcast_to(xp.arange(grid_w, dtype=xp.float32), x.shape[1:])) y_shift = Variable( xp.broadcast_to( xp.arange(grid_h, dtype=xp.float32).reshape(grid_h, 1), y.shape[1:])) w_anchor = Variable( xp.broadcast_to( xp.reshape( xp.array(self.anchors, dtype=xp.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:])) h_anchor = Variable( xp.broadcast_to( xp.reshape( xp.array(self.anchors, dtype=xp.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:])) x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu( ) best_ious = [] for batch in range(batch_size): n_truth_boxes = len(t[batch]) box_x = (x[batch] + x_shift) / grid_w box_y = (y[batch] + y_shift) / grid_h box_w = F.exp(w[batch]) * w_anchor / grid_w box_h = F.exp(h[batch]) * h_anchor / grid_h ious = [] for truth_index in range(n_truth_boxes): truth_box_x = Variable( xp.broadcast_to( xp.array(t[batch][truth_index]["x"], dtype=xp.float32), box_x.shape)) truth_box_y = Variable( xp.broadcast_to( xp.array(t[batch][truth_index]["y"], dtype=xp.float32), box_y.shape)) truth_box_w = Variable( xp.broadcast_to( xp.array(t[batch][truth_index]["w"], dtype=xp.float32), box_w.shape)) truth_box_h = Variable( xp.broadcast_to( xp.array(t[batch][truth_index]["h"], dtype=xp.float32), box_h.shape)) truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu( ), truth_box_h.to_gpu() ious.append( multi_box_iou( Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h)).data.get()) ious = xp.array(ious) best_ious.append(xp.max(ious, axis=0)) best_ious = xp.array(best_ious) # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。 tconf[best_ious > self.thresh] = conf.data.get()[ best_ious > self.thresh] conf_learning_scale[best_ious > self.thresh] = 0 # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正 abs_anchors = self.anchors / xp.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: truth_w = int(float(truth_box["x"]) * grid_w) truth_h = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou( Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。 box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = xp.log( float(truth_box["w"]) / abs_anchors[truth_n][0]) th[batch, truth_n, :, truth_h, truth_w] = xp.log( float(truth_box["h"]) / abs_anchors[truth_n][1]) tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1 # IOUの観測 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) / grid_w, (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) / grid_h, xp.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0], xp.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1]) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0 # debug prints maps = F.transpose(prob[batch], (2, 3, 1, 0)).data print( "best confidences and best conditional probability and predicted class of each grid:" ) for i in range(grid_h): for j in range(grid_w): print("%2d" % (int(conf[batch, :, :, i, j].data.max() * 100)), end=" ") print(" ", end="") for j in range(grid_w): print("%2d" % (maps[i][j][int( maps[i][j].max(axis=1).argmax())].argmax()), end=" ") print(" ", end="") for j in range(grid_w): print("%2d" % (maps[i][j][int( maps[i][j].max(axis=1).argmax())].max() * 100), end=" ") print() print( "best default iou: %.2f predicted iou: %.2f confidence: %.2f class: %s" % (best_iou, predicted_iou, conf[batch][truth_n][0][truth_h][truth_w].data, t[batch][0]["label"])) print("-------------------------------") print("seen = %d" % self.seen) # loss計算 tx, ty, tw, th, tconf, tprob = Variable(tx), Variable(ty), Variable( tw), Variable(th), Variable(tconf), Variable(tprob) box_learning_scale, conf_learning_scale = Variable( box_learning_scale), Variable(conf_learning_scale) tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu( ), tprob.to_gpu() box_learning_scale.to_gpu() conf_learning_scale.to_gpu() x_loss = F.sum((tx - x)**2 * box_learning_scale) / 2 y_loss = F.sum((ty - y)**2 * box_learning_scale) / 2 w_loss = F.sum((tw - w)**2 * box_learning_scale) / 2 h_loss = F.sum((th - h)**2 * box_learning_scale) / 2 c_loss = F.sum((tconf - conf)**2 * conf_learning_scale) / 2 p_loss = F.sum((tprob - prob)**2) / 2 print( "x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f p_loss: %f" % (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data)) loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss return loss
def categorical_cross_entropy(x, y, epsilon=10**(-13)): x = cp.clip(x, epsilon, 1. - epsilon) N = x.shape[0] return -cp.sum(y * cp.log(x + 0.00001)) / N
def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None) -> float: """ Log loss, aka logistic loss or cross-entropy loss. This is the loss function used in (multinomial) logistic regression and extensions of it such as neural networks, defined as the negative log-likelihood of a logistic model that returns ``y_pred`` probabilities for its training data ``y_true``. The log loss is only defined for two or more labels. Parameters ---------- y_true : array-like, shape = (n_samples,) y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,) eps : float (default=1e-15) Log loss is undefined for p=0 or p=1, so probabilities are clipped to max(eps, min(1 - eps, p)). normalize : bool, optional (default=True) If true, return the mean loss per sample. Otherwise, return the sum of the per-sample losses. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- loss : float Examples -------- .. code-block:: python >>> from cuml.metrics import log_loss >>> import cupy as cp >>> log_loss(cp.array([1, 0, 0, 1]), ... cp.array([[.1, .9], [.9, .1], [.8, .2], [.35, .65]])) 0.21616... References ---------- C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer, p. 209. Notes ----- The logarithm used is the natural logarithm (base-e). """ y_true, n_rows, n_cols, ytype = \ input_to_cupy_array(y_true, check_dtype=[np.int32, np.int64, np.float32, np.float64]) if y_true.dtype.kind == 'f' and np.any(y_true != y_true.astype(int)): raise ValueError("'y_true' can only have integer values") if y_true.min() < 0: raise ValueError("'y_true' cannot have negative values") y_pred, _, _, _ = \ input_to_cupy_array(y_pred, check_dtype=[np.int32, np.int64, np.float32, np.float64], check_rows=n_rows) y_true_max = y_true.max() if (y_pred.ndim == 1 and y_true_max > 1) \ or (y_pred.ndim > 1 and y_pred.shape[1] <= y_true_max): raise ValueError("The shape of y_pred doesn't " "match the number of classes") y_true = y_true.astype('int32') y_pred = cp.clip(y_pred, eps, 1 - eps) if y_pred.ndim == 1: y_pred = cp.expand_dims(y_pred, axis=1) if y_pred.shape[1] == 1: y_pred = cp.hstack([1 - y_pred, y_pred]) y_pred /= cp.sum(y_pred, axis=1, keepdims=True) loss = -cp.log(y_pred)[cp.arange(y_pred.shape[0]), y_true] return _weighted_sum(loss, sample_weight, normalize).item()
def GetStepSize(photon_state, tau_atm): step = -cp.log(cp.random.rand(len(photon_state))) return step
def mi_1d_gpu_gg(x, y, biascorrect=True, demeaned=False): """Mutual information (MI) between two Gaussian variables in bits. This is the GPU variant of the m1_1d_gg function, using CuPy I = mi_gg(x,y) returns the MI between two (possibly multidimensional) Gaussian variables, x and y, with bias correction. Parameters ---------- x, y : array_like Gaussian arrays of shape (n_epochs,) or (n_dimensions, n_epochs) biascorrect : bool | True Specifies whether bias correction should be applied to the estimated MI demeaned : bool | False Specifies whether the input data already has zero mean (true if it has been copula-normalized) Returns ------- i : float Information shared by x and y (in bits) """ x, y = cp.atleast_2d(x), cp.atleast_2d(y) if (x.ndim > 2) or (y.ndim > 2): raise ValueError("x and y must be at most 2d") nvarx, ntrl = x.shape nvary = y.shape[0] nvarxy = nvarx + nvary if y.shape[1] != ntrl: raise ValueError("number of trials do not match") # joint variable xy = cp.vstack((x, y)) if not demeaned: xy = xy - xy.mean(axis=1)[:, cp.newaxis] cxy = cp.dot(xy, xy.T) / float(ntrl - 1) # submatrices of joint covariance cx = cxy[:nvarx, :nvarx] cy = cxy[nvarx:, nvarx:] chcxy = cp.linalg.cholesky(cxy) chcx = cp.linalg.cholesky(cx) chcy = cp.linalg.cholesky(cy) # entropies in nats # normalizations cancel for mutual information hx = cp.sum(cp.log(cp.diagonal(chcx))) hy = cp.sum(cp.log(cp.diagonal(chcy))) hxy = cp.sum(cp.log(cp.diagonal(chcxy))) ln2 = cp.log(2) if biascorrect: psiterms = psi( (ntrl - cp.arange(1, nvarxy + 1)).astype(cp.float) / 2.) / 2. dterm = (ln2 - cp.log(ntrl - 1.)) / 2. hx = hx - nvarx * dterm - psiterms[:nvarx].sum() hy = hy - nvary * dterm - psiterms[:nvary].sum() hxy = hxy - nvarxy * dterm - psiterms[:nvarxy].sum() # MI in bits i = (hx + hy - hxy) / ln2 return i
def cmi_1d_gpu_ggg(x, y, z, biascorrect=True, demeaned=False): """Conditional MI between two Gaussian variables conditioned on a third. I = cmi_ggg(x,y,z) returns the CMI between two (possibly multidimensional) Gaussian variables, x and y, conditioned on a third, z, with bias correction. Parameters ---------- x, y, z : array_like Gaussians arrays of shape (n_epochs,) or (n_dimensions, n_epochs). biascorrect : bool | True Specifies whether bias correction should be applied to the estimated MI demeaned : bool | False Specifies whether the input data already has zero mean (true if it has been copula-normalized) Returns ------- i : float Information shared by x and y conditioned by z (in bits) """ x, y, z = cp.atleast_2d(x), cp.atleast_2d(y), cp.atleast_2d(z) if x.ndim > 2 or y.ndim > 2 or z.ndim > 2: raise ValueError("x, y and z must be at most 2d") ntrl = x.shape[1] nvarx = x.shape[0] nvary = y.shape[0] nvarz = z.shape[0] nvaryz = nvary + nvarz nvarxy = nvarx + nvary nvarxz = nvarx + nvarz nvarxyz = nvarx + nvaryz if y.shape[1] != ntrl or z.shape[1] != ntrl: raise ValueError("number of trials do not match") # joint variable xyz = cp.vstack((x, y, z)) if not demeaned: xyz = xyz - xyz.mean(axis=1)[:, cp.newaxis] cxyz = cp.dot(xyz, xyz.T) / float(ntrl - 1) # submatrices of joint covariance cz = cxyz[nvarxy:, nvarxy:] cyz = cxyz[nvarx:, nvarx:] cxz = cp.zeros((nvarxz, nvarxz)) cxz[:nvarx, :nvarx] = cxyz[:nvarx, :nvarx] cxz[:nvarx, nvarx:] = cxyz[:nvarx, nvarxy:] cxz[nvarx:, :nvarx] = cxyz[nvarxy:, :nvarx] cxz[nvarx:, nvarx:] = cxyz[nvarxy:, nvarxy:] chcz = cp.linalg.cholesky(cz) chcxz = cp.linalg.cholesky(cxz) chcyz = cp.linalg.cholesky(cyz) chcxyz = cp.linalg.cholesky(cxyz) # entropies in nats # normalizations cancel for cmi hz = cp.sum(cp.log(cp.diagonal(chcz))) hxz = cp.sum(cp.log(cp.diagonal(chcxz))) hyz = cp.sum(cp.log(cp.diagonal(chcyz))) hxyz = cp.sum(cp.log(cp.diagonal(chcxyz))) ln2 = cp.log(2) if biascorrect: psiterms = psi( (ntrl - cp.arange(1, nvarxyz + 1)).astype(cp.float) / 2.) / 2. dterm = (ln2 - cp.log(ntrl - 1.)) / 2. hz = hz - nvarz * dterm - psiterms[:nvarz].sum() hxz = hxz - nvarxz * dterm - psiterms[:nvarxz].sum() hyz = hyz - nvaryz * dterm - psiterms[:nvaryz].sum() hxyz = hxyz - nvarxyz * dterm - psiterms[:nvarxyz].sum() # MI in bits i = (hxz + hyz - hxyz - hz) / ln2 return i
def get_error(y, t): # 2値の交差エントロピー誤差を返す。 eps = 1e-7 return -np.sum(t * np.log(y + eps) + (1 - t) * np.log(1 - y + eps)) / len(y)
def slogdet(a): """Returns sign and logarithm of the determinant of an array. It calculates the natural logarithm of the determinant of a given value. Args: a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``. Returns: tuple of :class:`~cupy.ndarray`: It returns a tuple ``(sign, logdet)``. ``sign`` represents each sign of the determinant as a real number ``0``, ``1`` or ``-1``. 'logdet' represents the natural logarithm of the absolute of the determinant. If the determinant is zero, ``sign`` will be ``0`` and ``logdet`` will be ``-inf``. The shapes of both ``sign`` and ``logdet`` are equal to ``a.shape[:-2]``. .. warning:: This function calls one or more cuSOLVER routine(s) which may yield invalid results if input conditions are not met. To detect these invalid results, you can set the `linalg` configuration to a value that is not `ignore` in :func:`cupyx.errstate` or :func:`cupyx.seterr`. .. warning:: To produce the same results as :func:`numpy.linalg.slogdet` for singular inputs, set the `linalg` configuration to `raise`. .. seealso:: :func:`numpy.linalg.slogdet` """ if a.ndim < 2: msg = ('%d-dimensional array given. ' 'Array must be at least two-dimensional' % a.ndim) raise linalg.LinAlgError(msg) _util._assert_nd_squareness(a) dtype = numpy.promote_types(a.dtype.char, 'f') real_dtype = numpy.dtype(dtype.char.lower()) if dtype not in (numpy.float32, numpy.float64, numpy.complex64, numpy.complex128): msg = ('dtype must be float32, float64, complex64, or complex128' ' (actual: {})'.format(a.dtype)) raise ValueError(msg) a_shape = a.shape shape = a_shape[:-2] n = a_shape[-2] if a.size == 0: # empty batch (result is empty, too) or empty matrices det([[]]) == 1 sign = cupy.ones(shape, dtype) logdet = cupy.zeros(shape, real_dtype) return sign, logdet lu, ipiv, dev_info = _decomposition._lu_factor(a, dtype) # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that # should never happen even if the matrix contains nan or inf. # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for # debugging purposes. diag = cupy.diagonal(lu, axis1=-2, axis2=-1) logdet = cupy.log(cupy.abs(diag)).sum(axis=-1) # ipiv is 1-origin non_zero = cupy.count_nonzero(ipiv != cupy.arange(1, n + 1), axis=-1) if dtype.kind == "f": non_zero += cupy.count_nonzero(diag < 0, axis=-1) # Note: sign == -1 ** (non_zero % 2) sign = (non_zero % 2) * -2 + 1 if dtype.kind == "c": sign = sign * cupy.prod(diag / cupy.abs(diag), axis=-1) singular = dev_info > 0 return ( cupy.where(singular, dtype.type(0), sign.astype(dtype)).reshape(shape), cupy.where(singular, real_dtype.type('-inf'), logdet).reshape(shape), )
def log_softmax(x: xp.ndarray, axis=-1): c = xp.max(x, axis=axis, keepdims=True) # [*, 1, *] x2 = x - c # [*, ?, *] logsumexp = xp.log(xp.exp(x2).sum(axis=axis, keepdims=True)) # [*, 1, *] return x2 - logsumexp
def main(): nwaves = 256 pw = 256 dw = 512+16+3 A = lt.Propagation(nwaves, dw, pw) B = to.Propagation(dw, pw, array_module=cp, asnumpy=cp.asnumpy) shape = (nwaves, pw, pw) nearplane = cp.random.rand(*shape) + 1j * cp.random.rand(*shape) amplitude = plt.imread("/home/beams/DCHING/Pictures/images/Cryptomeria_japonica-0256.tif") / 255 phase = plt.imread("/home/beams/DCHING/Pictures/images/Erdhummel_Bombus_terrestris-0256.tif") / 255 nearplane[0] = cp.asarray(amplitude + 1j * phase) nearplane = cp.ascontiguousarray(nearplane, dtype='complex64') start = time.time() farplaneB = B.fwd(nearplane) stop = time.time() print(farplaneB.shape, stop-start) start = time.time() farplaneA = A.fwd(nearplane) stop = time.time() print(farplaneA.shape, stop-start) return plt.figure() plt.subplot(1, 3, 1) plt.imshow(cp.log(cp.abs(farplaneA)).get()[0]) plt.colorbar() plt.title('CUDA') plt.subplot(1, 3, 2) plt.imshow(cp.log(cp.abs(farplaneB)).get()[0]) plt.colorbar() plt.title('CUPY') plt.subplot(1, 3, 3) plt.imshow( cp.log( cp.abs(farplaneA) - cp.abs(farplaneB) ).get()[0] ) plt.colorbar() plt.title('DIFF') plt.show() # cp.testing.assert_array_equal(farplaneA, farplaneB) nearplaneA = A.adj(farplaneB) nearplaneB = B.adj(farplaneA) plt.figure() plt.subplot(1, 3, 1) plt.imshow(nearplaneA.real.get()[0]) plt.colorbar() plt.title('CUDA') plt.subplot(1, 3, 2) plt.imshow(nearplaneB.real.get()[0]) plt.colorbar() plt.title('CUPY') plt.subplot(1, 3, 3) plt.imshow( cp.log( nearplaneB.real - nearplaneA.real ).get()[0] ) plt.colorbar() plt.title('DIFF') plt.show() cp.testing.assert_array_equal(nearplaneA, nearplaneB)
def choice(self, a, size=None, replace=True, p=None): """Returns an array of random values from a given 1-D array. .. seealso:: :func:`cupy.random.choice` for full document, :meth:`numpy.random.choice` """ if a is None: raise ValueError('a must be 1-dimensional or an integer') if isinstance(a, cupy.ndarray) and a.ndim == 0: raise NotImplementedError if isinstance(a, six.integer_types): a_size = a if a_size <= 0: raise ValueError('a must be greater than 0') else: a = cupy.array(a, copy=False) if a.ndim != 1: raise ValueError('a must be 1-dimensional or an integer') else: a_size = len(a) if a_size == 0: raise ValueError('a must be non-empty') if p is not None: p = cupy.array(p) if p.ndim != 1: raise ValueError('p must be 1-dimensional') if len(p) != a_size: raise ValueError('a and p must have same size') if not (p >= 0).all(): raise ValueError('probabilities are not non-negative') p_sum = cupy.sum(p).get() if not numpy.allclose(p_sum, 1): raise ValueError('probabilities do not sum to 1') if size is None: raise NotImplementedError shape = size size = numpy.prod(shape) if not replace and p is None: if a_size < size: raise ValueError( 'Cannot take a larger sample than population when ' '\'replace=False\'') if isinstance(a, six.integer_types): indices = cupy.arange(a, dtype='l') else: indices = a.copy() self.shuffle(indices) return indices[:size].reshape(shape) if not replace: raise NotImplementedError if p is not None: p = cupy.broadcast_to(p, (size, a_size)) index = cupy.argmax(cupy.log(p) + cupy.random.gumbel(size=(size, a_size)), axis=1) if not isinstance(shape, six.integer_types): index = cupy.reshape(index, shape) else: index = cupy.random.randint(0, a_size, size=shape) # Align the dtype with NumPy index = index.astype(cupy.int64, copy=False) if isinstance(a, six.integer_types): return index if index.ndim == 0: return cupy.array(a[index], dtype=a.dtype) return a[index]
def select_log_next_cupy(X, gains, current_values, idxs): gains[:] = cupy.sum(cupy.log(current_values + X + 1), axis=1)[idxs]
def beam_search(model, X, params, return_alphas=False, eos_sym=0, null_sym=2, model_ensemble=False, n_models=0): """ Beam search method for Cond models. (https://en.wikibooks.org/wiki/Artificial_Intelligence/Search/Heuristic_search/Beam_search) The algorithm in a nutshell does the following: 1. k = beam_size 2. open_nodes = [[]] * k 3. while k > 0: 3.1. Given the inputs, get (log) probabilities for the outputs. 3.2. Expand each open node with all possible output. 3.3. Prune and keep the k best nodes. 3.4. If a sample has reached the <eos> symbol: 3.4.1. Mark it as final sample. 3.4.2. k -= 1 3.5. Build new inputs (state_below) and go to 1. 4. return final_samples, final_scores :param model: Model to use :param X: Model inputs :param params: Search parameters :param return_alphas: Whether we should return attention weights or not. :param eos_sym: <eos> symbol :param null_sym: <null> symbol :param model_ensemble: Whether we are using several models in an ensemble :param n_models; Number of models in the ensemble. :return: UNSORTED list of [k_best_samples, k_best_scores] (k: beam size) """ k = params['beam_size'] samples = [] sample_scores = [] pad_on_batch = params['pad_on_batch'] dead_k = 0 # samples that reached eos live_k = 1 # samples that did not yet reach eos hyp_samples = [[]] * live_k hyp_scores = cp.zeros(live_k, dtype='float32') ret_alphas = return_alphas or params['pos_unk'] if ret_alphas: sample_alphas = [] hyp_alphas = [[]] * live_k if pad_on_batch: maxlen = int(len(X[params['dataset_inputs'][0]][0]) * params['output_max_length_depending_on_x_factor']) if \ params['output_max_length_depending_on_x'] else params['maxlen'] minlen = int( len(X[params['dataset_inputs'][0]][0]) / params['output_min_length_depending_on_x_factor'] + 1e-7) if \ params['output_min_length_depending_on_x'] else 0 else: minlen = int(np.argmax(X[params['dataset_inputs'][0]][0] == eos_sym) / params['output_min_length_depending_on_x_factor'] + 1e-7) if \ params['output_min_length_depending_on_x'] else 0 maxlen = int(np.argmax(X[params['dataset_inputs'][0]][0] == eos_sym) * params[ 'output_max_length_depending_on_x_factor']) if \ params['output_max_length_depending_on_x'] else params['maxlen'] maxlen = min(params['state_below_maxlen'] - 1, maxlen) # we must include an additional dimension if the input for each timestep are all the generated "words_so_far" if params['words_so_far']: if k > maxlen: raise NotImplementedError("BEAM_SIZE can't be higher than MAX_OUTPUT_TEXT_LEN on the current implementation.") state_below = np.asarray([[null_sym]] * live_k) if pad_on_batch else np.asarray([np.zeros((maxlen, maxlen))] * live_k) else: state_below = np.asarray([null_sym] * live_k) if pad_on_batch else np.asarray([np.zeros(params['state_below_maxlen']) + null_sym] * live_k) prev_out = [None] * n_models if model_ensemble else None for ii in range(maxlen): # for every possible live sample calc prob for every possible label if params['optimized_search']: # use optimized search model if available if model_ensemble: [probs, prev_out, alphas] = model.predict_cond_optimized(X, state_below, params, ii, prev_out) else: [probs, prev_out] = model.predict_cond_optimized(X, state_below, params, ii, prev_out) if ret_alphas: alphas = prev_out[-1][0] # Shape: (k, n_steps) prev_out = prev_out[:-1] else: probs = model.predict_cond(X, state_below, params, ii) log_probs = cp.log(probs) if minlen > 0 and ii < minlen: log_probs[:, eos_sym] = -cp.inf # total score for every sample is sum of -log of word prb cand_scores = hyp_scores[:, None] - log_probs cand_flat = cand_scores.flatten() # Find the best options by calling argsort of flatten array ranks_flat = cp.argsort(cand_flat)[:(k - dead_k)] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = ranks_flat // voc_size # index of row word_indices = ranks_flat % voc_size # index of col costs = cand_flat[ranks_flat] best_cost = costs[0] if cupy: trans_indices = cp.asnumpy(trans_indices) word_indices = cp.asnumpy(word_indices) if ret_alphas: alphas = cp.asnumpy(alphas) # Form a beam for the next iteration new_hyp_samples = [] new_trans_indices = [] new_hyp_scores = cp.zeros(k - dead_k, dtype='float32') if ret_alphas: new_hyp_alphas = [] for idx, [ti, wi] in list(enumerate(zip(trans_indices, word_indices))): if params['search_pruning']: if costs[idx] < k * best_cost: new_hyp_samples.append(hyp_samples[ti] + [wi]) new_trans_indices.append(ti) new_hyp_scores[idx] = copy.copy(costs[idx]) if ret_alphas: new_hyp_alphas.append(hyp_alphas[ti] + [alphas[ti]]) else: dead_k += 1 else: new_hyp_samples.append(hyp_samples[ti] + [wi]) new_trans_indices.append(ti) new_hyp_scores[idx] = copy.copy(costs[idx]) if ret_alphas: new_hyp_alphas.append(hyp_alphas[ti] + [alphas[ti]]) # check the finished samples new_live_k = 0 hyp_samples = [] hyp_scores = [] hyp_alphas = [] indices_alive = [] for idx in range(len(new_hyp_samples)): if new_hyp_samples[idx][-1] == eos_sym: # finished sample samples.append(new_hyp_samples[idx]) sample_scores.append(new_hyp_scores[idx]) if ret_alphas: sample_alphas.append(new_hyp_alphas[idx]) dead_k += 1 else: indices_alive.append(new_trans_indices[idx]) new_live_k += 1 hyp_samples.append(new_hyp_samples[idx]) hyp_scores.append(new_hyp_scores[idx]) if ret_alphas: hyp_alphas.append(new_hyp_alphas[idx]) hyp_scores = cp.array(np.asarray(hyp_scores, dtype='float32'), dtype='float32') live_k = new_live_k if new_live_k < 1: break if dead_k >= k: break state_below = np.asarray(hyp_samples, dtype='int64') state_below = np.hstack((np.zeros((state_below.shape[0], 1), dtype='int64') + null_sym, state_below)) \ if pad_on_batch else \ np.hstack((np.zeros((state_below.shape[0], 1), dtype='int64') + null_sym, state_below, np.zeros((state_below.shape[0], max(params['state_below_maxlen'] - state_below.shape[1] - 1, 0)), dtype='int64'))) # we must include an additional dimension if the input for each timestep are all the generated words so far if params['words_so_far']: state_below = np.expand_dims(state_below, axis=0) if params['optimized_search'] and ii > 0: # filter next search inputs w.r.t. remaining samples if model_ensemble: for n_model in range(n_models): # filter next search inputs w.r.t. remaining samples for idx_vars in range(len(prev_out[n_model])): prev_out[n_model][idx_vars] = prev_out[n_model][idx_vars][indices_alive] else: for idx_vars in range(len(prev_out)): prev_out[idx_vars] = prev_out[idx_vars][indices_alive] # dump every remaining one if live_k > 0: for idx in range(live_k): samples.append(hyp_samples[idx]) sample_scores.append(hyp_scores[idx]) if ret_alphas: sample_alphas.append(hyp_alphas[idx]) if ret_alphas: return samples, sample_scores, np.asarray(sample_alphas) else: return samples, sample_scores, None
def logit(x): return cp.log(x / (1 - x))
def convolutional_barycenter_gpu( Hv , reg : float, alpha : np.ndarray, stabThresh = 1e-30, niter = 1500, tol = 1e-9, sharpening = False, verbose = False): """Main function solving wasserstein barycenter problem using gpu Parameters: Hv {Set of distributions (cparray)} -- reg {regularization term "gamma" } -- float superior to 0, generally equals size of space/40 alpha {list } -- set of weights Keyword Parameters: stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30}) niter {int } -- Maximum number of loop iteration (default: {1500}) tolerance {float} -- convergence tolerance at which point iterations stop (default: {1e-9}) sharpening {bool } -- Whether or not entropic sharpening is used (default: {False}) verbose {bool } -- verbose option Returns: cparray -- solution of weighted wassertein barycenter problem """ import cupy as cp from cupyx.scipy.ndimage import gaussian_filter as cupyx_gaussian_filter def K_cupyx(x): return cupyx_gaussian_filter(x,sigma=reg) def to_find_root(barycenter, H0, beta): return entropy(barycenter**beta) - H0 alpha = cp.array(alpha) alpha = alpha/alpha.sum() Hv = cp.array(Hv) for i in range(len(Hv)): Hv[i] = Hv[i]/Hv[i].sum() v = cp.ones(Hv.shape) Kw = cp.ones(Hv.shape) entropy_max = max_entropy (Hv ) barycenter = cp .zeros(Hv[0].shape) cumtime_agg = 0 rolling_delta = [] cumtime = [] iterations = [] change = 1 for j in range(niter): print("For every iteration.. ") t0 = time.time() barycenterOld = barycenter barycenter = cp.zeros_like(Hv[0, :, :]) print("Hv shape is", Hv.shape) for i in range(Hv.shape[0]): #* for each of two distributions(which are identical) #* distribution *i* becomes Kernel of (dist1 over the Kernel of v(i)) Kw[i, :, :] = K_cupyx(Hv[i, :, :] / cp.maximum(stabThresh,K_cupyx(v[i, :, :])) ) #* barycenter is barycenter plus weighted log of v(i)*Kw(i) barycenter += alpha[i] * cp.log(cp.maximum(stabThresh, v[i, :, :]*Kw[i, :, :])) barycenter = cp.exp(barycenter) change = cp.sum(cp.abs(barycenter-barycenterOld)) if sharpening : if (entropy(barycenter)) > (entropy_max): beta = newton(lambda beta : to_find_root(barycenter,entropy_max,beta), 1, tol=1e-6) if beta < 0 : beta = 1 else : beta = 1 barycenter = barycenter**beta for i in range(Hv.shape[0]): # assign to v(i) barycenter normalized by Kw(i)'s largest v[i, :, :] = barycenter / cp.maximum(stabThresh, Kw[i, : ,: ]) elapsed = np.around(time.time() - t0, 4) delta = np.around(change,10) cumtime_agg += elapsed iterations.append(j) cumtime.append(cumtime_agg) rolling_delta.append(float( delta )) print(f"Refinement iter {j} | delta: {delta} | elapsed : {elapsed}") if change < tol : print(f"Exited. Change {change} under tolerance.") log = { "iterations" : iterations, "cumtime" : cumtime, "rolling_delta" : rolling_delta, "exited_on" : j, "exited_under_tolerance": True } # print(f"Exited with 0 on iter {j}") return [ cp.asnumpy(barycenter),log ] break log = { "iterations" : iterations, "cumtime" : cumtime, "rolling_delta" : rolling_delta, "exited_on" : j, "exited_under_tolerance": False } # print(f"Exited with 0 on iter {j}") return [ cp.asnumpy(barycenter),log ]
def learnAndSolve8b(ctx): """This is the main optimization. Takes the longest time and uses the GPU heavily.""" Nbatch = ctx.intermediate.Nbatch params = ctx.params probe = ctx.probe ir = ctx.intermediate proc = ir.proc iorig = ir.iorig # TODO: move_to_config NrankPC = 6 # this one is the rank of the PCs, used to detect spikes with threshold crossings Nrank = 3 # this one is the rank of the templates wTEMP, wPCA = extractTemplatesfromSnippets(proc=proc, probe=probe, params=params, Nbatch=Nbatch, nPCs=NrankPC) # move these to the GPU wPCA = cp.asarray(wPCA[:, :Nrank], dtype=np.float32, order='F') wTEMP = cp.asarray(wTEMP, dtype=np.float32, order='F') wPCAd = cp.asarray(wPCA, dtype=np.float64, order='F') # convert to double for extra precision nt0 = params.nt0 nt0min = params.nt0min nBatches = Nbatch NT = params.NT Nfilt = params.Nfilt Nchan = probe.Nchan # two variables for the same thing? number of nearest channels to each primary channel # TODO: unclear - let's fix this NchanNear = min(probe.Nchan, 32) Nnearest = min(probe.Nchan, 32) # decay of gaussian spatial mask centered on a channel sigmaMask = params.sigmaMask batchstart = list(range(0, NT * nBatches + 1, NT)) # find the closest NchanNear channels, and the masks for those channels iC, mask, C2C = getClosestChannels(probe, sigmaMask, NchanNear) # sorting order for the batches isortbatches = iorig nhalf = int(ceil(nBatches / 2)) - 1 # halfway point # this batch order schedule goes through half of the data forward and backward during the model # fitting and then goes through the data symmetrically-out from the center during the final # pass ischedule = np.concatenate( (np.arange(nhalf, nBatches), np.arange(nBatches - 1, nhalf - 1, -1))) i1 = np.arange(nhalf - 1, -1, -1) i2 = np.arange(nhalf, nBatches) irounds = np.concatenate((ischedule, i1, i2)) niter = irounds.size if irounds[niter - nBatches - 1] != nhalf: # this check is in here in case I do somehting weird when I try different schedules raise ValueError('Mismatch between number of batches') # these two flags are used to keep track of what stage of model fitting we're at # flag_final = 0 flag_resort = 1 # this is the absolute temporal offset in seconds corresponding to the start of the # spike sorted time segment t0 = 0 # ceil(params.trange(1) * ops.fs) nInnerIter = 60 # this is for SVD for the power iteration # schedule of learning rates for the model fitting part # starts small and goes high, it corresponds approximately to the number of spikes # from the past that were averaged to give rise to the current template pmi = cp.exp( -1. / cp.linspace(params.momentum[0], params.momentum[1], niter - nBatches)) Nsum = min( Nchan, 7) # how many channels to extend out the waveform in mexgetspikes # lots of parameters passed into the CUDA scripts Params = np.array([ NT, Nfilt, params.Th[0], nInnerIter, nt0, Nnearest, Nrank, params.lam, pmi[0], Nchan, NchanNear, params.nt0min, 1, Nsum, NrankPC, params.Th[0] ], dtype=np.float64) # W0 has to be ordered like this W0 = cp.transpose( cp.atleast_3d(cp.asarray(wPCA, dtype=np.float64, order='F')), [0, 2, 1]) # initialize the list of channels each template lives on iList = cp.zeros((Nnearest, Nfilt), dtype=np.int32, order='F') # initialize average number of spikes per batch for each template nsp = cp.zeros((0, 1), dtype=np.float64, order='F') # this flag starts 0, is set to 1 later Params[12] = 0 # kernels for subsample alignment Ka, Kb = getKernels(params) p1 = .95 # decay of nsp estimate in each batch ntot = 0 # this keeps track of dropped templates for debugging purposes ndrop = np.zeros(2, dtype=np.float32, order='F') # this is the minimum firing rate that all templates must maintain, or be dropped m0 = params.minFR * params.NT / params.fs # allocate variables when switching to extraction phase # this holds spike times, clusters and other info per spike st3 = [] # cp.zeros((int(1e7), 5), dtype=np.float32, order='F') # these ones store features per spike # Nnearest is the number of nearest templates to store features for fW = LargeArrayWriter(ctx.path('fW', ext='.dat'), dtype=np.float32, shape=(Nnearest, -1)) # NchanNear is the number of nearest channels to take PC features from fWpc = LargeArrayWriter(ctx.path('fWpc', ext='.dat'), dtype=np.float32, shape=(NchanNear, Nrank, -1)) for ibatch in tqdm(range(niter), desc="Optimizing templates"): # korder is the index of the batch at this point in the schedule korder = int(irounds[ibatch]) # k is the index of the batch in absolute terms k = int(isortbatches[korder]) logger.debug("Batch %d/%d, %d templates.", ibatch, niter, Nfilt) if ibatch > niter - nBatches - 1 and korder == nhalf: # this is required to revert back to the template states in the middle of the # batches W, dWU = ir.W, ir.dWU logger.debug('Reverted back to middle timepoint.') if ibatch < niter - nBatches: # obtained pm for this batch Params[8] = float(pmi[ibatch]) pm = pmi[ibatch] * ones((Nfilt, ), dtype=np.float64, order='F') # loading a single batch (same as everywhere) offset = Nchan * batchstart[k] dat = proc.flat[offset:offset + NT * Nchan].reshape((-1, Nchan), order='F') dataRAW = cp.asarray(dat, dtype=np.float32) / params.scaleproc if ibatch == 0: # only on the first batch, we first get a new set of spikes from the residuals, # which in this case is the unmodified data because we start with no templates # CUDA function to get spatiotemporal clips from spike detections dWU, cmap = mexGetSpikes2(Params, dataRAW, wTEMP, iC) dWU = cp.asarray(dWU, dtype=np.float64, order='F') # project these into the wPCA waveforms dWU = cp.reshape(cp.dot( wPCAd, cp.dot(wPCAd.T, dWU.reshape((dWU.shape[0], -1), order='F'))), dWU.shape, order='F') # initialize the low-rank decomposition with standard waves W = W0[:, cp.ones(dWU.shape[2], dtype=np.int32), :] Nfilt = W.shape[1] # update the number of filters/templates # initialize the number of spikes for new templates with the minimum allowed value, # so it doesn't get thrown back out right away nsp = _extend(nsp, 0, Nfilt, m0) Params[1] = Nfilt # update in the CUDA parameters if flag_resort: # this is a flag to resort the order of the templates according to best peak # channel # this is important in order to have cohesive memory requests from the GPU RAM # max channel (either positive or negative peak) iW = cp.argmax(cp.abs(dWU[nt0min - 1, :, :]), axis=0) # iW = int32(squeeze(iW)) isort = cp.argsort(iW) # sort by max abs channel iW = iW[isort] W = W[:, isort, :] # user ordering to resort all the other template variables dWU = dWU[:, :, isort] nsp = nsp[isort] # decompose dWU by svd of time and space (via covariance matrix of 61 by 61 samples) # this uses a "warm start" by remembering the W from the previous iteration W, U, mu = mexSVDsmall2(Params, dWU, W, iC, iW, Ka, Kb) # UtU is the gram matrix of the spatial components of the low-rank SVDs # it tells us which pairs of templates are likely to "interfere" with each other # such as when we subtract off a template # this needs to change (but I don't know why!) UtU, maskU = getMeUtU(iW, iC, mask, Nnearest, Nchan) # main CUDA function in the whole codebase. does the iterative template matching # based on the current templates, gets features for these templates if requested # (featW, featPC), # gets scores for the template fits to each spike (vexp), outputs the average of # waveforms assigned to each cluster (dWU0), # and probably a few more things I forget about st0, id0, x0, featW, dWU0, drez, nsp0, featPC, vexp = mexMPnu8( Params, dataRAW, U, W, mu, iC, iW, UtU, iList, wPCA) logger.debug("%d spikes.", x0.size) # Sometimes nsp can get transposed (think this has to do with it being # a single element in one iteration, to which elements are added # nsp, nsp0, and pm must all be row vectors (Nfilt x 1), so force nsp # to be a row vector. # nsp = cp.atleast_2d(nsp) # nsprow, nspcol = nsp.shape # if nsprow < nspcol: # nsp = nsp.T nsp = nsp.squeeze() # updates the templates as a running average weighted by recency # since some clusters have different number of spikes, we need to apply the # exp(pm) factor several times, and fexp is the resulting update factor # for each template fexp = np.exp(nsp0 * cp.log(pm[:Nfilt])) fexp = cp.reshape(fexp, (1, 1, -1), order='F') dWU = dWU * fexp + (1 - fexp) * ( dWU0 / cp.reshape(cp.maximum(1, nsp0), (1, 1, -1), order='F')) # nsp just gets updated according to the fixed factor p1 nsp = nsp * p1 + (1 - p1) * nsp0 if ibatch == niter - nBatches - 1: # if we reached this point, we need to disable secondary template updates # like dropping, and adding new templates. We need to memorize the state of the # templates at this timepoint, and set the processing mode to "extraction and # tracking" flag_resort = 0 # no need to resort templates by channel any more # flag_final = 1 # this is the "final" pass # final clean up, triage templates one last time W, U, dWU, mu, nsp, ndrop = triageTemplates2( params, iW, C2C, W, U, dWU, mu, nsp, ndrop) # final number of templates Nfilt = W.shape[1] Params[1] = Nfilt # final covariance matrix between all templates WtW, iList = getMeWtW(W, U, Nnearest) # iW is the final channel assigned to each template iW = cp.argmax(cp.abs(dWU[nt0min - 1, :, :]), axis=0) # extract ALL features on the last pass Params[ 12] = 2 # this is a flag to output features (PC and template features) # different threshold on last pass? Params[2] = params.Th[ -1] # usually the threshold is much lower on the last pass # memorize the state of the templates logger.debug("Memorized middle timepoint.") ir.W, ir.dWU, ir.U, ir.mu = W, dWU, U, mu ir.Wraw = cp.zeros((U.shape[0], W.shape[0], U.shape[1]), dtype=np.float64, order='F') for n in range(U.shape[1]): # temporarily use U rather Urot until I have a chance to test it ir.Wraw[:, :, n] = mu[n] * cp.dot(U[:, n, :], W[:, n, :].T) if ibatch < niter - nBatches - 1: # during the main "learning" phase of fitting a model if ibatch % 5 == 0: # this drops templates based on spike rates and/or similarities to # other templates W, U, dWU, mu, nsp, ndrop = triageTemplates2( params, iW, C2C, W, U, dWU, mu, nsp, ndrop) Nfilt = W.shape[1] # update the number of filters Params[1] = Nfilt # this adds new templates if they are detected in the residual dWU0, cmap = mexGetSpikes2(Params, drez, wTEMP, iC) if dWU0.shape[2] > 0: # new templates need to be integrated into the same format as all templates # apply PCA for smoothing purposes dWU0 = cp.reshape(cp.dot( wPCAd, cp.dot( wPCAd.T, dWU0.reshape( (dWU0.shape[0], dWU0.shape[1] * dWU0.shape[2]), order='F'))), dWU0.shape, order='F') dWU = cp.concatenate((dWU, dWU0), axis=2) m = dWU0.shape[2] # initialize temporal components of waveforms W = _extend(W, Nfilt, Nfilt + m, W0[:, cp.ones(m, dtype=np.int32), :], axis=1) # initialize the number of spikes with the minimum allowed nsp = _extend(nsp, Nfilt, Nfilt + m, params.minFR * NT / params.fs) # initialize the amplitude of this spike with a lowish number mu = _extend(mu, Nfilt, Nfilt + m, 10) # if the number of filters exceed the maximum allowed, clip it Nfilt = min(params.Nfilt, W.shape[1]) Params[1] = Nfilt W = W[:, : Nfilt, :] # remove any new filters over the maximum allowed dWU = dWU[:, :, : Nfilt] # remove any new filters over the maximum allowed nsp = nsp[: Nfilt] # remove any new filters over the maximum allowed mu = mu[: Nfilt] # remove any new filters over the maximum allowed if ibatch > niter - nBatches - 1: # during the final extraction pass, this keeps track of all spikes and features # we memorize the spatio-temporal decomposition of the waveforms at this batch # this is currently only used in the GUI to provide an accurate reconstruction # of the raw data at this time ir.WA[..., k] = cp.asnumpy(W) ir.UA[..., k] = cp.asnumpy(U) ir.muA[..., k] = cp.asnumpy(mu) # we carefully assign the correct absolute times to spikes found in this batch ioffset = params.ntbuff - 1 if k == 0: ioffset = 0 # the first batch is special (no pre-buffer) toff = nt0min + t0 - ioffset + (NT - params.ntbuff) * k st = toff + st0 st30 = np.c_[ cp.asnumpy(st), # spike times cp.asnumpy(id0), # spike clusters (0-indexing) cp.asnumpy(x0), # template amplitudes cp.asnumpy(vexp), # residual variance of this spike korder * np.ones(st.size), # batch from which this spike was found ] # Check the number of spikes. assert st30.shape[0] == featW.shape[1] == featPC.shape[2] st3.append(st30) fW.append(featW) fWpc.append(featPC) ntot = ntot + x0.size # keeps track of total number of spikes so far if ibatch == niter - nBatches - 1: # these next three store the low-d template decompositions ir.WA = np.zeros((nt0, Nfilt, Nrank, nBatches), dtype=np.float32, order='F') ir.UA = np.zeros((Nchan, Nfilt, Nrank, nBatches), dtype=np.float32, order='F') ir.muA = np.zeros((Nfilt, nBatches), dtype=np.float32, order='F') if ibatch % 100 == 0: # this is some of the relevant diagnostic information to be printed during training logger.info(('%d / %d batches, %d units, nspks: %2.4f, mu: %2.4f, ' 'nst0: %d, merges: %2.4f, %2.4f'), ibatch, niter, Nfilt, nsp.sum(), median(mu), st0.size, *ndrop) free_gpu_memory() # Close the large array writers and save the JSON metadata files to disk. fW.close() fWpc.close() # just display the total number of spikes logger.info("Found %d spikes.", ntot) # Save results to the ctx.intermediate object. ir.st3 = np.concatenate(st3, axis=0) # the similarity score between templates is simply the correlation, # taken as the max over several consecutive time delays ir.simScore = cp.asnumpy(cp.max(WtW, axis=2)) # NOTE: these are now already saved by LargeArrayWriter # fWa = np.concatenate(fW, axis=-1) # fWpca = np.concatenate(fWpc, axis=-1) # the template features are stored in cProj, like in Kilosort1 # ir.cProj = fWa.T # the neihboring templates idnices are stored in iNeigh ir.iNeigh = cp.asnumpy(iList) # permute the PC projections in the right order # ir.cProjPC = np.transpose(fWpca, (2, 1, 0)) # iNeighPC keeps the indices of the channels corresponding to the PC features ir.iNeighPC = cp.asnumpy(iC[:, iW]) # Number of spikes. assert ir.st3.shape[0] == fW.shape[-1] == fWpc.shape[-1] # this whole next block is just done to compress the compressed templates # we separately svd the time components of each template, and the spatial components # this also requires a careful decompression function, available somewhere in the GUI code nKeep = min(Nchan * 3, 20) # how many PCs to keep W_a = np.zeros((nt0 * Nrank, nKeep, Nfilt), dtype=np.float32) W_b = np.zeros((nBatches, nKeep, Nfilt), dtype=np.float32) U_a = np.zeros((Nchan * Nrank, nKeep, Nfilt), dtype=np.float32) U_b = np.zeros((nBatches, nKeep, Nfilt), dtype=np.float32) for j in tqdm(range(Nfilt), desc='Compressing templates'): # do this for every template separately WA = np.reshape(ir.WA[:, j, ...], (-1, nBatches), order='F') # svd on the GPU was faster for this, but the Python randomized CPU version # might be faster still # WA = gpuArray(WA) A, B, C = svdecon_cpu(WA) # W_a times W_b results in a reconstruction of the time components W_a[:, :, j] = np.dot(A[:, :nKeep], B[:nKeep, :nKeep]) W_b[:, :, j] = C[:, :nKeep] UA = np.reshape(ir.UA[:, j, ...], (-1, nBatches), order='F') # UA = gpuArray(UA) A, B, C = svdecon_cpu(UA) # U_a times U_b results in a reconstruction of the time components U_a[:, :, j] = np.dot(A[:, :nKeep], B[:nKeep, :nKeep]) U_b[:, :, j] = C[:, :nKeep] logger.info('Finished compressing time-varying templates.') return Bunch( wPCA=wPCA[:, :Nrank], wTEMP=wTEMP, st3=ir.st3, simScore=ir.simScore, # cProj=ir.cProj, # cProjPC=ir.cProjPC, iNeigh=ir.iNeigh, iNeighPC=ir.iNeighPC, WA=ir.WA, UA=ir.UA, W=ir.W, U=ir.U, dWU=ir.dWU, mu=ir.mu, W_a=W_a, W_b=W_b, U_a=U_a, U_b=U_b, )
def cross_entropy(label, prob): loss = -np.sum(label * np.log(prob)) return loss