def power(self, a, size=None, dtype=float): """Returns an array of samples drawn from the power distribution. .. warning:: This function may synchronize the device. .. seealso:: :func:`cupy.random.power` for full documentation, :meth:`numpy.random.RandomState.power <numpy.random.mtrand.RandomState.power>` """ a = cupy.asarray(a) if cupy.any(a < 0): # synchronize! raise ValueError('a < 0') if size is None: size = a.shape x = self.standard_exponential(size=size, dtype=dtype) cupy.exp(-x, out=x) cupy.add(1, -x, out=x) cupy.power(x, 1. / a, out=x) return x
def forward(self, data, is_training=True): # print(data[0]) if (len(data.shape) != 2): raise ValueError( 'data have shape is not compatible. Expect [batch_size, nums_score]' ) logits = np.exp(data - np.amax(data, axis=1, keepdims=True)) logits = logits / np.sum(logits, axis=1, keepdims=True) if is_training: self.cache['logits'] = np.copy(logits) # print(logits[0]) return logits
def predict_row(self, x): # x.shape=[N,2] #pdb.set_trace() x = self.normX(cp.asarray(x)) dist = cp.tile(self.X, [x.shape[0], 1]) - cp.reshape( cp.tile(x, [1, self.X.shape[0]]), [self.X.shape[0] * x.shape[0], 2]) Psi = cp.reshape( cp.exp( -cp.sum(self.theta * cp.power(cp.abs(dist), self.pl), axis=1)), [x.shape[0], self.X.shape[0]]) # 次元方向に和 ccc = Psi.dot(self.bbb) fff = ccc + self.mu return cp.asnumpy(self.inversenormy(fff))
def forward(self, x): #Encode mu, logvar = self.encoder(x) #use reparameterization trick to sample from gaussian self.rand_sample = np.random.standard_normal(size=(self.batch_size, self.nz)) self.sample_z = mu + np.exp(logvar * .5) * np.random.standard_normal( size=(self.batch_size, self.nz)) decode = self.decoder(self.sample_z) return decode, mu, logvar
def det(a): """Returns the determinant of an array. Args: a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``. Returns: cupy.ndarray: Determinant of ``a``. Its shape is ``a.shape[:-2]``. .. seealso:: :func:`numpy.linalg.det` """ sign, logdet = slogdet(a) return sign * cupy.exp(logdet)
def predict(w, b, X): m = X.shape[1] Y_prediction = cp.zeros((1, m)) w = w.reshape(X.shape[0], 1) Z = cp.dot(w.T, X) + b A = 1 / (1 + cp.exp(-Z)) for i in range(A.shape[1]): Y_prediction[0, i] = 0 if A[0, i] <= 0.5 else 1 return Y_prediction
def forward(self, bottom, top): self.label = cp.asarray(copy.deepcopy(bottom[1].data),cp.uint8) prob = cp.asarray(copy.deepcopy(bottom[0].data),cp.float64) prob = cp.subtract(prob,cp.max(prob,axis=1)[:,cp.newaxis,...]) prob = cp.exp(prob) self.softmax = cp.divide(prob,cp.sum(prob,axis=1)[:,cp.newaxis,...]) ## mask self.weight_mask = cp.ones_like(self.label, cp.float64) for weight_id in self.weight_dic: self.weight_mask[self.label == weight_id] = self.weight_dic[weight_id] if self.has_ignore_label: self.weight_mask[self.label == self.ignore_label] = 0 # num_total = 15422668800 # empty_num = 3679002314 # road_num = 10565335603 # ped_num = 99066996 # car_num = 995347874 self.label[self.label == 3] = 2 # w_empty = float((num_total-empty_num)/num_total) # w_road = float((num_total-road_num)/num_total) # w_ped = float((num_total-ped_num)/num_total) # w_car = float((num_total-car_num)/num_total) # print(w_empty) # print(w_road) # print(w_ped) # print(w_car) # empty:0.3 # road:0.25 self.weight_mask[self.label == 0] = 0.3 self.weight_mask[self.label == 1] = 0.25 # self.weight_mask[self.label == 2] = w_ped # self.weight_mask[self.label == 4] = w_car compute_count = self.weight_mask[self.weight_mask != 0].size ## nomalize mask self.weight_mask = cp.divide(self.weight_mask, cp.divide(cp.sum(self.weight_mask), compute_count)) ## compute loss prob_compute_matrix = copy.deepcopy(self.softmax[self.index_0,self.label,self.index_2,self.index_3]) prob_compute_matrix[prob_compute_matrix < (1e-10)] = 1e-10 loss = - cp.divide(cp.sum(cp.multiply(cp.log(prob_compute_matrix),self.weight_mask)),compute_count) loss = cp.asnumpy(loss) top[0].data[...] = loss
def nonlin_evo(psiP2, psiP1, psi0, psiM1, psiM2, c0, c2, c4, V, p, dt, spin_f): # Calculate densities: n = abs(psiP2) ** 2 + abs(psiP1) ** 2 + abs(psi0) ** 2 + abs(psiM1) ** 2 + abs(psiM2) ** 2 A00 = 1 / cp.sqrt(5) * (psi0 ** 2 - 2 * psiP1 * psiM1 + 2 * psiP2 * psiM2) fz = 2 * (abs(psiP2) ** 2 - abs(psiM2) ** 2) + abs(psiP1) ** 2 - abs(psiM1) ** 2 # Evolve spin-singlet term -c4*(n^2-|alpha|^2) S = cp.sqrt(n ** 2 - abs(A00) ** 2) S = cp.nan_to_num(S) cosT = cp.cos(c4 * S * dt) sinT = cp.sin(c4 * S * dt) / S sinT[S == 0] = 0 # Corrects division by 0 Wfn = [psiP2 * cosT + 1j * (n * psiP2 - A00 * cp.conj(psiM2)) * sinT, psiP1 * cosT + 1j * (n * psiP1 + A00 * cp.conj(psiM1)) * sinT, psi0 * cosT + 1j * (n * psi0 - A00 * cp.conj(psi0)) * sinT, psiM1 * cosT + 1j * (n * psiM1 + A00 * cp.conj(psiP1)) * sinT, psiM2 * cosT + 1j * (n * psiM2 - A00 * cp.conj(psiP2)) * sinT] # Calculate spin vectors fp = cp.sqrt(6) * (Wfn[1] * cp.conj(Wfn[2]) + Wfn[2] * cp.conj(Wfn[3])) + 2 * (Wfn[3] * cp.conj(Wfn[4]) + Wfn[0] * cp.conj(Wfn[1])) F = cp.sqrt(fz ** 2 + abs(fp) ** 2) # Calculate cos, sin and Qfactor terms: C1, S1 = cp.cos(c2 * F * dt), cp.sin(c2 * F * dt) C2, S2 = cp.cos(2 * c2 * F * dt), cp.sin(2 * c2 * F * dt) Qfactor = 1j * (-4 / 3 * S1 + 1 / 6 * S2) Q2factor = (-5 / 4 + 4 / 3 * C1 - 1 / 12 * C2) Q3factor = 1j * (1 / 3 * S1 - 1 / 6 * S2) Q4factor = (1 / 4 - 1 / 3 * C1 + 1 / 12 * C2) fzQ = cp.nan_to_num(fz / F) fpQ = cp.nan_to_num(fp / F) Qpsi = calc_Qpsi(fzQ, fpQ, Wfn) Q2psi = calc_Qpsi(fzQ, fpQ, Qpsi) Q3psi = calc_Qpsi(fzQ, fpQ, Q2psi) Q4psi = calc_Qpsi(fzQ, fpQ, Q3psi) # Evolve spin term c2 * F^2 for ii in range(len(Wfn)): Wfn[ii] += Qfactor * Qpsi[ii] + Q2factor * Q2psi[ii] + Q3factor * Q3psi[ii] + Q4factor * Q4psi[ii] # Evolve (c0+c4)*n^2 + (V + pm)*n: for ii in range(len(Wfn)): mF = spin_f - ii Wfn[ii] *= cp.exp(-1j * dt * ((c0 + c4) * n + V + p * mF)) return Wfn
def fhtcoeff(n, dln, mu, offset=0.0, bias=0.0): '''Compute the coefficient array for a fast Hankel transform. ''' lnkr, q = offset, bias # Hankel transform coefficients # u_m = (kr)^{-i 2m pi/(n dlnr)} U_mu(q + i 2m pi/(n dlnr)) # with U_mu(x) = 2^x Gamma((mu+1+x)/2)/Gamma((mu+1-x)/2) xp = (mu + 1 + q)/2 xm = (mu + 1 - q)/2 y = cupy.linspace(0, math.pi * (n // 2) / (n * dln), n // 2 + 1) u = cupy.empty(n // 2 + 1, dtype=complex) v = cupy.empty(n // 2 + 1, dtype=complex) u.imag[:] = y u.real[:] = xm loggamma(u, out=v) u.real[:] = xp loggamma(u, out=u) y *= 2 * (LN_2 - lnkr) u.real -= v.real u.real += LN_2 * q u.imag += v.imag u.imag += y cupy.exp(u, out=u) # fix last coefficient to be real u.imag[-1] = 0 # deal with special cases if not cupy.isfinite(u[0]): # write u_0 = 2^q Gamma(xp)/Gamma(xm) = 2^q poch(xm, xp-xm) # poch() handles special cases for negative integers correctly u[0] = 2**q * poch(xm, xp - xm) # the coefficient may be inf or 0, meaning the transform or the # inverse transform, respectively, is singular return u
def apply_shift(self, psi, p): """Apply shift for all projections.""" tmp = cp.zeros([psi.shape[0], 2 * self.nz, 2 * self.n], dtype='float32') tmp[:, self.nz // 2:3 * self.nz // 2, self.n // 2:3 * self.n // 2] = psi [x, y] = cp.meshgrid(cp.fft.rfftfreq(2 * self.n), cp.fft.fftfreq(2 * self.nz)) shift = cp.exp(-2 * cp.pi * 1j * (x * p[:, 1, None, None] + y * p[:, 0, None, None])) res0 = cp.fft.irfft2(shift * cp.fft.rfft2(tmp)) res = res0[:, self.nz // 2:3 * self.nz // 2, self.n // 2:3 * self.n // 2] return res
def _kdpdf1(x_j, t_ij, h_ij, w_i): ''' Evaluate a the normalized PDF at a single point using generic NumPy/CuPy code instead of a dedicated CUDA kernel. x_j is the j-dimensional point to evaluate the PDF at t_ij are the i events in the PDF at j-dimensional points h_ij are the bandwidths of each PDF event i in dimension j w_i are the weights of each PDF event ''' res = cp.sum( w_i * cp.prod(KernelDensityPDF._inv_sqrt_2pi / h_ij, axis=1) * cp.exp(-0.5 * cp.sum(cp.square((x_j - t_ij) / h_ij), axis=1))) return res if np == cp else res.get()
def subpixel_pad4D(data4D_flat, final_size, cut_radius, chunks=10): stops = np.zeros(chunks + 1, dtype=np.int) stops[0:chunks] = np.arange(0, data4D_flat.shape[0], (data4D_flat.shape[0] / chunks)) stops[chunks] = data4D_flat.shape[0] max_size = int(np.amax(np.diff(stops))) final_size = (np.asarray(final_size)).astype(int) move_pixels = cp.asarray( np.flip(0.5 * (final_size - np.asarray(data4D_flat.shape[1:3])))) yy, xx = np.mgrid[0:final_size[0], 0:final_size[1]] rad = ((yy - final_size[0] / 2)**2) + ((xx - final_size[1] / 2)**2) cutoff = cp.asarray((rad < ((1.1 * cut_radius)**2)).astype(data4D_flat.dtype)) cbed = cp.zeros(final_size, dtype=data4D_flat.dtype) fourier_cal_y = (cp.linspace( (-final_size[0] / 2), ((final_size[0] / 2) - 1), final_size[0])) / final_size[0] fourier_cal_x = (cp.linspace( (-final_size[1] / 2), ((final_size[1] / 2) - 1), final_size[1])) / final_size[1] [fourier_mesh_x, fourier_mesh_y] = cp.meshgrid(fourier_cal_x, fourier_cal_y) move_phase = cp.exp( (-2) * np.pi * (1j) * ((fourier_mesh_x * move_pixels[0]) + (fourier_mesh_y * move_pixels[1]))) padded_4D = np.zeros((data4D_flat.shape[0], final_size[0], final_size[1]), dtype=data4D_flat.dtype) padded_on_gpu = cp.zeros((max_size, final_size[0], final_size[1]), dtype=data4D_flat.dtype) for cc in range(chunks): startval = stops[cc] stop_val = stops[cc + 1] gpu_4Dchunk = cp.asarray(data4D_flat[startval:stop_val, :, :]) for ii in range(gpu_4Dchunk.shape[0]): cbed[0:data4D_flat.shape[1], 0:data4D_flat.shape[2]] = gpu_4Dchunk[ii, :, :] FFT_cbd = cp.fft.fftshift(cp.fft.fft2(cbed)) moved_cbed = (cp.absolute( cp.fft.ifft2(cp.multiply(FFT_cbd, move_phase)))).astype( data4D_flat.dtype) padded_on_gpu[ii, :, :] = moved_cbed * cutoff padded_4D[startval:stop_val, :, :] = cp.asnumpy( padded_on_gpu[0:gpu_4Dchunk.shape[0], :, :]) del padded_on_gpu, moved_cbed, cbed, FFT_cbd, move_phase, gpu_4Dchunk, move_pixels, cutoff return padded_4D
def constructLocalMat(self, src_pts, grids, scale_factor): ''' This function src_pts : A N by 2 matrix. N is number of matching pairs. grids : A instance of Grids class. ''' gamma = 0.0025 src_pts = cp.asarray(src_pts) grids_center_coordi = cp.asarray( grids.center_lst) # A M by 2 matrix, M is number of grids. grid_num = len(grids.center_lst) A = cp.asarray(self.A) C1 = cp.asarray(self.C1) C2 = cp.asarray(self.C2) matchingPairNum = src_pts.shape[0] skip = 0 global_H = cp.asarray(np.copy(self.globalHomoMat)) local_homo_mat_lst = cp.zeros((grid_num, 3, 3)) change_mask = [] for idx in range(grid_num): grid_coordi = grids_center_coordi[idx] weight = cp.exp((-1) * cp.sum( (src_pts - grid_coordi)**2, axis=1) / scale_factor**2) print( f'SVD {idx+1:8d}/{grid_num}({(idx+1)/(grid_num)*100:8.1f}%) Current skip {skip} times. Current Skip rate is {skip/grid_num:5.3%}', end='\r') if cp.amax(weight) < gamma: skip += 1 local_homo_mat_lst[idx, :, :] = global_H continue weight = cp.repeat(weight, 2) weight[weight < gamma] = gamma weight = weight.reshape((2 * matchingPairNum, 1)) weighted_A = cp.multiply(weight, A) u, s, v = cp.linalg.svd(weighted_A) H = v[-1, :].reshape((3, 3)) H = cp.linalg.inv(C2) @ H @ C1 H = H / H[-1, -1] local_homo_mat_lst[idx, :, :] = H change_mask.append(idx) print() self.non_global_homo_mat_lst = change_mask self.localHomoMat_lst = cp.asnumpy(local_homo_mat_lst)
def my_conv2(S1, sig, varargin=None): # S1 is the matrix to be filtered along a choice of axes # sig is either a scalar or a sequence of scalars, one for each axis to be filtered # varargin can be the dimensions to do filtering, if len(sig) != x.shape # if sig is scalar and no axes are provided, the default axis is 2 if sig <= .25: return S1 idims = 1 if varargin is not None: idims = varargin idims = _make_vect(idims) if _is_vect(idims) and _is_vect(sig): sigall = sig else: sigall = np.tile(sig, len(idims)) for sig, idim in zip(sigall, idims): Nd = S1.ndim S1 = cp.transpose(S1, [idim] + list(range(0, idim)) + list(range(idim + 1, Nd))) dsnew = S1.shape S1 = cp.reshape(S1, (S1.shape[0], -1), order='F') dsnew2 = S1.shape tmax = ceil(4 * sig) dt = cp.arange(-tmax, tmax + 1) gaus = cp.exp(-dt**2 / (2 * sig**2)) gaus = gaus[:, cp.newaxis] / cp.sum(gaus) # This GPU FFT-based convolution leads to a splitting step 3.5x faster than the # custom GPU lfilter implementation below. cNorm = convolve(cp.ones((dsnew2[0], 1)), gaus).ravel()[:, cp.newaxis] S1 = convolve(S1, gaus) # Slow Custom GPU lfilter implementation: # cNorm = _apply_lfilter( # _gaus_lfilter(sig), # cp.concatenate((cp.ones(dsnew2[0]), cp.zeros(tmax)))[:, np.newaxis]) # cNorm = cNorm[tmax:, :] # S1 = _apply_lfilter(_gaus_lfilter(sig), cp.asfortranarray(cp.concatenate( # (S1, cp.zeros((tmax, dsnew2[1]), order='F')), axis=0))) # S1 = S1[tmax:, :] S1 = S1.reshape(dsnew, order='F') S1 = S1 / cNorm S1 = cp.transpose( S1, list(range(1, idim + 1)) + [0] + list(range(idim + 1, Nd))) return S1
def liquidize(self, intens, sigma_A, gamma_A): '''Apply liquidization transform on given intensity''' s_sq = (2. * cp.pi * sigma_A * self.dgen.qrad)**2 patt = cp.fft.fftshift(cp.fft.fftn(cp.fft.ifftshift(intens))) if self.slimits.max() > 2. * np.pi * sigma_A / self.res_max: n_max = np.where( self.slimits > 2. * np.pi * sigma_A / self.res_max)[0][0] + 1 else: print('No effect of liquid-like motions with these parameters') return intens liq = cp.zeros_like(intens) for n in range(n_max): kernel = cp.exp(-n * self.urad / gamma_A) weight = cp.exp(-s_sq + n * cp.log(s_sq) - float(special.loggamma(n + 1))) liq += weight * cp.abs(cp.fft.fftshift(cp.fft.ifftn( patt * kernel))) sys.stderr.write('\rLiquidizing: %d/%d' % (n + 1, n_max)) sys.stderr.write('\n') return liq
def _rbf_kernel(x, y, gamma=None): xn, nx = x.shape _, ny = y.shape assert nx == ny, ('The number ({}) of columns of x must be the same as ' 'the number ({}) of rows of y'.format(nx, ny)) if gamma is None: gamma = 1.0 / xn xy = cupy.dot(x, y.transpose()) x2 = (x * x).sum(axis=1) y2 = (y * y).sum(axis=1) return cupy.exp((x2[:, cupy.newaxis] - 2 * xy + y2) * -gamma)
def backward(self): logits, loss_t, arr_tags, arr_logprobs = self.get_ctx( 'logits', 'loss_t', 'arr_tags', 'arr_logprobs') if loss_t.grad is not None: arr_probs = xp.exp(arr_logprobs) # [*, N] grad_logits = arr_probs # prob-1 for gold, prob for non-gold if len(grad_logits.shape) == 1: grad_logits[arr_tags] -= 1. grad_logits *= loss_t.grad else: grad_logits[xp.arange(len(grad_logits.shape[0])), arr_tags] -= 1. grad_logits *= loss_t.grad[:, None] logits.accumulate_grad(grad_logits)
def forward(self, x, t): if x.ndim == 2: # ミニバッチ使用時 x = x - x.max(axis=1, keepdims=True) x = cp.exp(x) y = x / x.sum(axis=1, keepdims=True) elif x.ndim == 1: x = x - cp.max(x) y = cp.exp(x) / cp.sum(cp.exp(x)) if y.ndim == 1: t = t.reshape(1, t.size) y = y.reshape(1, y.size) # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換 if t.size == y.size: t = t.argmax(axis=1) batch_size = y.shape[0] loss = -1.0 * cp.sum( t * cp.log(y[cp.arange(batch_size), t] + 1e-7)) / batch_size self.y = y self.t = t return loss
def forward(self): # input is the tensor phi:(1120,1120,3) # clone phi? phi = cp.asarray(self.phi) h = cp.fft.ifft2(cp.fft.ifftshift(cp.exp(1.0j * phi), axes=(0, 1)), axes=(0, 1)) psf_new = cp.square(cp.abs(h)) if len(psf_new.shape) == 2: norm = cp.sum(psf_new) else: norm = cp.reshape(cp.sum(psf_new, axis=(0, 1)), (1, 1) + psf_new.shape[2:]) psf_new = psf_new / norm psf_new_rescaled = cv.resize(cp.asnumpy(psf_new), (final_dim, final_dim), interpolation=cv.INTER_NEAREST) psf_new_rescaled = cp.asarray(psf_new_rescaled) A = np.load(filename_crosstalk) A_t = np.transpose(A) P = cp.asnumpy(psf_new_rescaled) B = np.matmul(A_t.reshape((1, 1) + A_t.shape), P.reshape(P.shape + (1, ))).reshape(P.shape) # psf_new_unpadded (152, 228, 3) psf_new_unpadded = B[unpad_1:-unpad_1, unpad_2:-unpad_2] psf_new_unpadded = cp.asarray(psf_new_unpadded) # tiled kernels (76, 228, 3) tiled_kernels = cp.split(psf_new_unpadded, 2)[0] - cp.split( psf_new_unpadded, 2)[1] # (48, 3, 3, 3) weights_pm = [] for i in range(rows): for j in range(cols): padded_kernel = cp.split(cp.split(tiled_kernels, rows, axis=0)[i], cols, axis=1)[j] kernel = padded_kernel[pad:-pad, pad:-pad] weights_pm.append(kernel) #(3,3,3,48) weights_pm = np.asarray(weights_pm) weights_pm = np.transpose(weights_pm, (1, 2, 3, 0)) return weights_pm * norm_factor
def _pfb_xcorr(self): ''' Consume buffer data to compute PSDs in pairs and then cross- correlate them. Use mapped, pinned memory space allocated on the GPU. Returns ------- vis : If mode == 'continuum', float. If mode =='spectrum', cupy.array. The result of one complex cross-correlation of the input IQ data. ''' # Threading to take ffts using polyphase filterbank with concurrent.futures.ThreadPoolExecutor( max_workers=2) as iq_processor: future_0 = iq_processor.submit( self._spectrometer_poly, *(cp.array(self.gpu_iq_0), self.ntaps, self.nbins, self.window)) future_1 = iq_processor.submit( self._spectrometer_poly, *(cp.array(self.gpu_iq_1), self.ntaps, self.nbins, self.window)) f0 = future_0.result() f1 = future_1.result() # Apply phase gradient, inspired by # http://www.gmrt.ncra.tifr.res.in/doc/WEBLF/LFRA/node70.html # implemented according to Thompson, Moran, Swenson's Interferometry and # Synthesis in Radio Astronoy, 3rd ed., p.364: Fractional Sample Delay # Correction freqs = cp.fft.fftfreq(f0.shape[-1], d=1 / self.bandwidth) + self.frequency # Calculate cross-power spectrum and apply FSTC by a phase gradient rot = cp.exp(-2j * cp.pi * freqs * (-self.calibrated_delay)) xpower_spec = f0 * cp.conj(f1 * rot) xpower_spec = cp.fft.fftshift(xpower_spec.mean(axis=0)) ncols = xpower_spec.shape[-1] xpower_spec[ncols // 2] = (xpower_spec[-1 + ncols // 2] + xpower_spec[1 + ncols // 2]) / 2. if self.mode in ['CONTINUUM', 'TEST']: # don't save spectral information vis = xpower_spec.mean( axis=0) / self.bandwidth # a visibility amplitude estimate else: vis = xpower_spec return vis
def phaseshift_cupy(img_cupy,shift): fftimg_cupy=cp.fft.fftshift(cp.fft.fft2(img_cupy)) xsize=img_cupy.shape[0] ysize=img_cupy.shape[1] [Y,X]=cp.meshgrid(cp.arange(ysize)-ysize//2,cp.arange(xsize)-xsize//2) phas=cp.zeros([xsize,ysize],dtype=cp.complex64) phas.imag=-2.0*cp.pi*cp.add(X*shift[0]/xsize,Y*shift[1]/ysize) tmp0=cp.multiply(fftimg_cupy,cp.exp(phas)) result_cupy=cp.fft.ifft2(cp.fft.ifftshift(tmp0)).real return result_cupy
def local_cov_in_class(self,key,label,nb_class,batchsize): index = cp.arange(key.shape[0]) xx,yy = cp.meshgrid(index,index) sub = key[xx] - key[yy] norm_sub = cp.linalg.norm(sub,axis=2) a = cp.exp(-norm_sub*norm_sub/100) lindex = cp.arange(label.shape[0]) lx,ly = cp.meshgrid(lindex,lindex) l = (label[lx]==label[ly]) a = a*l Sw = cp.einsum('ij,ijk,ijl->kl',a,sub,sub,dtype='float32')*0.5*(1.0/batchsize) return Sw
def local_cov_in_class(self,key,label,nb_class,batchsize,affinity): batchsize_per_class=batchsize//nb_class index = cp.arange(key.shape[0]) xx,yy=cp.meshgrid(index,index) sub=key[xx]-key[yy] norm_sub=cp.linalg.norm(sub,axis=2) a=cp.exp(-norm_sub*norm_sub*affinity) lindex=cp.arange(label.shape[0]) lx,ly=cp.meshgrid(lindex,lindex) l=(label[lx]==label[ly]) a=a*l a=a.reshape([a.shape[0],a.shape[1],1]) a_sub=a*sub Sw=cp.einsum('ijk,ijl->kl',a_sub,sub,dtype='float32')*0.5*(1.0/batchsize_per_class) return Sw
def DFT_matrix(Nd, om=None): dim = len(Nd) # dimension if om is None: om = fake_Cartesian(Nd) N = numpy.prod(Nd) omN = cupy.zeros((N, dim), dtype=numpy.float64) grid = cupy.indices(Nd) for dimid in range(0, dim): omN[:, dimid] = (grid[dimid].ravel() - Nd[dimid] / 2) M = om.shape[0] A = cupy.einsum('m, n -> mn', om[:, 0], omN[:, 0], optimize='optimal') for d in range(1, dim): A += cupy.einsum('m, n -> mn', om[:, d], omN[:, d], optimize='optimal') return cupy.exp(-1.0j * A)
def estimate_intensity(density, occupancy, mean_rate): ''' Parameters ---------- density : ndarray, shape (n_bins,) occupancy : ndarray, shape (n_bins,) mean_rate : float Returns ------- intensity : ndarray, shape (n_bins,) ''' return cp.exp(estimate_log_intensity(density, occupancy, mean_rate))
def normal_density_cupy(x, mean, stddev, from_axis=None, eps=1e-8, gpu=0): import cupy as cp with cp.cuda.Device(gpu): variance = cp.maximum(stddev ** 2, eps) stddev = cp.maximum(stddev, eps) density = cp.exp(-cp.square(x - mean) / (2 * variance)) / (stddev * math.sqrt(2 * math.pi)) if (from_axis is not None) and (from_axis >= 0): shape = tuple(density.shape[:from_axis]) + (cp.prod(density.shape[from_axis:]),) density = cp.reshape(density, shape) density = cp.prod(density, axis=from_axis) return density
def sigmoid(z): """ Perform sigmoid activation function. Parameters ---------- z : cp.array of floats, shape (number of examples,) + (layer shape) Input values. Returns ------- cp.array of floats, shape (number of examples,) + 2 * (layer shape) Output values. """ return 1 / (1 + cp.exp(-z))
def admm(self, data, h, e, psi, phi, lamd, mu, u, alpha, piter, titer, NITER, model): data = data.copy() * self.coefdata # normalization # init penalties rho, tau = 1, 1 # Lagrangian for each iter lagr = cp.zeros([NITER, 7], dtype="float32") lagr0 = self.take_lagr(psi, phi, data, h, e, lamd, mu, tau, rho, alpha, model) for m in range(NITER): # keep previous iteration for penalty updates h0, e0 = h, e psi = self.cg_ptycho_batch(data, psi, h, lamd, rho, piter, model) # tomography problem xi0, xi1, K, pshift = self.takexi(psi, phi, lamd, mu, rho, tau) u = self.cg_tomo(xi0, xi1, K, u, rho, tau, titer) # regularizer problem phi = self.solve_reg(u, mu, tau, alpha) # h,e updates h = self.exptomo(self.fwd_tomo(u)) * cp.exp(1j * pshift) e = self.fwd_reg(u) # lambda, mu updates lamd = lamd + rho * (h - psi) mu = mu + tau * (e - phi) # update rho, tau for a faster convergence rho, tau = self.update_penalty(psi, h, h0, phi, e, e0, rho, tau) # Lagrangians difference between two iterations if (np.mod(m, 10) == 0): lagr[m] = self.take_lagr(psi, phi, data, h, e, lamd, mu, alpha, rho, tau, model) print( "%d/%d) rho=%.2e, tau=%.2e, Lagr terms diff: %.2e %.2e %.2e %.2e %.2e %.2e, Sum: %.2e" % (m, NITER, rho, tau, *(lagr0 - lagr[m]))) lagr0 = lagr[m] name = 'reg'+str(model)+str(piter)+str(titer) + \ str(NITER)+str(np.amax(data)) dxchange.write_tiff(u[u.shape[0] // 2].imag.get(), 'betap/beta' + name) dxchange.write_tiff(u[u.shape[0] // 2].real.get(), 'deltap/delta' + name) dxchange.write_tiff(cp.abs(psi).get(), 'psip/psiamp' + name) dxchange.write_tiff( cp.angle(psi).get(), 'psip/psiangle' + name) lagrr = self.take_lagr(psi, phi, data, h, e, lamd, mu, tau, rho, alpha, model) print(lagrr) return u, psi, lagrr
def test_elementwise_binary(self): desc_a = cutensor.create_tensor_descriptor(self.a, ct.OP_SIGMOID) desc_c = cutensor.create_tensor_descriptor(self.c, ct.OP_ABS) d = cutensor.elementwise_binary( self.alpha, self.a, desc_a, self.mode_a, self.gamma, self.c, desc_c, self.mode_c, op_AC=ct.OP_MUL ) testing.assert_allclose( self.alpha * (1 / (1 + cupy.exp(-self.a_transposed))) * self.gamma * cupy.abs(self.c), d, rtol=1e-6, atol=1e-6 )
def __init__(self, n, eps): # parameters for the USFFT transform mu = -np.log(eps) / (2 * n**2) Te = 1 / np.pi * np.sqrt(-mu * np.log(eps) + (mu * n)**2 / 4) m = np.int(np.ceil(2 * n * Te)) # smearing kernel xeq = cp.mgrid[-n // 2:n // 2, -n // 2:n // 2] kernel = cp.exp(-mu * cp.sum(xeq**2, axis=0)).astype('float32') # smearing constants cons = [np.sqrt(np.pi / mu)**2, -np.pi**2 / mu] self.n = n self.mu = mu self.m = m self.kernel = kernel self.cons = cons