def grad(self, _cur_output, _next_output, _next_action, _batch_tuples, _err_list, _err_count, _k): # alloc if self.config.gpu: _cur_output.grad = cupy.zeros_like(_cur_output.data) else: _cur_output.grad = np.zeros_like(_cur_output.data) # compute grad from each tuples for i in range(len(_batch_tuples)): # if use bootstrap and masked if not _batch_tuples[i].mask[_k]: continue cur_action_value = \ _cur_output.data[i][_batch_tuples[i].action].tolist() reward = _batch_tuples[i].reward target_value = reward # if not empty position, not terminal state if _batch_tuples[i].next_state.in_game: next_action_value = \ _next_output.data[i][_next_action[i]].tolist() target_value += self.config.gamma * next_action_value loss = cur_action_value - target_value _cur_output.grad[i][_batch_tuples[i].action] = 2 * loss _err_list[i] += abs(loss) _err_count[i] += 1
def run(args): onnx_filename = os.path.join(args.test_dir, 'model.onnx') input_names, output_names = onnx_input_output_names(onnx_filename) test_data_dir = os.path.join(args.test_dir, 'test_data_set_0') inputs, outputs = load_test_data(test_data_dir, input_names, output_names) with open(onnx_filename, 'rb') as f: onnx_proto = f.read() if args.debug: logger = tensorrt.Logger(tensorrt.Logger.Severity.INFO) else: logger = tensorrt.Logger() builder = tensorrt.Builder(logger) # TODO(hamaji): Infer batch_size from inputs. builder.max_batch_size = args.batch_size network = builder.create_network() parser = tensorrt.OnnxParser(network, logger) parser.parse(onnx_proto) engine = builder.build_cuda_engine(network) context = engine.create_execution_context() assert len(inputs) + len(outputs) == engine.num_bindings for i, (_, input) in enumerate(inputs): assert args.batch_size == input.shape[0] assert input.shape[1:] == engine.get_binding_shape(i) for i, (_, output) in enumerate(outputs): assert args.batch_size == output.shape[0] i += len(inputs) assert output.shape[1:] == engine.get_binding_shape(i) inputs = [v for n, v in inputs] outputs = [v for n, v in outputs] gpu_inputs = to_gpu(inputs) gpu_outputs = [] for output in outputs: gpu_outputs.append(cupy.zeros_like(cupy.array(output))) bindings = [a.data.ptr for a in gpu_inputs] bindings += [a.data.ptr for a in gpu_outputs] context.execute(args.batch_size, bindings) actual_outputs = to_cpu(gpu_outputs) for i, (name, expected, actual) in enumerate( zip(output_names, outputs, actual_outputs)): np.testing.assert_allclose(expected, actual, rtol=1e-3, atol=1e-4), name print('%s: OK' % name) print('ALL OK') if args.iterations > 1: num_iterations = args.iterations - 1 start = time.time() for t in range(num_iterations): context.execute(args.batch_size, bindings) cupy.cuda.device.Device().synchronize() elapsed = time.time() - start print('Elapsed: %.3f msec' % (elapsed * 1000 / num_iterations))
def test_scan_out(self, dtype): element_num = 10000 if dtype in {cupy.int8, cupy.uint8, cupy.float16}: element_num = 100 a = cupy.ones((element_num,), dtype=dtype) b = cupy.zeros_like(a) cupy.core.core.scan(a, b) expect = cupy.arange(start=1, stop=element_num + 1).astype(dtype) testing.assert_array_equal(b, expect) cupy.core.core.scan(a, a) testing.assert_array_equal(a, expect)
def zeros_like(array, stream=None): """Creates a zero-filled cupy.ndarray object like the given array. Args: array (cupy.ndarray or numpy.ndarray): Base array. stream (cupy.cuda.Stream): CUDA stream. Returns: cupy.ndarray: Zero-filled array. """ warnings.warn("chainer.cuda.zeros_like is deprecated. Use cupy.zeros_like instead.", DeprecationWarning) check_cuda_available() assert stream is None if isinstance(array, cupy.ndarray): return cupy.zeros_like(array) return cupy.zeros(array.shape, dtype=array.dtype)
def forward_gpu(self, inputs): a, b = inputs c = cp.zeros_like(a, 'float32') chainer.cuda.elementwise( 'int32 j, raw T a, raw T b', 'raw T c', ''' float* ap = &a[j * 3]; float* bp = &b[j * 3]; float* cp = &c[j * 3]; cp[0] = ap[1] * bp[2] - ap[2] * bp[1]; cp[1] = ap[2] * bp[0] - ap[0] * bp[2]; cp[2] = ap[0] * bp[1] - ap[1] * bp[0]; ''', 'function', )( cp.arange(a.size / 3).astype('int32'), a, b, c, ) return c,
def _process_data(samples_history, u_samples_history, n, n_ext, t_start, t_end, target_image, corrupted_image, burn_percentage, isSinogram, sinogram, theta, fbp, SimulationResult_dir, result_file, cmap=plt.cm.seismic_r, desired_n_ext=256): #remove pdf files: remove_pdf_files(SimulationResult_dir) burn_start_index = np.int(0.01 * burn_percentage * u_samples_history.shape[0]) fourier = imc.FourierAnalysis_2D(n, desired_n_ext, t_start, t_end) sL2 = util.sigmasLancosTwo(cp.int(n)) # n_ext = 2*n scalling_factor = (2 * fourier.extended_basis_number - 1) / (2 * n_ext - 1) #initial conditions samples_init = samples_history[0, :] #change u_samples_history = u_samples_history[burn_start_index:, :] samples_history = samples_history[burn_start_index:, :] N = u_samples_history.shape[0] #initial condition vF_init = util.symmetrize(cp.asarray(samples_init)).reshape( 2 * n - 1, 2 * n - 1, order=imc.ORDER) * scalling_factor # vF_init = vF_init.conj() vF_mean = util.symmetrize(cp.asarray(np.mean(samples_history, axis=0))) * scalling_factor vF_stdev = util.symmetrize(cp.asarray(np.std(samples_history, axis=0))) vF_abs_stdev = util.symmetrize( cp.asarray(np.std(np.abs(samples_history), axis=0))) print('fourier n_ext = {}'.format(n_ext)) # if isSinogram: # vF_init = util.symmetrize_2D(fourier.rfft2(cp.asarray(fbp,dtype=cp.float32))) # if not isSinogram: vForiginal = util.symmetrize_2D( fourier.rfft2(cp.array( target_image, dtype=cp.float32))) #target image does not need to be scalled reconstructed_image_original = fourier.irfft2(vForiginal[:, n - 1:]) reconstructed_image_init = fourier.irfft2(vF_init[:, n - 1:]) samples_history_cp = cp.asarray(samples_history) * scalling_factor v_image_count = 0 v_image_M = cp.zeros_like(reconstructed_image_original) v_image_M2 = cp.zeros_like(reconstructed_image_original) v_image_aggregate = (v_image_count, v_image_M, v_image_M2) for i in range(N): vF = util.symmetrize(samples_history_cp[i, :]).reshape(2 * n - 1, 2 * n - 1, order=imc.ORDER) v_temp = fourier.irfft2(vF[:, n - 1:]) v_image_aggregate = util.updateWelford(v_image_aggregate, v_temp) v_image_mean, v_image_var, v_image_s_var = util.finalizeWelford( v_image_aggregate) #TODO: This is sign of wrong processing, Remove this # if isSinogram: # reconstructed_image_init = cp.fliplr(reconstructed_image_init) # v_image_mean = cp.fliplr(v_image_mean) # v_image_s_var = cp.fliplr(v_image_s_var) mask = cp.zeros_like(reconstructed_image_original) r = (mask.shape[0] + 1) // 2 for i in range(mask.shape[0]): for j in range(mask.shape[1]): x = 2 * (i - r) / mask.shape[0] y = 2 * (j - r) / mask.shape[1] if (x**2 + y**2 < 1): mask[i, j] = 1. u_samples_history_cp = cp.asarray(u_samples_history) * scalling_factor u_image = cp.zeros_like(v_image_mean) # ell_image = cp.zeros_like(v_image_mean) u_image_count = 0 u_image_M = cp.zeros_like(u_image) u_image_M2 = cp.zeros_like(u_image) u_image_aggregate = (u_image_count, u_image_M, u_image_M2) ell_image_count = 0 ell_image_M = cp.zeros_like(u_image) ell_image_M2 = cp.zeros_like(u_image) ell_image_aggregate = (ell_image_count, ell_image_M, ell_image_M2) for i in range(N): uF = util.symmetrize(u_samples_history_cp[i, :]).reshape( 2 * n - 1, 2 * n - 1, order=imc.ORDER) u_temp = fourier.irfft2(uF[:, n - 1:]) u_image_aggregate = util.updateWelford(u_image_aggregate, u_temp) ell_temp = cp.exp(u_temp) ell_image_aggregate = util.updateWelford(ell_image_aggregate, ell_temp) u_image_mean, u_image_var, u_image_s_var = util.finalizeWelford( u_image_aggregate) ell_image_mean, ell_image_var, ell_image_s_var = util.finalizeWelford( ell_image_aggregate) # if isSinogram: # u_image_mean = cp.flipud(u_image_mean) #cp.rot90(cp.fft.fftshift(u_image),1) # u_image_var = cp.flipud(u_image_var) #cp.rot90(cp.fft.fftshift(u_image),1) # ell_image_mean = cp.flipud(ell_image_mean)# cp.rot90(cp.fft.fftshift(ell_image),1) # ell_image_var = cp.flipud(ell_image_var)# cp.rot90(cp.fft.fftshift(ell_image),1) ri_fourier = cp.asnumpy(reconstructed_image_original) if isSinogram: ri_compare = fbp else: ri_compare = ri_fourier is_masked = True if is_masked: reconstructed_image_var = mask * v_image_s_var reconstructed_image_mean = mask * v_image_mean reconstructed_image_init = mask * reconstructed_image_init u_image_mean = u_image_mean #cp.rot90(cp.fft.fftshift(u_image),1) u_image_s_var = u_image_s_var #cp.rot90(cp.fft.fftshift(u_image),1) ell_image_mean = ell_image_mean # cp.rot90(cp.fft.fftshift(ell_image),1) ell_image_s_var = ell_image_s_var # cp.rot90(cp.fft.fftshift(ell_image),1) else: reconstructed_image_mean = v_image_mean reconstructed_image_var = v_image_s_var reconstructed_image_mean = v_image_mean reconstructed_image_init = reconstructed_image_init u_image_mean = u_image_mean #cp.rot90(cp.fft.fftshift(u_image),1) u_image_s_var = u_image_s_var #cp.rot90(cp.fft.fftshift(u_image),1) ell_image_mean = ell_image_mean # cp.rot90(cp.fft.fftshift(ell_image),1) ell_image_s_var = ell_image_s_var # cp.rot90(cp.fft.fftshift(ell_image),1) ri_init = cp.asnumpy(reconstructed_image_init) # ri_fourier = fourier.irfft2((sL2.astype(cp.float32)*vForiginal)[:,n-1:]) vForiginal_n = cp.asnumpy(vForiginal) vF_init_n = cp.asnumpy(vF_init) ri_fourier_n = cp.asnumpy(ri_fourier) vF_mean_n = cp.asnumpy( vF_mean.reshape(2 * n - 1, 2 * n - 1, order=imc.ORDER)) vF_stdev_n = cp.asnumpy( vF_stdev.reshape(2 * n - 1, 2 * n - 1, order=imc.ORDER)) vF_abs_stdev_n = cp.asnumpy( vF_abs_stdev.reshape(2 * n - 1, 2 * n - 1, order=imc.ORDER)) ri_mean_n = cp.asnumpy(reconstructed_image_mean) ri_var_n = cp.asnumpy(reconstructed_image_var) ri_std_n = np.sqrt(ri_var_n) # ri_n_scalled = ri_n*cp.asnumpy(scalling_factor) u_mean_n = cp.asnumpy(u_image_mean) u_var_n = cp.asnumpy(u_image_s_var) ell_mean_n = cp.asnumpy(ell_image_mean) ell_var_n = cp.asnumpy(ell_image_s_var) #Plotting one by one #initial condition fig = plt.figure() plt.subplot(1, 2, 1) im = plt.imshow(np.absolute(vF_init_n), cmap=cmap, vmin=-1, vmax=1) fig.colorbar(im) plt.title('Fourier - real part') plt.subplot(1, 2, 2) im = plt.imshow(np.angle(vF_init_n), cmap=cmap, vmin=-np.pi, vmax=np.pi) fig.colorbar(im) plt.title('Fourier - imaginary part') plt.tight_layout() # plt.savefig(str(SimulationResult_dir/'vF_init'+image_extension), bbox_inches='tight') savefig(SimulationResult_dir / ('vF_init' + image_extension)) plt.close() #vF Original fig = plt.figure() plt.subplot(1, 2, 1) im = plt.imshow(np.absolute(vForiginal_n), cmap=cmap, vmin=-1, vmax=1) fig.colorbar(im) plt.title('Fourier - absolute') plt.subplot(1, 2, 2) im = plt.imshow(np.angle(vForiginal_n), cmap=cmap, vmin=-np.pi, vmax=np.pi) fig.colorbar(im) plt.title('Fourier - angle') plt.tight_layout() # plt.savefig(SimulationResult_dir/'vForiginal'+image_extension), bbox_inches='tight') savefig(SimulationResult_dir / ('vForiginal' + image_extension)) plt.close() #vF Original fig = plt.figure() plt.subplot(1, 2, 1) im = plt.imshow(np.absolute(vF_mean_n), cmap=cmap, vmin=-1, vmax=1) fig.colorbar(im) plt.title('Fourier - absolute') plt.subplot(1, 2, 2) im = plt.imshow(np.angle(vF_mean_n), cmap=cmap, vmin=-np.pi, vmax=np.pi) fig.colorbar(im) plt.title('Fourier - phase') plt.tight_layout() # plt.savefig(SimulationResult_dir/'vF_mean'+image_extension), bbox_inches='tight') savefig(SimulationResult_dir / ('vF_mean' + image_extension)) plt.close() #Absolute error of vF - vForiginal imshow(np.abs(vF_mean_n - vForiginal_n), cmap, -1, 1, 'Fourier abs Error', 'abs_err_vF_mean', SimulationResult_dir) #Absolute error of vF_init - vForiginal imshow(np.abs(vF_init_n - vForiginal_n), cmap, -1, 1, 'Fourier abs Error', 'abs_err_vF_init', SimulationResult_dir) #Absolute error of vF_init - vForiginal imshow(np.abs(vF_init_n - vF_mean_n), cmap, -1, 1, 'Fourier abs Error', 'abs_err_vF_init_vF_mean', SimulationResult_dir) #Ri_mean imshow(ri_mean_n, cmap, -1, 1, 'Posterior mean', 'ri_mean_n', SimulationResult_dir) #Ri_fourier imshow(ri_fourier, cmap, -1, 1, 'Reconstructed image through Fourier', 'ri_or_n', SimulationResult_dir) #Ri_fourier imshow(ri_init, cmap, -1, 1, 'Reconstructed image through Fourier', 'ri_init', SimulationResult_dir) #Reconstructed Image variance imshow(ri_var_n, cmap, None, None, 'Posterior variance', 'ri_var_n', SimulationResult_dir) #Target Image imshow(target_image, cmap, -1, 1, 'Target Image', 'target_image', SimulationResult_dir) #Filtered Back Projection imshow(ri_compare, cmap, -1, 1, 'Filtered Back Projection', 'ri_compare', SimulationResult_dir) #Errors imshow((target_image - ri_mean_n), cmap, -1, 1, 'Error FPB', 'err_RI_TI', SimulationResult_dir) #Errors imshow((target_image - ri_compare), cmap, -1, 1, 'Error FPB-SPDE', 'err_RIO_TI', SimulationResult_dir) #Errors imshow((ri_mean_n - ri_compare), cmap, -1, 1, 'Error SPDE', 'err_RI_CMP', SimulationResult_dir) #Mean $u$ imshow(u_mean_n, cmap, None, None, 'Mean $u$', 'u_mean_n', SimulationResult_dir) #'Var $u$' imshow(u_var_n, cmap, None, None, 'Var $u$', 'u_var_n', SimulationResult_dir) #'Mean $\ell$' imshow(ell_mean_n, cmap, None, None, r'Mean $\ell$', 'ell_mean_n', SimulationResult_dir) #'Var $\ell$' imshow(ell_var_n, cmap, None, None, r'Var $\ell$', 'ell_var_n', SimulationResult_dir) fig = plt.figure() if isSinogram: im = plt.imshow(sinogram, cmap=cmap) plt.title('Sinogram') else: im = plt.imshow(corrupted_image, cmap=cmap) plt.title('corrupted_image --- CI') fig.colorbar(im) plt.tight_layout() # plt.savefig(SimulationResult_dir/'measurement'+image_extension), bbox_inches='tight') savefig(SimulationResult_dir / ('measurement' + image_extension)) plt.close() #plot several slices N_slices = 16 t_index = np.arange(target_image.shape[1]) for i in range(N_slices): fig = plt.figure() slice_index = target_image.shape[0] * i // N_slices plt.plot(t_index, target_image[slice_index, :], '-k', linewidth=0.25, markersize=1) plt.plot(t_index, ri_fourier_n[slice_index, :], '-r', linewidth=0.25, markersize=1) plt.plot(t_index, ri_mean_n[slice_index, :], '-b', linewidth=0.25, markersize=1) plt.fill_between( t_index, ri_mean_n[slice_index, :] - 2 * ri_std_n[slice_index, :], ri_mean_n[slice_index, :] + 2 * ri_std_n[slice_index, :], color='b', alpha=0.1) plt.plot(t_index, ri_compare[slice_index, :], ':k', linewidth=0.25, markersize=1) # plt.savefig(SimulationResult_dir/'1D_Slice_{}'+image_extension.format(slice_index-(target_image.shape[0]//2))), bbox_inches='tight') savefig(SimulationResult_dir / ('1D_Slice_{}'.format(slice_index - (target_image.shape[0] // 2)) + image_extension)) plt.close() f_index = np.arange(n) for i in range(N_slices): fig = plt.figure() slice_index = vForiginal_n.shape[0] * i // N_slices plt.plot(f_index, np.abs(vForiginal_n[slice_index, n - 1:]), '-r', linewidth=0.25, markersize=1) plt.plot(f_index, np.abs(vF_init_n[slice_index, n - 1:]), ':k', linewidth=0.25, markersize=1) plt.plot(f_index, np.abs(vF_mean_n[slice_index, n - 1:]), '-b', linewidth=0.25, markersize=1) plt.fill_between(f_index, np.abs(vF_mean_n[slice_index, n - 1:]) - 2 * vF_abs_stdev_n[slice_index, n - 1:], np.abs(vF_mean_n[slice_index, n - 1:]) + 2 * vF_abs_stdev_n[slice_index, n - 1:], color='b', alpha=0.1) # plt.savefig(SimulationResult_dir/'1D_F_Slice_{}'+image_extension.format(slice_index-n)), bbox_inches='tight') savefig(SimulationResult_dir / ('1D_F_Slice_{}'.format(slice_index - n) + image_extension)) plt.close() error = (target_image - ri_mean_n) error_CMP = (target_image - ri_compare) L2_error = np.linalg.norm(error) MSE = np.sum(error * error) / error.size PSNR = 10 * np.log10(np.max(ri_mean_n)**2 / MSE) SNR = np.mean(ri_mean_n) / np.sqrt(MSE * (error.size / (error.size - 1))) L2_error_CMP = np.linalg.norm(error_CMP) MSE_CMP = np.sum(error_CMP * error_CMP) / error_CMP.size PSNR_CMP = 10 * np.log10(np.max(ri_compare)**2 / MSE_CMP) SNR_CMP = np.mean(ri_compare) / np.sqrt(MSE_CMP * (error_CMP.size / (error_CMP.size - 1))) metric = { 'L2_error': L2_error, 'MSE': MSE, 'PSNR': PSNR, 'SNR': SNR, 'L2_error_CMP': L2_error_CMP, 'MSE_CMP': MSE_CMP, 'PSNR_CMP': PSNR_CMP, 'SNR_CMP': SNR_CMP } # with h5py.File(result_file,mode='a') as file: # for key,value in metric.items(): # if key in file.keys(): # del file[key] # # else: # file.create_dataset(key,data=value) print('Shallow-SPDE : L2-error {}, MSE {}, SNR {}, PSNR {},'.format( L2_error, MSE, SNR, PSNR)) print('FBP : L2-error {}, MSE {}, SNR {}, PSNR {}'.format( L2_error_CMP, MSE_CMP, SNR_CMP, PSNR_CMP))
import numpy as np import cupy from chainer import cuda def _mul_i(): return cuda.elementwise( "raw T x", "raw T y", """ y[i] = x[i] """, "muli") o = cupy.ones((3,2,2)) y = cupy.zeros_like(o) print _mul_i()(o,y, size=6)
def _voxelize_sub3(voxels): # fill in bs, vs = voxels.shape[:2] voxels = cp.ascontiguousarray(voxels) visible = cp.zeros_like(voxels, 'int32') chainer.cuda.elementwise( 'int32 j, raw int32 bs, raw int32 vs', 'raw int32 voxels, raw int32 visible', ''' int z = j % vs; int x = (j / vs) % vs; int y = (j / (vs * vs)) % vs; int bn = j / (vs * vs * vs); int pn = j; if ((y == 0) || (y == vs - 1) || (x == 0) || (x == vs - 1) || (z == 0) || (z == vs - 1)) { if (voxels[pn] == 0) visible[pn] = 1; } ''', 'function', )(cp.arange(bs * vs * vs * vs).astype('int32'), bs, vs, voxels, visible) sum_visible = visible.sum() while True: chainer.cuda.elementwise( 'int32 j, raw int32 bs, raw int32 vs', 'raw int32 voxels, raw int32 visible', ''' int z = j % vs; int x = (j / vs) % vs; int y = (j / (vs * vs)) % vs; int bn = j / (vs * vs * vs); int pn = j; if ((y == 0) || (y == vs - 1) || (x == 0) || (x == vs - 1) || (z == 0) || (z == vs - 1)) return; if (voxels[pn] == 0 && visible[pn] == 0) { int yi, xi, zi; yi = y - 1; xi = x; zi = z; if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1; yi = y + 1; xi = x; zi = z; if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1; yi = y; xi = x - 1; zi = z; if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1; yi = y; xi = x + 1; zi = z; if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1; yi = y; xi = x; zi = z - 1; if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1; yi = y; xi = x; zi = z + 1; if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1; } ''', 'function', )(cp.arange(bs * vs * vs * vs).astype('int32'), bs, vs, voxels, visible) if visible.sum() == sum_visible: break else: sum_visible = visible.sum() return 1 - visible
def _get_output(output, input, shape=None): if not isinstance(output, cupy.ndarray): return cupy.zeros_like(input, shape=shape, dtype=output, order='C') if output.shape != (input.shape if shape is None else tuple(shape)): raise ValueError('output shape is not correct') return output
learning_rate = 1e-4 gamma = 0.99 # discount factor for reward decay_rate = 0.99 # decay factor for RMSProp leaky sum of grad^2 resume = False # resume from previous checkpoint? render = False # model initialization D = 80 * 80 # input dimensionality: 80x80 grid if resume: model = pickle.load(open('save.p', 'rb')) else: model = {} model['W1'] = np.random.randn(H, D) / np.sqrt(D) # "Xavier" initialization model['W2'] = np.random.randn(H) / np.sqrt(H) grad_buffer = {k: np.zeros_like(v) for k, v in model.items() } # update buffers that add up gradients over a batch rmsprop_cache = {k: np.zeros_like(v) for k, v in model.items()} # rmsprop memory def sigmoid(x): return 1.0 / (1.0 + np.exp(-x) ) # sigmoid "squashing" function to interval [0,1] def prepro(I): """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """ I = I[35:195] # crop I = I[::2, ::2, 0] # downsample by factor of 2
def svds(a, k=6, *, ncv=None, tol=0, which='LM', maxiter=None, return_singular_vectors=True): """Finds the largest ``k`` singular values/vectors for a sparse matrix. Args: a (cupy.ndarray or cupyx.scipy.sparse.csr_matrix): A real or complex array with dimension ``(m, n)`` k (int): The number of singular values/vectors to compute. Must be ``1 <= k < min(m, n)``. ncv (int): The number of Lanczos vectors generated. Must be ``k + 1 < ncv < min(m, n)``. If ``None``, default value is used. tol (float): Tolerance for singular values. If ``0``, machine precision is used. which (str): Only 'LM' is supported. 'LM': finds ``k`` largest singular values. maxiter (int): Maximum number of Lanczos update iterations. If ``None``, default value is used. return_singular_vectors (bool): If ``True``, returns singular vectors in addition to singular values. Returns: tuple: If ``return_singular_vectors`` is ``True``, it returns ``u``, ``s`` and ``vt`` where ``u`` is left singular vectors, ``s`` is singular values and ``vt`` is right singular vectors. Otherwise, it returns only ``s``. .. seealso:: :func:`scipy.sparse.linalg.svds` .. note:: This is a naive implementation using cupyx.scipy.sparse.linalg.eigsh as an eigensolver on ``a.H @ a`` or ``a @ a.H``. """ if a.ndim != 2: raise ValueError('expected 2D (shape: {})'.format(a.shape)) if a.dtype.char not in 'fdFD': raise TypeError('unsupprted dtype (actual: {})'.format(a.dtype)) m, n = a.shape if k <= 0: raise ValueError('k must be greater than 0 (actual: {})'.format(k)) if k >= min(m, n): raise ValueError('k must be smaller than min(m, n) (actual: {})' ''.format(k)) aH = a.conj().T if m >= n: aa = aH @ a else: aa = a @ aH if return_singular_vectors: w, x = eigsh(aa, k=k, which=which, ncv=ncv, maxiter=maxiter, tol=tol, return_eigenvectors=True) else: w = eigsh(aa, k=k, which=which, ncv=ncv, maxiter=maxiter, tol=tol, return_eigenvectors=False) w = cupy.maximum(w, 0) t = w.dtype.char.lower() factor = {'f': 1e3, 'd': 1e6} cond = factor[t] * numpy.finfo(t).eps cutoff = cond * cupy.max(w) above_cutoff = (w > cutoff) n_large = above_cutoff.sum() s = cupy.zeros_like(w) s[:n_large] = cupy.sqrt(w[above_cutoff]) if not return_singular_vectors: return s x = x[:, above_cutoff] if m >= n: v = x u = a @ v / s[:n_large] else: u = x v = aH @ u / s[:n_large] u = _augmented_orthnormal_cols(u, k - n_large) v = _augmented_orthnormal_cols(v, k - n_large) return u, s, v.conj().T
def perfft2(im, compute_P=True, compute_spatial=False): """ Moisan's Periodic plus Smooth Image Decomposition. The image is decomposed into two parts: im = s + p where 's' is the 'smooth' component with mean 0, and 'p' is the 'periodic' component which has no sharp discontinuities when one moves cyclically across the image boundaries. useage: S, [P, s, p] = perfft2(im) where: im is the image S is the FFT of the smooth component P is the FFT of the periodic component, returned if compute_P (default) s & p are the smooth and periodic components in the spatial domain, returned if compute_spatial By default this function returns `P` and `S`, the FFTs of the periodic and smooth components respectively. If `compute_spatial=True`, the spatial domain components 'p' and 's' are also computed. This code is adapted from Lionel Moisan's Scilab function 'perdecomp.sci' "Periodic plus Smooth Image Decomposition" 07/2012 available at: <http://www.mi.parisdescartes.fr/~moisan/p+s> """ if im.dtype not in ['float32', 'float64']: im = np.float64(im) rows, cols = im.shape # Compute the boundary image which is equal to the image discontinuity # values across the boundaries at the edges and is 0 elsewhere s = np.zeros_like(im) s[0, :] = im[0, :] - im[-1, :] s[-1, :] = -s[0, :] s[:, 0] = s[:, 0] + im[:, 0] - im[:, -1] s[:, -1] = s[:, -1] - im[:, 0] + im[:, -1] # Generate grid upon which to compute the filter for the boundary image # in the frequency domain. Note that cos is cyclic hence the grid # values can range from 0 .. 2*pi rather than 0 .. pi and then pi .. 0 x, y = (2 * np.pi * np.arange(0, v) / float(v) for v in (cols, rows)) cx, cy = np.meshgrid(x, y) denom = (2. * (2. - np.cos(cx) - np.cos(cy))) denom[0, 0] = 1. # avoid / 0 S = fft2(s) / denom S[0, 0] = 0 # enforce zero mean if compute_P or compute_spatial: P = fft2(im) - S if compute_spatial: s = ifft2(S).real p = im - s return S, P, s, p else: return S, P else: return S
def powerspectrum(*U, average=False, kmin=None, kmax=None, npts=None, compute_fft=True, compute_sqr=True, double=True, bench=False, **kwargs): """ Returns the 1D radially averaged power spectrum :math:`P(k)` of a 1D, 2D, or 3D real or complex-valued scalar or vector field :math:`U`. This is computed as .. math:: P(k) = \sum\limits_{|\mathbf{k}| = k} |\hat{U}(\mathbf{k})|^2, where :math:`\hat{U}` is the FFT of :math:`U`, :math:`\mathbf{k}` is a wavevector, and :math:`k` is a scalar wavenumber. Parameters ---------- U : `np.ndarray` Real or complex vector or scalar data. If vector data, pass arguments as ``U1, U2, ..., Un`` where ``Ui`` is the ith vector component. Each ``Ui`` can be 1D, 2D, or 3D, and all must have the same ``Ui.shape`` and ``Ui.dtype``. average : `bool`, optional If ``True``, average over values in a given bin and multiply by the bin volume. If ``False``, compute the sum. kmin : `int` or `float`, optional Minimum wavenumber in power spectrum bins. If ``None``, ``kmin = 1``. kmax : `int` or `float`, optional Maximum wavenumber in power spectrum bins. If ``None``, ``kmax = max(U.shape)//2``. npts : `int`, optional Number of modes between ``kmin`` and ``kmax``, inclusive. If ``None``, ``npts = kmax-kmin+1``. compute_fft : `bool`, optional If ``False``, do not take the FFT of the input data. FFTs should not be passed with the zero-frequency component in the center. compute_sqr : `bool`, optional If ``False``, sum the real part of the FFT. This can be useful for purely real FFTs, where the sign of the FFT is useful information. If ``True``, take the square as usual. double : `bool`, optional If ``False``, calculate FFTs in single precision. Useful for saving memory. bench : `bool`, optional Print message for time of calculation. kwargs Additional keyword arguments passed to ``cupyx.scipy.fft.fftn`` or ``cupyx.scipy.fft.rfftn``. Returns ------- spectrum : `np.ndarray`, shape `(npts,)` Radially averaged power spectrum :math:`P(k)`. kn : `np.ndarray`, shape `(npts,)` Corresponding bins for spectrum :math:`k`. """ if bench: t0 = time() shape = U[0].shape ndim = U[0].ndim ncomp = len(U) N = max(U[0].shape) if np.issubdtype(U[0].dtype, np.floating): real = True dtype = cp.float64 if double else cp.float32 else: real = False dtype = cp.complex128 if double else cp.complex64 if ndim not in [1, 2, 3]: raise ValueError("Dimension of image must be 1, 2, or 3.") # Get memory pools mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() # Compute power spectral density with memory efficiency density = None comp = cp.empty(shape, dtype=dtype) for i in range(ncomp): temp = cp.asarray(U[i], dtype=dtype) comp[...] = temp del temp if compute_fft: fft = _cufftn(comp, **kwargs) else: fft = comp if density is None: fftshape = fft.shape density = cp.zeros(fft.shape) if compute_sqr: density[...] += _mod_squared(fft) else: density[...] += cp.real(fft) del fft mempool.free_all_blocks() pinned_mempool.free_all_blocks() # Need to double count if using rfftn if real: density[...] *= 2 # Get radial coordinates kr = cp.asarray(_kmag_sampling(fftshape, real=real).astype(np.float32)) # Flatten arrays kr = kr.ravel() density = density.ravel() # Get minimum and maximum k for binning if not given if kmin is None: kmin = 1 if kmax is None: kmax = int(N / 2) if npts is None: npts = kmax - kmin + 1 # Generate bins kn = cp.linspace(kmin, kmax, npts, endpoint=True) # Left edges of bins dk = kn[1] - kn[0] kn += dk / 2 # Convert kn to bin centers. # Radially average power spectral density if ndim == 1: fac = 2 * np.pi elif ndim == 2: fac = 4 * np.pi elif ndim == 3: fac = 4. / 3. * np.pi spectrum = cp.zeros_like(kn) for i, ki in enumerate(kn): ii = cp.where(np.logical_and(kr >= ki - dk / 2, kr < ki + dk / 2)) if average: dv = fac * cp.pi * ((ki + dk / 2)**ndim - (ki - dk / 2)**ndim) spectrum[i] = dv * cp.mean(density[ii]) else: spectrum[i] = cp.sum(density[ii]) spectrum = cp.asnumpy(spectrum) kn = cp.asnumpy(kn) del density, kr mempool.free_all_blocks() pinned_mempool.free_all_blocks() if bench: print(f"Time: {time() - t0:.04f} s") return spectrum, kn
def random_noise(image, mode="gaussian", seed=None, clip=True, **kwargs): """ Function to add random noise of various types to a floating-point image. Parameters ---------- image : ndarray Input image data. Will be converted to float. mode : str, optional One of the following strings, selecting the type of noise to add: - 'gaussian' Gaussian-distributed additive noise. - 'localvar' Gaussian-distributed additive noise, with specified local variance at each point of `image`. - 'poisson' Poisson-distributed noise generated from the data. - 'salt' Replaces random pixels with 1. - 'pepper' Replaces random pixels with 0 (for unsigned images) or -1 (for signed images). - 's&p' Replaces random pixels with either 1 or `low_val`, where `low_val` is 0 for unsigned images or -1 for signed images. - 'speckle' Multiplicative noise using out = image + n*image, where n is uniform noise with specified mean & variance. seed : int, optional If provided, this will set the random seed before generating noise, for valid pseudo-random comparisons. clip : bool, optional If True (default), the output will be clipped after noise applied for modes `'speckle'`, `'poisson'`, and `'gaussian'`. This is needed to maintain the proper image data range. If False, clipping is not applied, and the output may extend beyond the range [-1, 1]. mean : float, optional Mean of random distribution. Used in 'gaussian' and 'speckle'. Default : 0. var : float, optional Variance of random distribution. Used in 'gaussian' and 'speckle'. Note: variance = (standard deviation) ** 2. Default : 0.01 local_vars : ndarray, optional Array of positive floats, same shape as `image`, defining the local variance at every image point. Used in 'localvar'. amount : float, optional Proportion of image pixels to replace with noise on range [0, 1]. Used in 'salt', 'pepper', and 'salt & pepper'. Default : 0.05 salt_vs_pepper : float, optional Proportion of salt vs. pepper noise for 's&p' on range [0, 1]. Higher values represent more salt. Default : 0.5 (equal amounts) Returns ------- out : ndarray Output floating-point image data on range [0, 1] or [-1, 1] if the input `image` was unsigned or signed, respectively. Notes ----- Speckle, Poisson, Localvar, and Gaussian noise may generate noise outside the valid image range. The default is to clip (not alias) these values, but they may be preserved by setting `clip=False`. Note that in this case the output may contain values outside the ranges [0, 1] or [-1, 1]. Use this option with care. Because of the prevalence of exclusively positive floating-point images in intermediate calculations, it is not possible to intuit if an input is signed based on dtype alone. Instead, negative values are explicitly searched for. Only if found does this function assume signed input. Unexpected results only occur in rare, poorly exposes cases (e.g. if all values are above 50 percent gray in a signed `image`). In this event, manually scaling the input to the positive domain will solve the problem. The Poisson distribution is only defined for positive integers. To apply this noise type, the number of unique values in the image is found and the next round power of two is used to scale up the floating-point result, after which it is scaled back down to the floating-point image range. To generate Poisson noise against a signed image, the signed image is temporarily converted to an unsigned image in the floating point domain, Poisson noise is generated, then it is returned to the original range. """ mode = mode.lower() # Detect if a signed image was input if image.min() < 0: low_clip = -1.0 else: low_clip = 0.0 image = img_as_float(image) if seed is not None: cp.random.seed(seed=seed) allowedtypes = { "gaussian": "gaussian_values", "localvar": "localvar_values", "poisson": "poisson_values", "salt": "sp_values", "pepper": "sp_values", "s&p": "s&p_values", "speckle": "gaussian_values", } kwdefaults = { "mean": 0.0, "var": 0.01, "amount": 0.05, "salt_vs_pepper": 0.5, "local_vars": cp.zeros_like(image) + 0.01, } allowedkwargs = { "gaussian_values": ["mean", "var"], "localvar_values": ["local_vars"], "sp_values": ["amount"], "s&p_values": ["amount", "salt_vs_pepper"], "poisson_values": [], } for key in kwargs: if key not in allowedkwargs[allowedtypes[mode]]: raise ValueError("%s keyword not in allowed keywords %s" % (key, allowedkwargs[allowedtypes[mode]])) # Set kwarg defaults for kw in allowedkwargs[allowedtypes[mode]]: kwargs.setdefault(kw, kwdefaults[kw]) if mode == "gaussian": noise = cp.random.normal(kwargs["mean"], kwargs["var"]**0.5, image.shape) out = image + noise elif mode == "localvar": # Ensure local variance input is correct if (kwargs["local_vars"] <= 0).any(): raise ValueError("All values of `local_vars` must be > 0.") # Safe shortcut usage broadcasts kwargs['local_vars'] as a ufunc # out = image + cp.random.normal(0, kwargs['local_vars'] ** 0.5) # TODO: CuPy bug -> have to specify size argument for this to work. out = image + cp.random.normal(0, kwargs["local_vars"]**0.5, kwargs["local_vars"].shape) elif mode == "poisson": # Determine unique values in image & calculate the next power of two vals = len(cp.unique(image)) vals = 2**cp.ceil(cp.log2(vals)) # Ensure image is exclusively positive if low_clip == -1.0: old_max = image.max() image = (image + 1.0) / (old_max + 1.0) # Generating noise for each unique value in image. out = cp.random.poisson(image * vals) / float(vals) # Return image to original range if input was signed if low_clip == -1.0: out = out * (old_max + 1.0) - 1.0 elif mode == "salt": # Re-call function with mode='s&p' and p=1 (all salt noise) out = random_noise( image, mode="s&p", seed=seed, amount=kwargs["amount"], salt_vs_pepper=1.0, ) elif mode == "pepper": # Re-call function with mode='s&p' and p=1 (all pepper noise) out = random_noise( image, mode="s&p", seed=seed, amount=kwargs["amount"], salt_vs_pepper=0.0, ) elif mode == "s&p": out = image.copy() p = kwargs["amount"] q = kwargs["salt_vs_pepper"] flipped = cp.random.choice([True, False], size=image.shape, p=[p, 1 - p]) salted = cp.random.choice([True, False], size=image.shape, p=[q, 1 - q]) peppered = ~salted out[flipped & salted] = 1 out[flipped & peppered] = low_clip elif mode == "speckle": noise = cp.random.normal(kwargs["mean"], kwargs["var"]**0.5, image.shape) out = image + image * noise # Clip back to original range, if necessary if clip: out = cp.clip(out, low_clip, 1.0) return out
def morphological_geodesic_active_contour(gimage, iterations, init_level_set='circle', smoothing=1, threshold='auto', balloon=0, iter_callback=lambda x: None): """Morphological Geodesic Active Contours (MorphGAC). Geodesic active contours implemented with morphological operators. It can be used to segment objects with visible but noisy, cluttered, broken borders. Parameters ---------- gimage : (M, N) or (L, M, N) array Preprocessed image or volume to be segmented. This is very rarely the original image. Instead, this is usually a preprocessed version of the original image that enhances and highlights the borders (or other structures) of the object to segment. `morphological_geodesic_active_contour` will try to stop the contour evolution in areas where `gimage` is small. See `morphsnakes.inverse_gaussian_gradient` as an example function to perform this preprocessing. Note that the quality of `morphological_geodesic_active_contour` might greatly depend on this preprocessing. iterations : uint Number of iterations to run. init_level_set : str, (M, N) array, or (L, M, N) array Initial level set. If an array is given, it will be binarized and used as the initial level set. If a string is given, it defines the method to generate a reasonable initial level set with the shape of the `image`. Accepted values are 'checkerboard' and 'circle'. See the documentation of `checkerboard_level_set` and `circle_level_set` respectively for details about how these level sets are created. smoothing : uint, optional Number of times the smoothing operator is applied per iteration. Reasonable values are around 1-4. Larger values lead to smoother segmentations. threshold : float, optional Areas of the image with a value smaller than this threshold will be considered borders. The evolution of the contour will stop in this areas. balloon : float, optional Balloon force to guide the contour in non-informative areas of the image, i.e., areas where the gradient of the image is too small to push the contour towards a border. A negative value will shrink the contour, while a positive value will expand the contour in these areas. Setting this to zero will disable the balloon force. iter_callback : function, optional If given, this function is called once per iteration with the current level set as the only argument. This is useful for debugging or for plotting intermediate results during the evolution. Returns ------- out : (M, N) or (L, M, N) array Final segmentation (i.e., the final level set) See Also -------- inverse_gaussian_gradient, circle_level_set, checkerboard_level_set Notes ----- This is a version of the Geodesic Active Contours (GAC) algorithm that uses morphological operators instead of solving partial differential equations (PDEs) for the evolution of the contour. The set of morphological operators used in this algorithm are proved to be infinitesimally equivalent to the GAC PDEs (see [1]_). However, morphological operators are do not suffer from the numerical stability issues typically found in PDEs (e.g., it is not necessary to find the right time step for the evolution), and are computationally faster. The algorithm and its theoretical derivation are described in [1]_. References ---------- .. [1] A Morphological Approach to Curvature-based Evolution of Curves and Surfaces, Pablo Márquez-Neila, Luis Baumela, Luis Álvarez. In IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI), 2014, :DOI:`10.1109/TPAMI.2013.106` """ image = gimage init_level_set = _init_level_set(init_level_set, image.shape) _check_input(image, init_level_set) if threshold == 'auto': threshold = cp.percentile(image, 40) structure = cp.ones((3, ) * len(image.shape), dtype=cp.int8) dimage = cp.gradient(image) # threshold_mask = image > threshold if balloon != 0: threshold_mask_balloon = image > threshold / cp.abs(balloon) u = (init_level_set > 0).astype(cp.int8) iter_callback(u) for _ in range(iterations): # Balloon if balloon > 0: aux = ndi.binary_dilation(u, structure) elif balloon < 0: aux = ndi.binary_erosion(u, structure) if balloon != 0: u[threshold_mask_balloon] = aux[threshold_mask_balloon] # Image attachment aux = cp.zeros_like(image) du = cp.gradient(u) for el1, el2 in zip(dimage, du): aux += el1 * el2 u[aux > 0] = 1 u[aux < 0] = 0 # Smoothing for _ in range(smoothing): u = _curvop(u) iter_callback(u) return u
def forward(self, is_training): for layer in self.outbound_layers: layer.input_tensor = self.output_tensor if is_training: if self.require_grads: self.grads = cp.zeros_like(self.output_tensor)
def convolve_gpu_chunked(x, b, pad='flip', nwin=DEFAULT_CONV_CHUNK, ntap=500, overlap=2000): """Chunked GPU FFT-based convolution for large arrays. This memory-controlled version splits the signal into chunks of n samples. Each chunk is tapered in and out, the overlap is designed to get clear of the taper splicing of overlaping chunks is done in a cosine way. param: pad None, 'zeros', 'constant', 'flip' """ x = cp.asarray(x) b = cp.asarray(b) assert b.ndim == 1 n = x.shape[0] assert overlap >= 2 * ntap # create variables, the gain is to control the splicing y = cp.zeros_like(x) gain = cp.zeros(n) # compute tapers/constants outside of the loop taper_in = (-cp.cos(cp.linspace(0, 1, ntap) * cp.pi) / 2 + 0.5)[:, cp.newaxis] taper_out = cp.flipud(taper_in) assert b.shape[0] < nwin < n # this is the convolution wavelet that we shift to be 0 lag bp = cp.pad(b, (0, nwin - b.shape[0]), mode='constant') bp = cp.roll(bp, -b.size // 2 + 1) bp = cp.fft.rfft(bp, n=nwin)[:, cp.newaxis] # this is used to splice windows together: cosine taper. The reversed taper is complementary scale = cp.minimum( cp.maximum(0, cp.linspace(-0.5, 1.5, overlap - 2 * ntap)), 1) splice = (-cp.cos(scale * cp.pi) / 2 + 0.5)[:, cp.newaxis] # loop over the signal by chunks and apply convolution in frequency domain first = 0 while True: first = min(n - nwin, first) last = min(first + nwin, n) # the convolution x_ = cp.copy(x[first:last, :]) x_[:ntap] *= taper_in x_[-ntap:] *= taper_out x_ = cp.fft.irfft(cp.fft.rfft(x_, axis=0, n=nwin) * bp, axis=0, n=nwin) # this is to check the gain of summing the windows tt = cp.ones(nwin) tt[:ntap] *= taper_in[:, 0] tt[-ntap:] *= taper_out[:, 0] # the full overlap is outside of the tapers: we apply a cosine splicing to this part only if first > 0: full_overlap_first = first + ntap full_overlap_last = first + overlap - ntap gain[full_overlap_first:full_overlap_last] *= (1. - splice[:, 0]) gain[full_overlap_first:full_overlap_last] += tt[ntap:overlap - ntap] * splice[:, 0] gain[full_overlap_last:last] = tt[overlap - ntap:] y[full_overlap_first:full_overlap_last] *= (1. - splice) y[full_overlap_first:full_overlap_last] += x_[ntap:overlap - ntap] * splice y[full_overlap_last:last] = x_[overlap - ntap:] else: y[first:last, :] = x_ gain[first:last] = tt if last == n: break first += nwin - overlap return y
def move(self, move_preference_matrix, move_probability_matrix, ratio_random_move=0.1): """ 1. Select all living agents and their neighbours. 2. Create a movement matrix. All occupied by agent cells should be unavailable for move. add {move_preference_matrix} for values of neighbours. 3. If agent does not have any available cells for moving - it should die. Drop all died agents from current moving agents. 4. 10% of the time the agent moves randomly. Agent can't go to unavailable cells, so we recalculate probability for available neighbours. (sum of prob should be 1). 5. Vectorized way to get random indices from array of probs. Like random.choice, but for 2d array. 6. Find new flat indexes for random moving agents. 7. Find new flat indexes for normal moving agents. Before argmax selection we shuffle neighbours, otherwise we will use always first max index. 8. Create an array with new agents positions. 9. If two agents want to occupy same cell - then we accept only first. All agents, which was declined to move because of collision will die. 10. If agent reach top - it dies too. :param move_preference_matrix: The agent decides which space to move to by adding this move preference array to the value array of the surrounding environment. :param move_probability_matrix: 10% of the time the agent moves randomly to an adjacent space. It is the move probability matrix. :return: """ # (1) live_agents_neighbour_flat_positions = self.agents_neighbour_flat_positions[ self.agents_state] # (2) move_candidates = self.env.ravel( )[live_agents_neighbour_flat_positions].copy() is_available = self.is_available_env.ravel( )[live_agents_neighbour_flat_positions] move_candidates[~is_available] = cp.nan move_candidates = move_candidates + cp.asarray(move_preference_matrix) # (3) should_die = cp.all(cp.isnan(move_candidates.reshape(-1, 27)), axis=1) should_die_agents = cp.flatnonzero(self.agents_state)[should_die] self.agents_state[should_die_agents] = False move_candidates = move_candidates[~should_die] live_agents_neighbour_flat_positions = live_agents_neighbour_flat_positions[ ~should_die] # (4) is_random_move = cp.random.binomial( 1, ratio_random_move, live_agents_neighbour_flat_positions.shape[0]) is_random_move = is_random_move.astype(cp.bool) random_move_candidates = move_candidates[is_random_move] random_move_probs = (~cp.isnan(random_move_candidates) * cp.asarray(move_probability_matrix)).reshape( -1, 27) random_move_probs /= random_move_probs.sum(axis=1)[:, None] # (5) random_vals = cp.expand_dims(cp.random.rand( random_move_probs.shape[0]), axis=1) random_indexes = (random_move_probs.cumsum(axis=1) > random_vals).argmax(axis=1) # (6) random_live_agents_neighbour_flat_positions = live_agents_neighbour_flat_positions[ is_random_move] random_new_positions = cp.take_along_axis( random_live_agents_neighbour_flat_positions.reshape(-1, 27), random_indexes[:, None], axis=1).T[0] # (7) normal_move_candidates = move_candidates[~is_random_move] # normal_move_indexes = cp.nanargmax(normal_move_candidates.reshape(-1, 27), axis=1)[:, None] # smart analog of cp.nanargmax(normal_move_candidates.reshape(-1, 27), axis=1)[:, None] normal_flattened_move_candidates = normal_move_candidates.reshape( -1, 27) normal_shuffled_candidates_idx = cp.random.rand( *normal_flattened_move_candidates.shape).argsort(axis=1) normal_shuffled_flattened_move_candidates = cp.take_along_axis( normal_flattened_move_candidates, normal_shuffled_candidates_idx, axis=1) normal_shuffled_candidates_max_idx = cp.nanargmax( normal_shuffled_flattened_move_candidates, axis=1)[:, None] normal_move_indexes = cp.take_along_axis( normal_shuffled_candidates_idx, normal_shuffled_candidates_max_idx, axis=1) #### normal_live_agents_neighbour_flat_positions = live_agents_neighbour_flat_positions[ ~is_random_move] normal_move_new_positions = cp.take_along_axis( normal_live_agents_neighbour_flat_positions.reshape(-1, 27), normal_move_indexes, axis=1).T[0] # (8) moving_agents_flat_positions = self.agents_flat_positions[ self.agents_state] new_agents_flat_positions = moving_agents_flat_positions.copy() new_agents_flat_positions[is_random_move] = random_new_positions new_agents_flat_positions[~is_random_move] = normal_move_new_positions live_agents_indexes = cp.flatnonzero(self.agents_state) # (9) _, flat_positions_first_entry = cp.unique(new_agents_flat_positions, return_index=True) is_live = cp.zeros_like(new_agents_flat_positions).astype(cp.bool) is_live[flat_positions_first_entry] = True new_agents_flat_positions[~is_live] = moving_agents_flat_positions[ ~is_live] new_agents_positions = cp.array( cp.unravel_index(new_agents_flat_positions, self.env.shape)).T # (10) is_live[new_agents_positions[:, 2] == 1] = False self._agents_positions[live_agents_indexes] = new_agents_positions self.agents_state[live_agents_indexes] = is_live self.is_available_env.ravel()[moving_agents_flat_positions] = True self.is_available_env.ravel()[new_agents_flat_positions] = False self._agents_positions_all_time.append( cp.asnumpy(self._agents_positions))
def remove_small_objects(ar, min_size=64, connectivity=1, in_place=False): """Remove objects smaller than the specified size. Expects ar to be an array with labeled objects, and removes objects smaller than min_size. If `ar` is bool, the image is first labeled. This leads to potentially different behavior for bool and 0-and-1 arrays. Parameters ---------- ar : ndarray (arbitrary shape, int or bool type) The array containing the objects of interest. If the array type is int, the ints must be non-negative. min_size : int, optional (default: 64) The smallest allowable object size. connectivity : int, {1, 2, ..., ar.ndim}, optional (default: 1) The connectivity defining the neighborhood of a pixel. Used during labelling if `ar` is bool. in_place : bool, optional (default: False) If ``True``, remove the objects in the input array itself. Otherwise, make a copy. Raises ------ TypeError If the input array is of an invalid type, such as float or string. ValueError If the input array contains negative values. Returns ------- out : ndarray, same shape and type as input `ar` The input array with small connected components removed. Examples -------- >>> import cupy as cp >>> from cupyimg.skimage import morphology >>> a = cp.array([[0, 0, 0, 1, 0], ... [1, 1, 1, 0, 0], ... [1, 1, 1, 0, 1]], bool) >>> b = morphology.remove_small_objects(a, 6) >>> b array([[False, False, False, False, False], [ True, True, True, False, False], [ True, True, True, False, False]]) >>> c = morphology.remove_small_objects(a, 7, connectivity=2) >>> c array([[False, False, False, True, False], [ True, True, True, False, False], [ True, True, True, False, False]]) >>> d = morphology.remove_small_objects(a, 6, in_place=True) >>> d is a True """ # Raising type error if not int or bool _check_dtype_supported(ar) if in_place: out = ar else: out = ar.copy() if min_size == 0: # shortcut for efficiency return out if out.dtype == bool: selem = ndi.generate_binary_structure(ar.ndim, connectivity) ccs = cp.zeros_like(ar, dtype=cp.int32) ndi.label(ar, selem, output=ccs) else: ccs = out try: component_sizes = cp.bincount(ccs.ravel()) except ValueError: raise ValueError("Negative value labels are not supported. Try " "relabeling the input with `scipy.ndimage.label` or " "`skimage.morphology.label`.") if len(component_sizes) == 2 and out.dtype != bool: warn("Only one label was provided to `remove_small_objects`. " "Did you mean to use a boolean array?") too_small = component_sizes < min_size too_small_mask = too_small[ccs] out[too_small_mask] = 0 return out
def denoise_tv_chambolle(image, weight=0.1, eps=2.0e-4, n_iter_max=200, multichannel=False): """Perform total-variation denoising on n-dimensional images. Parameters ---------- image : ndarray of ints, uints or floats Input data to be denoised. `image` can be of any numeric type, but it is cast into an ndarray of floats for the computation of the denoised image. weight : float, optional Denoising weight. The greater `weight`, the more denoising (at the expense of fidelity to `input`). eps : float, optional Relative difference of the value of the cost function that determines the stop criterion. The algorithm stops when: (E_(n-1) - E_n) < eps * E_0 n_iter_max : int, optional Maximal number of iterations used for the optimization. multichannel : bool, optional Apply total-variation denoising separately for each channel. This option should be true for color images, otherwise the denoising is also applied in the channels dimension. Returns ------- out : ndarray Denoised image. Notes ----- Make sure to set the multichannel parameter appropriately for color images. The principle of total variation denoising is explained in https://en.wikipedia.org/wiki/Total_variation_denoising The principle of total variation denoising is to minimize the total variation of the image, which can be roughly described as the integral of the norm of the image gradient. Total variation denoising tends to produce "cartoon-like" images, that is, piecewise-constant images. This code is an implementation of the algorithm of Rudin, Fatemi and Osher that was proposed by Chambolle in [1]_. References ---------- .. [1] A. Chambolle, An algorithm for total variation minimization and applications, Journal of Mathematical Imaging and Vision, Springer, 2004, 20, 89-97. Examples -------- 2D example on astronaut image: >>> from skimage import color, data >>> img = color.rgb2gray(data.astronaut())[:50, :50] >>> img += 0.5 * img.std() * np.random.randn(*img.shape) >>> denoised_img = denoise_tv_chambolle(img, weight=60) 3D example on synthetic data: >>> x, y, z = np.ogrid[0:20, 0:20, 0:20] >>> mask = (x - 22)**2 + (y - 20)**2 + (z - 17)**2 < 8**2 >>> mask = mask.astype(np.float) >>> mask += 0.2*np.random.randn(*mask.shape) >>> res = denoise_tv_chambolle(mask, weight=100) """ im_type = image.dtype if not im_type.kind == 'f': image = img_as_float(image) if multichannel: out = cp.zeros_like(image) for c in range(image.shape[-1]): out[..., c] = _denoise_tv_chambolle_nd(image[..., c], weight, eps, n_iter_max) else: out = _denoise_tv_chambolle_nd(image, weight, eps, n_iter_max) return out
void mymul(int n, const double* x1, const double* x2, double* y) { int tid = blockDim.x * blockIdx.x + threadIdx.x; if (tid < n) { y[tid] = x1[tid] * x2[tid]; } } }""" module = cupy.RawModule(code=source_str) mymul_kernel = module.get_function("mymul") x1 = np.array([1, 2, 3, 4, 5], dtype=np.float64) x2 = np.array([7, 8, 9, 10, 12], dtype=np.float64) x1_dev = cupy.array(x1) x2_dev = cupy.array(x2) y_dev = cupy.zeros_like(x1_dev) blocksize = 2 n_blocks = int(np.ceil(len(x1) / blocksize)) mymul_kernel(grid=(n_blocks, ), block=(blocksize, ), args=(len(x1), x1_dev, x2_dev, y_dev)) y = y_dev.get() assert np.allclose(y, x1 * x2)
def reverse2(d): d = d.T c = np.zeros_like(d) c[0] = d[0] ^ d[7] ^ d[10] ^ d[12] ^ d[13] ^ d[15] ^ d[18] ^ d[19] ^ d[ 21] ^ d[22] ^ d[25] ^ d[28] ^ d[29] ^ d[30] ^ d[31] c[1] = d[1] ^ d[4] ^ d[7] ^ d[10] ^ d[11] ^ d[12] ^ d[14] ^ d[15] ^ d[ 16] ^ d[18] ^ d[21] ^ d[23] ^ d[25] ^ d[26] ^ d[28] c[2] = d[2] ^ d[5] ^ d[8] ^ d[11] ^ d[13] ^ d[15] ^ d[16] ^ d[17] ^ d[ 19] ^ d[20] ^ d[22] ^ d[26] ^ d[27] ^ d[28] ^ d[29] c[3] = d[3] ^ d[6] ^ d[9] ^ d[12] ^ d[14] ^ d[17] ^ d[18] ^ d[20] ^ d[ 21] ^ d[23] ^ d[24] ^ d[27] ^ d[28] ^ d[29] ^ d[30] c[4] = d[3] ^ d[4] ^ d[8] ^ d[9] ^ d[11] ^ d[14] ^ d[17] ^ d[18] ^ d[ 22] ^ d[23] ^ d[24] ^ d[25] ^ d[26] ^ d[27] ^ d[29] c[5] = d[0] ^ d[3] ^ d[5] ^ d[8] ^ d[10] ^ d[11] ^ d[14] ^ d[15] ^ d[ 17] ^ d[19] ^ d[20] ^ d[22] ^ d[24] ^ d[29] ^ d[30] c[6] = d[1] ^ d[6] ^ d[9] ^ d[11] ^ d[12] ^ d[15] ^ d[16] ^ d[18] ^ d[ 20] ^ d[21] ^ d[23] ^ d[24] ^ d[25] ^ d[30] ^ d[31] c[7] = d[2] ^ d[7] ^ d[8] ^ d[10] ^ d[13] ^ d[16] ^ d[17] ^ d[19] ^ d[ 21] ^ d[22] ^ d[24] ^ d[25] ^ d[26] ^ d[28] ^ d[31] c[8] = d[2] ^ d[4] ^ d[5] ^ d[7] ^ d[8] ^ d[15] ^ d[17] ^ d[20] ^ d[ 21] ^ d[22] ^ d[23] ^ d[26] ^ d[27] ^ d[29] ^ d[30] c[9] = d[2] ^ d[3] ^ d[4] ^ d[6] ^ d[7] ^ d[9] ^ d[12] ^ d[15] ^ d[17] ^ d[ 18] ^ d[20] ^ d[24] ^ d[26] ^ d[29] ^ d[31] c[10] = d[0] ^ d[3] ^ d[5] ^ d[7] ^ d[10] ^ d[13] ^ d[18] ^ d[19] ^ d[ 20] ^ d[21] ^ d[24] ^ d[25] ^ d[27] ^ d[28] ^ d[30] c[11] = d[1] ^ d[4] ^ d[6] ^ d[11] ^ d[14] ^ d[16] ^ d[19] ^ d[20] ^ d[ 21] ^ d[22] ^ d[25] ^ d[26] ^ d[28] ^ d[29] ^ d[31] c[12] = d[0] ^ d[1] ^ d[3] ^ d[6] ^ d[11] ^ d[12] ^ d[16] ^ d[17] ^ d[ 18] ^ d[19] ^ d[21] ^ d[25] ^ d[26] ^ d[30] ^ d[31] c[13] = d[0] ^ d[2] ^ d[3] ^ d[6] ^ d[7] ^ d[8] ^ d[11] ^ d[13] ^ d[ 16] ^ d[21] ^ d[22] ^ d[25] ^ d[27] ^ d[28] ^ d[30] c[14] = d[1] ^ d[3] ^ d[4] ^ d[7] ^ d[9] ^ d[14] ^ d[16] ^ d[17] ^ d[ 22] ^ d[23] ^ d[24] ^ d[26] ^ d[28] ^ d[29] ^ d[31] c[15] = d[0] ^ d[2] ^ d[5] ^ d[10] ^ d[15] ^ d[16] ^ d[17] ^ d[18] ^ d[ 20] ^ d[23] ^ d[24] ^ d[25] ^ d[27] ^ d[29] ^ d[30] c[16] = d[2] ^ d[3] ^ d[5] ^ d[6] ^ d[9] ^ d[12] ^ d[13] ^ d[14] ^ d[ 15] ^ d[16] ^ d[23] ^ d[26] ^ d[28] ^ d[29] ^ d[31] c[17] = d[0] ^ d[2] ^ d[5] ^ d[7] ^ d[9] ^ d[10] ^ d[12] ^ d[17] ^ d[ 20] ^ d[23] ^ d[26] ^ d[27] ^ d[28] ^ d[30] ^ d[31] c[18] = d[0] ^ d[1] ^ d[3] ^ d[4] ^ d[6] ^ d[10] ^ d[11] ^ d[12] ^ d[ 13] ^ d[18] ^ d[21] ^ d[24] ^ d[27] ^ d[29] ^ d[31] c[19] = d[1] ^ d[2] ^ d[4] ^ d[5] ^ d[7] ^ d[8] ^ d[11] ^ d[12] ^ d[ 13] ^ d[14] ^ d[19] ^ d[22] ^ d[25] ^ d[28] ^ d[30] c[20] = d[1] ^ d[2] ^ d[6] ^ d[7] ^ d[8] ^ d[9] ^ d[10] ^ d[11] ^ d[ 13] ^ d[19] ^ d[20] ^ d[24] ^ d[25] ^ d[27] ^ d[30] c[21] = d[1] ^ d[3] ^ d[4] ^ d[6] ^ d[8] ^ d[13] ^ d[14] ^ d[16] ^ d[ 19] ^ d[21] ^ d[24] ^ d[26] ^ d[27] ^ d[30] ^ d[31] c[22] = d[0] ^ d[2] ^ d[4] ^ d[5] ^ d[7] ^ d[8] ^ d[9] ^ d[14] ^ d[15] ^ d[ 17] ^ d[22] ^ d[25] ^ d[27] ^ d[28] ^ d[31] c[23] = d[0] ^ d[1] ^ d[3] ^ d[5] ^ d[6] ^ d[8] ^ d[9] ^ d[10] ^ d[12] ^ d[ 15] ^ d[18] ^ d[23] ^ d[24] ^ d[26] ^ d[29] c[24] = d[1] ^ d[4] ^ d[5] ^ d[6] ^ d[7] ^ d[10] ^ d[11] ^ d[13] ^ d[ 14] ^ d[18] ^ d[20] ^ d[21] ^ d[23] ^ d[24] ^ d[31] c[25] = d[1] ^ d[2] ^ d[4] ^ d[8] ^ d[10] ^ d[13] ^ d[15] ^ d[18] ^ d[ 19] ^ d[20] ^ d[22] ^ d[23] ^ d[25] ^ d[28] ^ d[31] c[26] = d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[8] ^ d[9] ^ d[11] ^ d[12] ^ d[ 14] ^ d[16] ^ d[19] ^ d[21] ^ d[23] ^ d[26] ^ d[29] c[27] = d[0] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[9] ^ d[10] ^ d[12] ^ d[ 13] ^ d[15] ^ d[17] ^ d[20] ^ d[22] ^ d[27] ^ d[30] c[28] = d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[5] ^ d[9] ^ d[10] ^ d[14] ^ d[ 15] ^ d[16] ^ d[17] ^ d[19] ^ d[22] ^ d[27] ^ d[28] c[29] = d[0] ^ d[5] ^ d[6] ^ d[9] ^ d[11] ^ d[12] ^ d[14] ^ d[16] ^ d[ 18] ^ d[19] ^ d[22] ^ d[23] ^ d[24] ^ d[27] ^ d[29] c[30] = d[0] ^ d[1] ^ d[6] ^ d[7] ^ d[8] ^ d[10] ^ d[12] ^ d[13] ^ d[ 15] ^ d[17] ^ d[19] ^ d[20] ^ d[23] ^ d[25] ^ d[30] c[31] = d[0] ^ d[1] ^ d[2] ^ d[4] ^ d[7] ^ d[8] ^ d[9] ^ d[11] ^ d[13] ^ d[ 14] ^ d[16] ^ d[18] ^ d[21] ^ d[26] ^ d[31] return c.T
+ cp.trace(cp.matmul(J_2_m, calc_K_v_v)) \ - 2 * (cp.trace(cp.matmul(cp.matmul(calc_K_u_v, J_2_m), cp.matmul(calc_K_u_v.T, J_1_m)))) ** .5 if USE_CUPY: cp.cuda.Stream.null.synchronize() return W_2 def kl_divergence(p, q): return np.sum(np.where(p != 0, p * np.log(p / q), 0)) if __name__ == "__main__": a = cp.random.normal(1, 2, 2048) b = cp.random.normal(0, 1, 2048) b = cp.zeros_like(a) print(np.var(a), np.mean(a)) _foo = np.mean(a) a = (a - _foo) / np.std(a, axis=-1, keepdims=True) + _foo print(np.var(a), np.mean(a)) import time import scipy.stats start_time = time.time() x = ([kernel_wasserstein_distance(a, b, True) for _ in range(1)]) print("time spent:", time.time() - start_time) print(x) start_time = time.time() print(kernel_wasserstein_distance(a, b, False)) print("time spent:", time.time() - start_time) print(scipy.stats.wasserstein_distance(a, b))
def reconstruct_alt(imgs, discs, hres_size, row, n_iters=1, o_f_init=None, del_1=1000, del_2=1, round_values=True, plot_per_frame=False, show_interval=None, subtract_bg=False, out_path=None): """The main reconstruction algorithm. Adapted from Tian et. al.""" # Put input images on GPU, estimate background noise imgs = [cp.array(img) for img in imgs] bgs = get_bg(imgs) if subtract_bg else cp.zeros(len(imgs)) IMAGESIZE = imgs[0].shape[0] CUTOFF_FREQ_px = get_cutoff(row) FRAMES = len(imgs) orig = IMAGESIZE // 2 - 1 # Low-res origin lres_size = (IMAGESIZE, IMAGESIZE) m1, n1 = lres_size m, n = hres_size losses = [] # Reconstruction Loss convs = [] # Inverse Convergence index # Initial high-res guess if lres_size == hres_size: # Initialize with ones # Use old algorithm F = lambda x: cp.fft.fftshift(cp.fft.fft2(x)) Ft = lambda x: cp.fft.ifft2(cp.fft.ifftshift(x)) o = cp.ones(hres_size) o_f = F(o) elif o_f_init is not None: # Initialize with given initialization F = lambda x: cp.fft.fftshift(cp.fft.fft2(cp.fft.ifftshift(x))) Ft = lambda x: cp.fft.fftshift(cp.fft.ifft2(cp.fft.ifftshift(x))) o = cp.zeros_like(o_f_init) o_f = o_f_init else: # Intialize with resized first frame from imgs F = lambda x: cp.fft.fftshift(cp.fft.fft2(cp.fft.ifftshift(x))) Ft = lambda x: cp.fft.fftshift(cp.fft.ifft2(cp.fft.ifftshift(x))) o = cp.sqrt( cp.array(cv2.resize(cp.asnumpy(imgs[0] - bgs[0]), hres_size))) o_f = Ft(o) # Pupil Function p = cp.zeros(lres_size) p = cp.array(cv2.circle(cp.asnumpy(p), (orig, orig), CUTOFF_FREQ_px, 1, -1)) ctf = p.copy() # Ideal Pupil, for filtering later on # Main Loop log = tqdm( total=n_iters, desc=f'Starting...', bar_format= '{percentage:3.0f}% [{elapsed}<{remaining} ({rate_inv_fmt})]{bar}{desc}', leave=False, ascii=True) for j in range(n_iters): conv = [] # Convergence Index for i in range(FRAMES): if discs[i] == 0: # Empty frame continue # Get k0x, k0y and hence, shifting values k0x, k0y = discs[i] # Construct auxillary functions for the set of LEDs (= 1, here) if hres_size == lres_size: shift_x, shift_y = [ -round(k0x - orig), -round(k0y - orig) ] if round_values else [-(k0x - orig), -(k0y - orig)] if not round_values: o_f_i = FourierShift2D(o_f, [shift_x, shift_y]) # O_i(k - k_m) else: o_f_i = cp.roll(o_f, int(shift_y), axis=0) o_f_i = cp.roll(o_f_i, int(shift_x), axis=1) yl, xl = 0, 0 # To reduce code later on else: # Output size larger than individual frames _orig = hres_size[0] // 2 - 1 del_x, del_y = k0x - orig, k0y - orig x, y = round(_orig - del_x), round(_orig - del_y) yl = int(y - m1 // 2) xl = int(x - n1 // 2) assert xl > 0 and yl > 0, 'Both should be > 0' o_f_i = o_f[yl:yl + n1, xl:xl + m1].copy() psi_k = o_f_i * p * ctf #DEBUG: REPLACE * ctf with * p # Plot outputs after each frame, for debugging if plot_per_frame: o_i = Ft(o_f_i * p) plt.figure(figsize=(10, 2)) plt.subplot(161) plt.imshow(cp.asnumpy(correct(abs(o_i)))) plt.title(f'$I_{{l}}({i})$') opts() #DEBUG plt.subplot(162) plt.imshow( cp.asnumpy( cv2.convertScaleAbs( cp.asnumpy(20 * cp.log(1 + abs(o_f_i * p)))))) plt.title(f'$S_{{l}}({i})$') opts() #DEBUG # Impose intensity constraint and update auxillary function psi_r = F(psi_k) #DEBUG: CHANGE BACK TO F # Low-res estimate obtained from our reconstruction I_l = abs(psi_r) if lres_size != hres_size else abs(psi_r) # Subtract background noise and clip values to avoid NaN I_hat = cp.clip(imgs[i] - bgs[i], a_min=0) phi_r = cp.sqrt(I_hat / (cp.abs(psi_r)**2)) * psi_r phi_k = Ft(phi_r) #DEBUG: CHANGE BACK TO Ft # Update object and pupil estimates if hres_size == lres_size: if not round_values: p_i = FourierShift2D(p, [-shift_x, -shift_y]) # P_i(k+k_m) else: p_i = cp.roll(p, int(-shift_y), axis=0) p_i = cp.roll(p_i, int(-shift_x), axis=1) if not round_values: phi_k_i = FourierShift2D( phi_k, [-shift_x, -shift_y]) # Phi_m_i(k+k_m) else: phi_k_i = cp.roll(phi_k, int(-shift_y), axis=0) phi_k_i = cp.roll(phi_k_i, int(-shift_x), axis=1) else: # Output size larger than individual frames p_i = p.copy() phi_k_i = phi_k.copy() ## O_{i+1}(k) temp = o_f[yl:yl + n1, xl:xl + m1].copy() + ( cp.abs(p_i) * cp.conj(p_i) * (phi_k_i - o_f[yl:yl + n1, xl:xl + m1].copy() * p_i) ) / \ ( cp.abs(p).max() * (cp.abs(p_i) ** 2 + del_1) ) ## P_{i+1}(k) p = p + ( cp.abs(o_f_i) * cp.conj(o_f_i) * (phi_k - o_f_i * p) ) / \ ( cp.abs(o_f[yl:yl + n1, xl:xl + m1].copy()).max() * (cp.abs(o_f_i) ** 2 + del_2) ) o_f[yl:yl + n1, xl:xl + m1] = temp.copy() ###### Using F here instead of Ft to get upright image o = F(o_f) if lres_size != hres_size else Ft(o_f) ###### if plot_per_frame: plt.subplot(163) plt.imshow(cp.asnumpy(cp.mod(ctf * cp.angle(p), 2 * cp.pi))) plt.title(f'P({i})') opts() #DEBUG plt.subplot(164) plt.imshow(cp.asnumpy(correct(abs(o)))) plt.title(f'$I_{{h}}({i})$') opts() #DEBUG plt.subplot(165) plt.imshow(cp.asnumpy(correct(cp.angle(o)))) plt.title(f'$\\theta(I_{{h}}({i}))$') opts() #DEBUG plt.subplot(166) plt.imshow(cp.asnumpy(show(cp.asnumpy(o_f)))) plt.title(f'$S_{{h}}({i})$') opts() plt.show() #DEBUG c = inv_conv_idx(I_l, imgs[i]) conv.append(c) if not plot_per_frame and (show_interval is not None and j % show_interval == 0): o_i = Ft(o_f_i * p) #DEBUG plt.figure(figsize=(10, 2)) plt.subplot(161) plt.imshow(cp.asnumpy(correct(abs(o_i)))) plt.title(f'$I_{{l}}({i})$') opts() #DEBUG plt.subplot(162) plt.imshow( cp.asnumpy( cv2.convertScaleAbs( cp.asnumpy(20 * cp.log(1 + abs(o_f_i * p)))))) plt.title(f'$S_{{l}}({i})$') opts() #DEBUG plt.subplot(163) plt.imshow(cp.asnumpy(cp.mod(ctf * cp.angle(p), 2 * cp.pi))) plt.title(f'P({i})') opts() #DEBUG plt.subplot(164) plt.imshow(cp.asnumpy(correct(abs(o)))) plt.title(f'$I_{{h}}({i})$') opts() #DEBUG plt.subplot(165) plt.imshow(cp.asnumpy(correct(cp.angle(o)))) plt.title(f'$\\theta(I_{{h}}({i}))$') opts() #DEBUG plt.subplot(166) plt.imshow( cp.asnumpy( cv2.convertScaleAbs(cp.asnumpy(20 * cp.log(1 + abs(o_f)))))) plt.title(f'$S_{{h}}({i})$') opts() plt.show() #DEBUG loss = metric_norm(imgs, o_f_i, p) losses.append(loss) conv = float(sum(conv) / len(conv)) convs.append(conv) log.set_description_str( f'[Iteration {j + 1}] Convergence Loss: {cp.asnumpy(conv):e}') log.update(1) scale = 7 plt.figure(figsize=(3 * scale, 4 * scale)) plt.subplot(421) plt.plot(cp.asnumpy(cp.arange(len(losses))), cp.asnumpy(cp.clip(cp.array(losses), a_min=None, a_max=1e4)), 'b-') plt.title('Loss Curve') plt.ylabel('Loss Value') plt.xlabel('Iteration') plt.subplot(422) plt.plot(cp.asnumpy(cp.arange(len(convs))), cp.asnumpy(cp.clip(cp.array(convs), a_min=None, a_max=1e14)), 'b-') plt.title('Convergence Index Curve') plt.ylabel('Convergence Index') plt.xlabel('Iteration') amp = cp.array(cv2.resize( read_tiff(row.AMPLITUDE.values[0])[0], hres_size)) phase = cp.array(cv2.resize(read_tiff(row.PHASE.values[0])[0], hres_size)) plt.subplot(434) plt.title(f'amplitude (Scaled up from {lres_size})') plt.imshow(cp.asnumpy(to_uint8(amp))) opts() plt.subplot(435) plt.title(f'phase (Scaled up from {lres_size})') plt.imshow(cp.asnumpy(to_uint8(phase))) plt.subplot(436) rec = abs(cp.sqrt(amp) * cp.exp(1j * phase)) plt.title(f'Ground Truth (Scaled up from {lres_size})') plt.imshow(cp.asnumpy(to_uint8(rec))) plt.subplot(437) plt.title('Reconstruction Amplitude') amp = abs(o) if lres_size == hres_size: amp = correct(amp) plt.imshow(cp.asnumpy(to_uint8((amp)))) plt.subplot(438) plt.title('Reconstruction Phase') phase = cp.angle(o) if lres_size == hres_size: phase = correct(phase) plt.imshow(cp.asnumpy(to_uint8(phase))) plt.subplot(439) plt.title('Reconstructed Image') rec = abs(cp.sqrt(amp) * cp.exp(1j * phase)) plt.imshow(cp.asnumpy(to_uint8(rec))) plt.subplot(427) plt.title(f'Recovered Pupil') p_show = cp.mod(ctf * cp.angle(p), 2 * cp.pi) p_show = (p_show / p_show.max() * 255).astype(np.uint8) plt.imshow(cp.asnumpy(p_show), cmap='nipy_spectral') plt.subplot(428) plt.title(f'Raw frames\' mean (Scaled up from {lres_size})') plt.imshow(cv2.resize(cp.asnumpy(cp.array(imgs).mean(axis=0)), hres_size)) if out_path is None: plt.show() else: plt.savefig(out_path, bbox_inches='tight') plt.close('all') # Ignore early noise and print where the error is lowest if n_iters > 10: it = cp.argmin(cp.array(convs[10:])) + 11 if out_path is not None: print(f'Convergence index lowest at {it}th iteration.') else: it = cp.argmin(cp.array(convs)) + 1 if out_path is not None: print(f'Convergence index lowest at {it}th iteration.') if lres_size == hres_size: o = correct(o) return o, p, it
def _prep_output(self, a): if self.output == 'empty': return cupy.zeros_like(a) return self.output
def rezToPhy(ctx, dat_path=None, output_dir=None): # pull out results from kilosort's rez to either return to workspace or to # save in the appropriate format for the phy GUI to run on. If you provide # a savePath it should be a folder savePath = output_dir Path(savePath).mkdir(exist_ok=True, parents=True) ctx = checkClusters(ctx) # check clusters integrity probe = ctx.probe ir = ctx.intermediate params = ctx.params nt0 = params.nt0 # spikeTimes will be in samples, not seconds W = cp.asarray(ir.Wphy).astype(np.float32) Wrot = ir.Wrot est_contam_rate = ir.est_contam_rate good = ir.good Ths = ir.Ths st3 = cp.asarray(ir.st3_c) U = cp.asarray(ir.U_s).astype(np.float32) iNeigh = ir.iNeigh_s iNeighPC = ir.iNeighPC_s simScore = ir.simScore_s if st3.shape[1] > 4: st3 = st3[:, :4] isort = cp.argsort(st3[:, 0]) st3 = st3[isort, :] # cProj = ir.cProj_c[cp.asnumpy(isort), :] # cProjPC = ir.cProjPC_c[cp.asnumpy(isort), :, :] fs = os.listdir(savePath) for file in fs: if file.endswith('.npy'): os.remove(join(savePath, file)) if os.path.isdir(join(savePath, '.phy')): shutil.rmtree(join(savePath, '.phy')) spikeTimes = st3[:, 0].astype(cp.uint64) spikeTemplates = st3[:, 1].astype(cp.uint32) # (DEV_NOTES) if statement below seems useless due to above if statement if st3.shape[1] > 4: spikeClusters = (1 + st3[:, 4]).astype(cp.uint32) # templateFeatures = cProj templateFeatureInds = iNeigh.astype(cp.uint32) # pcFeatures = cProjPC pcFeatureInds = iNeighPC.astype(cp.uint32) whiteningMatrix = cp.asarray(Wrot) / params.scaleproc whiteningMatrixInv = cp.linalg.pinv(whiteningMatrix) amplitudes = st3[:, 2] Nchan = probe.Nchan xcoords = probe.xc ycoords = probe.yc chanMap = probe.chanMap chanMap0ind = chanMap # - 1 nt0, Nfilt = W.shape[:2] # (DEV_NOTES) 2 lines below can be combined # templates = cp.einsum('ikl,jkl->ijk', U, W).astype(cp.float32) # templates = cp.zeros((Nchan, nt0, Nfilt), dtype=np.float32, order='F') tempAmpsUnscaled = cp.zeros(Nfilt, dtype=np.float32) templates_writer = NpyWriter(join(savePath, 'templates.npy'), (Nfilt, nt0, Nchan), np.float32) for iNN in tqdm(range(Nfilt), desc="Computing templates"): t = cp.dot(U[:, iNN, :], W[:, iNN, :].T).T templates_writer.append(t) t_unw = cp.dot(t, whiteningMatrixInv) assert t_unw.ndim == 2 tempChanAmps = t_unw.max(axis=0) - t_unw.min(axis=0) tempAmpsUnscaled[iNN] = tempChanAmps.max() templates_writer.close() # templates = cp.transpose(templates, (2, 1, 0)) # now it's nTemplates x nSamples x nChannels # we include all channels so this is trivial templatesInds = cp.tile(np.arange(Nfilt), (Nchan, 1)) # here we compute the amplitude of every template... # unwhiten all the templates # tempsUnW = cp.einsum('ijk,kl->ijl', templates, whiteningMatrixinv) # tempsUnW = cp.zeros(templates.shape, dtype=np.float32, order='F') # for t in tqdm(range(templates.shape[0]), desc="Unwhitening the templates"): # tempsUnW[t, :, :] = cp.dot(templates[t, :, :], whiteningMatrixInv) # The amplitude on each channel is the positive peak minus the negative # tempChanAmps = tempsUnW.max(axis=1) - tempsUnW.min(axis=1) # The template amplitude is the amplitude of its largest channel # tempAmpsUnscaled = tempChanAmps.max(axis=1) # assign all spikes the amplitude of their template multiplied by their # scaling amplitudes # tempAmpsUnscaled = cp.(tempAmpsUnscaled, axis=0).astype(np.float32) spikeAmps = tempAmpsUnscaled[spikeTemplates] * amplitudes # take the average of all spike amps to get actual template amps (since # tempScalingAmps are equal mean for all templates) ta = clusterAverage(spikeTemplates, spikeAmps) tids = cp.unique(spikeTemplates).astype(np.int64) tempAmps = cp.zeros_like(tempAmpsUnscaled, order='F') tempAmps[ tids] = ta # because ta only has entries for templates that had at least one spike tempAmps = params.gain * tempAmps # for consistency, make first dimension template number save_pcs(ir.spikes_to_remove, ir.cProj, ir.cProjPC, savePath, st3, isort) # with open(, 'wb') as fp: # save_large_array(fp, templateFeatures) # cProj = ir.cProj_c[cp.asnumpy(isort), :] # cProjPC = ir.cProjPC_c[cp.asnumpy(isort), :, :] def _save(name, arr, dtype=None): cp.save(join(savePath, name + '.npy'), arr.astype(dtype or arr.dtype)) if savePath is not None: _save('spike_times', spikeTimes) _save('spike_templates', spikeTemplates, cp.uint32) if st3.shape[1] > 4: _save('spike_clusters', spikeClusters, cp.uint32) else: _save('spike_clusters', spikeTemplates, cp.uint32) _save('amplitudes', amplitudes) # _save('templates', templates) _save('templates_ind', templatesInds) chanMap0ind = chanMap0ind.astype(cp.int32) _save('channel_map', chanMap0ind) _save('channel_positions', np.c_[xcoords, ycoords]) # _save('template_features', templateFeatures) # with open(join(savePath, 'template_features.npy'), 'wb') as fp: # save_large_array(fp, templateFeatures) _save('template_feature_ind', templateFeatureInds.T) # _save('pc_features', pcFeatures) # with open(join(savePath, 'pc_features.npy'), 'wb') as fp: # save_large_array(fp, pcFeatures) _save('pc_feature_ind', pcFeatureInds.T) _save('whitening_mat', whiteningMatrix) _save('whitening_mat_inv', whiteningMatrixInv) _save('thresholds', Ths) if 'simScore' in ir: similarTemplates = simScore _save('similar_templates', similarTemplates) est_contam_rate[np.isnan(est_contam_rate)] = 1 with open(join(savePath, 'cluster_group.tsv'), 'w') as f: f.write('cluster_id\tgroup\n') for j in range(len(good)): if good[j]: f.write('%d\tgood\n' % j) # else: # f.write('%d\tmua\n' % j) with open(join(savePath, 'cluster_ContamPct.tsv'), 'w') as f: f.write('cluster_id\tContamPct\n') for j in range(len(good)): f.write('%d\t%.1f\n' % (j, 100 * est_contam_rate[j])) with open(join(savePath, 'cluster_Amplitude.tsv'), 'w') as f: f.write('cluster_id\tAmplitude\n') for j in range(len(good)): f.write('%d\t%.1f\n' % (j, tempAmps[j])) # make params file if not os.path.exists(join(savePath, 'params.py')): with open(join(savePath, 'params.py'), 'w') as f: if os.path.isabs(dat_path): f.write('dat_path = "%s"\n' % dat_path) else: f.write('dat_path = "../%s"\n' % dat_path) f.write('n_channels_dat = %d\n' % probe.NchanTOT) f.write('dtype = "int16"\n') f.write('offset = 0\n') f.write('hp_filtered = False\n') f.write('sample_rate = %i\n' % params.fs) f.write('template_scaling = %.1f\n' % params.templateScaling)
def convolutional_barycenter_gpu(Hv, reg, alpha, stabThresh=1e-30, niter=1500, tol=1e-9, sharpening=False, verbose=False): """Main function solving wasserstein barycenter problem using gpu Arguments: Hv {Set of distributions (cparray)} -- reg {regularization term "gamma"} -- float superior to 0, generally equals size of space/40 alpha {list} -- set of weights Keyword Arguments: stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30}) niter {int} -- Maximum number of loop iteration (default: {1500}) tol {float} -- convergence tolerance at which point iterations stop (default: {1e-9}) sharpening {bool} -- Whether or not entropic sharpening is used (default: {False}) verbose {bool} -- verbose option Returns: cparray -- solution of weighted wassertein barycenter problem """ def K(x): return cp.array(gaussian_filter(cp.asnumpy(x), sigma=reg)) def to_find_root(barycenter, H0, beta): return entropy(barycenter**beta) - H0 alpha = cp.array(alpha) alpha = alpha / alpha.sum() Hv = cp.array(Hv) mean_weights = (Hv[0].sum() + Hv[1].sum()) / 2. #print('mean weights', mean_weights) for i in range(len(Hv)): Hv[i] = Hv[i] / Hv[i].sum() v = cp.ones(Hv.shape) Kw = cp.ones(Hv.shape) entropy_max = max_entropy(Hv) barycenter = cp.zeros(Hv[0].shape) change = 1 for j in range(niter): t0 = time.time() barycenterOld = barycenter barycenter = cp.zeros_like(Hv[0, :, :]) for i in range(Hv.shape[0]): Kw[i, :, :] = K(Hv[i, :, :] / cp.maximum(stabThresh, K(v[i, :, :]))) barycenter += alpha[i] * cp.log( cp.maximum(stabThresh, v[i, :, :] * Kw[i, :, :])) barycenter = cp.exp(barycenter) change = cp.sum(cp.abs(barycenter - barycenterOld)) if sharpening: if (entropy(barycenter)) > (entropy_max): beta = newton( lambda beta: to_find_root(barycenter, entropy_max, beta), 1, tol=1e-6) if beta < 0: beta = 1 else: beta = 1 barycenter = barycenter**beta for i in range(Hv.shape[0]): v[i, :, :] = barycenter / cp.maximum(stabThresh, Kw[i, :, :]) if verbose: print("iter : ", j, "change : ", change, 'time :', time.time() - t0) if change < tol: break return cp.asnumpy(barycenter * mean_weights)
def variance(input, labels=None, index=None): """Calculates the variance of the values of an n-D image array, optionally at specified sub-regions. Args: input (cupy.ndarray): Nd-image data to process. labels (cupy.ndarray or None): Labels defining sub-regions in `input`. If not None, must be same shape as `input`. index (cupy.ndarray or None): `labels` to include in output. If None (default), all values where `labels` is non-zero are used. Returns: variance (cupy.ndarray): Values of variance, for each sub-region if `labels` and `index` are specified. .. seealso:: :func:`scipy.ndimage.variance` """ if not isinstance(input, cupy.ndarray): raise TypeError('input must be cupy.ndarray') if input.dtype in (cupy.complex64, cupy.complex128): raise TypeError("cupyx.scipy.ndimage.variance doesn't support %{}" "".format(input.dtype.type)) use_kern = False # There are constraints on types because of atomicAdd() in CUDA. if input.dtype not in [ cupy.int32, cupy.float16, cupy.float32, cupy.float64, cupy.uint32, cupy.uint64, cupy.ulonglong ]: warnings.warn( 'Using the slower implementation as ' 'cupyx.scipy.ndimage.sum supports int32, float16, ' 'float32, float64, uint32, uint64 as data types' 'for the fast implementation', _util.PerformanceWarning) use_kern = True def calc_var_with_intermediate_float(input): vals_c = input - input.mean() count = vals_c.size # Does not use `ndarray.mean()` here to return the same results as # SciPy does, especially in case `input`'s dtype is float16. return cupy.square(vals_c).sum() / cupy.asanyarray(count).astype(float) if labels is None: return calc_var_with_intermediate_float(input) if not isinstance(labels, cupy.ndarray): raise TypeError('label must be cupy.ndarray') input, labels = cupy.broadcast_arrays(input, labels) if index is None: return calc_var_with_intermediate_float(input[labels > 0]) if cupy.isscalar(index): return calc_var_with_intermediate_float(input[labels == index]) if not isinstance(index, cupy.ndarray): if not isinstance(index, int): raise TypeError('index must be cupy.ndarray or a scalar int') else: return (input[labels == index]).var().astype(cupy.float64, copy=False) mean_val, count = _mean_driver(input, labels, index, True, use_kern) if use_kern: new_axis = (..., *(cupy.newaxis for _ in range(input.ndim))) return cupy.where(labels[None, ...] == index[new_axis], cupy.square(input - mean_val[new_axis]), 0).sum( tuple(range(1, input.ndim + 1))) / count out = cupy.zeros_like(index, dtype=cupy.float64) return _ndimage_variance_kernel(input, labels, index, index.size, mean_val, out) / count
def main(): try: os.mkdir(args.snapshot_path) except: pass dataset = gqn.data.Dataset(args.dataset_path) sampler = gqn.data.Sampler(dataset) iterator = gqn.data.Iterator(sampler, batch_size=args.batch_size) hyperparams = HyperParameters() model = Model(hyperparams) model.to_gpu() optimizer = Optimizer(model.parameters) for iteration in range(args.training_steps): for batch_index, data_indices in enumerate(iterator): # shape: (batch, views, height, width, channels) # range: [-1, 1] images, viewpoints = dataset[data_indices] image_size = images.shape[2:4] total_views = images.shape[1] # sample number of views num_views = random.choice(range(total_views)) query_index = random.choice(range(total_views)) if num_views > 0: observed_images = images[:, :num_views] observed_viewpoints = viewpoints[:, :num_views] # (batch, views, height, width, channels) -> (batch * views, height, width, channels) observed_images = observed_images.reshape(( args.batch_size * num_views, ) + observed_images.shape[2:]) observed_viewpoints = observed_viewpoints.reshape( (args.batch_size * num_views, ) + observed_viewpoints.shape[2:]) # (batch * views, height, width, channels) -> (batch * views, channels, height, width) observed_images = observed_images.transpose((0, 3, 1, 2)) # transfer to gpu observed_images = chainer.cuda.to_gpu(observed_images) observed_viewpoints = chainer.cuda.to_gpu(observed_viewpoints) r = model.representation_network.compute_r( observed_images, observed_viewpoints) # (batch * views, channels, height, width) -> (batch, views, channels, height, width) r = r.reshape((args.batch_size, num_views) + r.shape[1:]) # sum element-wise across views r = cf.sum(r, axis=1) else: r = np.zeros((args.batch_size, hyperparams.channels_r) + hyperparams.chrz_size, dtype="float32") r = chainer.cuda.to_gpu(r) query_images = images[:, query_index] query_viewpoints = viewpoints[:, query_index] # (batch * views, height, width, channels) -> (batch * views, channels, height, width) query_images = query_images.transpose((0, 3, 1, 2)) # transfer to gpu query_images = chainer.cuda.to_gpu(query_images) query_viewpoints = chainer.cuda.to_gpu(query_viewpoints) hg_0 = xp.zeros(( args.batch_size, hyperparams.channels_chz, ) + hyperparams.chrz_size, dtype="float32") cg_0 = xp.zeros(( args.batch_size, hyperparams.channels_chz, ) + hyperparams.chrz_size, dtype="float32") u_0 = xp.zeros(( args.batch_size, hyperparams.generator_u_channels, ) + image_size, dtype="float32") he_0 = xp.zeros(( args.batch_size, hyperparams.channels_chz, ) + hyperparams.chrz_size, dtype="float32") ce_0 = xp.zeros(( args.batch_size, hyperparams.channels_chz, ) + hyperparams.chrz_size, dtype="float32") sigma_t = 1.0 loss_kld = 0 he_l = he_0 ce_l = ce_0 hg_l = hg_0 cg_l = cg_0 u_l = u_0 for l in range(hyperparams.generator_total_timestep): # zg_l = model.generation_network.sample_z(hg_l) # hg_l, cg_l, u_l = model.generation_network.forward_onestep( # hg_0, cg_0, u_0, zg_l, query_viewpoints, r) # x = model.generation_network.sample_x(u_l) he_next, ce_next = model.inference_network.forward_onestep( hg_l, he_l, ce_l, query_images, query_viewpoints, r) mu_z_q = model.inference_network.compute_mu_z(he_l) ze_l = cf.gaussian(mu_z_q, xp.zeros_like(mu_z_q)) hg_next, cg_next, u_next = model.generation_network.forward_onestep( hg_l, cg_l, u_l, ze_l, query_viewpoints, r) mu_z_p = model.generation_network.compute_mu_z(hg_l) kld = gqn.nn.chainer.functions.gaussian_kl_divergence( mu_z_q, mu_z_p) loss_kld += cf.mean(kld) hg_l = hg_next cg_l = cg_next u_l = u_next he_l = he_next ce_l = ce_next mu_x = model.generation_network.compute_mu_x(u_l) negative_log_likelihood = gqn.nn.chainer.functions.gaussian_negative_log_likelihood( query_images, mu_x, xp.full_like(mu_x, math.log(sigma_t))) loss_nll = cf.mean(negative_log_likelihood) loss = loss_nll + loss_kld model.cleargrads() loss.backward() optimizer.step() print("Iteration {}: {} / {} - loss: {}".format( iteration + 1, batch_index + 1, len(iterator), float(loss.data))) chainer.serializers.save_hdf5( os.path.join(args.snapshot_path, "model.hdf5"), model.parameters)
def test_compose_vector_fields(shape): r""" Creates two random displacement field that exactly map pixels from an input image to an output image. The resulting displacements and their composition, although operating in physical space, map the points exactly (up to numerical precision). """ np.random.seed(8315759) input_shape = shape tgt_sh = shape ndim = len(shape) if ndim == 3: # create a simple affine transformation ns = input_shape[0] nr = input_shape[1] nc = input_shape[2] s = 1.5 t = 2.5 trans = np.array([ [1, 0, 0, -t * ns], [0, 1, 0, -t * nr], [0, 0, 1, -t * nc], [0, 0, 0, 1], ]) trans_inv = np.linalg.inv(trans) scale = np.array([[1 * s, 0, 0, 0], [0, 1 * s, 0, 0], [0, 0, 1 * s, 0], [0, 0, 0, 1]]) dipy_func = vfu.compose_vector_fields_3d dipy_create_func = vfu.create_random_displacement_3d elif ndim == 2: # create a simple affine transformation nr = input_shape[0] nc = input_shape[1] s = 1.5 t = 2.5 trans = np.array([[1, 0, -t * nr], [0, 1, -t * nc], [0, 0, 1]]) trans_inv = np.linalg.inv(trans) scale = np.array([[1 * s, 0, 0], [0, 1 * s, 0], [0, 0, 1]]) dipy_func = vfu.compose_vector_fields_2d dipy_create_func = vfu.create_random_displacement_2d gt_affine = trans_inv.dot(scale.dot(trans)) # create two random displacement fields input_grid2world = gt_affine target_grid2world = gt_affine disp1, assign1 = dipy_create_func( np.array(input_shape, dtype=np.int32), input_grid2world, np.array(tgt_sh, dtype=np.int32), target_grid2world, ) disp1 = np.array(disp1, dtype=floating) assign1 = np.array(assign1) disp2, assign2 = dipy_create_func( np.array(input_shape, dtype=np.int32), input_grid2world, np.array(tgt_sh, dtype=np.int32), target_grid2world, ) disp2 = np.array(disp2, dtype=floating) assign2 = np.array(assign2) # create a random image (with decimal digits) to warp moving_image = np.empty(tgt_sh, dtype=floating) moving_image[...] = np.random.randint(0, 10, np.size(moving_image)).reshape( tuple(tgt_sh)) # set boundary values to zero so we don't test wrong interpolation due to # floating point precision if ndim == 3: moving_image[0, :, :] = 0 moving_image[-1, :, :] = 0 moving_image[:, 0, :] = 0 moving_image[:, -1, :] = 0 moving_image[:, :, 0] = 0 moving_image[:, :, -1] = 0 # evaluate the composed warping using the exact assignments # (first 1 then 2) warp1 = moving_image[(assign2[..., 0], assign2[..., 1], assign2[..., 2])] expected = warp1[(assign1[..., 0], assign1[..., 1], assign1[..., 2])] elif ndim == 2: moving_image[0, :] = 0 moving_image[-1, :] = 0 moving_image[:, 0] = 0 moving_image[:, -1] = 0 # evaluate the composed warping using the exact assignments # (first 1 then 2) warp1 = moving_image[(assign2[..., 0], assign2[..., 1])] expected = warp1[(assign1[..., 0], assign1[..., 1])] # compose the displacement fields target_world2grid = np.linalg.inv(target_grid2world) premult_index = target_world2grid.dot(input_grid2world) premult_disp = target_world2grid disp1d = cupy.asarray(disp1) disp2d = cupy.asarray(disp2) premult_indexd = cupy.asarray(premult_index) premult_dispd = cupy.asarray(premult_disp) moving_imaged = cupy.asarray(moving_image) for time_scaling in [0.25, 1.0, 4.0]: composition, stats = dipy_func( disp1, disp2 / time_scaling, premult_index, premult_disp, time_scaling, None, ) compositiond, statsd = compose_vector_fields( disp1d, disp2d / time_scaling, premult_indexd, premult_dispd, time_scaling, None, ) cupy.testing.assert_array_almost_equal(composition, compositiond) cupy.testing.assert_array_almost_equal(stats, statsd) for order in [0, 1]: warped = warp( moving_imaged, compositiond, None, premult_indexd, premult_dispd, order=order, ) cupy.testing.assert_array_almost_equal(warped, expected) # test updating the displacement field instead of creating a new one compositiond = disp1d.copy() compose_vector_fields( compositiond, disp2d / time_scaling, premult_indexd, premult_dispd, time_scaling, compositiond, ) for order in [0, 1]: warped = warp( moving_imaged, compositiond, None, premult_indexd, premult_dispd, order=order, ) cupy.testing.assert_array_almost_equal(warped, expected) # Test non-overlapping case if ndim == 3: x_0 = np.asarray(range(input_shape[0])) x_1 = np.asarray(range(input_shape[1])) x_2 = np.asarray(range(input_shape[2])) X = np.empty(input_shape + (3, ), dtype=np.float64) O = np.ones(input_shape) X[..., 0] = x_0[:, None, None] * O X[..., 1] = x_1[None, :, None] * O X[..., 2] = x_2[None, None, :] * O sz = input_shape[0] * input_shape[1] * input_shape[2] * 3 random_labels = np.random.randint(0, 2, sz) random_labels = random_labels.reshape(input_shape + (3, )) elif ndim == 2: # Test non-overlapping case x_0 = np.asarray(range(input_shape[0])) x_1 = np.asarray(range(input_shape[1])) X = np.empty(input_shape + (2, ), dtype=np.float64) O = np.ones(input_shape) X[..., 0] = x_0[:, None] * O X[..., 1] = x_1[None, :] * O random_labels = np.random.randint(0, 2, input_shape[0] * input_shape[1] * 2) random_labels = random_labels.reshape(input_shape + (2, )) values = np.array([-1, tgt_sh[0]]) disp1 = (values[random_labels] - X).astype(floating) disp1d = cupy.asarray(disp1) disp2d = cupy.asarray(disp2) composition, stats = compose_vector_fields(disp1d, disp2d, None, None, 1.0, None) cupy.testing.assert_array_almost_equal(composition, cupy.zeros_like(composition)) # test updating the displacement field instead of creating a new one compositiond = disp1d.copy() compose_vector_fields(compositiond, disp2d, None, None, 1.0, compositiond) cupy.testing.assert_array_almost_equal(compositiond, cupy.zeros_like(composition))
def update(self, data, now_epoch): if self.KL_counter < self.KL_loss_iter: self.KL_loss_ratio = self.KL_counter * (1 / self.KL_loss_iter) self.KL_counter += 1 else: self.KL_loss_ratio = 1 for i in range(self.gpu_num): netG = getattr(self.model, f"netG_{i}") netD = getattr(self.model, f"netD_{i}") depth = getattr(data, f"depth_{i}") real_img = getattr(data, f"image_{i}") embeddings = getattr(data, f"text_{i}") wrong_img = getattr(data, f"wrong_image_{i}") wrong_depth = getattr(data, f"wrong_depth_{i}") #wrong_text = getattr(data, f"wrong_text_{i}") fake_img, KL_loss = netG(embeddings) g_loss = self.KL_loss_conf * KL_loss #g_loss = self.KL_loss_conf * self.KL_loss_ratio * KL_loss d_loss = 0 for key in real_img.keys(): real_logit, real_img_logit_local = netD(real_img[key], embeddings, fg=fg, bg=bg) fake_logit, fake_img_logit_local = netD(fake_img[key], embeddings, fg=fg, bg=bg) wrong_logit, wrong_img_logit_local = netD(wrong_img[key], embeddings, fg=fg, bg=bg) ''' compute disc pair loss ''' real_labels = cuda.to_gpu( xp.ones_like(real_logit.data, dtype="float32"), i) fake_labels = cuda.to_gpu( xp.zeros_like(real_logit.data, dtype="float32"), i) pair_loss = compute_d_pair_loss(real_logit, wrong_logit, fake_logit, real_labels, fake_labels) ''' compute disc image loss ''' real_labels = cuda.to_gpu( xp.ones_like(real_img_logit_local.data, dtype="float32"), i) fake_labels = cuda.to_gpu( xp.zeros_like(real_img_logit_local.data, dtype="float32"), i) img_loss = compute_d_img_loss(wrong_img_logit_local, real_img_logit_local, fake_img_logit_local, real_labels, fake_labels) d_loss += (pair_loss + img_loss) ''' compute gen loss ''' real_labels = cuda.to_gpu( xp.ones_like(fake_logit.data, dtype="float32"), i) g_loss += compute_g_loss(fake_logit, real_labels) real_labels = cuda.to_gpu( xp.ones_like(fake_img_logit_local.data, dtype="float32"), i) g_loss += compute_g_loss(fake_img_logit_local, real_labels) if self.counter % self.n_dis == 0: netG.cleargrads() g_loss.backward() unchain_backward(fake_img) netD.cleargrads() d_loss.backward() #add calc grad netG_0 = getattr(self.model, "netG_0") netD_0 = getattr(self.model, "netD_0") for i in range(1, self.gpu_num - 1): netG = getattr(self.model, f"netG_{i}") netD = getattr(self.model, f"netD_{i}") netG_0.addgrads(netG) netD_0.addgrads(netD) if self.now_epoch != now_epoch: self.now_epoch = now_epoch if self.now_epoch % self.epoch_decay == 0: self.netG_opt.hyperparam.alpha /= 2 self.netD_opt.hyperparam.alpha /= 2 self.netG_opt.update() self.netD_opt.update() cuda.memory_pool.free_all_blocks() self.counter += 1
def run(args): onnx_filename = run_onnx_util.onnx_model_file(args.test_dir, args.model_file) input_names, output_names = run_onnx_util.onnx_input_output_names( onnx_filename) test_data_dir = os.path.join(args.test_dir, 'test_data_set_0') inputs, outputs = run_onnx_util.load_test_data( test_data_dir, input_names, output_names) with open(onnx_filename, 'rb') as f: onnx_proto = f.read() if args.debug: logger = tensorrt.Logger(tensorrt.Logger.Severity.INFO) else: logger = tensorrt.Logger() builder = tensorrt.Builder(logger) if args.fp16_mode: builder.fp16_mode = True # TODO(hamaji): Infer batch_size from inputs. builder.max_batch_size = args.batch_size network = builder.create_network() parser = tensorrt.OnnxParser(network, logger) if not parser.parse(onnx_proto): for i in range(parser.num_errors): sys.stderr.write('ONNX import failure: %s\n' % parser.get_error(i)) raise RuntimeError('ONNX import failed') engine = builder.build_cuda_engine(network) context = engine.create_execution_context() assert len(inputs) + len(outputs) == engine.num_bindings for i, (_, input) in enumerate(inputs): assert args.batch_size == input.shape[0] assert input.shape[1:] == engine.get_binding_shape(i) for i, (_, output) in enumerate(outputs): assert args.batch_size == output.shape[0] i += len(inputs) assert output.shape[1:] == engine.get_binding_shape(i) inputs = [v for n, v in inputs] outputs = [v for n, v in outputs] gpu_inputs = to_gpu(inputs) gpu_outputs = [] for output in outputs: gpu_outputs.append(cupy.zeros_like(cupy.array(output))) bindings = [a.data.ptr for a in gpu_inputs] bindings += [a.data.ptr for a in gpu_outputs] context.execute(args.batch_size, bindings) actual_outputs = to_cpu(gpu_outputs) for i, (name, expected, actual) in enumerate( zip(output_names, outputs, actual_outputs)): np.testing.assert_allclose(expected, actual, rtol=args.rtol, atol=args.atol), name print('%s: OK' % name) print('ALL OK') def compute(): context.execute(args.batch_size, bindings) cupy.cuda.device.Device().synchronize() return run_onnx_util.run_benchmark(compute, args.iterations)
def sum(input, labels=None, index=None): """Calculates the sum of the values of an n-D image array, optionally at specified sub-regions. Args: input (cupy.ndarray): Nd-image data to process. labels (cupy.ndarray or None): Labels defining sub-regions in `input`. If not None, must be same shape as `input`. index (cupy.ndarray or None): `labels` to include in output. If None (default), all values where `labels` is non-zero are used. Returns: sum (cupy.ndarray): sum of values, for each sub-region if `labels` and `index` are specified. .. seealso:: :func:`scipy.ndimage.sum` """ if not isinstance(input, cupy.ndarray): raise TypeError('input must be cupy.ndarray') if input.dtype in (cupy.complex64, cupy.complex128): raise TypeError("cupyx.scipy.ndimage.sum does not support %{}".format( input.dtype.type)) use_kern = False # There is constraints on types because of atomicAdd() in CUDA. if input.dtype not in [ cupy.int32, cupy.float16, cupy.float32, cupy.float64, cupy.uint32, cupy.uint64, cupy.ulonglong ]: warnings.warn( 'Using the slower implmentation as ' 'cupyx.scipy.ndimage.sum supports int32, float16, ' 'float32, float64, uint32, uint64 as data types' 'for the fast implmentation', _util.PerformanceWarning) use_kern = True if labels is None: return input.sum() if not isinstance(labels, cupy.ndarray): raise TypeError('label must be cupy.ndarray') input, labels = cupy.broadcast_arrays(input, labels) if index is None: return input[labels != 0].sum() if not isinstance(index, cupy.ndarray): if not isinstance(index, int): raise TypeError('index must be cupy.ndarray or a scalar int') else: return (input[labels == index]).sum() if index.size == 0: return cupy.array([], dtype=cupy.int64) out = cupy.zeros_like(index, dtype=cupy.float64) # The following parameters for sum where determined using a Tesla P100. if (input.size >= 262144 and index.size <= 4) or use_kern: return _ndimage_sum_kernel_2(input, labels, index, out) return _ndimage_sum_kernel(input, labels, index, index.size, out)
def test_fetch_float_texture(self): width, height, depth = self.dimensions dim = 3 if depth != 0 else 2 if height != 0 else 1 if (self.mem_type == 'linear' and dim != 1) or \ (self.mem_type == 'pitch2D' and dim != 2): pytest.skip('The test case {0} is inapplicable for {1} and thus ' 'skipped.'.format(self.dimensions, self.mem_type)) # generate input data and allocate output buffer shape = (depth, height, width) if dim == 3 else \ (height, width) if dim == 2 else \ (width,) # prepare input, output, and texture memory tex_data = cupy.random.random(shape, dtype=cupy.float32) real_output = cupy.zeros_like(tex_data) ch = ChannelFormatDescriptor(32, 0, 0, 0, runtime.cudaChannelFormatKindFloat) assert tex_data.flags['C_CONTIGUOUS'] assert real_output.flags['C_CONTIGUOUS'] if self.mem_type == 'CUDAarray': arr = CUDAarray(ch, width, height, depth) expected_output = cupy.zeros_like(tex_data) assert expected_output.flags['C_CONTIGUOUS'] # test bidirectional copy arr.copy_from(tex_data) arr.copy_to(expected_output) else: # linear are pitch2D are backed by ndarray arr = tex_data expected_output = tex_data # create resource and texture descriptors if self.mem_type == 'CUDAarray': res = ResourceDescriptor(runtime.cudaResourceTypeArray, cuArr=arr) elif self.mem_type == 'linear': res = ResourceDescriptor(runtime.cudaResourceTypeLinear, arr=arr, chDesc=ch, sizeInBytes=arr.size * arr.dtype.itemsize) else: # pitch2D # In this case, we rely on the fact that the hand-picked array # shape meets the alignment requirement. This is CUDA's limitation, # see CUDA Runtime API reference guide. "TexturePitchAlignment" is # assumed to be 32, which should be applicable for most devices. res = ResourceDescriptor(runtime.cudaResourceTypePitch2D, arr=arr, chDesc=ch, width=width, height=height, pitchInBytes=width * arr.dtype.itemsize) address_mode = (runtime.cudaAddressModeClamp, runtime.cudaAddressModeClamp) tex = TextureDescriptor(address_mode, runtime.cudaFilterModePoint, runtime.cudaReadModeElementType) if self.target == 'object': # create a texture object texobj = TextureObject(res, tex) mod = cupy.RawModule(source_obj) else: # self.target == 'reference' mod = cupy.RawModule(source_ref) texref_name = 'texref' texref_name += '3D' if dim == 3 else '2D' if dim == 2 else '1D' texrefPtr = mod.get_texref(texref_name) # bind texture ref to resource texref = TextureReference(texrefPtr, res, tex) # noqa # get and launch the kernel ker_name = 'copyKernel' ker_name += '3D' if dim == 3 else '2D' if dim == 2 else '1D' ker_name += 'fetch' if self.mem_type == 'linear' else '' ker = mod.get_function(ker_name) block = (4, 4, 2) if dim == 3 else (4, 4) if dim == 2 else (4, ) grid = () args = (real_output, ) if self.target == 'object': args = args + (texobj, ) if dim >= 1: grid_x = (width + block[0] - 1) // block[0] grid = grid + (grid_x, ) args = args + (width, ) if dim >= 2: grid_y = (height + block[1] - 1) // block[1] grid = grid + (grid_y, ) args = args + (height, ) if dim == 3: grid_z = (depth + block[2] - 1) // block[2] grid = grid + (grid_z, ) args = args + (depth, ) ker(grid, block, args) # validate result assert (real_output == expected_output).all()
def test_backward(self): images = np.random.normal(size=(10, 32, 32, 3)).astype('float32') x = np.tile( np.arange(32).astype('float32')[None, None, :, None], (10, 32, 1, 1)) y = np.tile( np.arange(32).astype('float32')[None, :, None, None], (10, 1, 32, 1)) coordinates = np.concatenate((x, y), axis=-1) coordinates = ((coordinates / 31) * 2 - 1) * 31. / 32. noise = np.random.normal(size=(10, 32, 32, 3)).astype('float32') step = 2 / 32. images = chainer.cuda.to_gpu(images) coordinates = chainer.Variable(chainer.cuda.to_gpu(coordinates)) noise = chainer.cuda.to_gpu(noise) loss = cf.sum( neural_renderer_chainer.differentiation(images, coordinates) * noise) loss.backward() grad_coordinates = coordinates.grad for i in range(100): yi = np.random.randint(1, 31) xi = np.random.randint(1, 31) images_yb = images.copy() images_yb[:, yi - 1, xi] = images[:, yi, xi].copy() images_yb[:, yi, xi] = images[:, yi + 1, xi].copy() grad_yb = ((images_yb - images) * noise).sum((1, 2, 3)) / step grad_yb = cp.minimum(grad_yb, cp.zeros_like(grad_yb)) images_yt = images.copy() images_yt[:, yi + 1, xi] = images[:, yi, xi].copy() images_yt[:, yi, xi] = images[:, yi - 1, xi].copy() grad_yt = ((images_yt - images) * noise).sum((1, 2, 3)) / step grad_yt = cp.minimum(grad_yt, cp.zeros_like(grad_yt)) grad_y_abs = cp.maximum(cp.abs(grad_yb), cp.abs(grad_yt)) chainer.testing.assert_allclose( grad_y_abs, cp.abs(grad_coordinates[:, yi, xi, 1])) images_xl = images.copy() images_xl[:, yi, xi - 1] = images[:, yi, xi].copy() images_xl[:, yi, xi] = images[:, yi, xi + 1].copy() grad_xl = ((images_xl - images) * noise).sum((1, 2, 3)) / step grad_xl = cp.minimum(grad_xl, cp.zeros_like(grad_xl)) images_xr = images.copy() images_xr[:, yi, xi + 1] = images[:, yi, xi].copy() images_xr[:, yi, xi] = images[:, yi, xi - 1].copy() grad_xr = ((images_xr - images) * noise).sum((1, 2, 3)) / step grad_xr = cp.minimum(grad_xr, cp.zeros_like(grad_xr)) grad_x_abs = cp.maximum(cp.abs(grad_xl), cp.abs(grad_xr)) chainer.testing.assert_allclose( grad_x_abs, cp.abs(grad_coordinates[:, yi, xi, 0]))
def _denoise_tv_chambolle_nd(image, weight=0.1, eps=2.0e-4, n_iter_max=200): """Perform total-variation denoising on n-dimensional images. Parameters ---------- image : ndarray n-D input data to be denoised. weight : float, optional Denoising weight. The greater `weight`, the more denoising (at the expense of fidelity to `input`). eps : float, optional Relative difference of the value of the cost function that determines the stop criterion. The algorithm stops when: (E_(n-1) - E_n) < eps * E_0 n_iter_max : int, optional Maximal number of iterations used for the optimization. Returns ------- out : ndarray Denoised array of floats. Notes ----- Rudin, Osher and Fatemi algorithm. """ ndim = image.ndim p = cp.zeros((image.ndim, ) + image.shape, dtype=image.dtype) g = cp.zeros_like(p) d = cp.zeros_like(image) i = 0 slices_g = [slice(None)] * (ndim + 1) slices_d = [slice(None)] * ndim slices_p = [slice(None)] * (ndim + 1) while i < n_iter_max: if i > 0: # d will be the (negative) divergence of p d = -p.sum(0) for ax in range(ndim): slices_d[ax] = slice(1, None) slices_p[ax + 1] = slice(0, -1) slices_p[0] = ax d[tuple(slices_d)] += p[tuple(slices_p)] slices_d[ax] = slice(None) slices_p[ax + 1] = slice(None) out = image + d E = (d * d).sum() else: out = image E = 0.0 # g stores the gradients of out along each axis # e.g. g[0] is the first order finite difference along axis 0 for ax in range(ndim): slices_g[ax + 1] = slice(0, -1) slices_g[0] = ax g[tuple(slices_g)] = cp.diff(out, axis=ax) slices_g[ax + 1] = slice(None) norm = (g * g).sum(axis=0, keepdims=True) cp.sqrt(norm, out=norm) E += weight * norm.sum() tau = 1.0 / (2.0 * ndim) norm *= tau / weight norm += 1.0 p -= tau * g p /= norm E /= float(image.size) if i == 0: E_init = E E_previous = E else: if abs(E_previous - E) < eps * E_init: break else: E_previous = E i += 1 return out
popcount_log = [] for in_dim in in_dims: for out_dim in out_dims: binarize_time = 0 preprocess_time = 0 preprocess_vec_time = 0 xnor_time = 0 popcount_time = 0 for _ in range(1): W = cupy.random.rand(out_dim, in_dim)-0.5 x = cupy.random.rand(in_dim, )-0.5 yw = cupy.zeros_like(W) yx = cupy.zeros_like(x) s = time.time() Wb = _binarize()(W, yw) xb = _binarize()(x, yx) Wb = Wb.astype('int32') xb = xb.astype('int32') binarize_time += time.time()-s s = time.time() Wb = _preprocess()(Wb, Wb.shape[0], cupy.zeros((Wb.shape[0], Wb.shape[1]//32)).astype("int32"), size=Wb.shape[1] )
def train(self): # clear grads self.q_func.zerograds() # pull tuples from memory pool batch_tuples = self.replay.pull(Config.batch_size) if not len(batch_tuples): return # stack inputs cur_x = [self.env.getX(t.state) for t in batch_tuples] next_x = [self.env.getX(t.next_state) for t in batch_tuples] # merge inputs into one array if Config.gpu: cur_x = [cupy.expand_dims(t, 0) for t in cur_x] cur_x = cupy.concatenate(cur_x, 0) next_x = [cupy.expand_dims(t, 0) for t in next_x] next_x = cupy.concatenate(next_x, 0) else: cur_x = np.stack(cur_x) next_x = np.stack(next_x) # get cur outputs cur_output = self.QFunc(self.q_func, cur_x) # get next outputs, NOT target next_output = self.QFunc(self.q_func, next_x) # choose next action for each output next_action = [ self.env.getBestAction( o.data, [t.next_state for t in batch_tuples] ) for o in next_output # for each head in Model ] # get next outputs, target next_output = self.QFunc(self.target_q_func, next_x) # clear err of tuples for t in batch_tuples: t.err = 0. # store err count err_count_list = [0.] * len(batch_tuples) # compute grad's weights weights = np.array([t.P for t in batch_tuples], np.float32) if Config.gpu: weights = cuda.to_gpu(weights) if self.replay.getPoolSize(): weights *= self.replay.getPoolSize() weights = weights ** -Config.beta weights /= weights.max() if Config.gpu: weights = cupy.expand_dims(weights, 1) else: weights = np.expand_dims(weights, 1) # update beta Config.beta = min(1, Config.beta + Config.beta_add) # compute grad for each head for k in range(Config.K): if Config.gpu: cur_output[k].grad = cupy.zeros_like(cur_output[k].data) else: cur_output[k].grad = np.zeros_like(cur_output[k].data) # compute grad from each tuples for i in range(len(batch_tuples)): if batch_tuples[i].mask[k]: cur_action_value = \ cur_output[k].data[i][batch_tuples[i].action].tolist() reward = batch_tuples[i].reward next_action_value = \ next_output[k].data[i][next_action[k][i]].tolist() target_value = reward # if not empty position, not terminal state if batch_tuples[i].next_state.in_game: target_value += Config.gamma * next_action_value loss = cur_action_value - target_value cur_output[k].grad[i][batch_tuples[i].action] = 2 * loss # count err if cur_action_value: batch_tuples[i].err += abs(loss / cur_action_value) err_count_list[i] += 1 # multiply weights with grad and clip if Config.gpu: cur_output[k].grad = cupy.multiply( cur_output[k].grad, weights) cur_output[k].grad = cupy.clip(cur_output[k].grad, -1, 1) else: cur_output[k].grad = np.multiply( cur_output[k].grad, weights) cur_output[k].grad = np.clip(cur_output[k].grad, -1, 1) # backward cur_output[k].backward() # adjust grads of shared for param in self.q_func.shared.params(): param.grad /= Config.K # update params self.optimizer.update() # avg err for i in range(len(batch_tuples)): if err_count_list[i] > 0: batch_tuples[i].err /= err_count_list[i] self.replay.merge(Config.alpha) return np.mean([t.err for t in batch_tuples])