def test_flat_int_range_dtype(): im = cp.linspace(-128, 128, 256, dtype=np.int8) frequencies, bin_centers = exposure.histogram(im, source_range="dtype") assert_array_equal(bin_centers, cp.arange(-128, 128)) assert frequencies.shape == (256, )
def osg(aR, theta): t = cp.linspace(-cp.pi / 2, cp.pi / 2, 1000) w = aR * cp.cos(t) + (1 - aR) + 1j * aR * cp.sin(t) g = max( cp.log(abs(w)) + cp.log(cp.cos(theta - cp.arctan2(w.imag, w.real)))) return g
def test_mixed_funclist(self, dtype): x = cupy.linspace(-2, 2, 6, dtype=dtype) condlist = [x < 0, x == 0, x > 0] funclist = [-10, lambda x: -x, 10, lambda x: x] with pytest.raises(NotImplementedError): cupy.piecewise(x, condlist, funclist)
alpha = [8e-9, 1e-8, 2e-8, 4e-8][igpu] prbsize = 16 # probe size prbshift = 8 # probe shift (probe overlap = (1-prbshift)/prbsize) det = [128, 128] # detector size ntheta = n * 3 // 4 # number of angles (rotations) noise = True # apply discrete Poisson noise ptheta = 2 pnz = 8 beta = dxchange.read_tiff('../data/beta192.tiff') delta = -dxchange.read_tiff('../data/delta192.tiff') obj = cp.array(delta + 1j * beta) prb = cp.array(pt.objects.probe(prbsize, maxint)) theta = cp.linspace(0, np.pi, ntheta).astype('float32') scan = cp.array( pt.objects.scanner3(theta, obj.shape, prbshift, prbshift, prbsize, spiral=0, randscan=True, save=False)) #tomoshape = [len(theta), obj.shape[0], obj.shape[2]] # Class gpu solver slv = pt.solver.Solver(prb, scan, theta, det, voxelsize, energy, ntheta, nz, n, ptheta, pnz) # Free gpu memory after SIGINT, SIGSTSTP
def optimize(self,training_features, training_targets,weight_matrix): training_features = cupy.array(training_features) training_targets = cupy.array(training_targets) N = training_features.shape[0] M = weight_matrix.shape[1] tensor_of_x_features = cupy.tile(0.0,(N,1,training_features.shape[1])) tensor_of_x_squared = cupy.tile(0.0,(N,training_features.shape[1],training_features.shape[1])) matrix_set_diag_to_zero = cupy.tile(1.0,(training_features.shape[1],training_features.shape[1])) cupy.fill_diagonal(matrix_set_diag_to_zero,0.0) for i in range(N): tensor_of_x_features[i]=training_features[i] tensor_of_x_squared[i]=training_features[i].dot(training_features[i]) historical_gradient=cupy.tile(0.0,(weight_matrix.shape)) tensor_of_x_squared = tensor_of_x_squared*matrix_set_diag_to_zero tensor_of_x_features_squared = tensor_of_x_features*tensor_of_x_features tensor_of_proto_vx = cupy.tile(0.0,(N,1,M)) tensor_of_proto_square = cupy.tile(0.0,(N,1,M)) vector_of_prediction = cupy.tile(0.0,N) vector_of_sum = cupy.tile(1.0,(M,1)) vector_of_gradient = cupy.tile(0.0,N) weight_matrix_square = cupy.tile(0.0,(weight_matrix.shape)) update_step = cupy.tile(0.0,(weight_matrix.shape)) #batch_size = #numpy.floor(N/batch_count).astype(numpy.int32) batch_count = numpy.floor(N/self.batch_size).astype(numpy.int32) seed = 0 idxs = cupy.linspace(0,self.batch_size,self.batch_size,dtype=numpy.int32) patience_counter = 0 last_iteration_error = 0 #error_iter_array = numpy.tile(1,(iterations,1)) error_iter_array = numpy.empty(self.iterations, dtype=numpy.float32) for i in range(self.iterations): seed = seed + 1 cupy.random.seed(seed) numpy_rand_idx_list = numpy.random.permutation(N) random_idx_list = cupy.array(numpy_rand_idx_list) idxs = 0 init = 0 ending = 0 error_sum = 0 for j in range(batch_count): init = j*self.batch_size ending = (j+1)*self.batch_size idxs = random_idx_list[init:ending] weight_matrix[cupy.abs(weight_matrix)<0.0000001]=0 weight_matrix_square = weight_matrix*weight_matrix tensor_of_proto_vx = cupy.tensordot(tensor_of_x_features[idxs],weight_matrix,axes=1) tensor_of_proto_square = cupy.tensordot(tensor_of_x_features_squared[idxs],weight_matrix_square,axes=1) vector_of_prediction = cupy.tensordot(((tensor_of_proto_vx*tensor_of_proto_vx) - tensor_of_proto_square),vector_of_sum,axes=1).sum(axis=1)*0.5 b = training_targets[idxs]-vector_of_prediction #print(b.mean()) error_sum = error_sum+cupy.mean(b)#b.mean() vector_of_gradient = -2*b vrau = cupy.tensordot(tensor_of_x_squared[idxs],weight_matrix,axes=1) update_step = ((vector_of_gradient.T*vrau.T).T).sum(axis=0)+weight_matrix_square*self.regularization #ADAGRAD UPDATE historical_gradient += update_step * update_step weight_matrix -= self.alpha/(cupy.sqrt(historical_gradient)) * update_step#+0.000001 error_iter_array[i] = error_sum/batch_count if cupy.abs(cupy.abs(error_iter_array[i]) - last_iteration_error) < self.iteration_patience_threshold: patience_counter = patience_counter+1 else: patience_counter = 0 #RESET if patience_counter == self.iteration_patience: break # last_iteration_error = cupy.abs(error_iter_array[i]) return weight_matrix,error_iter_array.mean(),error_iter_array#return array with the most errors
def fit(self, X, y=None) -> "KBinsDiscretizer": """ Fit the estimator. Parameters ---------- X : numeric array-like, shape (n_samples, n_features) Data to be discretized. y : None Ignored. This parameter exists only for compatibility with :class:`sklearn.pipeline.Pipeline`. Returns ------- self """ X = self._validate_data(X, dtype='numeric') valid_encode = ('onehot', 'onehot-dense', 'ordinal') if self.encode not in valid_encode: raise ValueError("Valid options for 'encode' are {}. " "Got encode={!r} instead.".format( valid_encode, self.encode)) valid_strategy = ('uniform', 'quantile', 'kmeans') if self.strategy not in valid_strategy: raise ValueError("Valid options for 'strategy' are {}. " "Got strategy={!r} instead.".format( valid_strategy, self.strategy)) n_features = X.shape[1] n_bins = self._validate_n_bins(n_features) n_bins = np.asnumpy(n_bins) bin_edges = cpu_np.zeros(n_features, dtype=object) for jj in range(n_features): column = X[:, jj] col_min, col_max = column.min(), column.max() if col_min == col_max: warnings.warn("Feature %d is constant and will be " "replaced with 0." % jj) n_bins[jj] = 1 bin_edges[jj] = np.array([-np.inf, np.inf]) continue if self.strategy == 'uniform': bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1) elif self.strategy == 'quantile': quantiles = np.linspace(0, 100, n_bins[jj] + 1) bin_edges[jj] = np.asarray(np.percentile(column, quantiles)) # Workaround for https://github.com/cupy/cupy/issues/4451 # This should be removed as soon as a fix is available in cupy # in order to limit alterations in the included sklearn code bin_edges[jj][-1] = col_max elif self.strategy == 'kmeans': # Deterministic initialization with uniform spacing uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1) init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5 # 1D k-means procedure km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1, output_type='cupy') km = km.fit(column[:, None]) with using_output_type('cupy'): centers = km.cluster_centers_[:, 0] # Must sort, centers may be unsorted even with sorted init centers.sort() bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5 bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max] # Remove bins whose width are too small (i.e., <= 1e-8) if self.strategy in ('quantile', 'kmeans'): mask = np.diff(bin_edges[jj], prepend=-np.inf) > 1e-8 bin_edges[jj] = bin_edges[jj][mask] if len(bin_edges[jj]) - 1 != n_bins[jj]: warnings.warn('Bins whose width are too small (i.e., <= ' '1e-8) in feature %d are removed. Consider ' 'decreasing the number of bins.' % jj) n_bins[jj] = len(bin_edges[jj]) - 1 self.bin_edges_ = bin_edges self.n_bins_ = n_bins if 'onehot' in self.encode: self._encoder = OneHotEncoder(categories=np.array( [np.arange(i) for i in self.n_bins_]), sparse=self.encode == 'onehot', output_type='cupy') # Fit the OneHotEncoder with toy datasets # so that it's ready for use after the KBinsDiscretizer is fitted self._encoder.fit(np.zeros((1, len(self.n_bins_)), dtype=int)) return self
def linspace(start, end, steps, requires_grad=False, device='cpu', dtype='float32') -> 'Tensor': engine = _get_engine(device) return from_array(engine.linspace(start, end, steps).astype(dtype), requires_grad, device)
def run_simulation(input_filename, pixel_layout, detector_properties, output_filename, response_file='../larndsim/bin/response_44.npy', light_lut_filename='../larndsim/bin/lightLUT.npy', bad_channels=None, n_tracks=None, pixel_thresholds_file=None): """ Command-line interface to run the simulation of a pixelated LArTPC Args: input_filename (str): path of the edep-sim input file pixel_layout (str): path of the YAML file containing the pixel layout and connection details. detector_properties (str): path of the YAML file containing the detector properties output_filename (str): path of the HDF5 output file. If not specified the output is added to the input file. response_file (str, optional): path of the Numpy array containing the pre-calculated field responses. Defaults to ../larndsim/bin/response_44.npy. light_lut_file (str, optional): path of the Numpy array containing the light look-up table. Defaults to ../larndsim/bin/lightLUT.npy. bad_channels (str, optional): path of the YAML file containing the channels to be disabled. Defaults to None n_tracks (int, optional): number of tracks to be simulated. Defaults to None (all tracks). pixel_thresholds_file (str): path to npz file containing pixel thresholds. Defaults to None. """ start_simulation = time() RangePush("run_simulation") print(LOGO) print( "**************************\nLOADING SETTINGS AND INPUT\n**************************" ) print("Random seed:", SEED) print("Batch size:", BATCH_SIZE) print("Pixel layout file:", pixel_layout) print("Detector properties file:", detector_properties) print("edep-sim input file:", input_filename) print("Response file:", response_file) if bad_channels: print("Disabled channel list: ", bad_channels) RangePush("load_detector_properties") consts.load_properties(detector_properties, pixel_layout) from larndsim.consts import light, detector, physics RangePop() RangePush("load_larndsim_modules") # Here we load the modules after loading the detector properties # maybe can be implemented in a better way? from larndsim import quenching, drifting, detsim, pixels_from_track, fee, lightLUT RangePop() RangePush("load_pixel_thresholds") if pixel_thresholds_file is not None: print("Pixel thresholds file:", pixel_thresholds_file) pixel_thresholds_lut = CudaDict.load(pixel_thresholds_file, 256) else: pixel_thresholds_lut = CudaDict( cp.array([fee.DISCRIMINATION_THRESHOLD]), 1, 1) RangePop() RangePush("load_hd5_file") # First of all we load the edep-sim output with h5py.File(input_filename, 'r') as f: tracks = np.array(f['segments']) try: trajectories = np.array(f['trajectories']) input_has_trajectories = True except KeyError: input_has_trajectories = False try: vertices = np.array(f['vertices']) input_has_vertices = True except KeyError: print("Input file does not have true vertices info") input_has_vertices = False RangePop() # Makes an empty array to store data from lightlut if light.LIGHT_SIMULATED: light_sim_dat = np.zeros([len(tracks), light.N_OP_CHANNEL * 2], dtype=[('n_photons_det', 'f4'), ('t0_det', 'f4')]) if tracks.size == 0: print("Empty input dataset, exiting") return if n_tracks: tracks = tracks[:n_tracks] if light.LIGHT_SIMULATED: light_sim_dat = light_sim_dat[:n_tracks] if 'n_photons' not in tracks.dtype.names: n_photons = np.zeros(tracks.shape[0], dtype=[('n_photons', 'f4')]) tracks = rfn.merge_arrays((tracks, n_photons), flatten=True) # Here we swap the x and z coordinates of the tracks # because of the different convention in larnd-sim wrt edep-sim tracks = swap_coordinates(tracks) response = cp.load(response_file) TPB = 256 BPG = ceil(tracks.shape[0] / TPB) print("******************\nRUNNING SIMULATION\n******************") # We calculate the number of electrons after recombination (quenching module) # and the position and number of electrons after drifting (drifting module) print("Quenching electrons...", end="") start_quenching = time() quenching.quench[BPG, TPB](tracks, physics.BIRKS) end_quenching = time() print(f" {end_quenching-start_quenching:.2f} s") print("Drifting electrons...", end="") start_drifting = time() drifting.drift[BPG, TPB](tracks) end_drifting = time() print(f" {end_drifting-start_drifting:.2f} s") if light.LIGHT_SIMULATED: print("Calculating optical responses...", end="") start_light_time = time() lut = cp.load(light_lut_filename) TPB = 256 BPG = ceil(tracks.shape[0] / TPB) lightLUT.calculate_light_incidence[BPG, TPB](tracks, lut, light_sim_dat) print(f" {time()-start_light_time:.2f} s") with h5py.File(output_filename, 'a') as output_file: output_file.create_dataset("tracks", data=tracks) if light.LIGHT_SIMULATED: output_file.create_dataset('light_dat', data=light_sim_dat) if input_has_trajectories: output_file.create_dataset("trajectories", data=trajectories) if input_has_vertices: output_file.create_dataset("vertices", data=vertices) # create a lookup table that maps between unique event ids and the segments in the file tot_evids = np.unique(tracks['eventID']) _, _, start_idx = np.intersect1d(tot_evids, tracks['eventID'], return_indices=True) _, _, rev_idx = np.intersect1d(tot_evids, tracks['eventID'][::-1], return_indices=True) end_idx = len(tracks['eventID']) - 1 - rev_idx # We divide the sample in portions that can be processed by the GPU step = 1 # pre-allocate some random number states rng_states = maybe_create_rng_states(1024 * 256, seed=0) t0 = 0 for ievd in tqdm(range(0, tot_evids.shape[0], step), desc='Simulating events...', ncols=80, smoothing=0): event_id_list = [] adc_tot_list = [] adc_tot_ticks_list = [] track_pixel_map_tot = [] unique_pix_tot = [] current_fractions_tot = [] first_event = tot_evids[ievd] last_event = tot_evids[min(ievd + step, tot_evids.shape[0] - 1)] if first_event == last_event: last_event += 1 # load a subset of segments from the file and process those that are from the current event track_subset = tracks[min(start_idx[ievd:ievd + step]):max(end_idx[ievd:ievd + step]) + 1] evt_tracks = track_subset[(track_subset['eventID'] >= first_event) & (track_subset['eventID'] < last_event)] first_trk_id = np.where( track_subset['eventID'] == evt_tracks['eventID'][0])[0][0] + min( start_idx[ievd:ievd + step]) for itrk in tqdm(range(0, evt_tracks.shape[0], BATCH_SIZE), desc=' Simulating event %i batches...' % ievd, leave=False, ncols=80): selected_tracks = evt_tracks[itrk:itrk + BATCH_SIZE] RangePush("event_id_map") event_ids = selected_tracks['eventID'] unique_eventIDs = np.unique(event_ids) RangePop() # We find the pixels intersected by the projection of the tracks on # the anode plane using the Bresenham's algorithm. We also take into # account the neighboring pixels, due to the transverse diffusion of the charges. RangePush("pixels_from_track") max_radius = ceil( max(selected_tracks["tran_diff"]) * 5 / detector.PIXEL_PITCH) TPB = 128 BPG = ceil(selected_tracks.shape[0] / TPB) max_pixels = np.array([0]) pixels_from_track.max_pixels[BPG, TPB](selected_tracks, max_pixels) # This formula tries to estimate the maximum number of pixels which can have # a current induced on them. max_neighboring_pixels = (2 * max_radius + 1) * max_pixels[0] + ( 1 + 2 * max_radius) * max_radius * 2 active_pixels = cp.full((selected_tracks.shape[0], max_pixels[0]), -1, dtype=np.int32) neighboring_pixels = cp.full( (selected_tracks.shape[0], max_neighboring_pixels), -1, dtype=np.int32) n_pixels_list = cp.zeros(shape=(selected_tracks.shape[0])) if not active_pixels.shape[1] or not neighboring_pixels.shape[1]: continue pixels_from_track.get_pixels[BPG, TPB](selected_tracks, active_pixels, neighboring_pixels, n_pixels_list, max_radius) RangePop() RangePush("unique_pix") shapes = neighboring_pixels.shape joined = neighboring_pixels.reshape(shapes[0] * shapes[1]) unique_pix = cp.unique(joined) unique_pix = unique_pix[(unique_pix != -1)] RangePop() if not unique_pix.shape[0]: continue RangePush("time_intervals") # Here we find the longest signal in time and we store an array with the start in time of each track max_length = cp.array([0]) track_starts = cp.empty(selected_tracks.shape[0]) detsim.time_intervals[BPG, TPB](track_starts, max_length, selected_tracks) RangePop() RangePush("tracks_current") # Here we calculate the induced current on each pixel signals = cp.zeros( (selected_tracks.shape[0], neighboring_pixels.shape[1], cp.asnumpy(max_length)[0]), dtype=np.float32) TPB = (1, 1, 64) BPG_X = ceil(signals.shape[0] / TPB[0]) BPG_Y = ceil(signals.shape[1] / TPB[1]) BPG_Z = ceil(signals.shape[2] / TPB[2]) BPG = (BPG_X, BPG_Y, BPG_Z) rng_states = maybe_create_rng_states(int( np.prod(TPB[:2]) * np.prod(BPG[:2])), seed=SEED + ievd + itrk, rng_states=rng_states) detsim.tracks_current_mc[BPG, TPB](signals, neighboring_pixels, selected_tracks, response, rng_states) RangePop() RangePush("pixel_index_map") # Here we create a map between tracks and index in the unique pixel array pixel_index_map = cp.full( (selected_tracks.shape[0], neighboring_pixels.shape[1]), -1) for i_ in range(selected_tracks.shape[0]): compare = neighboring_pixels[i_, ..., cp.newaxis] == unique_pix indices = cp.where(compare) pixel_index_map[i_, indices[0]] = indices[1] RangePop() RangePush("track_pixel_map") # Mapping between unique pixel array and track array index track_pixel_map = cp.full( (unique_pix.shape[0], detsim.MAX_TRACKS_PER_PIXEL), -1) TPB = 32 BPG = ceil(unique_pix.shape[0] / TPB) detsim.get_track_pixel_map[BPG, TPB](track_pixel_map, unique_pix, neighboring_pixels) RangePop() RangePush("sum_pixels_signals") # Here we combine the induced current on the same pixels by different tracks TPB = (8, 8, 8) BPG_X = ceil(signals.shape[0] / TPB[0]) BPG_Y = ceil(signals.shape[1] / TPB[1]) BPG_Z = ceil(signals.shape[2] / TPB[2]) BPG = (BPG_X, BPG_Y, BPG_Z) pixels_signals = cp.zeros( (len(unique_pix), len(detector.TIME_TICKS))) pixels_tracks_signals = cp.zeros( (len(unique_pix), len(detector.TIME_TICKS), track_pixel_map.shape[1])) detsim.sum_pixel_signals[BPG, TPB](pixels_signals, signals, track_starts, pixel_index_map, track_pixel_map, pixels_tracks_signals) RangePop() RangePush("get_adc_values") # Here we simulate the electronics response (the self-triggering cycle) and the signal digitization time_ticks = cp.linspace( 0, len(unique_eventIDs) * detector.TIME_INTERVAL[1], pixels_signals.shape[1] + 1) integral_list = cp.zeros( (pixels_signals.shape[0], fee.MAX_ADC_VALUES)) adc_ticks_list = cp.zeros( (pixels_signals.shape[0], fee.MAX_ADC_VALUES)) current_fractions = cp.zeros( (pixels_signals.shape[0], fee.MAX_ADC_VALUES, track_pixel_map.shape[1])) TPB = 128 BPG = ceil(pixels_signals.shape[0] / TPB) rng_states = maybe_create_rng_states(int(TPB * BPG), seed=SEED + ievd + itrk, rng_states=rng_states) pixel_thresholds_lut.tpb = TPB pixel_thresholds_lut.bpg = BPG pixel_thresholds = pixel_thresholds_lut[ unique_pix.ravel()].reshape(unique_pix.shape) fee.get_adc_values[BPG, TPB](pixels_signals, pixels_tracks_signals, time_ticks, integral_list, adc_ticks_list, 0, rng_states, current_fractions, pixel_thresholds) adc_list = fee.digitize(integral_list) adc_event_ids = np.full( adc_list.shape, unique_eventIDs[0] ) # FIXME: only works if looping on a single event RangePop() event_id_list.append(adc_event_ids) adc_tot_list.append(adc_list) adc_tot_ticks_list.append(adc_ticks_list) unique_pix_tot.append(unique_pix) current_fractions_tot.append(current_fractions) track_pixel_map[track_pixel_map != -1] += first_trk_id + itrk track_pixel_map_tot.append(track_pixel_map) if event_id_list and adc_tot_list: event_id_list_batch = np.concatenate(event_id_list, axis=0) adc_tot_list_batch = np.concatenate(adc_tot_list, axis=0) adc_tot_ticks_list_batch = np.concatenate(adc_tot_ticks_list, axis=0) unique_pix_tot_batch = np.concatenate(unique_pix_tot, axis=0) current_fractions_tot_batch = np.concatenate(current_fractions_tot, axis=0) track_pixel_map_tot_batch = np.concatenate(track_pixel_map_tot, axis=0) _, _, last_time = fee.export_to_hdf5( event_id_list_batch, adc_tot_list_batch, adc_tot_ticks_list_batch, cp.asnumpy(unique_pix_tot_batch), cp.asnumpy(current_fractions_tot_batch), cp.asnumpy(track_pixel_map_tot_batch), output_filename, t0=t0, bad_channels=bad_channels) t0 = last_time with h5py.File(output_filename, 'a') as output_file: if 'configs' in output_file.keys(): output_file['configs'].attrs['pixel_layout'] = pixel_layout print("Output saved in:", output_filename) RangePop() end_simulation = time() print(f"Elapsed time: {end_simulation-start_simulation:.2f} s")
def _get_bin_edges(a, bins, range): """ Computes the bins used internally by `histogram`. Args: a (ndarray): Ravelled data array bins (int or ndarray): Forwarded argument from `histogram`. range (None or tuple): Forwarded argument from `histogram`. Returns: bin_edges (ndarray): Array of bin edges uniform_bins (Number, Number, int): The upper bound, lowerbound, and number of bins, used in the implementation of `histogram` that works on uniform bins. """ # parse the overloaded bins argument n_equal_bins = None bin_edges = None # if isinstance(bins, cupy.ndarray) and bins.ndim == 0: # # allow uint8 array, etc # if bins.dtype not in 'bui': # raise TypeError( # "`bins` must be an integer, a string, or an array") # bins = int(bins) # synchronize if isinstance(bins, int): # will not allow 0-dimensional cupy array # if cupy.ndim(bins) == 0: try: n_equal_bins = operator.index(bins) except TypeError: raise TypeError("`bins` must be an integer, a string, or an array") if n_equal_bins < 1: raise ValueError("`bins` must be positive, when an integer") first_edge, last_edge = _get_outer_edges(a, range) elif isinstance(bins, cupy.ndarray): if bins.ndim == 1: # cupy.ndim(bins) == 0: bin_edges = cupy.asarray(bins) if (bin_edges[:-1] > bin_edges[1:]).any(): # synchronize! raise ValueError( "`bins` must increase monotonically, when an array" ) elif isinstance(bins, str): raise NotImplementedError("only integer and array bins are implemented") if n_equal_bins is not None: # numpy's gh-10322 means that type resolution rules are dependent on # array shapes. To avoid this causing problems, we pick a type now and # stick with it throughout. bin_type = cupy.result_type(first_edge, last_edge, a) if cupy.issubdtype(bin_type, cupy.integer): bin_type = cupy.result_type(bin_type, float) # bin edges must be computed bin_edges = cupy.linspace( first_edge, last_edge, n_equal_bins + 1, endpoint=True, dtype=bin_type, ) return bin_edges, (first_edge, last_edge, n_equal_bins) else: return bin_edges, None
n_time = 10 n_in = 1 n_mid = 20 n_out = 1 eta = 0.01 epochs = 101 batch_size = 8 interval = 10 def sigmoid(x): return 1 / (1 + np.exp(-x)) sin_x = np.linspace(-2 * np.pi, 2 * np.pi) sin_y = np.sin(sin_x) + 0.1 * np.random.randn(len(sin_x)) n_sample = len(sin_x) - n_time input_data = np.zeros((n_sample, n_time, n_in)) correct_data = np.zeros((n_sample, n_out)) for i in range(0, n_sample): input_data[i] = sin_y[i:i + n_time].reshape(-1, 1) correct_data[i] = sin_y[i + n_time:i + n_time + 1] class GRULayer: def __init__(self, n_upper, n): self.w = np.random.randn(3, n_upper, n) / np.sqrt(n_upper) self.v = np.random.randn(3, n, n) / np.sqrt(n) # バイアスは省略する。 def forward(self, x, y_prev):
def phasecong100(im, nscale=2, norient=2, minWavelength=7, mult=2, sigmaOnf=0.65): rows, cols = im.shape imagefft = fft2(im) zero = cp.zeros(shape=(rows, cols)) EO = dict() EnergyV = cp.zeros((rows, cols, 3)) x_range = cp.linspace(-0.5, 0.5, num=cols, endpoint=True) y_range = cp.linspace(-0.5, 0.5, num=rows, endpoint=True) x, y = cp.meshgrid(x_range, y_range) radius = cp.sqrt(x**2 + y**2) theta = cp.arctan2(-y, x) radius = ifftshift(radius) theta = ifftshift(theta) radius[0, 0] = 1. sintheta = cp.sin(theta) costheta = cp.cos(theta) lp = lowpass_filter((rows, cols), 0.45, 15) logGabor = [] for s in range(1, nscale + 1): wavelength = minWavelength * mult**(s - 1.) fo = 1.0 / wavelength logGabor.append( cp.exp((-(cp.log(radius / fo))**2) / (2 * cp.log(sigmaOnf)**2))) logGabor[-1] *= lp logGabor[-1][0, 0] = 0 # The main loop... for o in range(1, norient + 1): angl = (o - 1.) * cp.pi / norient ds = sintheta * cp.cos(angl) - costheta * cp.sin(angl) dc = costheta * cp.cos(angl) + sintheta * cp.sin(angl) dtheta = cp.abs(cp.arctan2(ds, dc)) dtheta = cp.minimum(dtheta * norient / 2., cp.pi) spread = (cp.cos(dtheta) + 1.) / 2. sumE_ThisOrient = zero.copy() sumO_ThisOrient = zero.copy() for s in range(0, nscale): filter_ = logGabor[s] * spread EO[(s, o)] = ifft2(imagefft * filter_) sumE_ThisOrient = sumE_ThisOrient + cp.real(EO[(s, o)]) sumO_ThisOrient = sumO_ThisOrient + cp.imag(EO[(s, o)]) EnergyV[:, :, 0] = EnergyV[:, :, 0] + sumE_ThisOrient EnergyV[:, :, 1] = EnergyV[:, :, 1] + cp.cos(angl) * sumO_ThisOrient EnergyV[:, :, 2] = EnergyV[:, :, 2] + cp.sin(angl) * sumO_ThisOrient OddV = cp.sqrt(EnergyV[:, :, 0]**2 + EnergyV[:, :, 1]**2) featType = cp.arctan2(EnergyV[:, :, 0], OddV) return featType
def test_pythagorean_triangle_right_downward_interpolated(): prof = profile_line(image, (1, 1), (7, 9), order=1, mode="constant") expected_prof = cp.linspace(11, 79, 11) assert_array_almost_equal(prof, expected_prof)
def test_45deg_right_downward_interpolated(): prof = profile_line(image, (2, 2), (8, 8), order=1, mode="constant") expected_prof = cp.linspace(22, 88, 10) assert_array_almost_equal(prof, expected_prof)
def histogramdd(sample, bins=10, range=None, weights=None, density=False): """Compute the multidimensional histogram of some data. Args: sample (cupy.ndarray): The data to be histogrammed. (N, D) or (D, N) array Note the unusual interpretation of sample when an array_like: * When an array, each row is a coordinate in a D-dimensional space - such as ``histogramdd(cupy.array([p1, p2, p3]))``. * When an array_like, each element is the list of values for single coordinate - such as ``histogramdd((X, Y, Z))``. The first form should be preferred. bins (int or tuple of int or cupy.ndarray): The bin specification: * A sequence of arrays describing the monotonically increasing bin edges along each dimension. * The number of bins for each dimension (nx, ny, ... =bins) * The number of bins for all dimensions (nx=ny=...=bins). range (sequence, optional): A sequence of length D, each an optional (lower, upper) tuple giving the outer bin edges to be used if the edges are not given explicitly in `bins`. An entry of None in the sequence results in the minimum and maximum values being used for the corresponding dimension. The default, None, is equivalent to passing a tuple of D None values. weights (cupy.ndarray): An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. The values of the returned histogram are equal to the sum of the weights belonging to the samples falling into each bin. density (bool, optional): If False, the default, returns the number of samples in each bin. If True, returns the probability *density* function at the bin, ``bin_count / sample_count / bin_volume``. Returns: tuple: H (cupy.ndarray): The multidimensional histogram of sample x. See normed and weights for the different possible semantics. edges (list of cupy.ndarray): A list of D arrays describing the bin edges for each dimension. .. warning:: This function may synchronize the device. .. seealso:: :func:`numpy.histogramdd` """ if isinstance(sample, cupy.ndarray): # Sample is an ND-array. if sample.ndim == 1: sample = sample[:, cupy.newaxis] nsamples, ndim = sample.shape else: sample = cupy.stack(sample, axis=-1) nsamples, ndim = sample.shape nbin = numpy.empty(ndim, int) edges = ndim * [None] dedges = ndim * [None] if weights is not None: weights = cupy.asarray(weights) try: nbins = len(bins) if nbins != ndim: raise ValueError( 'The dimension of bins must be equal to the dimension of the ' ' sample x.') except TypeError: # bins is an integer bins = ndim * [bins] # normalize the range argument if range is None: range = (None, ) * ndim elif len(range) != ndim: raise ValueError('range argument must have one entry per dimension') # Create edge arrays for i in _range(ndim): if cupy.ndim(bins[i]) == 0: if bins[i] < 1: raise ValueError( '`bins[{}]` must be positive, when an integer'.format(i)) smin, smax = _get_outer_edges(sample[:, i], range[i]) num = int(bins[i] + 1) # synchronize! edges[i] = cupy.linspace(smin, smax, num) elif cupy.ndim(bins[i]) == 1: if not isinstance(bins[i], cupy.ndarray): raise ValueError('array-like bins not supported') edges[i] = bins[i] if (edges[i][:-1] > edges[i][1:]).any(): # synchronize! raise ValueError( '`bins[{}]` must be monotonically increasing, when an ' 'array'.format(i)) else: raise ValueError( '`bins[{}]` must be a scalar or 1d array'.format(i)) nbin[i] = len(edges[i]) + 1 # includes an outlier on each end dedges[i] = cupy.diff(edges[i]) # Compute the bin number each sample falls into. ncount = tuple( # avoid cupy.digitize to work around NumPy issue gh-11022 cupy.searchsorted(edges[i], sample[:, i], side='right') for i in _range(ndim)) # Using digitize, values that fall on an edge are put in the right bin. # For the rightmost bin, we want values equal to the right edge to be # counted in the last bin, and not as an outlier. for i in _range(ndim): # Find which points are on the rightmost edge. on_edge = sample[:, i] == edges[i][-1] # Shift these points one bin to the left. ncount[i][on_edge] -= 1 # Compute the sample indices in the flattened histogram matrix. # This raises an error if the array is too large. xy = cupy.ravel_multi_index(ncount, nbin) # Compute the number of repetitions in xy and assign it to the # flattened histmat. hist = cupy.bincount(xy, weights, minlength=numpy.prod(nbin)) # Shape into a proper matrix hist = hist.reshape(nbin) # This preserves the (bad) behavior observed in NumPy gh-7845, for now. hist = hist.astype(float) # Note: NumPy uses casting='safe' here too # Remove outliers (indices 0 and -1 for each dimension). core = ndim * (slice(1, -1), ) hist = hist[core] if density: # calculate the probability density function s = hist.sum() for i in _range(ndim): shape = [1] * ndim shape[i] = nbin[i] - 2 hist = hist / dedges[i].reshape(shape) hist /= s if any(hist.shape != numpy.asarray(nbin) - 2): raise RuntimeError('Internal Shape Error') return hist, edges
print(i) binvalues.append(sum([min(values[frame]),i*(width/binscount)])) ''' return binvalues def findIndex(value,list_values): for i range(len(list_values)): if list_values[i]==value: return int(i) else: pass types='all_frames' lowerlimit=1 upperlimit_list=createUpperLimitList(lowerlimit,10,0.5) spacinghistograms=[] framesindices=cp.linspace(0,2500,100) cp.cuda.Stream.null.synchronize() hydrogenbonds_allframes=[] hydrogenbonds_array=[] data_folder = "/home/gemsec-user/Desktop/" file_to_open = data_folder + "water.pdb" file = open(file_to_open) #for frameindex in framesindices: hydrogenbonds=[] allspacings=[] numberofbins=100 histogram=[]
def uniform(left, right, size): x = cp.linspace(left, right, size) return x.reshape(size, 1)
def lanczos4_zoom_wrapper(images: cp.array, mag: float): """closure. It will return a function to zoom the images, but the parameters will not be calculated again""" # init the parameters for lanczos image resize afterwards h, w = images.shape[1:3] lanczos4_core_lut = generate_lanczos4_weights_lut() yCoordinate = cp.linspace(0, h - 1 / mag, int(h * mag), dtype=cp.float32) t, u = cp.modf(yCoordinate) u = u.astype(int) # select 8 sampling points uy = [ cp.maximum(u - 3, 0), cp.maximum(u - 2, 0), cp.maximum(u - 1, 0), cp.minimum(u, h - 1), cp.minimum(u + 1, h - 1), cp.minimum(u + 2, h - 1), cp.minimum(u + 3, h - 1), cp.minimum(u + 4, h - 1), ] Q = cp.take(lanczos4_core_lut, (t * 1024).astype(int), axis=0) Qy = [cp.take(Q, i, axis=1) for i in range(8)] xCoordinate = cp.linspace(0, w - 1 / mag, int(w * mag), dtype=cp.float32) del t, u, xCoordinate t, u = cp.modf(xCoordinate) u = u.astype(int) # select 8 sampling points ux = [ cp.maximum(u - 3, 0), cp.maximum(u - 2, 0), cp.maximum(u - 1, 0), cp.minimum(u, w - 1), cp.minimum(u + 1, w - 1), cp.minimum(u + 2, w - 1), cp.minimum(u + 3, w - 1), cp.minimum(u + 4, w - 1), ] Q = cp.take(lanczos4_core_lut, (t * 1024).astype(int), axis=0) Qx = [cp.take(Q, i, axis=1) for i in range(8)] del t, u, yCoordinate, Q, lanczos4_core_lut def lanczos4_zoom(image_mat: cp.array) -> cp.array: """the function to zoom image matrix""" number_of_files = image_mat.shape[0] # First interpolate in Y direction mat_temp = cp.zeros((number_of_files, w, int(h * mag)), dtype=cp.float32) for Qi, ui in zip(Qy, uy): cp.add(mat_temp, cp.transpose(cp.take(image_mat, ui, axis=1), (0, 2, 1)) * Qi, out=mat_temp) del image_mat # Then interpolate in X direction mat_zoomed = cp.zeros((number_of_files, int(h * mag), int(w * mag)), dtype=cp.float32) for Qi, ui in zip(Qx, ux): cp.add(mat_zoomed, cp.transpose(cp.take(mat_temp, ui, axis=1), (0, 2, 1)) * Qi, out=mat_zoomed) del mat_temp return mat_zoomed return lanczos4_zoom
# The number of photons to simulate for each optical depth N_photons = 100 # # Set a counter for the number of photons absorbed. Not used for momentum calculation. # N_absorbed = cp.zeros(N_atm) # Henyeye-Greenstein parameters g = [-1, -0.5, 0.001, 0.5, 1] # Keeping track of the angles photons escape at. escape_mu = cp.array([]) g = [0.0001] # Create an array of wavelengths. Units in nm. Wavelengths = cp.linspace(100, 500, 50) # Pick a uniform grain size, units in micrometers. # a = 1 # We will use units of h*nu/c = 1, We can change this or iterate over a list of frequencies later. photon_momentum = 1 # Total initial momentum in the photons momentum_i = photon_momentum * N_photons # Troubleshooting flag flag = 0 # step counter, used in troubleshooting.
def catmullrom_zoom_wrapper( images: cp.array, mag: float, frame_n: int, ) -> cp.array: """ Zoom image using catmull_rom algorithm """ coeff = 0.5 * cp.array( [[0, 2, 0, 0], [-1, 0, 1, 0], [2, -5, 4, -1], [-1, 3, -3, 1]], dtype=cp.float32).T h, w = images.shape[1:3] # First interpolate in Y direction yCoordinate = cp.linspace(0, h - 1 / mag, int(h * mag), dtype=cp.float32) xCoordinate = cp.linspace(0, w - 1 / mag, int(w * mag), dtype=cp.float32) def generate_q_u_matrix(x_coordinate: cp.array, y_coordinate: cp.array) -> tuple: flatten_flag = x_coordinate.ndim > 1 if flatten_flag: x_coordinate = x_coordinate.flatten() y_coordinate = y_coordinate.flatten() t, u = cp.modf(y_coordinate) u = u.astype(int) uy = cp.vstack([ cp.minimum(cp.maximum(u - 1, 0), h - 1), cp.minimum(cp.maximum(u, 0), h - 1), cp.minimum(cp.maximum(u + 1, 0), h - 1), cp.minimum(cp.maximum(u + 2, 0), h - 1), ]).astype(int) Qy = cp.dot( coeff, cp.vstack([ cp.ones_like(t, dtype=cp.float32), t, cp.power(t, 2), cp.power(t, 3) ])) t, u = cp.modf(x_coordinate) u = u.astype(int) ux = cp.vstack([ cp.minimum(cp.maximum(u - 1, 0), w - 1), cp.minimum(cp.maximum(u, 0), w - 1), cp.minimum(cp.maximum(u + 1, 0), w - 1), cp.minimum(cp.maximum(u + 2, 0), w - 1), ]) Qx = cp.dot( coeff, cp.vstack([ cp.ones_like(t, dtype=cp.float32), t, cp.power(t, 2), cp.power(t, 3) ])) if flatten_flag: Qx = Qx.reshape(4, frame_n, int(w * mag)).transpose(1, 0, 2).copy() Qy = Qy.reshape(4, frame_n, int(h * mag)).transpose(1, 0, 2).copy() ux = ux.reshape(4, frame_n, int(w * mag)).transpose(1, 0, 2).copy() uy = uy.reshape(4, frame_n, int(h * mag)).transpose(1, 0, 2).copy() return Qx, Qy, ux, uy global_Qx, global_Qy, global_ux, global_uy = generate_q_u_matrix( xCoordinate, yCoordinate) mat_temp = cp.empty((frame_n, int(w * mag), h), dtype=cp.float32) threads_per_block = (1, 16, 16) kernel_file = get_kernel_path("catmull_rom_zoom.cu") config1 = {"FRAME_N": frame_n, "W": w, "H": h, "MAG": mag} config2 = {"FRAME_N": frame_n, "W": h, "H": int(w * mag), "MAG": mag} code1 = read_cu_code(kernel_file, params=config1) code2 = read_cu_code(kernel_file, params=config2) compile_options = ("--use_fast_math", ) _cmlr_zoom_x_T = cp.RawKernel(code1, "cmlr_zoom_x_T", options=compile_options) _cmlr_zoom_y_T = cp.RawKernel(code2, "cmlr_zoom_x_T", options=compile_options) def catmullrom_zoom(image_mat: cp.array, out: cp.array, drift=None) -> cp.array: """the function to zoom image matrix""" if drift is not None: drift_x, drift_y = drift y_coordinate = cp.expand_dims( cp.linspace(0, h - 1 / mag, int(h * mag), dtype=cp.float32), 0).repeat(frame_n, axis=0) x_coordinate = cp.expand_dims( cp.linspace(0, w - 1 / mag, int(w * mag), dtype=cp.float32), 0).repeat(frame_n, axis=0) x_coordinate += cp.expand_dims(cp.asarray(drift_x), 1) y_coordinate += cp.expand_dims(cp.asarray(drift_y), 1) Qx, Qy, ux, uy = generate_q_u_matrix(x_coordinate, y_coordinate) blocks_per_grid = (frame_n, ceil(h / 16), ceil(int(w * mag) / 16)) _cmlr_zoom_x_T(blocks_per_grid, threads_per_block, (image_mat, ux, Qx, Qx.shape[0], mat_temp)) blocks_per_grid = (frame_n, ceil(int(w * mag) / 16), ceil(int(h * mag) / 16)) _cmlr_zoom_y_T(blocks_per_grid, threads_per_block, (mat_temp, uy, Qy, Qx.shape[0], out)) else: Qx, Qy, ux, uy = global_Qx, global_Qy, global_ux, global_uy # the name "cmlr_zoom_x_T" stands for catmullrom_zoom_in_x_direction_then_transpose_kernel # after transposing, the image needs to be put into this kernel function again (with different config), # zoomed both in height and width direction # do horizontal interpolation blocks_per_grid = (frame_n, ceil(h / 16), ceil(int(w * mag) / 16)) _cmlr_zoom_x_T(blocks_per_grid, threads_per_block, (image_mat, ux, Qx, 1, mat_temp)) # do vertical interpolation blocks_per_grid = (frame_n, ceil(int(w * mag) / 16), ceil(int(h * mag) / 16)) _cmlr_zoom_y_T(blocks_per_grid, threads_per_block, (mat_temp, uy, Qy, 1, out)) return catmullrom_zoom
def histogram(input, min, max, bins, labels=None, index=None): """ Calculate the histogram of the values of an array, optionally at labels. Histogram calculates the frequency of values in an array within bins determined by `min`, `max`, and `bins`. The `labels` and `index` keywords can limit the scope of the histogram to specified sub-regions within the array. Parameters ---------- input : array_like Data for which to calculate histogram. min, max : int Minimum and maximum values of range of histogram bins. bins : int Number of bins. labels : array_like, optional Labels for objects in `input`. If not None, must be same shape as `input`. index : int or sequence of ints, optional Label or labels for which to calculate histogram. If None, all values where label is greater than zero are used Returns ------- hist : ndarray Histogram counts. Examples -------- >>> a = cp.asarray([[ 0. , 0.2146, 0.5962, 0. ], ... [ 0. , 0.7778, 0. , 0. ], ... [ 0. , 0. , 0. , 0. ], ... [ 0. , 0. , 0.7181, 0.2787], ... [ 0. , 0. , 0.6573, 0.3094]]) >>> from cupyimg.scipy import ndimage >>> ndimage.measurements.histogram(a, 0, 1, 10) array([13, 0, 2, 1, 0, 1, 1, 2, 0, 0]) With labels and no indices, non-zero elements are counted: >>> lbl, nlbl = ndimage.label(a) >>> ndimage.measurements.histogram(a, 0, 1, 10, lbl) array([0, 0, 2, 1, 0, 1, 1, 2, 0, 0]) Indices can be used to count only certain objects: >>> ndimage.measurements.histogram(a, 0, 1, 10, lbl, 2) array([0, 0, 1, 1, 0, 0, 1, 1, 0, 0]) """ _bins = cupy.linspace(min, max, bins + 1) def _hist(vals): return cupy.histogram(vals, _bins)[0] return labeled_comprehension(input, labels, index, _hist, object, None, pass_positions=False)
def learnAndSolve8b(ctx, sanity_plots=False, plot_widgets=None, plot_pos=None): """This is the main optimization. Takes the longest time and uses the GPU heavily.""" Nbatch = ctx.intermediate.Nbatch params = ctx.params probe = ctx.probe ir = ctx.intermediate proc = ir.proc iorig = ir.iorig # TODO: move_to_config NrankPC = 6 # this one is the rank of the PCs, used to detect spikes with threshold crossings Nrank = 3 # this one is the rank of the templates wTEMP, wPCA = extractTemplatesfromSnippets(proc=proc, probe=probe, params=params, Nbatch=Nbatch, nPCs=NrankPC) # move these to the GPU wPCA = cp.asarray(wPCA[:, :Nrank], dtype=np.float32, order="F") wTEMP = cp.asarray(wTEMP, dtype=np.float32, order="F") wPCAd = cp.asarray(wPCA, dtype=np.float64, order="F") # convert to double for extra precision nt0 = params.nt0 nt0min = params.nt0min nBatches = Nbatch NT = params.NT Nfilt = params.Nfilt Nchan = probe.Nchan # two variables for the same thing? number of nearest channels to each primary channel # TODO: unclear - let's fix this NchanNear = min(probe.Nchan, 32) Nnearest = min(probe.Nchan, 32) # decay of gaussian spatial mask centered on a channel sigmaMask = params.sigmaMask batchstart = list(range(0, NT * nBatches + 1, NT)) # find the closest NchanNear channels, and the masks for those channels iC, mask, C2C = getClosestChannels(probe, sigmaMask, NchanNear) # sorting order for the batches isortbatches = iorig nhalf = int(ceil(nBatches / 2)) - 1 # halfway point # this batch order schedule goes through half of the data forward and backward during the model # fitting and then goes through the data symmetrically-out from the center during the final # pass ischedule = np.concatenate( (np.arange(nhalf, nBatches), np.arange(nBatches - 1, nhalf - 1, -1))) i1 = np.arange(nhalf - 1, -1, -1) i2 = np.arange(nhalf, nBatches) irounds = np.concatenate((ischedule, i1, i2)) niter = irounds.size if irounds[niter - nBatches - 1] != nhalf: # this check is in here in case I do somehting weird when I try different schedules raise ValueError("Mismatch between number of batches") # these two flags are used to keep track of what stage of model fitting we're at # flag_final = 0 flag_resort = 1 # this is the absolute temporal offset in seconds corresponding to the start of the # spike sorted time segment t0 = 0 # ceil(params.trange(1) * ops.fs) nInnerIter = 60 # this is for SVD for the power iteration # schedule of learning rates for the model fitting part # starts small and goes high, it corresponds approximately to the number of spikes # from the past that were averaged to give rise to the current template pmi = cp.exp( -1.0 / cp.linspace(params.momentum[0], params.momentum[1], niter - nBatches)) Nsum = min( Nchan, 7) # how many channels to extend out the waveform in mexgetspikes # lots of parameters passed into the CUDA scripts Params = np.array( [ NT, Nfilt, params.Th[0], nInnerIter, nt0, Nnearest, Nrank, params.lam, pmi[0], Nchan, NchanNear, params.nt0min, 1, Nsum, NrankPC, params.Th[0], ], dtype=np.float64, ) # W0 has to be ordered like this W0 = cp.transpose( cp.atleast_3d(cp.asarray(wPCA, dtype=np.float64, order="F")), [0, 2, 1]) # initialize the list of channels each template lives on iList = cp.zeros((Nnearest, Nfilt), dtype=np.int32, order="F") # initialize average number of spikes per batch for each template nsp = cp.zeros((0, 1), dtype=np.float64, order="F") # this flag starts 0, is set to 1 later Params[12] = 0 # kernels for subsample alignment Ka, Kb = getKernels(params) p1 = 0.95 # decay of nsp estimate in each batch ntot = 0 # this keeps track of dropped templates for debugging purposes ndrop = np.zeros(2, dtype=np.float32, order="F") # this is the minimum firing rate that all templates must maintain, or be dropped m0 = params.minFR * params.NT / params.fs # allocate variables when switching to extraction phase # this holds spike times, clusters and other info per spike st3 = [] # cp.zeros((int(1e7), 5), dtype=np.float32, order='F') # these ones store features per spike # Nnearest is the number of nearest templates to store features for fW = LargeArrayWriter(ctx.path("fW", ext=".dat"), dtype=np.float32, shape=(Nnearest, -1)) # NchanNear is the number of nearest channels to take PC features from fWpc = LargeArrayWriter(ctx.path("fWpc", ext=".dat"), dtype=np.float32, shape=(NchanNear, Nrank, -1)) for ibatch in tqdm(range(niter), desc="Optimizing templates"): # korder is the index of the batch at this point in the schedule korder = int(irounds[ibatch]) # k is the index of the batch in absolute terms k = int(isortbatches[korder]) logger.debug("Batch %d/%d, %d templates.", ibatch, niter, Nfilt) if ibatch > niter - nBatches - 1 and korder == nhalf: # this is required to revert back to the template states in the middle of the # batches W, dWU = ir.W, ir.dWU logger.debug("Reverted back to middle timepoint.") if ibatch < niter - nBatches: # obtained pm for this batch Params[8] = float(pmi[ibatch]) pm = pmi[ibatch] * ones((Nfilt, ), dtype=np.float64, order="F") # loading a single batch (same as everywhere) offset = Nchan * batchstart[k] dat = proc.flat[offset:offset + NT * Nchan].reshape((-1, Nchan), order="F") dataRAW = cp.asarray(dat, dtype=np.float32) / params.scaleproc if ibatch == 0: # only on the first batch, we first get a new set of spikes from the residuals, # which in this case is the unmodified data because we start with no templates # CUDA function to get spatiotemporal clips from spike detections dWU, cmap = mexGetSpikes2(Params, dataRAW, wTEMP, iC) dWU = cp.asarray(dWU, dtype=np.float64, order="F") # project these into the wPCA waveforms dWU = cp.reshape( cp.dot( wPCAd, cp.dot(wPCAd.T, dWU.reshape((dWU.shape[0], -1), order="F"))), dWU.shape, order="F", ) # initialize the low-rank decomposition with standard waves W = W0[:, cp.ones(dWU.shape[2], dtype=np.int32), :] Nfilt = W.shape[1] # update the number of filters/templates # initialize the number of spikes for new templates with the minimum allowed value, # so it doesn't get thrown back out right away nsp = _extend(nsp, 0, Nfilt, m0) Params[1] = Nfilt # update in the CUDA parameters if flag_resort: # this is a flag to resort the order of the templates according to best peak # channel # this is important in order to have cohesive memory requests from the GPU RAM # max channel (either positive or negative peak) iW = cp.argmax(cp.abs(dWU[nt0min - 1, :, :]), axis=0) # iW = int32(squeeze(iW)) isort = cp.argsort(iW) # sort by max abs channel iW = iW[isort] W = W[:, isort, :] # user ordering to resort all the other template variables dWU = dWU[:, :, isort] nsp = nsp[isort] # decompose dWU by svd of time and space (via covariance matrix of 61 by 61 samples) # this uses a "warm start" by remembering the W from the previous iteration W, U, mu = mexSVDsmall2(Params, dWU, W, iC, iW, Ka, Kb) # UtU is the gram matrix of the spatial components of the low-rank SVDs # it tells us which pairs of templates are likely to "interfere" with each other # such as when we subtract off a template # this needs to change (but I don't know why!) UtU, maskU = getMeUtU(iW, iC, mask, Nnearest, Nchan) # main CUDA function in the whole codebase. does the iterative template matching # based on the current templates, gets features for these templates if requested # (featW, featPC), # gets scores for the template fits to each spike (vexp), outputs the average of # waveforms assigned to each cluster (dWU0), # and probably a few more things I forget about st0, id0, x0, featW, dWU0, drez, nsp0, featPC, vexp = mexMPnu8( Params, dataRAW, U, W, mu, iC, iW, UtU, iList, wPCA, params) logger.debug("%d spikes.", x0.size) # Sometimes nsp can get transposed (think this has to do with it being # a single element in one iteration, to which elements are added # nsp, nsp0, and pm must all be row vectors (Nfilt x 1), so force nsp # to be a row vector. # nsp = cp.atleast_2d(nsp) # nsprow, nspcol = nsp.shape # if nsprow < nspcol: # nsp = nsp.T nsp = nsp.squeeze() # updates the templates as a running average weighted by recency # since some clusters have different number of spikes, we need to apply the # exp(pm) factor several times, and fexp is the resulting update factor # for each template fexp = np.exp(nsp0 * cp.log(pm[:Nfilt])) fexp = cp.reshape(fexp, (1, 1, -1), order="F") dWU = dWU * fexp + (1 - fexp) * ( dWU0 / cp.reshape(cp.maximum(1, nsp0), (1, 1, -1), order="F")) # nsp just gets updated according to the fixed factor p1 nsp = nsp * p1 + (1 - p1) * nsp0 if ibatch == niter - nBatches - 1: # if we reached this point, we need to disable secondary template updates # like dropping, and adding new templates. We need to memorize the state of the # templates at this timepoint, and set the processing mode to "extraction and # tracking" flag_resort = 0 # no need to resort templates by channel any more # flag_final = 1 # this is the "final" pass # final clean up, triage templates one last time W, U, dWU, mu, nsp, ndrop = triageTemplates2( params, iW, C2C, W, U, dWU, mu, nsp, ndrop) # final number of templates Nfilt = W.shape[1] Params[1] = Nfilt # final covariance matrix between all templates WtW, iList = getMeWtW(W, U, Nnearest) # iW is the final channel assigned to each template iW = cp.argmax(cp.abs(dWU[nt0min - 1, :, :]), axis=0) # extract ALL features on the last pass Params[ 12] = 2 # this is a flag to output features (PC and template features) # different threshold on last pass? Params[2] = params.Th[ -1] # usually the threshold is much lower on the last pass # memorize the state of the templates logger.debug("Memorized middle timepoint.") ir.W, ir.dWU, ir.U, ir.mu = W, dWU, U, mu ir.Wraw = cp.zeros((U.shape[0], W.shape[0], U.shape[1]), dtype=np.float64, order="F") for n in range(U.shape[1]): # temporarily use U rather Urot until I have a chance to test it ir.Wraw[:, :, n] = mu[n] * cp.dot(U[:, n, :], W[:, n, :].T) if ibatch < niter - nBatches - 1: # during the main "learning" phase of fitting a model if ibatch % 5 == 0: # this drops templates based on spike rates and/or similarities to # other templates W, U, dWU, mu, nsp, ndrop = triageTemplates2( params, iW, C2C, W, U, dWU, mu, nsp, ndrop) Nfilt = W.shape[1] # update the number of filters Params[1] = Nfilt # this adds new templates if they are detected in the residual dWU0, cmap = mexGetSpikes2(Params, drez, wTEMP, iC) if dWU0.shape[2] > 0: # new templates need to be integrated into the same format as all templates # apply PCA for smoothing purposes dWU0 = cp.reshape( cp.dot( wPCAd, cp.dot( wPCAd.T, dWU0.reshape( (dWU0.shape[0], dWU0.shape[1] * dWU0.shape[2]), order="F", ), ), ), dWU0.shape, order="F", ) dWU = cp.concatenate((dWU, dWU0), axis=2) m = dWU0.shape[2] # initialize temporal components of waveforms W = _extend(W, Nfilt, Nfilt + m, W0[:, cp.ones(m, dtype=np.int32), :], axis=1) # initialize the number of spikes with the minimum allowed nsp = _extend(nsp, Nfilt, Nfilt + m, params.minFR * NT / params.fs) # initialize the amplitude of this spike with a lowish number mu = _extend(mu, Nfilt, Nfilt + m, 10) # if the number of filters exceed the maximum allowed, clip it Nfilt = min(params.Nfilt, W.shape[1]) Params[1] = Nfilt W = W[:, : Nfilt, :] # remove any new filters over the maximum allowed dWU = dWU[:, :, : Nfilt] # remove any new filters over the maximum allowed nsp = nsp[: Nfilt] # remove any new filters over the maximum allowed mu = mu[: Nfilt] # remove any new filters over the maximum allowed if ibatch > niter - nBatches - 1: # during the final extraction pass, this keeps track of all spikes and features # we memorize the spatio-temporal decomposition of the waveforms at this batch # this is currently only used in the GUI to provide an accurate reconstruction # of the raw data at this time ir.WA[..., k] = cp.asnumpy(W) ir.UA[..., k] = cp.asnumpy(U) ir.muA[..., k] = cp.asnumpy(mu) # we carefully assign the correct absolute times to spikes found in this batch ioffset = params.ntbuff - 1 if k == 0: ioffset = 0 # the first batch is special (no pre-buffer) toff = nt0min + t0 - ioffset + (NT - params.ntbuff) * k st = toff + st0 st30 = np.c_[ cp.asnumpy(st), # spike times cp.asnumpy(id0), # spike clusters (0-indexing) cp.asnumpy(x0), # template amplitudes cp.asnumpy(vexp), # residual variance of this spike korder * np.ones(st.size), # batch from which this spike was found ] # Check the number of spikes. assert st30.shape[0] == featW.shape[1] == featPC.shape[2] st3.append(st30) fW.append(featW) fWpc.append(featPC) ntot = ntot + x0.size # keeps track of total number of spikes so far if ibatch == niter - nBatches - 1: # these next three store the low-d template decompositions ir.WA = np.zeros((nt0, Nfilt, Nrank, nBatches), dtype=np.float32, order="F") ir.UA = np.zeros((Nchan, Nfilt, Nrank, nBatches), dtype=np.float32, order="F") ir.muA = np.zeros((Nfilt, nBatches), dtype=np.float32, order="F") if ibatch % 100 == 0: # this is some of the relevant diagnostic information to be printed during training logger.info(("%d / %d batches, %d units, nspks: %2.4f, mu: %2.4f, " "nst0: %d, merges: %2.4f, %2.4f"), ibatch, niter, Nfilt, nsp.sum(), median(mu), st0.size, *ndrop) if sanity_plots: assert plot_widgets is not None, "if sanity_plots is set, then plot_widgets cannot be None" plot_diagnostics(W, U, mu, nsp, plot_widgets[plot_pos]) free_gpu_memory() # Close the large array writers and save the JSON metadata files to disk. fW.close() fWpc.close() # just display the total number of spikes logger.info("Found %d spikes.", ntot) # Save results to the ctx.intermediate object. ir.st3 = np.concatenate(st3, axis=0) # the similarity score between templates is simply the correlation, # taken as the max over several consecutive time delays ir.simScore = cp.asnumpy(cp.max(WtW, axis=2)) # NOTE: these are now already saved by LargeArrayWriter # fWa = np.concatenate(fW, axis=-1) # fWpca = np.concatenate(fWpc, axis=-1) # the template features are stored in cProj, like in Kilosort1 # ir.cProj = fWa.T # the neihboring templates idnices are stored in iNeigh ir.iNeigh = cp.asnumpy(iList) # permute the PC projections in the right order # ir.cProjPC = np.transpose(fWpca, (2, 1, 0)) # iNeighPC keeps the indices of the channels corresponding to the PC features ir.iNeighPC = cp.asnumpy(iC[:, iW]) # Number of spikes. assert ir.st3.shape[0] == fW.shape[-1] == fWpc.shape[-1] # this whole next block is just done to compress the compressed templates # we separately svd the time components of each template, and the spatial components # this also requires a careful decompression function, available somewhere in the GUI code nKeep = min(Nchan * 3, 20) # how many PCs to keep W_a = np.zeros((nt0 * Nrank, nKeep, Nfilt), dtype=np.float32) W_b = np.zeros((nBatches, nKeep, Nfilt), dtype=np.float32) U_a = np.zeros((Nchan * Nrank, nKeep, Nfilt), dtype=np.float32) U_b = np.zeros((nBatches, nKeep, Nfilt), dtype=np.float32) for j in tqdm(range(Nfilt), desc="Compressing templates"): # do this for every template separately WA = np.reshape(ir.WA[:, j, ...], (-1, nBatches), order="F") # svd on the GPU was faster for this, but the Python randomized CPU version # might be faster still # WA = gpuArray(WA) A, B, C = svdecon_cpu(WA) # W_a times W_b results in a reconstruction of the time components W_a[:, :, j] = np.dot(A[:, :nKeep], B[:nKeep, :nKeep]) W_b[:, :, j] = C[:, :nKeep] UA = np.reshape(ir.UA[:, j, ...], (-1, nBatches), order="F") # UA = gpuArray(UA) A, B, C = svdecon_cpu(UA) # U_a times U_b results in a reconstruction of the time components U_a[:, :, j] = np.dot(A[:, :nKeep], B[:nKeep, :nKeep]) U_b[:, :, j] = C[:, :nKeep] logger.info("Finished compressing time-varying templates.") return Bunch( wPCA=wPCA[:, :Nrank], wTEMP=wTEMP, st3=ir.st3, simScore=ir.simScore, # cProj=ir.cProj, # cProjPC=ir.cProjPC, iNeigh=ir.iNeigh, iNeighPC=ir.iNeighPC, WA=ir.WA, UA=ir.UA, W=ir.W, U=ir.U, dWU=ir.dWU, mu=ir.mu, W_a=W_a, W_b=W_b, U_a=U_a, U_b=U_b, )
def test_callable_funclist(self, dtype): x = cupy.linspace(-2, 4, 6, dtype=dtype) condlist = [x < 0, x > 0] funclist = [lambda x: -x, lambda x: x] with pytest.raises(NotImplementedError): cupy.piecewise(x, condlist, funclist)
) -> Array: """ Array API compatible wrapper for :py:func:`np.linspace <numpy.linspace>`. See its docstring for more information. """ from ._array_object import Array _check_valid_dtype(dtype) if device is None: device = _Device() # current device if device is not None and not isinstance(device, _Device): raise ValueError(f"Unsupported device {device!r}") with device: return Array._new( np.linspace(start, stop, num, dtype=dtype, endpoint=endpoint)) def meshgrid(*arrays: Array, indexing: str = "xy") -> List[Array]: """ Array API compatible wrapper for :py:func:`np.meshgrid <numpy.meshgrid>`. See its docstring for more information. """ from ._array_object import Array return [ Array._new(array) for array in np.meshgrid(*[a._array for a in arrays], indexing=indexing) ]
def convolve_gpu_chunked(x, b, pad='flip', nwin=DEFAULT_CONV_CHUNK, ntap=500, overlap=2000): """Chunked GPU FFT-based convolution for large arrays. This memory-controlled version splits the signal into chunks of n samples. Each chunk is tapered in and out, the overlap is designed to get clear of the taper splicing of overlaping chunks is done in a cosine way. param: pad None, 'zeros', 'constant', 'flip' """ x = cp.asarray(x) b = cp.asarray(b) assert b.ndim == 1 n = x.shape[0] assert overlap >= 2 * ntap # create variables, the gain is to control the splicing y = cp.zeros_like(x) gain = cp.zeros(n) # compute tapers/constants outside of the loop taper_in = (-cp.cos(cp.linspace(0, 1, ntap) * cp.pi) / 2 + 0.5)[:, cp.newaxis] taper_out = cp.flipud(taper_in) assert b.shape[0] < nwin < n # this is the convolution wavelet that we shift to be 0 lag bp = cp.pad(b, (0, nwin - b.shape[0]), mode='constant') bp = cp.roll(bp, -b.size // 2 + 1) bp = cp.fft.rfft(bp, n=nwin)[:, cp.newaxis] # this is used to splice windows together: cosine taper. The reversed taper is complementary scale = cp.minimum( cp.maximum(0, cp.linspace(-0.5, 1.5, overlap - 2 * ntap)), 1) splice = (-cp.cos(scale * cp.pi) / 2 + 0.5)[:, cp.newaxis] # loop over the signal by chunks and apply convolution in frequency domain first = 0 while True: first = min(n - nwin, first) last = min(first + nwin, n) # the convolution x_ = cp.copy(x[first:last, :]) x_[:ntap] *= taper_in x_[-ntap:] *= taper_out x_ = cp.fft.irfft(cp.fft.rfft(x_, axis=0, n=nwin) * bp, axis=0, n=nwin) # this is to check the gain of summing the windows tt = cp.ones(nwin) tt[:ntap] *= taper_in[:, 0] tt[-ntap:] *= taper_out[:, 0] # the full overlap is outside of the tapers: we apply a cosine splicing to this part only if first > 0: full_overlap_first = first + ntap full_overlap_last = first + overlap - ntap gain[full_overlap_first:full_overlap_last] *= (1. - splice[:, 0]) gain[full_overlap_first:full_overlap_last] += tt[ntap:overlap - ntap] * splice[:, 0] gain[full_overlap_last:last] = tt[overlap - ntap:] y[full_overlap_first:full_overlap_last] *= (1. - splice) y[full_overlap_first:full_overlap_last] += x_[ntap:overlap - ntap] * splice y[full_overlap_last:last] = x_[overlap - ntap:] else: y[first:last, :] = x_ gain[first:last] = tt if last == n: break first += nwin - overlap return y
def setup(self): self.d = np.linspace(0, 100, 100000)
def powerspectrum(*u, average=True, diagnostics=False, kmin=None, kmax=None, npts=None, compute_fft=True, compute_sqr=True, double=True, bench=False, **kwargs): """ See the documentation for the :ref:`CPU version<powerspectrum>`. Parameters ---------- u : `np.ndarray` Scalar or vector field. If vector data, pass arguments as ``u1, u2, ..., un`` where ``ui`` is the ith vector component. Each ``ui`` can be 1D, 2D, or 3D, and all must have the same ``ui.shape`` and ``ui.dtype``. average : `bool`, optional If ``True``, average over values in a given bin and multiply by the bin volume. If ``False``, compute the sum. diagnostics : `bool`, optional Return the standard deviation and number of points in a particular radial bin. kmin : `int` or `float`, optional Minimum wavenumber in power spectrum bins. If ``None``, ``kmin = 1``. kmax : `int` or `float`, optional Maximum wavenumber in power spectrum bins. If ``None``, ``kmax = max(u.shape)//2``. npts : `int`, optional Number of modes between ``kmin`` and ``kmax``, inclusive. If ``None``, ``npts = kmax-kmin+1``. compute_fft : `bool`, optional If ``False``, do not take the FFT of the input data. FFTs should not be passed with the zero-frequency component in the center. compute_sqr : `bool`, optional If ``False``, sum the real part of the FFT. This can be useful for purely real FFTs, where the sign of the FFT is useful information. If ``True``, take the square as usual. double : `bool`, optional If ``False``, calculate FFTs in single precision. Useful for saving memory. bench : `bool`, optional Print message for time of calculation. kwargs Additional keyword arguments passed to ``cupyx.scipy.fft.fftn`` or ``cupyx.scipy.fft.rfftn``. Returns ------- spectrum : `np.ndarray`, shape `(npts,)` Radially averaged power spectrum :math:`P(k)`. kn : `np.ndarray`, shape `(npts,)` Left edges of radial bins :math:`k`. counts : `np.ndarray`, shape `(npts,)`, optional Number of points :math:`N_k` in each bin. vol : `np.ndarray`, shape `(npts,)`, optional Volume :math:`V_k` of each bin. stdev : `np.ndarray`, shape `(npts,)`, optional Standard deviation multiplied with :math:`V_k` in each bin. """ if bench: t0 = time() shape = u[0].shape ndim = u[0].ndim ncomp = len(u) N = max(u[0].shape) if np.issubdtype(u[0].dtype, np.floating): real = True dtype = cp.float64 if double else cp.float32 else: real = False dtype = cp.complex128 if double else cp.complex64 if ndim not in [1, 2, 3]: raise ValueError("Dimension of image must be 1, 2, or 3.") # Get memory pools mempool = cp.get_default_memory_pool() pinned_mempool = cp.get_default_pinned_memory_pool() # Compute pqower spectral density with memory efficiency density = None comp = cp.empty(shape, dtype=dtype) for i in range(ncomp): temp = cp.asarray(u[i], dtype=dtype) comp[...] = temp del temp if compute_fft: fft = _cufftn(comp, **kwargs) else: fft = comp if density is None: fftshape = fft.shape density = cp.zeros(fft.shape) if compute_sqr: density[...] += _mod_squared(fft) else: density[...] += cp.real(fft) del fft mempool.free_all_blocks() pinned_mempool.free_all_blocks() # Need to double count if using rfftn if real and compute_fft: density[...] *= 2 # Get radial coordinates kr = cp.asarray(_kmag_sampling(fftshape, real=real).astype(np.float32)) # Flatten arrays kr = kr.ravel() density = density.ravel() # Get minimum and maximum k for binning if not given if kmin is None: kmin = 1 if kmax is None: kmax = int(N / 2) if npts is None: npts = kmax - kmin + 1 # Generate bins kn = cp.linspace(kmin, kmax, npts, endpoint=True) # Left edges of bins dk = kn[1] - kn[0] # Radially average power spectral density if ndim == 1: fac = 2 * np.pi elif ndim == 2: fac = 4 * np.pi elif ndim == 3: fac = 4. / 3. * np.pi spectrum = cp.zeros_like(kn) stdev = cp.zeros_like(kn) vol = cp.zeros_like(kn) counts = cp.zeros(kn.shape, dtype=np.int64) for i, ki in enumerate(kn): ii = cp.where(cp.logical_and(kr >= ki, kr < ki + dk)) samples = density[ii] vk = fac * cp.pi * ((ki + dk)**ndim - (ki)**ndim) if average: spectrum[i] = vk * cp.mean(samples) else: spectrum[i] = cp.sum(samples) if diagnostics: Nk = samples.size stdev[i] = vk * cp.std(samples, ddof=1) vol[i] = vk counts[i] = Nk del density, kr mempool.free_all_blocks() pinned_mempool.free_all_blocks() if bench: print(f"Time: {time() - t0:.04f} s") result = [spectrum.get(), kn.get()] if diagnostics: result.extend([counts.get(), vol.get(), stdev.get()]) return tuple(result)
def setup(self): self.d = np.linspace(0, 100, 200000).reshape((-1, 2))
def WISHrun(self, y0: np.ndarray, SLM: np.ndarray, delta3: float, delta4: float, N_os: int, N_iter: int,\ N_batch: int, plot: bool=True): """ Runs the WISH algorithm using a Gerchberg Saxton loop for phase retrieval. :param y0: Target modulated amplitudes in the sensor plane :param SLM: SLM modulation patterns :param delta3: Apparent sampling size of the SLM as seen from the sensor plane :param delta4: Sampling size of the sensor plane :param N_os: Number of observations per image :param N_iter: Maximal number of Gerchberg Saxton iterations :param N_batch: Number of batches (modulations) :param plot: If True, plots the advance of the retrieval every 10 iterations :return u4_est, idx_converge: Estimated field of size (N,N) and the convergence indices to check convergence speed """ wvl = self.wavelength z3 = self.z ## parameters N = y0.shape[0] k = 2 * np.pi / wvl #u3_batch = np.zeros((N, N, N_os), dtype=complex) # store all U3 gpu #u4 = np.zeros((N, N, N_os), dtype=complex) # gpu #y = np.zeros((N, N, N_os), dtype=complex) # store all U3 gpu u3_batch = cp.zeros((N, N, N_os), dtype=cp.complex64) # store all U3 gpu u4 = cp.zeros((N, N, N_os), dtype=cp.complex64) # gpu y = cp.zeros((N, N, N_os), dtype=cp.complex64) # store all U3 gpu ## initilize a3 k = 2 * np.pi / wvl xx = cp.linspace(0, N - 1, N, dtype=cp.float) - (N / 2) * cp.ones(N, dtype=cp.float) yy = cp.linspace(0, N - 1, N, dtype=cp.float) - (N / 2) * cp.ones(N, dtype=cp.float) X, Y = float(delta4) * cp.meshgrid( xx, yy)[0], float(delta4) * cp.meshgrid(xx, yy)[1] R = cp.sqrt(X**2 + Y**2) Q = cp.exp(1j * (k / (2 * z3)) * R**2) for ii in range(N_os): #SLM_batch = SLM[:,:, ii] SLM_batch = cp.asarray(SLM[:, :, ii]) y0_batch = y0[:, :, ii] #u3_batch[:,:, ii] = self.frt(y0_batch, delta4, -z3) * np.conj(SLM_batch) #y0_batch gpu #u3_batch[:,:, ii] = self.frt_gpu(cp.asarray(y0_batch), delta4, -z3) * cp.conj(SLM_batch) #y0_batch gpu u3_batch[:, :, ii] = self.frt_gpu_s( cp.asarray(y0_batch) / Q, delta4, -z3) * cp.conj( SLM_batch) #y0_batch gpu #u3 = np.mean(u3_batch, 2) # average it u3 = cp.mean(u3_batch, 2) ## Recon run : GS loop idx_converge = np.empty(N_iter) for jj in range(N_iter): sys.stdout.write(f"\rGS iteration {jj+1}") sys.stdout.flush() #u3_collect = np.zeros(u3.shape, dtype=complex) u3_collect = cp.zeros(u3.shape, dtype=cp.complex64) idx_converge0 = np.empty(N_batch) for idx_batch in range(N_batch): # put the correct batch into the GPU (no GPU for now) #SLM_batch = SLM[:,:, int(N_os * idx_batch): int(N_os * (idx_batch+1))] #y0_batch = y0[:,:, int(N_os * idx_batch): int(N_os * (idx_batch+1))] SLM_batch = cp.asarray( SLM[:, :, int(N_os * idx_batch):int(N_os * (idx_batch + 1))]) y0_batch = cp.asarray( y0[:, :, int(N_os * idx_batch):int(N_os * (idx_batch + 1))]) for _ in range(N_os): #u4[:,:,_] = self.frt(u3 * SLM_batch[:,:,_], delta3, z3) # U4 is the field on the sensor u4[:, :, _] = self.frt_gpu_s(u3 * SLM_batch[:, :, _], delta3, z3) # U4 is the field on the sensor y[:, :, _] = y0_batch[:, :, _] * cp.exp(1j * cp.angle( u4[:, :, _])) # force the amplitude of y to be y0 #u3_batch[:,:,_] = self.frt(y[:,:,_], delta4, -z3) * np.conj(SLM_batch[:,:,_]) u3_batch[:, :, _] = self.frt_gpu_s( y[:, :, _], delta4, -z3) * cp.conj(SLM_batch[:, :, _]) #u3_collect = u3_collect + np.mean(u3_batch, 2) # collect(add) U3 from each batch u3_collect = u3_collect + cp.mean( u3_batch, 2) # collect(add) U3 from each batch #idx_converge0[idx_batch] = np.mean(np.mean(np.mean(y0_batch,1),0)/np.sum(np.sum(np.abs(np.abs(u4)-y0_batch),1),0)) #idx_converge0[idx_batch] = cp.asnumpy(cp.mean(cp.mean(cp.mean(y0_batch,1),0)/cp.sum(cp.sum(cp.abs(cp.abs(u4)-y0_batch),1),0))) # convergence index matrix for each batch idx_converge0[idx_batch] = cp.linalg.norm( cp.abs(u4) - y0_batch) / cp.linalg.norm(y0_batch) u3 = (u3_collect / N_batch) # average over batches idx_converge[jj] = np.mean(idx_converge0) # sum over batches sys.stdout.write(f" (convergence index : {idx_converge[jj]})") #u4_est = self.frt(u3, delta3, z3) u4_est = cp.asnumpy(self.frt_gpu_s(u3, delta3, z3) * Q) if jj % 10 == 0 and plot: plt.close('all') fig = plt.figure(0) fig.suptitle(f'Iteration {jj}') ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) im = ax1.imshow(np.abs(u4_est), cmap='viridis') ax1.set_title('Amplitude') ax2.imshow(np.angle(u4_est), cmap='viridis') ax2.set_title('Phase') fig1 = plt.figure(1) ax = fig1.gca() ax.plot(np.arange(0, jj, 1), idx_converge[0:jj], marker='o') ax.set_xlabel('Iterations') ax.set_ylabel('Convergence estimator') ax.set_title('Convergence curve') plt.show() time.sleep(2) # exit if the matrix doesn 't change much if jj > 1: if cp.abs(idx_converge[jj] - idx_converge[jj - 1]) / idx_converge[jj] < 1e-4: print('\nConverged. Exit the GS loop ...') #idx_converge = idx_converge[0:jj] idx_converge = cp.asnumpy(idx_converge[0:jj]) break return u4_est, idx_converge
def histogram(x, bins=10): """Computes the histogram of a set of data. Args: x (cupy.ndarray): Input array. bins (int or cupy.ndarray): If ``bins`` is an int, it represents the number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it represents a bin edges. Returns: tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray` storing the values of the histogram, and ``bin_edges`` is a :class:`cupy.ndarray` storing the bin edges. .. seealso:: :func:`numpy.histogram` """ if x.dtype.kind == 'c': # TODO(unno): comparison between complex numbers is not implemented raise NotImplementedError('complex number is not supported') if isinstance(bins, int): if x.size == 0: min_value = 0.0 max_value = 1.0 else: min_value = float(x.min()) max_value = float(x.max()) if min_value == max_value: min_value -= 0.5 max_value += 0.5 bins = cupy.linspace(min_value, max_value, bins + 1) elif isinstance(bins, cupy.ndarray): if cupy.any(bins[:-1] > bins[1:]): raise ValueError('bins must increase monotonically.') else: raise NotImplementedError('Only int or ndarray are supported for bins') # atomicAdd only supports int32 y = cupy.zeros(bins.size - 1, dtype=cupy.int32) # TODO(unno): use searchsorted cupy.ElementwiseKernel( 'S x, raw T bins, int32 n_bins', 'raw int32 y', ''' if (x < bins[0] or bins[n_bins - 1] < x) { return; } int high = n_bins - 1; int low = 0; while (high - low > 1) { int mid = (high + low) / 2; if (bins[mid] <= x) { low = mid; } else { high = mid; } } atomicAdd(&y[low], 1); ''')(x, bins, bins.size, y) return y.astype('l'), bins