Esempio n. 1
0
def test_flat_int_range_dtype():
    im = cp.linspace(-128, 128, 256, dtype=np.int8)
    frequencies, bin_centers = exposure.histogram(im, source_range="dtype")
    assert_array_equal(bin_centers, cp.arange(-128, 128))
    assert frequencies.shape == (256, )
Esempio n. 2
0
def osg(aR, theta):
    t = cp.linspace(-cp.pi / 2, cp.pi / 2, 1000)
    w = aR * cp.cos(t) + (1 - aR) + 1j * aR * cp.sin(t)
    g = max(
        cp.log(abs(w)) + cp.log(cp.cos(theta - cp.arctan2(w.imag, w.real))))
    return g
Esempio n. 3
0
 def test_mixed_funclist(self, dtype):
     x = cupy.linspace(-2, 2, 6, dtype=dtype)
     condlist = [x < 0, x == 0, x > 0]
     funclist = [-10, lambda x: -x, 10, lambda x: x]
     with pytest.raises(NotImplementedError):
         cupy.piecewise(x, condlist, funclist)
Esempio n. 4
0
    alpha = [8e-9, 1e-8, 2e-8, 4e-8][igpu]

    prbsize = 16  # probe size
    prbshift = 8  # probe shift (probe overlap = (1-prbshift)/prbsize)
    det = [128, 128]  # detector size
    ntheta = n * 3 // 4  # number of angles (rotations)
    noise = True  # apply discrete Poisson noise

    ptheta = 2
    pnz = 8
    beta = dxchange.read_tiff('../data/beta192.tiff')
    delta = -dxchange.read_tiff('../data/delta192.tiff')
    obj = cp.array(delta + 1j * beta)

    prb = cp.array(pt.objects.probe(prbsize, maxint))
    theta = cp.linspace(0, np.pi, ntheta).astype('float32')
    scan = cp.array(
        pt.objects.scanner3(theta,
                            obj.shape,
                            prbshift,
                            prbshift,
                            prbsize,
                            spiral=0,
                            randscan=True,
                            save=False))
    #tomoshape = [len(theta), obj.shape[0], obj.shape[2]]
    # Class gpu solver
    slv = pt.solver.Solver(prb, scan, theta, det, voxelsize, energy, ntheta,
                           nz, n, ptheta, pnz)

    # Free gpu memory after SIGINT, SIGSTSTP
Esempio n. 5
0
 def optimize(self,training_features, training_targets,weight_matrix):    
     training_features = cupy.array(training_features)
     training_targets = cupy.array(training_targets)
     
     N = training_features.shape[0]
     M = weight_matrix.shape[1]
     
     tensor_of_x_features = cupy.tile(0.0,(N,1,training_features.shape[1]))
     tensor_of_x_squared = cupy.tile(0.0,(N,training_features.shape[1],training_features.shape[1]))
 
     matrix_set_diag_to_zero = cupy.tile(1.0,(training_features.shape[1],training_features.shape[1]))
     cupy.fill_diagonal(matrix_set_diag_to_zero,0.0)
 
     for i in range(N):
         tensor_of_x_features[i]=training_features[i]
         tensor_of_x_squared[i]=training_features[i].dot(training_features[i])
 
     historical_gradient=cupy.tile(0.0,(weight_matrix.shape))
     tensor_of_x_squared = tensor_of_x_squared*matrix_set_diag_to_zero
     tensor_of_x_features_squared = tensor_of_x_features*tensor_of_x_features
     
     tensor_of_proto_vx = cupy.tile(0.0,(N,1,M))
     tensor_of_proto_square = cupy.tile(0.0,(N,1,M))
     vector_of_prediction = cupy.tile(0.0,N)
     vector_of_sum = cupy.tile(1.0,(M,1))
     vector_of_gradient = cupy.tile(0.0,N)
     
     weight_matrix_square = cupy.tile(0.0,(weight_matrix.shape))
     update_step = cupy.tile(0.0,(weight_matrix.shape))
 
     #batch_size = #numpy.floor(N/batch_count).astype(numpy.int32)
     batch_count = numpy.floor(N/self.batch_size).astype(numpy.int32)
     seed = 0
     
     idxs = cupy.linspace(0,self.batch_size,self.batch_size,dtype=numpy.int32)  
 
     patience_counter = 0
     last_iteration_error = 0
 
     #error_iter_array = numpy.tile(1,(iterations,1))
     error_iter_array = numpy.empty(self.iterations, dtype=numpy.float32)
 
     for i in range(self.iterations):
         seed = seed + 1
         cupy.random.seed(seed)
         numpy_rand_idx_list = numpy.random.permutation(N)
         random_idx_list = cupy.array(numpy_rand_idx_list)
 
         idxs = 0
         init = 0
         ending = 0
         error_sum = 0
         
         for j in range(batch_count):
             init = j*self.batch_size
             ending = (j+1)*self.batch_size
 
             idxs = random_idx_list[init:ending]
         
             weight_matrix[cupy.abs(weight_matrix)<0.0000001]=0 
             weight_matrix_square = weight_matrix*weight_matrix
             tensor_of_proto_vx = cupy.tensordot(tensor_of_x_features[idxs],weight_matrix,axes=1)
             tensor_of_proto_square = cupy.tensordot(tensor_of_x_features_squared[idxs],weight_matrix_square,axes=1)
             vector_of_prediction = cupy.tensordot(((tensor_of_proto_vx*tensor_of_proto_vx) - tensor_of_proto_square),vector_of_sum,axes=1).sum(axis=1)*0.5
             b = training_targets[idxs]-vector_of_prediction           
             
             #print(b.mean())
 
             error_sum = error_sum+cupy.mean(b)#b.mean()
             
             vector_of_gradient = -2*b
             vrau = cupy.tensordot(tensor_of_x_squared[idxs],weight_matrix,axes=1)
             update_step = ((vector_of_gradient.T*vrau.T).T).sum(axis=0)+weight_matrix_square*self.regularization
     
             #ADAGRAD UPDATE
             historical_gradient += update_step * update_step
             weight_matrix -= self.alpha/(cupy.sqrt(historical_gradient)) * update_step#+0.000001            
 
         error_iter_array[i] = error_sum/batch_count
 
         if cupy.abs(cupy.abs(error_iter_array[i]) - last_iteration_error) < self.iteration_patience_threshold:
         patience_counter = patience_counter+1
         else:
         patience_counter = 0 #RESET
         
         if patience_counter == self.iteration_patience:
         break #
         
         last_iteration_error = cupy.abs(error_iter_array[i])
         
     return weight_matrix,error_iter_array.mean(),error_iter_array#return array with the most errors
Esempio n. 6
0
    def fit(self, X, y=None) -> "KBinsDiscretizer":
        """
        Fit the estimator.

        Parameters
        ----------
        X : numeric array-like, shape (n_samples, n_features)
            Data to be discretized.

        y : None
            Ignored. This parameter exists only for compatibility with
            :class:`sklearn.pipeline.Pipeline`.

        Returns
        -------
        self
        """
        X = self._validate_data(X, dtype='numeric')

        valid_encode = ('onehot', 'onehot-dense', 'ordinal')
        if self.encode not in valid_encode:
            raise ValueError("Valid options for 'encode' are {}. "
                             "Got encode={!r} instead.".format(
                                 valid_encode, self.encode))
        valid_strategy = ('uniform', 'quantile', 'kmeans')
        if self.strategy not in valid_strategy:
            raise ValueError("Valid options for 'strategy' are {}. "
                             "Got strategy={!r} instead.".format(
                                 valid_strategy, self.strategy))

        n_features = X.shape[1]
        n_bins = self._validate_n_bins(n_features)
        n_bins = np.asnumpy(n_bins)

        bin_edges = cpu_np.zeros(n_features, dtype=object)
        for jj in range(n_features):
            column = X[:, jj]
            col_min, col_max = column.min(), column.max()

            if col_min == col_max:
                warnings.warn("Feature %d is constant and will be "
                              "replaced with 0." % jj)
                n_bins[jj] = 1
                bin_edges[jj] = np.array([-np.inf, np.inf])
                continue

            if self.strategy == 'uniform':
                bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1)

            elif self.strategy == 'quantile':
                quantiles = np.linspace(0, 100, n_bins[jj] + 1)
                bin_edges[jj] = np.asarray(np.percentile(column, quantiles))
                # Workaround for https://github.com/cupy/cupy/issues/4451
                # This should be removed as soon as a fix is available in cupy
                # in order to limit alterations in the included sklearn code
                bin_edges[jj][-1] = col_max

            elif self.strategy == 'kmeans':
                # Deterministic initialization with uniform spacing
                uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1)
                init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5

                # 1D k-means procedure
                km = KMeans(n_clusters=n_bins[jj],
                            init=init,
                            n_init=1,
                            output_type='cupy')
                km = km.fit(column[:, None])
                with using_output_type('cupy'):
                    centers = km.cluster_centers_[:, 0]
                # Must sort, centers may be unsorted even with sorted init
                centers.sort()
                bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5
                bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max]

            # Remove bins whose width are too small (i.e., <= 1e-8)
            if self.strategy in ('quantile', 'kmeans'):
                mask = np.diff(bin_edges[jj], prepend=-np.inf) > 1e-8
                bin_edges[jj] = bin_edges[jj][mask]
                if len(bin_edges[jj]) - 1 != n_bins[jj]:
                    warnings.warn('Bins whose width are too small (i.e., <= '
                                  '1e-8) in feature %d are removed. Consider '
                                  'decreasing the number of bins.' % jj)
                    n_bins[jj] = len(bin_edges[jj]) - 1

        self.bin_edges_ = bin_edges
        self.n_bins_ = n_bins

        if 'onehot' in self.encode:
            self._encoder = OneHotEncoder(categories=np.array(
                [np.arange(i) for i in self.n_bins_]),
                                          sparse=self.encode == 'onehot',
                                          output_type='cupy')
            # Fit the OneHotEncoder with toy datasets
            # so that it's ready for use after the KBinsDiscretizer is fitted
            self._encoder.fit(np.zeros((1, len(self.n_bins_)), dtype=int))

        return self
Esempio n. 7
0
def linspace(start, end, steps, requires_grad=False, device='cpu', dtype='float32') -> 'Tensor':
    engine = _get_engine(device)
    return from_array(engine.linspace(start, end, steps).astype(dtype), requires_grad, device)
Esempio n. 8
0
def run_simulation(input_filename,
                   pixel_layout,
                   detector_properties,
                   output_filename,
                   response_file='../larndsim/bin/response_44.npy',
                   light_lut_filename='../larndsim/bin/lightLUT.npy',
                   bad_channels=None,
                   n_tracks=None,
                   pixel_thresholds_file=None):
    """
    Command-line interface to run the simulation of a pixelated LArTPC

    Args:
        input_filename (str): path of the edep-sim input file
        pixel_layout (str): path of the YAML file containing the pixel
            layout and connection details.
        detector_properties (str): path of the YAML file containing
            the detector properties
        output_filename (str): path of the HDF5 output file. If not specified
            the output is added to the input file.
        response_file (str, optional): path of the Numpy array containing the pre-calculated
            field responses. Defaults to ../larndsim/bin/response_44.npy.
        light_lut_file (str, optional): path of the Numpy array containing the light
            look-up table. Defaults to ../larndsim/bin/lightLUT.npy.
        bad_channels (str, optional): path of the YAML file containing the channels to be
            disabled. Defaults to None
        n_tracks (int, optional): number of tracks to be simulated. Defaults to None
            (all tracks).
        pixel_thresholds_file (str): path to npz file containing pixel thresholds. Defaults
            to None.
    """
    start_simulation = time()

    RangePush("run_simulation")

    print(LOGO)
    print(
        "**************************\nLOADING SETTINGS AND INPUT\n**************************"
    )
    print("Random seed:", SEED)
    print("Batch size:", BATCH_SIZE)
    print("Pixel layout file:", pixel_layout)
    print("Detector properties file:", detector_properties)
    print("edep-sim input file:", input_filename)
    print("Response file:", response_file)
    if bad_channels:
        print("Disabled channel list: ", bad_channels)
    RangePush("load_detector_properties")
    consts.load_properties(detector_properties, pixel_layout)
    from larndsim.consts import light, detector, physics
    RangePop()

    RangePush("load_larndsim_modules")
    # Here we load the modules after loading the detector properties
    # maybe can be implemented in a better way?
    from larndsim import quenching, drifting, detsim, pixels_from_track, fee, lightLUT
    RangePop()

    RangePush("load_pixel_thresholds")
    if pixel_thresholds_file is not None:
        print("Pixel thresholds file:", pixel_thresholds_file)
        pixel_thresholds_lut = CudaDict.load(pixel_thresholds_file, 256)
    else:
        pixel_thresholds_lut = CudaDict(
            cp.array([fee.DISCRIMINATION_THRESHOLD]), 1, 1)
    RangePop()

    RangePush("load_hd5_file")
    # First of all we load the edep-sim output
    with h5py.File(input_filename, 'r') as f:
        tracks = np.array(f['segments'])
        try:
            trajectories = np.array(f['trajectories'])
            input_has_trajectories = True
        except KeyError:
            input_has_trajectories = False

        try:
            vertices = np.array(f['vertices'])
            input_has_vertices = True
        except KeyError:
            print("Input file does not have true vertices info")
            input_has_vertices = False

    RangePop()

    # Makes an empty array to store data from lightlut
    if light.LIGHT_SIMULATED:
        light_sim_dat = np.zeros([len(tracks), light.N_OP_CHANNEL * 2],
                                 dtype=[('n_photons_det', 'f4'),
                                        ('t0_det', 'f4')])

    if tracks.size == 0:
        print("Empty input dataset, exiting")
        return

    if n_tracks:
        tracks = tracks[:n_tracks]
        if light.LIGHT_SIMULATED:
            light_sim_dat = light_sim_dat[:n_tracks]

    if 'n_photons' not in tracks.dtype.names:
        n_photons = np.zeros(tracks.shape[0], dtype=[('n_photons', 'f4')])
        tracks = rfn.merge_arrays((tracks, n_photons), flatten=True)

    # Here we swap the x and z coordinates of the tracks
    # because of the different convention in larnd-sim wrt edep-sim
    tracks = swap_coordinates(tracks)

    response = cp.load(response_file)

    TPB = 256
    BPG = ceil(tracks.shape[0] / TPB)

    print("******************\nRUNNING SIMULATION\n******************")
    # We calculate the number of electrons after recombination (quenching module)
    # and the position and number of electrons after drifting (drifting module)
    print("Quenching electrons...", end="")
    start_quenching = time()
    quenching.quench[BPG, TPB](tracks, physics.BIRKS)
    end_quenching = time()
    print(f" {end_quenching-start_quenching:.2f} s")

    print("Drifting electrons...", end="")
    start_drifting = time()
    drifting.drift[BPG, TPB](tracks)
    end_drifting = time()
    print(f" {end_drifting-start_drifting:.2f} s")

    if light.LIGHT_SIMULATED:
        print("Calculating optical responses...", end="")
        start_light_time = time()
        lut = cp.load(light_lut_filename)
        TPB = 256
        BPG = ceil(tracks.shape[0] / TPB)
        lightLUT.calculate_light_incidence[BPG, TPB](tracks, lut,
                                                     light_sim_dat)
        print(f" {time()-start_light_time:.2f} s")

    with h5py.File(output_filename, 'a') as output_file:
        output_file.create_dataset("tracks", data=tracks)
        if light.LIGHT_SIMULATED:
            output_file.create_dataset('light_dat', data=light_sim_dat)
        if input_has_trajectories:
            output_file.create_dataset("trajectories", data=trajectories)
        if input_has_vertices:
            output_file.create_dataset("vertices", data=vertices)

    # create a lookup table that maps between unique event ids and the segments in the file
    tot_evids = np.unique(tracks['eventID'])
    _, _, start_idx = np.intersect1d(tot_evids,
                                     tracks['eventID'],
                                     return_indices=True)
    _, _, rev_idx = np.intersect1d(tot_evids,
                                   tracks['eventID'][::-1],
                                   return_indices=True)
    end_idx = len(tracks['eventID']) - 1 - rev_idx

    # We divide the sample in portions that can be processed by the GPU
    step = 1

    # pre-allocate some random number states
    rng_states = maybe_create_rng_states(1024 * 256, seed=0)
    t0 = 0
    for ievd in tqdm(range(0, tot_evids.shape[0], step),
                     desc='Simulating events...',
                     ncols=80,
                     smoothing=0):

        event_id_list = []
        adc_tot_list = []
        adc_tot_ticks_list = []
        track_pixel_map_tot = []
        unique_pix_tot = []
        current_fractions_tot = []

        first_event = tot_evids[ievd]
        last_event = tot_evids[min(ievd + step, tot_evids.shape[0] - 1)]

        if first_event == last_event:
            last_event += 1

        # load a subset of segments from the file and process those that are from the current event
        track_subset = tracks[min(start_idx[ievd:ievd +
                                            step]):max(end_idx[ievd:ievd +
                                                               step]) + 1]
        evt_tracks = track_subset[(track_subset['eventID'] >= first_event)
                                  & (track_subset['eventID'] < last_event)]
        first_trk_id = np.where(
            track_subset['eventID'] == evt_tracks['eventID'][0])[0][0] + min(
                start_idx[ievd:ievd + step])

        for itrk in tqdm(range(0, evt_tracks.shape[0], BATCH_SIZE),
                         desc='  Simulating event %i batches...' % ievd,
                         leave=False,
                         ncols=80):
            selected_tracks = evt_tracks[itrk:itrk + BATCH_SIZE]
            RangePush("event_id_map")
            event_ids = selected_tracks['eventID']
            unique_eventIDs = np.unique(event_ids)
            RangePop()

            # We find the pixels intersected by the projection of the tracks on
            # the anode plane using the Bresenham's algorithm. We also take into
            # account the neighboring pixels, due to the transverse diffusion of the charges.
            RangePush("pixels_from_track")
            max_radius = ceil(
                max(selected_tracks["tran_diff"]) * 5 / detector.PIXEL_PITCH)

            TPB = 128
            BPG = ceil(selected_tracks.shape[0] / TPB)
            max_pixels = np.array([0])
            pixels_from_track.max_pixels[BPG, TPB](selected_tracks, max_pixels)

            # This formula tries to estimate the maximum number of pixels which can have
            # a current induced on them.
            max_neighboring_pixels = (2 * max_radius + 1) * max_pixels[0] + (
                1 + 2 * max_radius) * max_radius * 2

            active_pixels = cp.full((selected_tracks.shape[0], max_pixels[0]),
                                    -1,
                                    dtype=np.int32)
            neighboring_pixels = cp.full(
                (selected_tracks.shape[0], max_neighboring_pixels),
                -1,
                dtype=np.int32)
            n_pixels_list = cp.zeros(shape=(selected_tracks.shape[0]))

            if not active_pixels.shape[1] or not neighboring_pixels.shape[1]:
                continue

            pixels_from_track.get_pixels[BPG,
                                         TPB](selected_tracks, active_pixels,
                                              neighboring_pixels,
                                              n_pixels_list, max_radius)
            RangePop()

            RangePush("unique_pix")
            shapes = neighboring_pixels.shape
            joined = neighboring_pixels.reshape(shapes[0] * shapes[1])
            unique_pix = cp.unique(joined)
            unique_pix = unique_pix[(unique_pix != -1)]
            RangePop()

            if not unique_pix.shape[0]:
                continue

            RangePush("time_intervals")
            # Here we find the longest signal in time and we store an array with the start in time of each track
            max_length = cp.array([0])
            track_starts = cp.empty(selected_tracks.shape[0])
            detsim.time_intervals[BPG, TPB](track_starts, max_length,
                                            selected_tracks)
            RangePop()

            RangePush("tracks_current")
            # Here we calculate the induced current on each pixel
            signals = cp.zeros(
                (selected_tracks.shape[0], neighboring_pixels.shape[1],
                 cp.asnumpy(max_length)[0]),
                dtype=np.float32)
            TPB = (1, 1, 64)
            BPG_X = ceil(signals.shape[0] / TPB[0])
            BPG_Y = ceil(signals.shape[1] / TPB[1])
            BPG_Z = ceil(signals.shape[2] / TPB[2])
            BPG = (BPG_X, BPG_Y, BPG_Z)
            rng_states = maybe_create_rng_states(int(
                np.prod(TPB[:2]) * np.prod(BPG[:2])),
                                                 seed=SEED + ievd + itrk,
                                                 rng_states=rng_states)
            detsim.tracks_current_mc[BPG, TPB](signals, neighboring_pixels,
                                               selected_tracks, response,
                                               rng_states)
            RangePop()

            RangePush("pixel_index_map")
            # Here we create a map between tracks and index in the unique pixel array
            pixel_index_map = cp.full(
                (selected_tracks.shape[0], neighboring_pixels.shape[1]), -1)
            for i_ in range(selected_tracks.shape[0]):
                compare = neighboring_pixels[i_, ..., cp.newaxis] == unique_pix
                indices = cp.where(compare)
                pixel_index_map[i_, indices[0]] = indices[1]
            RangePop()

            RangePush("track_pixel_map")
            # Mapping between unique pixel array and track array index
            track_pixel_map = cp.full(
                (unique_pix.shape[0], detsim.MAX_TRACKS_PER_PIXEL), -1)
            TPB = 32
            BPG = ceil(unique_pix.shape[0] / TPB)
            detsim.get_track_pixel_map[BPG, TPB](track_pixel_map, unique_pix,
                                                 neighboring_pixels)
            RangePop()

            RangePush("sum_pixels_signals")
            # Here we combine the induced current on the same pixels by different tracks
            TPB = (8, 8, 8)
            BPG_X = ceil(signals.shape[0] / TPB[0])
            BPG_Y = ceil(signals.shape[1] / TPB[1])
            BPG_Z = ceil(signals.shape[2] / TPB[2])
            BPG = (BPG_X, BPG_Y, BPG_Z)
            pixels_signals = cp.zeros(
                (len(unique_pix), len(detector.TIME_TICKS)))
            pixels_tracks_signals = cp.zeros(
                (len(unique_pix), len(detector.TIME_TICKS),
                 track_pixel_map.shape[1]))
            detsim.sum_pixel_signals[BPG, TPB](pixels_signals, signals,
                                               track_starts, pixel_index_map,
                                               track_pixel_map,
                                               pixels_tracks_signals)
            RangePop()

            RangePush("get_adc_values")
            # Here we simulate the electronics response (the self-triggering cycle) and the signal digitization
            time_ticks = cp.linspace(
                0,
                len(unique_eventIDs) * detector.TIME_INTERVAL[1],
                pixels_signals.shape[1] + 1)
            integral_list = cp.zeros(
                (pixels_signals.shape[0], fee.MAX_ADC_VALUES))
            adc_ticks_list = cp.zeros(
                (pixels_signals.shape[0], fee.MAX_ADC_VALUES))
            current_fractions = cp.zeros(
                (pixels_signals.shape[0], fee.MAX_ADC_VALUES,
                 track_pixel_map.shape[1]))

            TPB = 128
            BPG = ceil(pixels_signals.shape[0] / TPB)
            rng_states = maybe_create_rng_states(int(TPB * BPG),
                                                 seed=SEED + ievd + itrk,
                                                 rng_states=rng_states)
            pixel_thresholds_lut.tpb = TPB
            pixel_thresholds_lut.bpg = BPG
            pixel_thresholds = pixel_thresholds_lut[
                unique_pix.ravel()].reshape(unique_pix.shape)

            fee.get_adc_values[BPG, TPB](pixels_signals, pixels_tracks_signals,
                                         time_ticks, integral_list,
                                         adc_ticks_list, 0, rng_states,
                                         current_fractions, pixel_thresholds)

            adc_list = fee.digitize(integral_list)
            adc_event_ids = np.full(
                adc_list.shape, unique_eventIDs[0]
            )  # FIXME: only works if looping on a single event
            RangePop()

            event_id_list.append(adc_event_ids)
            adc_tot_list.append(adc_list)
            adc_tot_ticks_list.append(adc_ticks_list)
            unique_pix_tot.append(unique_pix)
            current_fractions_tot.append(current_fractions)
            track_pixel_map[track_pixel_map != -1] += first_trk_id + itrk
            track_pixel_map_tot.append(track_pixel_map)

        if event_id_list and adc_tot_list:
            event_id_list_batch = np.concatenate(event_id_list, axis=0)
            adc_tot_list_batch = np.concatenate(adc_tot_list, axis=0)
            adc_tot_ticks_list_batch = np.concatenate(adc_tot_ticks_list,
                                                      axis=0)
            unique_pix_tot_batch = np.concatenate(unique_pix_tot, axis=0)
            current_fractions_tot_batch = np.concatenate(current_fractions_tot,
                                                         axis=0)
            track_pixel_map_tot_batch = np.concatenate(track_pixel_map_tot,
                                                       axis=0)
            _, _, last_time = fee.export_to_hdf5(
                event_id_list_batch,
                adc_tot_list_batch,
                adc_tot_ticks_list_batch,
                cp.asnumpy(unique_pix_tot_batch),
                cp.asnumpy(current_fractions_tot_batch),
                cp.asnumpy(track_pixel_map_tot_batch),
                output_filename,
                t0=t0,
                bad_channels=bad_channels)
            t0 = last_time

    with h5py.File(output_filename, 'a') as output_file:
        if 'configs' in output_file.keys():
            output_file['configs'].attrs['pixel_layout'] = pixel_layout

    print("Output saved in:", output_filename)

    RangePop()
    end_simulation = time()
    print(f"Elapsed time: {end_simulation-start_simulation:.2f} s")
Esempio n. 9
0
def _get_bin_edges(a, bins, range):
    """
    Computes the bins used internally by `histogram`.

    Args:
        a (ndarray): Ravelled data array
        bins (int or ndarray): Forwarded argument from `histogram`.
        range (None or tuple): Forwarded argument from `histogram`.

    Returns:
        bin_edges (ndarray): Array of bin edges
        uniform_bins (Number, Number, int): The upper bound, lowerbound, and
        number of bins, used in the implementation of `histogram` that works on
        uniform bins.
    """
    # parse the overloaded bins argument
    n_equal_bins = None
    bin_edges = None

    # if isinstance(bins, cupy.ndarray) and bins.ndim == 0:
    #     # allow uint8 array, etc
    #     if bins.dtype not in 'bui':
    #         raise TypeError(
    #             "`bins` must be an integer, a string, or an array")
    #     bins = int(bins)  # synchronize

    if isinstance(bins, int):  # will not allow 0-dimensional cupy array
        # if cupy.ndim(bins) == 0:
        try:
            n_equal_bins = operator.index(bins)
        except TypeError:
            raise TypeError("`bins` must be an integer, a string, or an array")
        if n_equal_bins < 1:
            raise ValueError("`bins` must be positive, when an integer")

        first_edge, last_edge = _get_outer_edges(a, range)

    elif isinstance(bins, cupy.ndarray):
        if bins.ndim == 1:  # cupy.ndim(bins) == 0:
            bin_edges = cupy.asarray(bins)
            if (bin_edges[:-1] > bin_edges[1:]).any():  # synchronize!
                raise ValueError(
                    "`bins` must increase monotonically, when an array"
                )

    elif isinstance(bins, str):
        raise NotImplementedError("only integer and array bins are implemented")

    if n_equal_bins is not None:
        # numpy's gh-10322 means that type resolution rules are dependent on
        # array shapes. To avoid this causing problems, we pick a type now and
        # stick with it throughout.
        bin_type = cupy.result_type(first_edge, last_edge, a)
        if cupy.issubdtype(bin_type, cupy.integer):
            bin_type = cupy.result_type(bin_type, float)

        # bin edges must be computed
        bin_edges = cupy.linspace(
            first_edge,
            last_edge,
            n_equal_bins + 1,
            endpoint=True,
            dtype=bin_type,
        )
        return bin_edges, (first_edge, last_edge, n_equal_bins)
    else:
        return bin_edges, None
Esempio n. 10
0
n_time = 10
n_in = 1
n_mid = 20
n_out = 1

eta = 0.01
epochs = 101
batch_size = 8
interval = 10


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


sin_x = np.linspace(-2 * np.pi, 2 * np.pi)
sin_y = np.sin(sin_x) + 0.1 * np.random.randn(len(sin_x))
n_sample = len(sin_x) - n_time
input_data = np.zeros((n_sample, n_time, n_in))
correct_data = np.zeros((n_sample, n_out))
for i in range(0, n_sample):
    input_data[i] = sin_y[i:i + n_time].reshape(-1, 1)
    correct_data[i] = sin_y[i + n_time:i + n_time + 1]


class GRULayer:
    def __init__(self, n_upper, n):
        self.w = np.random.randn(3, n_upper, n) / np.sqrt(n_upper)
        self.v = np.random.randn(3, n, n) / np.sqrt(n)
        # バイアスは省略する。
    def forward(self, x, y_prev):
Esempio n. 11
0
def phasecong100(im,
                 nscale=2,
                 norient=2,
                 minWavelength=7,
                 mult=2,
                 sigmaOnf=0.65):

    rows, cols = im.shape
    imagefft = fft2(im)
    zero = cp.zeros(shape=(rows, cols))

    EO = dict()
    EnergyV = cp.zeros((rows, cols, 3))

    x_range = cp.linspace(-0.5, 0.5, num=cols, endpoint=True)
    y_range = cp.linspace(-0.5, 0.5, num=rows, endpoint=True)

    x, y = cp.meshgrid(x_range, y_range)
    radius = cp.sqrt(x**2 + y**2)

    theta = cp.arctan2(-y, x)

    radius = ifftshift(radius)

    theta = ifftshift(theta)

    radius[0, 0] = 1.

    sintheta = cp.sin(theta)
    costheta = cp.cos(theta)

    lp = lowpass_filter((rows, cols), 0.45, 15)

    logGabor = []
    for s in range(1, nscale + 1):
        wavelength = minWavelength * mult**(s - 1.)
        fo = 1.0 / wavelength
        logGabor.append(
            cp.exp((-(cp.log(radius / fo))**2) / (2 * cp.log(sigmaOnf)**2)))
        logGabor[-1] *= lp
        logGabor[-1][0, 0] = 0

    # The main loop...
    for o in range(1, norient + 1):
        angl = (o - 1.) * cp.pi / norient
        ds = sintheta * cp.cos(angl) - costheta * cp.sin(angl)
        dc = costheta * cp.cos(angl) + sintheta * cp.sin(angl)
        dtheta = cp.abs(cp.arctan2(ds, dc))
        dtheta = cp.minimum(dtheta * norient / 2., cp.pi)
        spread = (cp.cos(dtheta) + 1.) / 2.
        sumE_ThisOrient = zero.copy()
        sumO_ThisOrient = zero.copy()
        for s in range(0, nscale):
            filter_ = logGabor[s] * spread
            EO[(s, o)] = ifft2(imagefft * filter_)
            sumE_ThisOrient = sumE_ThisOrient + cp.real(EO[(s, o)])
            sumO_ThisOrient = sumO_ThisOrient + cp.imag(EO[(s, o)])
        EnergyV[:, :, 0] = EnergyV[:, :, 0] + sumE_ThisOrient
        EnergyV[:, :, 1] = EnergyV[:, :, 1] + cp.cos(angl) * sumO_ThisOrient
        EnergyV[:, :, 2] = EnergyV[:, :, 2] + cp.sin(angl) * sumO_ThisOrient
    OddV = cp.sqrt(EnergyV[:, :, 0]**2 + EnergyV[:, :, 1]**2)
    featType = cp.arctan2(EnergyV[:, :, 0], OddV)
    return featType
Esempio n. 12
0
def test_pythagorean_triangle_right_downward_interpolated():
    prof = profile_line(image, (1, 1), (7, 9), order=1, mode="constant")
    expected_prof = cp.linspace(11, 79, 11)
    assert_array_almost_equal(prof, expected_prof)
Esempio n. 13
0
def test_45deg_right_downward_interpolated():
    prof = profile_line(image, (2, 2), (8, 8), order=1, mode="constant")
    expected_prof = cp.linspace(22, 88, 10)
    assert_array_almost_equal(prof, expected_prof)
Esempio n. 14
0
def histogramdd(sample, bins=10, range=None, weights=None, density=False):
    """Compute the multidimensional histogram of some data.

    Args:
        sample (cupy.ndarray): The data to be histogrammed. (N, D) or (D, N)
            array

            Note the unusual interpretation of sample when an array_like:

            * When an array, each row is a coordinate in a D-dimensional
              space - such as ``histogramdd(cupy.array([p1, p2, p3]))``.
            * When an array_like, each element is the list of values for single
              coordinate - such as ``histogramdd((X, Y, Z))``.

            The first form should be preferred.
        bins (int or tuple of int or cupy.ndarray): The bin specification:

            * A sequence of arrays describing the monotonically increasing bin
              edges along each dimension.
            * The number of bins for each dimension (nx, ny, ... =bins)
            * The number of bins for all dimensions (nx=ny=...=bins).
        range (sequence, optional): A sequence of length D, each an optional
            (lower, upper) tuple giving the outer bin edges to be used if the
            edges are not given explicitly in `bins`. An entry of None in the
            sequence results in the minimum and maximum values being used for
            the corresponding dimension. The default, None, is equivalent to
            passing a tuple of D None values.
        weights (cupy.ndarray): An array of values `w_i` weighing each sample
            `(x_i, y_i, z_i, ...)`. The values of the returned histogram are
            equal to the sum of the weights belonging to the samples falling
            into each bin.
        density (bool, optional): If False, the default, returns the number of
            samples in each bin. If True, returns the probability *density*
            function at the bin, ``bin_count / sample_count / bin_volume``.

    Returns:
        tuple:
        H (cupy.ndarray):
            The multidimensional histogram of sample x. See
            normed and weights for the different possible semantics.
        edges (list of cupy.ndarray):
            A list of D arrays describing the bin
            edges for each dimension.

    .. warning::

        This function may synchronize the device.

    .. seealso:: :func:`numpy.histogramdd`
    """
    if isinstance(sample, cupy.ndarray):
        # Sample is an ND-array.
        if sample.ndim == 1:
            sample = sample[:, cupy.newaxis]
        nsamples, ndim = sample.shape
    else:
        sample = cupy.stack(sample, axis=-1)
        nsamples, ndim = sample.shape

    nbin = numpy.empty(ndim, int)
    edges = ndim * [None]
    dedges = ndim * [None]
    if weights is not None:
        weights = cupy.asarray(weights)

    try:
        nbins = len(bins)
        if nbins != ndim:
            raise ValueError(
                'The dimension of bins must be equal to the dimension of the '
                ' sample x.')
    except TypeError:
        # bins is an integer
        bins = ndim * [bins]

    # normalize the range argument
    if range is None:
        range = (None, ) * ndim
    elif len(range) != ndim:
        raise ValueError('range argument must have one entry per dimension')

    # Create edge arrays
    for i in _range(ndim):
        if cupy.ndim(bins[i]) == 0:
            if bins[i] < 1:
                raise ValueError(
                    '`bins[{}]` must be positive, when an integer'.format(i))
            smin, smax = _get_outer_edges(sample[:, i], range[i])
            num = int(bins[i] + 1)  # synchronize!
            edges[i] = cupy.linspace(smin, smax, num)
        elif cupy.ndim(bins[i]) == 1:
            if not isinstance(bins[i], cupy.ndarray):
                raise ValueError('array-like bins not supported')
            edges[i] = bins[i]
            if (edges[i][:-1] > edges[i][1:]).any():  # synchronize!
                raise ValueError(
                    '`bins[{}]` must be monotonically increasing, when an '
                    'array'.format(i))
        else:
            raise ValueError(
                '`bins[{}]` must be a scalar or 1d array'.format(i))

        nbin[i] = len(edges[i]) + 1  # includes an outlier on each end
        dedges[i] = cupy.diff(edges[i])

    # Compute the bin number each sample falls into.
    ncount = tuple(
        # avoid cupy.digitize to work around NumPy issue gh-11022
        cupy.searchsorted(edges[i], sample[:, i], side='right')
        for i in _range(ndim))

    # Using digitize, values that fall on an edge are put in the right bin.
    # For the rightmost bin, we want values equal to the right edge to be
    # counted in the last bin, and not as an outlier.
    for i in _range(ndim):
        # Find which points are on the rightmost edge.
        on_edge = sample[:, i] == edges[i][-1]
        # Shift these points one bin to the left.
        ncount[i][on_edge] -= 1

    # Compute the sample indices in the flattened histogram matrix.
    # This raises an error if the array is too large.
    xy = cupy.ravel_multi_index(ncount, nbin)

    # Compute the number of repetitions in xy and assign it to the
    # flattened histmat.
    hist = cupy.bincount(xy, weights, minlength=numpy.prod(nbin))

    # Shape into a proper matrix
    hist = hist.reshape(nbin)

    # This preserves the (bad) behavior observed in NumPy gh-7845, for now.
    hist = hist.astype(float)  # Note: NumPy uses casting='safe' here too

    # Remove outliers (indices 0 and -1 for each dimension).
    core = ndim * (slice(1, -1), )
    hist = hist[core]

    if density:
        # calculate the probability density function
        s = hist.sum()
        for i in _range(ndim):
            shape = [1] * ndim
            shape[i] = nbin[i] - 2
            hist = hist / dedges[i].reshape(shape)
        hist /= s

    if any(hist.shape != numpy.asarray(nbin) - 2):
        raise RuntimeError('Internal Shape Error')
    return hist, edges
Esempio n. 15
0
                print(i)
                binvalues.append(sum([min(values[frame]),i*(width/binscount)]))
    '''
    return binvalues

def findIndex(value,list_values):
    for i range(len(list_values)):
        if list_values[i]==value:
            return int(i)
        else: pass

types='all_frames'
lowerlimit=1
upperlimit_list=createUpperLimitList(lowerlimit,10,0.5)
spacinghistograms=[]
framesindices=cp.linspace(0,2500,100)
cp.cuda.Stream.null.synchronize()
hydrogenbonds_allframes=[]
hydrogenbonds_array=[]

data_folder = "/home/gemsec-user/Desktop/"
file_to_open = data_folder + "water.pdb"
file = open(file_to_open)


#for frameindex in framesindices:
hydrogenbonds=[]
allspacings=[]
numberofbins=100
histogram=[]
Esempio n. 16
0
def uniform(left, right, size):
    x = cp.linspace(left, right, size)
    return x.reshape(size, 1)
Esempio n. 17
0
def lanczos4_zoom_wrapper(images: cp.array, mag: float):
    """closure. It will return a function to zoom the images, but the parameters will not be calculated again"""
    # init the parameters for lanczos image resize afterwards
    h, w = images.shape[1:3]
    lanczos4_core_lut = generate_lanczos4_weights_lut()
    yCoordinate = cp.linspace(0, h - 1 / mag, int(h * mag), dtype=cp.float32)
    t, u = cp.modf(yCoordinate)
    u = u.astype(int)  # select 8 sampling points
    uy = [
        cp.maximum(u - 3, 0),
        cp.maximum(u - 2, 0),
        cp.maximum(u - 1, 0),
        cp.minimum(u, h - 1),
        cp.minimum(u + 1, h - 1),
        cp.minimum(u + 2, h - 1),
        cp.minimum(u + 3, h - 1),
        cp.minimum(u + 4, h - 1),
    ]
    Q = cp.take(lanczos4_core_lut, (t * 1024).astype(int), axis=0)
    Qy = [cp.take(Q, i, axis=1) for i in range(8)]
    xCoordinate = cp.linspace(0, w - 1 / mag, int(w * mag), dtype=cp.float32)
    del t, u, xCoordinate
    t, u = cp.modf(xCoordinate)
    u = u.astype(int)  # select 8 sampling points
    ux = [
        cp.maximum(u - 3, 0),
        cp.maximum(u - 2, 0),
        cp.maximum(u - 1, 0),
        cp.minimum(u, w - 1),
        cp.minimum(u + 1, w - 1),
        cp.minimum(u + 2, w - 1),
        cp.minimum(u + 3, w - 1),
        cp.minimum(u + 4, w - 1),
    ]
    Q = cp.take(lanczos4_core_lut, (t * 1024).astype(int), axis=0)
    Qx = [cp.take(Q, i, axis=1) for i in range(8)]
    del t, u, yCoordinate, Q, lanczos4_core_lut

    def lanczos4_zoom(image_mat: cp.array) -> cp.array:
        """the function to zoom image matrix"""
        number_of_files = image_mat.shape[0]
        # First interpolate in Y direction
        mat_temp = cp.zeros((number_of_files, w, int(h * mag)),
                            dtype=cp.float32)
        for Qi, ui in zip(Qy, uy):
            cp.add(mat_temp,
                   cp.transpose(cp.take(image_mat, ui, axis=1),
                                (0, 2, 1)) * Qi,
                   out=mat_temp)
        del image_mat
        # Then interpolate in X direction
        mat_zoomed = cp.zeros((number_of_files, int(h * mag), int(w * mag)),
                              dtype=cp.float32)
        for Qi, ui in zip(Qx, ux):
            cp.add(mat_zoomed,
                   cp.transpose(cp.take(mat_temp, ui, axis=1), (0, 2, 1)) * Qi,
                   out=mat_zoomed)
        del mat_temp
        return mat_zoomed

    return lanczos4_zoom
# The number of photons to simulate for each optical depth
N_photons = 100

# # Set a counter for the number of photons absorbed.  Not used for momentum calculation.
# N_absorbed = cp.zeros(N_atm)

# Henyeye-Greenstein parameters
g = [-1, -0.5, 0.001, 0.5, 1]

# Keeping track of the angles photons escape at.
escape_mu = cp.array([])

g = [0.0001]

# Create an array of wavelengths.  Units in nm.
Wavelengths = cp.linspace(100, 500, 50)

# Pick a uniform grain size, units in micrometers.

# a = 1

# We will use units of h*nu/c = 1, We can change this or iterate over a list of frequencies later.
photon_momentum = 1

# Total initial momentum in the photons
momentum_i = photon_momentum * N_photons

# Troubleshooting flag
flag = 0

# step counter, used in troubleshooting.
Esempio n. 19
0
def catmullrom_zoom_wrapper(
    images: cp.array,
    mag: float,
    frame_n: int,
) -> cp.array:
    """ Zoom image using catmull_rom algorithm """

    coeff = 0.5 * cp.array(
        [[0, 2, 0, 0], [-1, 0, 1, 0], [2, -5, 4, -1], [-1, 3, -3, 1]],
        dtype=cp.float32).T
    h, w = images.shape[1:3]
    # First interpolate in Y direction
    yCoordinate = cp.linspace(0, h - 1 / mag, int(h * mag), dtype=cp.float32)
    xCoordinate = cp.linspace(0, w - 1 / mag, int(w * mag), dtype=cp.float32)

    def generate_q_u_matrix(x_coordinate: cp.array,
                            y_coordinate: cp.array) -> tuple:
        flatten_flag = x_coordinate.ndim > 1
        if flatten_flag:
            x_coordinate = x_coordinate.flatten()
            y_coordinate = y_coordinate.flatten()

        t, u = cp.modf(y_coordinate)
        u = u.astype(int)
        uy = cp.vstack([
            cp.minimum(cp.maximum(u - 1, 0), h - 1),
            cp.minimum(cp.maximum(u, 0), h - 1),
            cp.minimum(cp.maximum(u + 1, 0), h - 1),
            cp.minimum(cp.maximum(u + 2, 0), h - 1),
        ]).astype(int)
        Qy = cp.dot(
            coeff,
            cp.vstack([
                cp.ones_like(t, dtype=cp.float32), t,
                cp.power(t, 2),
                cp.power(t, 3)
            ]))
        t, u = cp.modf(x_coordinate)
        u = u.astype(int)
        ux = cp.vstack([
            cp.minimum(cp.maximum(u - 1, 0), w - 1),
            cp.minimum(cp.maximum(u, 0), w - 1),
            cp.minimum(cp.maximum(u + 1, 0), w - 1),
            cp.minimum(cp.maximum(u + 2, 0), w - 1),
        ])
        Qx = cp.dot(
            coeff,
            cp.vstack([
                cp.ones_like(t, dtype=cp.float32), t,
                cp.power(t, 2),
                cp.power(t, 3)
            ]))

        if flatten_flag:
            Qx = Qx.reshape(4, frame_n, int(w * mag)).transpose(1, 0, 2).copy()
            Qy = Qy.reshape(4, frame_n, int(h * mag)).transpose(1, 0, 2).copy()
            ux = ux.reshape(4, frame_n, int(w * mag)).transpose(1, 0, 2).copy()
            uy = uy.reshape(4, frame_n, int(h * mag)).transpose(1, 0, 2).copy()
        return Qx, Qy, ux, uy

    global_Qx, global_Qy, global_ux, global_uy = generate_q_u_matrix(
        xCoordinate, yCoordinate)
    mat_temp = cp.empty((frame_n, int(w * mag), h), dtype=cp.float32)
    threads_per_block = (1, 16, 16)
    kernel_file = get_kernel_path("catmull_rom_zoom.cu")
    config1 = {"FRAME_N": frame_n, "W": w, "H": h, "MAG": mag}
    config2 = {"FRAME_N": frame_n, "W": h, "H": int(w * mag), "MAG": mag}
    code1 = read_cu_code(kernel_file, params=config1)
    code2 = read_cu_code(kernel_file, params=config2)
    compile_options = ("--use_fast_math", )
    _cmlr_zoom_x_T = cp.RawKernel(code1,
                                  "cmlr_zoom_x_T",
                                  options=compile_options)
    _cmlr_zoom_y_T = cp.RawKernel(code2,
                                  "cmlr_zoom_x_T",
                                  options=compile_options)

    def catmullrom_zoom(image_mat: cp.array,
                        out: cp.array,
                        drift=None) -> cp.array:
        """the function to zoom image matrix"""

        if drift is not None:
            drift_x, drift_y = drift
            y_coordinate = cp.expand_dims(
                cp.linspace(0, h - 1 / mag, int(h * mag), dtype=cp.float32),
                0).repeat(frame_n, axis=0)
            x_coordinate = cp.expand_dims(
                cp.linspace(0, w - 1 / mag, int(w * mag), dtype=cp.float32),
                0).repeat(frame_n, axis=0)
            x_coordinate += cp.expand_dims(cp.asarray(drift_x), 1)
            y_coordinate += cp.expand_dims(cp.asarray(drift_y), 1)
            Qx, Qy, ux, uy = generate_q_u_matrix(x_coordinate, y_coordinate)
            blocks_per_grid = (frame_n, ceil(h / 16), ceil(int(w * mag) / 16))
            _cmlr_zoom_x_T(blocks_per_grid, threads_per_block,
                           (image_mat, ux, Qx, Qx.shape[0], mat_temp))
            blocks_per_grid = (frame_n, ceil(int(w * mag) / 16),
                               ceil(int(h * mag) / 16))
            _cmlr_zoom_y_T(blocks_per_grid, threads_per_block,
                           (mat_temp, uy, Qy, Qx.shape[0], out))
        else:
            Qx, Qy, ux, uy = global_Qx, global_Qy, global_ux, global_uy
            # the name "cmlr_zoom_x_T" stands for catmullrom_zoom_in_x_direction_then_transpose_kernel
            # after transposing, the image needs to be put into this kernel function again (with different config),
            # zoomed both in height and width direction
            # do horizontal interpolation
            blocks_per_grid = (frame_n, ceil(h / 16), ceil(int(w * mag) / 16))
            _cmlr_zoom_x_T(blocks_per_grid, threads_per_block,
                           (image_mat, ux, Qx, 1, mat_temp))
            # do vertical interpolation
            blocks_per_grid = (frame_n, ceil(int(w * mag) / 16),
                               ceil(int(h * mag) / 16))
            _cmlr_zoom_y_T(blocks_per_grid, threads_per_block,
                           (mat_temp, uy, Qy, 1, out))

    return catmullrom_zoom
Esempio n. 20
0
def histogram(input, min, max, bins, labels=None, index=None):
    """
    Calculate the histogram of the values of an array, optionally at labels.

    Histogram calculates the frequency of values in an array within bins
    determined by `min`, `max`, and `bins`. The `labels` and `index`
    keywords can limit the scope of the histogram to specified sub-regions
    within the array.

    Parameters
    ----------
    input : array_like
        Data for which to calculate histogram.
    min, max : int
        Minimum and maximum values of range of histogram bins.
    bins : int
        Number of bins.
    labels : array_like, optional
        Labels for objects in `input`.
        If not None, must be same shape as `input`.
    index : int or sequence of ints, optional
        Label or labels for which to calculate histogram. If None, all values
        where label is greater than zero are used

    Returns
    -------
    hist : ndarray
        Histogram counts.

    Examples
    --------
    >>> a = cp.asarray([[ 0.    ,  0.2146,  0.5962,  0.    ],
    ...                 [ 0.    ,  0.7778,  0.    ,  0.    ],
    ...                 [ 0.    ,  0.    ,  0.    ,  0.    ],
    ...                 [ 0.    ,  0.    ,  0.7181,  0.2787],
    ...                 [ 0.    ,  0.    ,  0.6573,  0.3094]])
    >>> from cupyimg.scipy import ndimage
    >>> ndimage.measurements.histogram(a, 0, 1, 10)
    array([13,  0,  2,  1,  0,  1,  1,  2,  0,  0])

    With labels and no indices, non-zero elements are counted:

    >>> lbl, nlbl = ndimage.label(a)
    >>> ndimage.measurements.histogram(a, 0, 1, 10, lbl)
    array([0, 0, 2, 1, 0, 1, 1, 2, 0, 0])

    Indices can be used to count only certain objects:

    >>> ndimage.measurements.histogram(a, 0, 1, 10, lbl, 2)
    array([0, 0, 1, 1, 0, 0, 1, 1, 0, 0])

    """
    _bins = cupy.linspace(min, max, bins + 1)

    def _hist(vals):
        return cupy.histogram(vals, _bins)[0]

    return labeled_comprehension(input,
                                 labels,
                                 index,
                                 _hist,
                                 object,
                                 None,
                                 pass_positions=False)
Esempio n. 21
0
def learnAndSolve8b(ctx, sanity_plots=False, plot_widgets=None, plot_pos=None):
    """This is the main optimization. Takes the longest time and uses the GPU heavily."""

    Nbatch = ctx.intermediate.Nbatch
    params = ctx.params
    probe = ctx.probe
    ir = ctx.intermediate
    proc = ir.proc

    iorig = ir.iorig

    # TODO: move_to_config
    NrankPC = 6  # this one is the rank of the PCs, used to detect spikes with threshold crossings
    Nrank = 3  # this one is the rank of the templates

    wTEMP, wPCA = extractTemplatesfromSnippets(proc=proc,
                                               probe=probe,
                                               params=params,
                                               Nbatch=Nbatch,
                                               nPCs=NrankPC)

    # move these to the GPU
    wPCA = cp.asarray(wPCA[:, :Nrank], dtype=np.float32, order="F")
    wTEMP = cp.asarray(wTEMP, dtype=np.float32, order="F")
    wPCAd = cp.asarray(wPCA, dtype=np.float64,
                       order="F")  # convert to double for extra precision

    nt0 = params.nt0
    nt0min = params.nt0min
    nBatches = Nbatch
    NT = params.NT
    Nfilt = params.Nfilt
    Nchan = probe.Nchan

    # two variables for the same thing? number of nearest channels to each primary channel
    # TODO: unclear - let's fix this
    NchanNear = min(probe.Nchan, 32)
    Nnearest = min(probe.Nchan, 32)

    # decay of gaussian spatial mask centered on a channel
    sigmaMask = params.sigmaMask

    batchstart = list(range(0, NT * nBatches + 1, NT))

    # find the closest NchanNear channels, and the masks for those channels
    iC, mask, C2C = getClosestChannels(probe, sigmaMask, NchanNear)

    # sorting order for the batches
    isortbatches = iorig
    nhalf = int(ceil(nBatches / 2)) - 1  # halfway point

    # this batch order schedule goes through half of the data forward and backward during the model
    # fitting and then goes through the data symmetrically-out from the center during the final
    # pass
    ischedule = np.concatenate(
        (np.arange(nhalf, nBatches), np.arange(nBatches - 1, nhalf - 1, -1)))
    i1 = np.arange(nhalf - 1, -1, -1)
    i2 = np.arange(nhalf, nBatches)

    irounds = np.concatenate((ischedule, i1, i2))

    niter = irounds.size
    if irounds[niter - nBatches - 1] != nhalf:
        # this check is in here in case I do somehting weird when I try different schedules
        raise ValueError("Mismatch between number of batches")

    # these two flags are used to keep track of what stage of model fitting we're at
    # flag_final = 0
    flag_resort = 1

    # this is the absolute temporal offset in seconds corresponding to the start of the
    # spike sorted time segment
    t0 = 0  # ceil(params.trange(1) * ops.fs)

    nInnerIter = 60  # this is for SVD for the power iteration

    # schedule of learning rates for the model fitting part
    # starts small and goes high, it corresponds approximately to the number of spikes
    # from the past that were averaged to give rise to the current template
    pmi = cp.exp(
        -1.0 /
        cp.linspace(params.momentum[0], params.momentum[1], niter - nBatches))

    Nsum = min(
        Nchan,
        7)  # how many channels to extend out the waveform in mexgetspikes
    # lots of parameters passed into the CUDA scripts
    Params = np.array(
        [
            NT,
            Nfilt,
            params.Th[0],
            nInnerIter,
            nt0,
            Nnearest,
            Nrank,
            params.lam,
            pmi[0],
            Nchan,
            NchanNear,
            params.nt0min,
            1,
            Nsum,
            NrankPC,
            params.Th[0],
        ],
        dtype=np.float64,
    )

    # W0 has to be ordered like this
    W0 = cp.transpose(
        cp.atleast_3d(cp.asarray(wPCA, dtype=np.float64, order="F")),
        [0, 2, 1])

    # initialize the list of channels each template lives on
    iList = cp.zeros((Nnearest, Nfilt), dtype=np.int32, order="F")

    # initialize average number of spikes per batch for each template
    nsp = cp.zeros((0, 1), dtype=np.float64, order="F")

    # this flag starts 0, is set to 1 later
    Params[12] = 0

    # kernels for subsample alignment
    Ka, Kb = getKernels(params)

    p1 = 0.95  # decay of nsp estimate in each batch

    ntot = 0
    # this keeps track of dropped templates for debugging purposes
    ndrop = np.zeros(2, dtype=np.float32, order="F")

    # this is the minimum firing rate that all templates must maintain, or be dropped
    m0 = params.minFR * params.NT / params.fs

    # allocate variables when switching to extraction phase
    # this holds spike times, clusters and other info per spike
    st3 = []  # cp.zeros((int(1e7), 5), dtype=np.float32, order='F')

    # these ones store features per spike
    # Nnearest is the number of nearest templates to store features for
    fW = LargeArrayWriter(ctx.path("fW", ext=".dat"),
                          dtype=np.float32,
                          shape=(Nnearest, -1))
    # NchanNear is the number of nearest channels to take PC features from
    fWpc = LargeArrayWriter(ctx.path("fWpc", ext=".dat"),
                            dtype=np.float32,
                            shape=(NchanNear, Nrank, -1))

    for ibatch in tqdm(range(niter), desc="Optimizing templates"):
        # korder is the index of the batch at this point in the schedule
        korder = int(irounds[ibatch])
        # k is the index of the batch in absolute terms
        k = int(isortbatches[korder])
        logger.debug("Batch %d/%d, %d templates.", ibatch, niter, Nfilt)

        if ibatch > niter - nBatches - 1 and korder == nhalf:
            # this is required to revert back to the template states in the middle of the
            # batches
            W, dWU = ir.W, ir.dWU
            logger.debug("Reverted back to middle timepoint.")

        if ibatch < niter - nBatches:
            # obtained pm for this batch
            Params[8] = float(pmi[ibatch])
            pm = pmi[ibatch] * ones((Nfilt, ), dtype=np.float64, order="F")

        # loading a single batch (same as everywhere)
        offset = Nchan * batchstart[k]
        dat = proc.flat[offset:offset + NT * Nchan].reshape((-1, Nchan),
                                                            order="F")
        dataRAW = cp.asarray(dat, dtype=np.float32) / params.scaleproc

        if ibatch == 0:
            # only on the first batch, we first get a new set of spikes from the residuals,
            # which in this case is the unmodified data because we start with no templates
            # CUDA function to get spatiotemporal clips from spike detections
            dWU, cmap = mexGetSpikes2(Params, dataRAW, wTEMP, iC)

            dWU = cp.asarray(dWU, dtype=np.float64, order="F")

            # project these into the wPCA waveforms
            dWU = cp.reshape(
                cp.dot(
                    wPCAd,
                    cp.dot(wPCAd.T, dWU.reshape((dWU.shape[0], -1),
                                                order="F"))),
                dWU.shape,
                order="F",
            )

            # initialize the low-rank decomposition with standard waves
            W = W0[:, cp.ones(dWU.shape[2], dtype=np.int32), :]
            Nfilt = W.shape[1]  # update the number of filters/templates
            # initialize the number of spikes for new templates with the minimum allowed value,
            # so it doesn't get thrown back out right away
            nsp = _extend(nsp, 0, Nfilt, m0)
            Params[1] = Nfilt  # update in the CUDA parameters

        if flag_resort:
            # this is a flag to resort the order of the templates according to best peak
            # channel
            # this is important in order to have cohesive memory requests from the GPU RAM
            # max channel (either positive or negative peak)
            iW = cp.argmax(cp.abs(dWU[nt0min - 1, :, :]), axis=0)
            # iW = int32(squeeze(iW))

            isort = cp.argsort(iW)  # sort by max abs channel
            iW = iW[isort]
            W = W[:,
                  isort, :]  # user ordering to resort all the other template variables
            dWU = dWU[:, :, isort]
            nsp = nsp[isort]

        # decompose dWU by svd of time and space (via covariance matrix of 61 by 61 samples)
        # this uses a "warm start" by remembering the W from the previous iteration
        W, U, mu = mexSVDsmall2(Params, dWU, W, iC, iW, Ka, Kb)

        # UtU is the gram matrix of the spatial components of the low-rank SVDs
        # it tells us which pairs of templates are likely to "interfere" with each other
        # such as when we subtract off a template
        # this needs to change (but I don't know why!)
        UtU, maskU = getMeUtU(iW, iC, mask, Nnearest, Nchan)

        # main CUDA function in the whole codebase. does the iterative template matching
        # based on the current templates, gets features for these templates if requested
        # (featW, featPC),
        # gets scores for the template fits to each spike (vexp), outputs the average of
        # waveforms assigned to each cluster (dWU0),
        # and probably a few more things I forget about
        st0, id0, x0, featW, dWU0, drez, nsp0, featPC, vexp = mexMPnu8(
            Params, dataRAW, U, W, mu, iC, iW, UtU, iList, wPCA, params)

        logger.debug("%d spikes.", x0.size)

        # Sometimes nsp can get transposed (think this has to do with it being
        # a single element in one iteration, to which elements are added
        # nsp, nsp0, and pm must all be row vectors (Nfilt x 1), so force nsp
        # to be a row vector.
        # nsp = cp.atleast_2d(nsp)
        # nsprow, nspcol = nsp.shape
        # if nsprow < nspcol:
        #     nsp = nsp.T
        nsp = nsp.squeeze()

        # updates the templates as a running average weighted by recency
        # since some clusters have different number of spikes, we need to apply the
        # exp(pm) factor several times, and fexp is the resulting update factor
        # for each template
        fexp = np.exp(nsp0 * cp.log(pm[:Nfilt]))
        fexp = cp.reshape(fexp, (1, 1, -1), order="F")
        dWU = dWU * fexp + (1 - fexp) * (
            dWU0 / cp.reshape(cp.maximum(1, nsp0), (1, 1, -1), order="F"))

        # nsp just gets updated according to the fixed factor p1
        nsp = nsp * p1 + (1 - p1) * nsp0

        if ibatch == niter - nBatches - 1:
            # if we reached this point, we need to disable secondary template updates
            # like dropping, and adding new templates. We need to memorize the state of the
            # templates at this timepoint, and set the processing mode to "extraction and
            # tracking"

            flag_resort = 0  # no need to resort templates by channel any more
            # flag_final = 1  # this is the "final" pass

            # final clean up, triage templates one last time
            W, U, dWU, mu, nsp, ndrop = triageTemplates2(
                params, iW, C2C, W, U, dWU, mu, nsp, ndrop)

            # final number of templates
            Nfilt = W.shape[1]
            Params[1] = Nfilt

            # final covariance matrix between all templates
            WtW, iList = getMeWtW(W, U, Nnearest)

            # iW is the final channel assigned to each template
            iW = cp.argmax(cp.abs(dWU[nt0min - 1, :, :]), axis=0)

            # extract ALL features on the last pass
            Params[
                12] = 2  # this is a flag to output features (PC and template features)

            # different threshold on last pass?
            Params[2] = params.Th[
                -1]  # usually the threshold is much lower on the last pass

            # memorize the state of the templates
            logger.debug("Memorized middle timepoint.")
            ir.W, ir.dWU, ir.U, ir.mu = W, dWU, U, mu
            ir.Wraw = cp.zeros((U.shape[0], W.shape[0], U.shape[1]),
                               dtype=np.float64,
                               order="F")
            for n in range(U.shape[1]):
                # temporarily use U rather Urot until I have a chance to test it
                ir.Wraw[:, :, n] = mu[n] * cp.dot(U[:, n, :], W[:, n, :].T)

        if ibatch < niter - nBatches - 1:
            # during the main "learning" phase of fitting a model
            if ibatch % 5 == 0:
                # this drops templates based on spike rates and/or similarities to
                # other templates
                W, U, dWU, mu, nsp, ndrop = triageTemplates2(
                    params, iW, C2C, W, U, dWU, mu, nsp, ndrop)

            Nfilt = W.shape[1]  # update the number of filters
            Params[1] = Nfilt

            # this adds new templates if they are detected in the residual
            dWU0, cmap = mexGetSpikes2(Params, drez, wTEMP, iC)

            if dWU0.shape[2] > 0:
                # new templates need to be integrated into the same format as all templates
                # apply PCA for smoothing purposes
                dWU0 = cp.reshape(
                    cp.dot(
                        wPCAd,
                        cp.dot(
                            wPCAd.T,
                            dWU0.reshape(
                                (dWU0.shape[0], dWU0.shape[1] * dWU0.shape[2]),
                                order="F",
                            ),
                        ),
                    ),
                    dWU0.shape,
                    order="F",
                )
                dWU = cp.concatenate((dWU, dWU0), axis=2)

                m = dWU0.shape[2]
                # initialize temporal components of waveforms
                W = _extend(W,
                            Nfilt,
                            Nfilt + m,
                            W0[:, cp.ones(m, dtype=np.int32), :],
                            axis=1)

                # initialize the number of spikes with the minimum allowed
                nsp = _extend(nsp, Nfilt, Nfilt + m,
                              params.minFR * NT / params.fs)
                # initialize the amplitude of this spike with a lowish number
                mu = _extend(mu, Nfilt, Nfilt + m, 10)

                # if the number of filters exceed the maximum allowed, clip it
                Nfilt = min(params.Nfilt, W.shape[1])
                Params[1] = Nfilt

                W = W[:, :
                      Nfilt, :]  # remove any new filters over the maximum allowed
                dWU = dWU[:, :, :
                          Nfilt]  # remove any new filters over the maximum allowed
                nsp = nsp[:
                          Nfilt]  # remove any new filters over the maximum allowed
                mu = mu[:
                        Nfilt]  # remove any new filters over the maximum allowed

        if ibatch > niter - nBatches - 1:
            # during the final extraction pass, this keeps track of all spikes and features

            # we memorize the spatio-temporal decomposition of the waveforms at this batch
            # this is currently only used in the GUI to provide an accurate reconstruction
            # of the raw data at this time
            ir.WA[..., k] = cp.asnumpy(W)
            ir.UA[..., k] = cp.asnumpy(U)
            ir.muA[..., k] = cp.asnumpy(mu)

            # we carefully assign the correct absolute times to spikes found in this batch
            ioffset = params.ntbuff - 1
            if k == 0:
                ioffset = 0  # the first batch is special (no pre-buffer)

            toff = nt0min + t0 - ioffset + (NT - params.ntbuff) * k
            st = toff + st0

            st30 = np.c_[
                cp.asnumpy(st),  # spike times
                cp.asnumpy(id0),  # spike clusters (0-indexing)
                cp.asnumpy(x0),  # template amplitudes
                cp.asnumpy(vexp),  # residual variance of this spike
                korder *
                np.ones(st.size),  # batch from which this spike was found
            ]
            # Check the number of spikes.
            assert st30.shape[0] == featW.shape[1] == featPC.shape[2]
            st3.append(st30)
            fW.append(featW)
            fWpc.append(featPC)

            ntot = ntot + x0.size  # keeps track of total number of spikes so far

        if ibatch == niter - nBatches - 1:
            # these next three store the low-d template decompositions
            ir.WA = np.zeros((nt0, Nfilt, Nrank, nBatches),
                             dtype=np.float32,
                             order="F")
            ir.UA = np.zeros((Nchan, Nfilt, Nrank, nBatches),
                             dtype=np.float32,
                             order="F")
            ir.muA = np.zeros((Nfilt, nBatches), dtype=np.float32, order="F")

        if ibatch % 100 == 0:
            # this is some of the relevant diagnostic information to be printed during training
            logger.info(("%d / %d batches, %d units, nspks: %2.4f, mu: %2.4f, "
                         "nst0: %d, merges: %2.4f, %2.4f"), ibatch, niter,
                        Nfilt, nsp.sum(), median(mu), st0.size, *ndrop)

            if sanity_plots:
                assert plot_widgets is not None, "if sanity_plots is set, then plot_widgets cannot be None"
                plot_diagnostics(W, U, mu, nsp, plot_widgets[plot_pos])

        free_gpu_memory()

    # Close the large array writers and save the JSON metadata files to disk.
    fW.close()
    fWpc.close()

    # just display the total number of spikes
    logger.info("Found %d spikes.", ntot)

    # Save results to the ctx.intermediate object.
    ir.st3 = np.concatenate(st3, axis=0)

    # the similarity score between templates is simply the correlation,
    # taken as the max over several consecutive time delays
    ir.simScore = cp.asnumpy(cp.max(WtW, axis=2))

    # NOTE: these are now already saved by LargeArrayWriter
    # fWa = np.concatenate(fW, axis=-1)
    # fWpca = np.concatenate(fWpc, axis=-1)

    # the template features are stored in cProj, like in Kilosort1
    # ir.cProj = fWa.T
    # the neihboring templates idnices are stored in iNeigh
    ir.iNeigh = cp.asnumpy(iList)

    #  permute the PC projections in the right order
    # ir.cProjPC = np.transpose(fWpca, (2, 1, 0))
    # iNeighPC keeps the indices of the channels corresponding to the PC features
    ir.iNeighPC = cp.asnumpy(iC[:, iW])

    # Number of spikes.
    assert ir.st3.shape[0] == fW.shape[-1] == fWpc.shape[-1]

    # this whole next block is just done to compress the compressed templates
    # we separately svd the time components of each template, and the spatial components
    # this also requires a careful decompression function, available somewhere in the GUI code
    nKeep = min(Nchan * 3, 20)  # how many PCs to keep
    W_a = np.zeros((nt0 * Nrank, nKeep, Nfilt), dtype=np.float32)
    W_b = np.zeros((nBatches, nKeep, Nfilt), dtype=np.float32)
    U_a = np.zeros((Nchan * Nrank, nKeep, Nfilt), dtype=np.float32)
    U_b = np.zeros((nBatches, nKeep, Nfilt), dtype=np.float32)

    for j in tqdm(range(Nfilt), desc="Compressing templates"):
        # do this for every template separately
        WA = np.reshape(ir.WA[:, j, ...], (-1, nBatches), order="F")
        # svd on the GPU was faster for this, but the Python randomized CPU version
        # might be faster still
        # WA = gpuArray(WA)
        A, B, C = svdecon_cpu(WA)
        # W_a times W_b results in a reconstruction of the time components
        W_a[:, :, j] = np.dot(A[:, :nKeep], B[:nKeep, :nKeep])
        W_b[:, :, j] = C[:, :nKeep]

        UA = np.reshape(ir.UA[:, j, ...], (-1, nBatches), order="F")
        # UA = gpuArray(UA)
        A, B, C = svdecon_cpu(UA)
        # U_a times U_b results in a reconstruction of the time components
        U_a[:, :, j] = np.dot(A[:, :nKeep], B[:nKeep, :nKeep])
        U_b[:, :, j] = C[:, :nKeep]

    logger.info("Finished compressing time-varying templates.")

    return Bunch(
        wPCA=wPCA[:, :Nrank],
        wTEMP=wTEMP,
        st3=ir.st3,
        simScore=ir.simScore,
        # cProj=ir.cProj,
        # cProjPC=ir.cProjPC,
        iNeigh=ir.iNeigh,
        iNeighPC=ir.iNeighPC,
        WA=ir.WA,
        UA=ir.UA,
        W=ir.W,
        U=ir.U,
        dWU=ir.dWU,
        mu=ir.mu,
        W_a=W_a,
        W_b=W_b,
        U_a=U_a,
        U_b=U_b,
    )
Esempio n. 22
0
 def test_callable_funclist(self, dtype):
     x = cupy.linspace(-2, 4, 6, dtype=dtype)
     condlist = [x < 0, x > 0]
     funclist = [lambda x: -x, lambda x: x]
     with pytest.raises(NotImplementedError):
         cupy.piecewise(x, condlist, funclist)
Esempio n. 23
0
) -> Array:
    """
    Array API compatible wrapper for :py:func:`np.linspace <numpy.linspace>`.

    See its docstring for more information.
    """
    from ._array_object import Array

    _check_valid_dtype(dtype)
    if device is None:
        device = _Device()  # current device
    if device is not None and not isinstance(device, _Device):
        raise ValueError(f"Unsupported device {device!r}")
    with device:
        return Array._new(
            np.linspace(start, stop, num, dtype=dtype, endpoint=endpoint))


def meshgrid(*arrays: Array, indexing: str = "xy") -> List[Array]:
    """
    Array API compatible wrapper for :py:func:`np.meshgrid <numpy.meshgrid>`.

    See its docstring for more information.
    """
    from ._array_object import Array

    return [
        Array._new(array) for array in np.meshgrid(*[a._array for a in arrays],
                                                   indexing=indexing)
    ]
Esempio n. 24
0
def convolve_gpu_chunked(x,
                         b,
                         pad='flip',
                         nwin=DEFAULT_CONV_CHUNK,
                         ntap=500,
                         overlap=2000):
    """Chunked GPU FFT-based convolution for large arrays.

    This memory-controlled version splits the signal into chunks of n samples.
    Each chunk is tapered in and out, the overlap is designed to get clear of the taper
    splicing of overlaping chunks is done in a cosine way.

    param: pad None, 'zeros', 'constant', 'flip'

    """
    x = cp.asarray(x)
    b = cp.asarray(b)
    assert b.ndim == 1
    n = x.shape[0]
    assert overlap >= 2 * ntap
    # create variables, the gain is to control the splicing
    y = cp.zeros_like(x)
    gain = cp.zeros(n)
    # compute tapers/constants outside of the loop
    taper_in = (-cp.cos(cp.linspace(0, 1, ntap) * cp.pi) / 2 + 0.5)[:,
                                                                    cp.newaxis]
    taper_out = cp.flipud(taper_in)
    assert b.shape[0] < nwin < n
    # this is the convolution wavelet that we shift to be 0 lag
    bp = cp.pad(b, (0, nwin - b.shape[0]), mode='constant')
    bp = cp.roll(bp, -b.size // 2 + 1)
    bp = cp.fft.rfft(bp, n=nwin)[:, cp.newaxis]
    # this is used to splice windows together: cosine taper. The reversed taper is complementary
    scale = cp.minimum(
        cp.maximum(0, cp.linspace(-0.5, 1.5, overlap - 2 * ntap)), 1)
    splice = (-cp.cos(scale * cp.pi) / 2 + 0.5)[:, cp.newaxis]
    # loop over the signal by chunks and apply convolution in frequency domain
    first = 0
    while True:
        first = min(n - nwin, first)
        last = min(first + nwin, n)
        # the convolution
        x_ = cp.copy(x[first:last, :])
        x_[:ntap] *= taper_in
        x_[-ntap:] *= taper_out
        x_ = cp.fft.irfft(cp.fft.rfft(x_, axis=0, n=nwin) * bp, axis=0, n=nwin)
        # this is to check the gain of summing the windows
        tt = cp.ones(nwin)
        tt[:ntap] *= taper_in[:, 0]
        tt[-ntap:] *= taper_out[:, 0]
        # the full overlap is outside of the tapers: we apply a cosine splicing to this part only
        if first > 0:
            full_overlap_first = first + ntap
            full_overlap_last = first + overlap - ntap
            gain[full_overlap_first:full_overlap_last] *= (1. - splice[:, 0])
            gain[full_overlap_first:full_overlap_last] += tt[ntap:overlap -
                                                             ntap] * splice[:,
                                                                            0]
            gain[full_overlap_last:last] = tt[overlap - ntap:]
            y[full_overlap_first:full_overlap_last] *= (1. - splice)
            y[full_overlap_first:full_overlap_last] += x_[ntap:overlap -
                                                          ntap] * splice
            y[full_overlap_last:last] = x_[overlap - ntap:]
        else:
            y[first:last, :] = x_
            gain[first:last] = tt
        if last == n:
            break
        first += nwin - overlap
    return y
Esempio n. 25
0
 def setup(self):
     self.d = np.linspace(0, 100, 100000)
Esempio n. 26
0
def powerspectrum(*u,
                  average=True,
                  diagnostics=False,
                  kmin=None,
                  kmax=None,
                  npts=None,
                  compute_fft=True,
                  compute_sqr=True,
                  double=True,
                  bench=False,
                  **kwargs):
    """
    See the documentation for the :ref:`CPU version<powerspectrum>`.

    Parameters
    ----------
    u : `np.ndarray`
        Scalar or vector field.
        If vector data, pass arguments as ``u1, u2, ..., un``
        where ``ui`` is the ith vector component.
        Each ``ui`` can be 1D, 2D, or 3D, and all must have the
        same ``ui.shape`` and ``ui.dtype``.
    average : `bool`, optional
        If ``True``, average over values in a given
        bin and multiply by the bin volume.
        If ``False``, compute the sum.
    diagnostics : `bool`, optional
        Return the standard deviation and number of points
        in a particular radial bin.
    kmin : `int` or `float`, optional
        Minimum wavenumber in power spectrum bins.
        If ``None``, ``kmin = 1``.
    kmax : `int` or `float`, optional
        Maximum wavenumber in power spectrum bins.
        If ``None``, ``kmax = max(u.shape)//2``.
    npts : `int`, optional
        Number of modes between ``kmin`` and ``kmax``,
        inclusive.
        If ``None``, ``npts = kmax-kmin+1``.
    compute_fft : `bool`, optional
        If ``False``, do not take the FFT of the input data.
        FFTs should not be passed with the zero-frequency
        component in the center.
    compute_sqr : `bool`, optional
        If ``False``, sum the real part of the FFT. This can be
        useful for purely real FFTs, where the sign of the
        FFT is useful information. If ``True``, take the square
        as usual.
    double : `bool`, optional
        If ``False``, calculate FFTs in single precision.
        Useful for saving memory.
    bench : `bool`, optional
        Print message for time of calculation.
    kwargs
        Additional keyword arguments passed to
        ``cupyx.scipy.fft.fftn`` or ``cupyx.scipy.fft.rfftn``.

    Returns
    -------
    spectrum : `np.ndarray`, shape `(npts,)`
        Radially averaged power spectrum :math:`P(k)`.
    kn : `np.ndarray`, shape `(npts,)`
        Left edges of radial bins :math:`k`.
    counts : `np.ndarray`, shape `(npts,)`, optional
        Number of points :math:`N_k` in each bin.
    vol : `np.ndarray`, shape `(npts,)`, optional
        Volume :math:`V_k` of each bin.
    stdev : `np.ndarray`, shape `(npts,)`, optional
        Standard deviation multiplied with :math:`V_k`
        in each bin.
    """
    if bench:
        t0 = time()

    shape = u[0].shape
    ndim = u[0].ndim
    ncomp = len(u)
    N = max(u[0].shape)

    if np.issubdtype(u[0].dtype, np.floating):
        real = True
        dtype = cp.float64 if double else cp.float32
    else:
        real = False
        dtype = cp.complex128 if double else cp.complex64

    if ndim not in [1, 2, 3]:
        raise ValueError("Dimension of image must be 1, 2, or 3.")

    # Get memory pools
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    # Compute pqower spectral density with memory efficiency
    density = None
    comp = cp.empty(shape, dtype=dtype)
    for i in range(ncomp):
        temp = cp.asarray(u[i], dtype=dtype)
        comp[...] = temp
        del temp
        if compute_fft:
            fft = _cufftn(comp, **kwargs)
        else:
            fft = comp
        if density is None:
            fftshape = fft.shape
            density = cp.zeros(fft.shape)
        if compute_sqr:
            density[...] += _mod_squared(fft)
        else:
            density[...] += cp.real(fft)
        del fft
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()

    # Need to double count if using rfftn
    if real and compute_fft:
        density[...] *= 2

    # Get radial coordinates
    kr = cp.asarray(_kmag_sampling(fftshape, real=real).astype(np.float32))

    # Flatten arrays
    kr = kr.ravel()
    density = density.ravel()

    # Get minimum and maximum k for binning if not given
    if kmin is None:
        kmin = 1
    if kmax is None:
        kmax = int(N / 2)
    if npts is None:
        npts = kmax - kmin + 1

    # Generate bins
    kn = cp.linspace(kmin, kmax, npts, endpoint=True)  # Left edges of bins
    dk = kn[1] - kn[0]

    # Radially average power spectral density
    if ndim == 1:
        fac = 2 * np.pi
    elif ndim == 2:
        fac = 4 * np.pi
    elif ndim == 3:
        fac = 4. / 3. * np.pi
    spectrum = cp.zeros_like(kn)
    stdev = cp.zeros_like(kn)
    vol = cp.zeros_like(kn)
    counts = cp.zeros(kn.shape, dtype=np.int64)
    for i, ki in enumerate(kn):
        ii = cp.where(cp.logical_and(kr >= ki, kr < ki + dk))
        samples = density[ii]
        vk = fac * cp.pi * ((ki + dk)**ndim - (ki)**ndim)
        if average:
            spectrum[i] = vk * cp.mean(samples)
        else:
            spectrum[i] = cp.sum(samples)
        if diagnostics:
            Nk = samples.size
            stdev[i] = vk * cp.std(samples, ddof=1)
            vol[i] = vk
            counts[i] = Nk

    del density, kr
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    if bench:
        print(f"Time: {time() - t0:.04f} s")

    result = [spectrum.get(), kn.get()]
    if diagnostics:
        result.extend([counts.get(), vol.get(), stdev.get()])

    return tuple(result)
Esempio n. 27
0
 def setup(self):
     self.d = np.linspace(0, 100, 200000).reshape((-1, 2))
Esempio n. 28
0
    def WISHrun(self, y0: np.ndarray, SLM: np.ndarray, delta3: float, delta4: float, N_os: int, N_iter: int,\
                N_batch: int, plot: bool=True):
        """
        Runs the WISH algorithm using a Gerchberg Saxton loop for phase retrieval.
        :param y0: Target modulated amplitudes in the sensor plane
        :param SLM: SLM modulation patterns
        :param delta3: Apparent sampling size of the SLM as seen from the sensor plane
        :param delta4: Sampling size of the sensor plane
        :param N_os: Number of observations per image
        :param N_iter: Maximal number of Gerchberg Saxton iterations
        :param N_batch: Number of batches (modulations)
        :param plot: If True, plots the advance of the retrieval every 10 iterations
        :return u4_est, idx_converge: Estimated field of size (N,N) and the convergence indices to check convergence
                                      speed
        """
        wvl = self.wavelength
        z3 = self.z
        ## parameters
        N = y0.shape[0]
        k = 2 * np.pi / wvl
        #u3_batch = np.zeros((N, N, N_os), dtype=complex) # store all U3 gpu
        #u4 = np.zeros((N, N, N_os), dtype=complex) # gpu
        #y = np.zeros((N, N, N_os), dtype=complex) # store all U3 gpu
        u3_batch = cp.zeros((N, N, N_os),
                            dtype=cp.complex64)  # store all U3 gpu
        u4 = cp.zeros((N, N, N_os), dtype=cp.complex64)  # gpu
        y = cp.zeros((N, N, N_os), dtype=cp.complex64)  # store all U3 gpu

        ## initilize a3
        k = 2 * np.pi / wvl
        xx = cp.linspace(0, N - 1, N,
                         dtype=cp.float) - (N / 2) * cp.ones(N, dtype=cp.float)
        yy = cp.linspace(0, N - 1, N,
                         dtype=cp.float) - (N / 2) * cp.ones(N, dtype=cp.float)
        X, Y = float(delta4) * cp.meshgrid(
            xx, yy)[0], float(delta4) * cp.meshgrid(xx, yy)[1]
        R = cp.sqrt(X**2 + Y**2)
        Q = cp.exp(1j * (k / (2 * z3)) * R**2)
        for ii in range(N_os):
            #SLM_batch = SLM[:,:, ii]
            SLM_batch = cp.asarray(SLM[:, :, ii])
            y0_batch = y0[:, :, ii]
            #u3_batch[:,:, ii] = self.frt(y0_batch, delta4, -z3) * np.conj(SLM_batch) #y0_batch gpu
            #u3_batch[:,:, ii] = self.frt_gpu(cp.asarray(y0_batch), delta4, -z3) * cp.conj(SLM_batch) #y0_batch gpu
            u3_batch[:, :, ii] = self.frt_gpu_s(
                cp.asarray(y0_batch) / Q, delta4, -z3) * cp.conj(
                    SLM_batch)  #y0_batch gpu
        #u3 = np.mean(u3_batch, 2) # average it
        u3 = cp.mean(u3_batch, 2)

        ## Recon run : GS loop
        idx_converge = np.empty(N_iter)
        for jj in range(N_iter):
            sys.stdout.write(f"\rGS iteration {jj+1}")
            sys.stdout.flush()
            #u3_collect = np.zeros(u3.shape, dtype=complex)
            u3_collect = cp.zeros(u3.shape, dtype=cp.complex64)
            idx_converge0 = np.empty(N_batch)
            for idx_batch in range(N_batch):
                # put the correct batch into the GPU (no GPU for now)
                #SLM_batch = SLM[:,:, int(N_os * idx_batch): int(N_os * (idx_batch+1))]
                #y0_batch = y0[:,:, int(N_os * idx_batch): int(N_os * (idx_batch+1))]
                SLM_batch = cp.asarray(
                    SLM[:, :,
                        int(N_os * idx_batch):int(N_os * (idx_batch + 1))])
                y0_batch = cp.asarray(
                    y0[:, :,
                       int(N_os * idx_batch):int(N_os * (idx_batch + 1))])
                for _ in range(N_os):
                    #u4[:,:,_] = self.frt(u3 * SLM_batch[:,:,_], delta3, z3) # U4 is the field on the sensor
                    u4[:, :,
                       _] = self.frt_gpu_s(u3 * SLM_batch[:, :, _], delta3,
                                           z3)  # U4 is the field on the sensor
                    y[:, :,
                      _] = y0_batch[:, :, _] * cp.exp(1j * cp.angle(
                          u4[:, :, _]))  # force the amplitude of y to be y0
                    #u3_batch[:,:,_] = self.frt(y[:,:,_], delta4, -z3) * np.conj(SLM_batch[:,:,_])
                    u3_batch[:, :, _] = self.frt_gpu_s(
                        y[:, :, _], delta4, -z3) * cp.conj(SLM_batch[:, :, _])
                #u3_collect = u3_collect + np.mean(u3_batch, 2) # collect(add) U3 from each batch
                u3_collect = u3_collect + cp.mean(
                    u3_batch, 2)  # collect(add) U3 from each batch
                #idx_converge0[idx_batch] = np.mean(np.mean(np.mean(y0_batch,1),0)/np.sum(np.sum(np.abs(np.abs(u4)-y0_batch),1),0))
                #idx_converge0[idx_batch] = cp.asnumpy(cp.mean(cp.mean(cp.mean(y0_batch,1),0)/cp.sum(cp.sum(cp.abs(cp.abs(u4)-y0_batch),1),0)))
                # convergence index matrix for each batch
                idx_converge0[idx_batch] = cp.linalg.norm(
                    cp.abs(u4) - y0_batch) / cp.linalg.norm(y0_batch)

            u3 = (u3_collect / N_batch)  # average over batches
            idx_converge[jj] = np.mean(idx_converge0)  # sum over batches
            sys.stdout.write(f"  (convergence index : {idx_converge[jj]})")
            #u4_est = self.frt(u3, delta3, z3)
            u4_est = cp.asnumpy(self.frt_gpu_s(u3, delta3, z3) * Q)

            if jj % 10 == 0 and plot:
                plt.close('all')
                fig = plt.figure(0)
                fig.suptitle(f'Iteration {jj}')
                ax1 = fig.add_subplot(121)
                ax2 = fig.add_subplot(122)
                im = ax1.imshow(np.abs(u4_est), cmap='viridis')
                ax1.set_title('Amplitude')
                ax2.imshow(np.angle(u4_est), cmap='viridis')
                ax2.set_title('Phase')

                fig1 = plt.figure(1)
                ax = fig1.gca()
                ax.plot(np.arange(0, jj, 1), idx_converge[0:jj], marker='o')
                ax.set_xlabel('Iterations')
                ax.set_ylabel('Convergence estimator')
                ax.set_title('Convergence curve')
                plt.show()
                time.sleep(2)

            # exit if the matrix doesn 't change much
            if jj > 1:
                if cp.abs(idx_converge[jj] -
                          idx_converge[jj - 1]) / idx_converge[jj] < 1e-4:
                    print('\nConverged. Exit the GS loop ...')
                    #idx_converge = idx_converge[0:jj]
                    idx_converge = cp.asnumpy(idx_converge[0:jj])
                    break
        return u4_est, idx_converge
Esempio n. 29
0
def histogram(x, bins=10):
    """Computes the histogram of a set of data.

    Args:
        x (cupy.ndarray): Input array.
        bins (int or cupy.ndarray): If ``bins`` is an int, it represents the
            number of bins. If ``bins`` is an :class:`~cupy.ndarray`, it
            represents a bin edges.

    Returns:
        tuple: ``(hist, bin_edges)`` where ``hist`` is a :class:`cupy.ndarray`
        storing the values of the histogram, and ``bin_edges`` is a
        :class:`cupy.ndarray` storing the bin edges.

    .. seealso:: :func:`numpy.histogram`
    """

    if x.dtype.kind == 'c':
        # TODO(unno): comparison between complex numbers is not implemented
        raise NotImplementedError('complex number is not supported')

    if isinstance(bins, int):
        if x.size == 0:
            min_value = 0.0
            max_value = 1.0
        else:
            min_value = float(x.min())
            max_value = float(x.max())
        if min_value == max_value:
            min_value -= 0.5
            max_value += 0.5
        bins = cupy.linspace(min_value, max_value, bins + 1)
    elif isinstance(bins, cupy.ndarray):
        if cupy.any(bins[:-1] > bins[1:]):
            raise ValueError('bins must increase monotonically.')
    else:
        raise NotImplementedError('Only int or ndarray are supported for bins')

    # atomicAdd only supports int32
    y = cupy.zeros(bins.size - 1, dtype=cupy.int32)

    # TODO(unno): use searchsorted
    cupy.ElementwiseKernel(
        'S x, raw T bins, int32 n_bins', 'raw int32 y', '''
        if (x < bins[0] or bins[n_bins - 1] < x) {
            return;
        }
        int high = n_bins - 1;
        int low = 0;

        while (high - low > 1) {
            int mid = (high + low) / 2;
            if (bins[mid] <= x) {
                low = mid;
            } else {
                high = mid;
            }
        }
        atomicAdd(&y[low], 1);
        ''')(x, bins, bins.size, y)
    return y.astype('l'), bins