Ejemplo n.º 1
0
def _slogdet_one(a):
    util._assert_rank2(a)
    util._assert_nd_squareness(a)
    dtype = a.dtype

    handle = device.get_cusolver_handle()
    m = len(a)
    ipiv = cupy.empty(m, 'i')
    info = cupy.empty((), 'i')

    # Need to make a copy because getrf works inplace
    a_copy = a.copy(order='F')

    if dtype == 'f':
        getrf_bufferSize = cusolver.sgetrf_bufferSize
        getrf = cusolver.sgetrf
    else:
        getrf_bufferSize = cusolver.dgetrf_bufferSize
        getrf = cusolver.dgetrf

    buffersize = getrf_bufferSize(handle, m, m, a_copy.data.ptr, m)
    workspace = cupy.empty(buffersize, dtype=dtype)
    getrf(handle, m, m, a_copy.data.ptr, m, workspace.data.ptr,
          ipiv.data.ptr, info.data.ptr)

    if info[()] == 0:
        diag = cupy.diag(a_copy)
        # ipiv is 1-origin
        non_zero = (cupy.count_nonzero(ipiv != cupy.arange(1, m + 1)) +
                    cupy.count_nonzero(diag < 0))
        # Note: sign == -1 ** (non_zero % 2)
        sign = (non_zero % 2) * -2 + 1
        logdet = cupy.log(abs(diag)).sum()
    else:
        sign = cupy.array(0.0, dtype=dtype)
        logdet = cupy.array(float('-inf'), dtype)

    return sign, logdet
Ejemplo n.º 2
0
    def liquidize(self, intens, sigma_A, gamma_A):
        '''Apply liquidization transform on given intensity'''
        s_sq = (2. * cp.pi * sigma_A * self.dgen.qrad)**2
        patt = cp.fft.fftshift(cp.fft.fftn(cp.fft.ifftshift(intens)))

        if self.slimits.max() > 2. * np.pi * sigma_A / self.res_max:
            n_max = np.where(
                self.slimits > 2. * np.pi * sigma_A / self.res_max)[0][0] + 1
        else:
            print('No effect of liquid-like motions with these parameters')
            return intens

        liq = cp.zeros_like(intens)
        for n in range(n_max):
            kernel = cp.exp(-n * self.urad / gamma_A)
            weight = cp.exp(-s_sq + n * cp.log(s_sq) -
                            float(special.loggamma(n + 1)))
            liq += weight * cp.abs(cp.fft.fftshift(cp.fft.ifftn(
                patt * kernel)))
            sys.stderr.write('\rLiquidizing: %d/%d' % (n + 1, n_max))
        sys.stderr.write('\n')

        return liq
Ejemplo n.º 3
0
    def forward(self, x, t):
        if x.ndim == 2:  # ミニバッチ使用時
            x = x - x.max(axis=1, keepdims=True)
            x = cp.exp(x)
            y = x / x.sum(axis=1, keepdims=True)
        elif x.ndim == 1:
            x = x - cp.max(x)
            y = cp.exp(x) / cp.sum(cp.exp(x))

        if y.ndim == 1:
            t = t.reshape(1, t.size)
            y = y.reshape(1, y.size)

        # 教師ラベルがone-hotベクトルの場合、正解のインデックスに変換
        if t.size == y.size:
            t = t.argmax(axis=1)

        batch_size = y.shape[0]
        loss = -1.0 * cp.sum(
            t * cp.log(y[cp.arange(batch_size), t] + 1e-7)) / batch_size
        self.y = y
        self.t = t
        return loss
Ejemplo n.º 4
0
    def rayleigh(self, scale=1.0, size=None, dtype=float):
        """Returns an array of samples drawn from a rayleigh distribution.

        .. warning::

            This function may synchronize the device.

        .. seealso::
            :func:`cupy.random.rayleigh` for full documentation,
            :meth:`numpy.random.RandomState.rayleigh
            <numpy.random.mtrand.RandomState.rayleigh>`
        """
        scale = cupy.asarray(scale)
        if size is None:
            size = scale.shape
        if cupy.any(scale < 0):  # synchronize!
            raise ValueError('scale < 0')
        x = self._random_sample_raw(size, dtype)
        x = cupy.log(x, out=x)
        x = cupy.multiply(x, -2., out=x)
        x = cupy.sqrt(x, out=x)
        x = cupy.multiply(x, scale, out=x)
        return x
Ejemplo n.º 5
0
def o_f2(imgs, hres_size, row=None, do_fil=False, show=False):
    """
    Fourier Spectrum Initialization #2
    ----------------------------------
    Mean Image > pad with reflect padding > sqrt > Ft > (optional) filter Ft with 2 * cutoff_freq > return Ft
    """
    im = cp.array(imgs).mean(0)
    pad = (hres_size[0] - imgs[0].shape[0]) // 2
    im = cp.pad(cp.array(im), [(pad, pad), (pad, pad)], mode='reflect')
    f = Ft(cp.sqrt(im))

    if do_fil:
        _orig = hres_size[0] // 2 - 1
        CUTOFF_FREQ_px = get_cutoff(row)
        fil = np.zeros(hres_size)
        fil = cp.array(
            cv2.circle(fil, (_orig, _orig), 2 * CUTOFF_FREQ_px, 1, -1))
        f = f * fil
    if show:
        plt.imshow(cp.asnumpy(cp.log(abs(f) + 1e-7)))
        plt.title(f'o_f2 {"without" if not do_fil else "with"} filtering')
        plt.show()

    return f
Ejemplo n.º 6
0
 def __extended_likelihood(self, params):
     data = self.__data_amplitude.calculate(params)
     mcdata = self.__monte_carlo_amplitude.calculate(params)
     
     if self.__data_amplitude.USE_GPU:
         likelihood_data = cp.sum(self.sweight * cp.log(data))
         likelihood_mc = cp.sum(self.mcweight * mcdata)
         return cp.asnumpy(
             likelihood_data - self.__generated * likelihood_mc
         )
     else:
         likelihood_data = ne.evaluate(
             "sum(sw * log(data))", local_dict={
                 "sw": self.sweight,
                 "data": data
             }
         )
         likelihood_mc = ne.evaluate(
             "sum(mcw * mcdata)", local_dict={
                 "mcw": self.mcweight,
                 "mcdata": mcdata
             }
         )
         return likelihood_data - self.__generated * likelihood_mc
Ejemplo n.º 7
0
    def _acausal_classifier_gpu(filter_posterior, movement_state_transition,
                                discrete_state_transition, observed_position_bin,
                                uniform):
        '''
        Parameters
        ----------
        filter_posterior : ndarray, shape (n_time, 2, n_position_bins)
        movement_state_transition : ndarray, shape (n_position_bins,
                                                    n_position_bins)
        discrete_state_transition : ndarray, shape (n_time, 2)
            discrete_state_transition[k, 0] = Pr(I_{k} = 1 | I_{k-1} = 0, v_{k})
            discrete_state_transition[k, 1] = Pr(I_{k} = 1 | I_{k-1} = 1, v_{k})
        observed_position_bin : ndarray, shape (n_time,)
            Which position bin is the animal in.
        position_bin_size : float

        Returns
        -------
        smoother_posterior : ndarray, shape (n_time, 2, n_position_bins)
            p(x_{k + 1}, I_{k + 1} \vert H_{1:T})
        smoother_probability : ndarray, shape (n_time, 2)
            smoother_probability[:, 0] = Pr(I_{1:T} = 0)
            smoother_probability[:, 1] = Pr(I_{1:T} = 1)
        smoother_prior : ndarray, shape (n_time, 2, n_position_bins)
            p(x_{k + 1}, I_{k + 1} \vert H_{1:k})
        weights : ndarray, shape (n_time, 2, n_position_bins)
            \sum_{I_{k+1}} \int \Big[ \frac{p(x_{k+1} \mid x_{k}, I_{k}, I_{k+1}) *
            Pr(I_{k + 1} \mid I_{k}, v_{k}) * p(x_{k+1}, I_{k+1} \mid H_{1:T})}
            {p(x_{k + 1}, I_{k + 1} \mid H_{1:k})} \Big] dx_{k+1}
        '''  # noqa

        filter_posterior = cp.asarray(filter_posterior, dtype=cp.float32)
        movement_state_transition = cp.asarray(
            movement_state_transition, dtype=cp.float32)
        discrete_state_transition = cp.asarray(
            discrete_state_transition, dtype=cp.float32)
        observed_position_bin = cp.asarray(observed_position_bin)
        uniform = cp.asarray(uniform, dtype=cp.float32)
        EPS = cp.asarray(np.spacing(1), dtype=cp.float32)

        filter_probability = cp.sum(filter_posterior, axis=2)

        smoother_posterior = cp.zeros_like(filter_posterior)
        n_time, _, n_position_bins = filter_posterior.shape

        smoother_posterior[-1] = filter_posterior[-1].copy()

        for k in cp.arange(n_time - 2, -1, -1):
            smoother_prior = cp.zeros((2, n_position_bins), dtype=cp.float32)
            weights = cp.zeros((2, n_position_bins), dtype=cp.float32)

            position_ind = observed_position_bin[k + 1]

            # Predict p(x_{k + 1}, I_{k + 1} \vert H_{1:k})
            # I_{k} = 0, I_{k + 1} = 0
            smoother_prior[0, position_ind] = (
                (1 - discrete_state_transition[k + 1, 0]) * filter_probability[k, 0])

            # I_{k} = 1, I_{k + 1} = 0
            smoother_prior[0, position_ind] += (
                (1 - discrete_state_transition[k + 1, 1]) * filter_probability[k, 1])

            # I_{k} = 0, I_{k + 1} = 1
            smoother_prior[1] = (
                discrete_state_transition[k + 1, 0] * uniform *
                filter_probability[k, 0])

            # I_{k} = 1, I_{k + 1} = 1
            smoother_prior[1] += (
                discrete_state_transition[k + 1, 1] *
                (movement_state_transition.T @ filter_posterior[k, 1]))

            # Update p(x_{k}, I_{k} \vert H_{1:k})
            ratio = cp.exp(
                cp.log(smoother_posterior[k + 1]) -
                cp.log(smoother_prior + EPS))
            integrated_ratio = cp.sum(ratio, axis=1)
            # I_{k} = 0, I_{k + 1} = 0
            weights[0] = (
                (1 - discrete_state_transition[k + 1, 0]) * ratio[0, position_ind])

            # I_{k} = 0, I_{k + 1} = 1
            weights[0] += (
                uniform * discrete_state_transition[k + 1, 0] * integrated_ratio[1])

            # I_{k} = 1, I_{k + 1} = 0
            weights[1] = (
                (1 - discrete_state_transition[k + 1, 1]) * ratio[0, position_ind])

            # I_{k} = 1, I_{k + 1} = 1
            weights[1] += (
                discrete_state_transition[k + 1, 1] *
                ratio[1] @ movement_state_transition)

            smoother_posterior[k] = weights * filter_posterior[k]
            smoother_posterior[k] /= cp.nansum(smoother_posterior[k])

        smoother_probability = cp.sum(smoother_posterior, axis=2)

        return (cp.asnumpy(smoother_posterior),
                cp.asnumpy(smoother_probability))
Ejemplo n.º 8
0
    def _causal_classifier_gpu(likelihood, movement_state_transition, discrete_state_transition,
                               observed_position_bin, uniform):
        '''
        Parameters
        ----------
        likelihood : ndarray, shape (n_time, ...)
        movement_state_transition : ndarray, shape (n_position_bins,
                                                    n_position_bins)
        discrete_state_transition : ndarray, shape (n_time, 2)
            discrete_state_transition[k, 0] = Pr(I_{k} = 1 | I_{k-1} = 0, v_{k})
            discrete_state_transition[k, 1] = Pr(I_{k} = 1 | I_{k-1} = 1, v_{k})
        observed_position_bin : ndarray, shape (n_time,)
            Which position bin is the animal in.
        position_bin_size : float

        Returns
        -------
        posterior : ndarray, shape (n_time, 2, n_position_bins)
        state_probability : ndarray, shape (n_time, 2)
            state_probability[:, 0] = Pr(I_{1:T} = 0)
            state_probability[:, 1] = Pr(I_{1:T} = 1)
        prior : ndarray, shape (n_time, 2, n_position_bins)

        '''

        likelihood = cp.asarray(likelihood, dtype=cp.float32)
        movement_state_transition = cp.asarray(
            movement_state_transition, dtype=cp.float32)
        discrete_state_transition = cp.asarray(
            discrete_state_transition, dtype=cp.float32)
        observed_position_bin = cp.asarray(observed_position_bin)
        uniform = cp.asarray(uniform, dtype=cp.float32)

        n_position_bins = movement_state_transition.shape[0]
        n_time = likelihood.shape[0]
        n_states = 2

        posterior = cp.zeros(
            (n_time, n_states, n_position_bins), dtype=cp.float32)
        state_probability = cp.zeros((n_time, n_states), dtype=cp.float32)

        # Initial Conditions
        posterior[0, 0, observed_position_bin[0]] = likelihood[0, 0, 0]
        norm = cp.nansum(posterior[0])
        data_log_likelihood = cp.log(norm)
        posterior[0] /= norm
        state_probability[0] = cp.sum(posterior[0], axis=1)

        for k in np.arange(1, n_time):
            prior = cp.zeros((n_states, n_position_bins), dtype=cp.float32)
            position_ind = observed_position_bin[k]
            # I_{k - 1} = 0, I_{k} = 0
            prior[0, position_ind] = (
                (1 - discrete_state_transition[k, 0]) * state_probability[k - 1, 0])
            # I_{k - 1} = 1, I_{k} = 0
            prior[0, position_ind] += (
                (1 - discrete_state_transition[k, 1]) * state_probability[k - 1, 1])

            # I_{k - 1} = 0, I_{k} = 1
            prior[1] = (discrete_state_transition[k, 0] * uniform *
                        state_probability[k - 1, 0])
            # I_{k - 1} = 1, I_{k} = 1
            prior[1] += (
                discrete_state_transition[k, 1] *
                (movement_state_transition.T @ posterior[k - 1, 1]))

            posterior[k] = prior * likelihood[k]
            norm = cp.nansum(posterior[k])
            data_log_likelihood += cp.log(norm)
            posterior[k] /= norm

            state_probability[k] = cp.sum(posterior[k], axis=1)

        return (cp.asnumpy(posterior),
                cp.asnumpy(state_probability),
                data_log_likelihood)
Ejemplo n.º 9
0
def convolutional_barycenter_gpu(Hv,
                                 reg,
                                 alpha,
                                 stabThresh=1e-30,
                                 niter=1500,
                                 tol=1e-9,
                                 sharpening=False,
                                 verbose=False):
    """Main function solving wasserstein barycenter problem using gpu

    Arguments:
        Hv {Set of distributions (cparray)} -- 
        reg {regularization term "gamma"} -- float superior to 0, generally equals size of space/40
        alpha {list} -- set of weights

    Keyword Arguments:
        stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30})
        niter {int} -- Maximum number of loop iteration (default: {1500})
        tol {float} -- convergence tolerance at which point iterations stop (default: {1e-9})
        sharpening {bool} -- Whether or not entropic sharpening is used (default: {False})
        verbose {bool} --  verbose option

    Returns:
        cparray -- solution of weighted wassertein barycenter problem
    """
    def K(x):
        return cp.array(gaussian_filter(cp.asnumpy(x), sigma=reg))

    def to_find_root(barycenter, H0, beta):
        return entropy(barycenter**beta) - H0

    alpha = cp.array(alpha)
    alpha = alpha / alpha.sum()
    Hv = cp.array(Hv)
    mean_weights = (Hv[0].sum() + Hv[1].sum()) / 2.
    #print('mean weights', mean_weights)
    for i in range(len(Hv)):
        Hv[i] = Hv[i] / Hv[i].sum()
    v = cp.ones(Hv.shape)
    Kw = cp.ones(Hv.shape)

    entropy_max = max_entropy(Hv)
    barycenter = cp.zeros(Hv[0].shape)

    change = 1

    for j in range(niter):
        t0 = time.time()
        barycenterOld = barycenter

        barycenter = cp.zeros_like(Hv[0, :, :])
        for i in range(Hv.shape[0]):

            Kw[i, :, :] = K(Hv[i, :, :] /
                            cp.maximum(stabThresh, K(v[i, :, :])))
            barycenter += alpha[i] * cp.log(
                cp.maximum(stabThresh, v[i, :, :] * Kw[i, :, :]))

        barycenter = cp.exp(barycenter)
        change = cp.sum(cp.abs(barycenter - barycenterOld))
        if sharpening:
            if (entropy(barycenter)) > (entropy_max):

                beta = newton(
                    lambda beta: to_find_root(barycenter, entropy_max, beta),
                    1,
                    tol=1e-6)
                if beta < 0:
                    beta = 1

            else:
                beta = 1
            barycenter = barycenter**beta

        for i in range(Hv.shape[0]):
            v[i, :, :] = barycenter / cp.maximum(stabThresh, Kw[i, :, :])

        if verbose:
            #sys.stdout('output.log','a')
            print("iter : ", j, "change : ", change, 'time :',
                  time.time() - t0)
        if change < tol:
            break

    return cp.asnumpy(barycenter)
Ejemplo n.º 10
0
    def choice(self, a, size=None, replace=True, p=None):
        """Returns an array of random values from a given 1-D array.

        .. seealso::
            :func:`cupy.random.choice` for full document,
            :meth:`numpy.random.choice`

        """
        if a is None:
            raise ValueError('a must be 1-dimensional or an integer')
        if isinstance(a, cupy.ndarray) and a.ndim == 0:
            raise NotImplementedError
        if isinstance(a, six.integer_types):
            a_size = a
            if a_size <= 0:
                raise ValueError('a must be greater than 0')
        else:
            a = cupy.array(a, copy=False)
            if a.ndim != 1:
                raise ValueError('a must be 1-dimensional or an integer')
            else:
                a_size = len(a)
                if a_size == 0:
                    raise ValueError('a must be non-empty')

        if p is not None:
            p = cupy.array(p)
            if p.ndim != 1:
                raise ValueError('p must be 1-dimensional')
            if len(p) != a_size:
                raise ValueError('a and p must have same size')
            if not (p >= 0).all():
                raise ValueError('probabilities are not non-negative')
            p_sum = cupy.sum(p).get()
            if not numpy.allclose(p_sum, 1):
                raise ValueError('probabilities do not sum to 1')

        if not replace:
            raise NotImplementedError

        if size is None:
            raise NotImplementedError
        shape = size
        size = numpy.prod(shape)

        if p is not None:
            p = cupy.broadcast_to(p, (size, a_size))
            index = cupy.argmax(cupy.log(p) -
                                cupy.random.gumbel(size=(size, a_size)),
                                axis=1)
            if not isinstance(shape, six.integer_types):
                index = cupy.reshape(index, shape)
        else:
            index = cupy.random.randint(0, a_size, size=shape)
            # Align the dtype with NumPy
            index = index.astype(cupy.int64, copy=False)

        if isinstance(a, six.integer_types):
            return index

        if index.ndim == 0:
            return cupy.array(a[index], dtype=a.dtype)

        return a[index]
Ejemplo n.º 11
0
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = (1./m) * (-np.dot(Y, np.log(AL).T) - np.dot(1-Y, np.log(1 - AL).T))
    cost = np.squeeze(cost)
    return cost
Ejemplo n.º 12
0
def mi_model_1d_gpu_gd(x, y, biascorrect=False, demeaned=False):
    """Mutual information between a Gaussian and a discrete variable in bits.
    This method is based on ANOVA style model comparison.
    I = mi_model_gd(x,y) returns the MI between the (possibly multidimensional)
    Gaussian variable x and the discrete variable y.
    Parameters
    ----------
    x, y : array_like
        Gaussian arrays of shape (n_epochs,) or (n_dimensions, n_epochs). y
        must be an array of integers
    biascorrect : bool | True
        Specifies whether bias correction should be applied to the estimated MI
    demeaned : bool | False
        Specifies whether the input data already has zero mean (true if it has
        been copula-normalized)
    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    # Converting to cupy array
    #x, y = cp.array(x), cp.array(y)
    x, y = cp.atleast_2d(x), cp.squeeze(y)
    if x.ndim > 2:
        raise ValueError("x must be at most 2d")
    if y.ndim > 1:
        raise ValueError("only univariate discrete variables supported")
    if not cp.issubdtype(y.dtype, cp.integer):
        raise ValueError("y should be an integer array")

    nvarx, ntrl = x.shape
    ym = cp.unique(y)

    if y.size != ntrl:
        raise ValueError("number of trials do not match")

    if not demeaned:
        x = x - x.mean(axis=1)[:, cp.newaxis]

    # class-conditional entropies
    ntrl_y = cp.zeros(len(ym))
    hcond = cp.zeros(len(ym))
    for n_yi, yi in enumerate(ym):
        idx = y == yi
        xm = x[:, idx]
        ntrl_y[n_yi] = xm.shape[1]
        xm = xm - xm.mean(axis=1)[:, cp.newaxis]
        cm = cp.dot(xm, xm.T) / float(ntrl_y[n_yi] - 1)
        chcm = cp.linalg.cholesky(cm)
        hcond[n_yi] = cp.sum(cp.log(cp.diagonal(chcm)))

    # class weights
    w = ntrl_y / float(ntrl)

    # unconditional entropy from unconditional Gaussian fit
    cx = cp.dot(x, x.T) / float(ntrl - 1)
    chc = cp.linalg.cholesky(cx)
    hunc = cp.sum(cp.log(cp.diagonal(chc)))  # + c*nvarx

    ln2 = cp.log(2)
    if biascorrect:
        vars = cp.arange(1, nvarx + 1)

        psiterms = psi((ntrl - vars).astype(cp.float) / 2.) / 2.
        dterm = (ln2 - cp.log(float(ntrl - 1))) / 2.
        hunc = hunc - nvarx * dterm - psiterms.sum()

        dterm = (ln2 - cp.log((ntrl_y - 1).astype(cp.float))) / 2.0
        psiterms = cp.zeros(len(ym))
        for vi in vars:
            idx = ntrl_y - vi
            psiterms = psiterms + psi(idx.astype(cp.float) / 2.)
        hcond = hcond - nvarx * dterm - (psiterms / 2.)

    # MI in bits
    i = (hunc - cp.sum(w * hcond)) / ln2
    return i
Ejemplo n.º 13
0
path = '/export/scratch2/kostenko/archive/OwnProjects/al_tests/new/90KV_no_filt/'

dark = flex.data.read_raw(path, 'di')
flat = flex.data.read_raw(path, 'io')
proj = flex.data.read_raw(path, 'scan_')

meta = flex.data.read_log(path, 'flexray')

#%% Prepro:

# Convert to CUPY:
proj = cupy.array(proj)
flat = cupy.array(flat)
dark = cupy.array(dark)

# Use CUDA to compute stuff:
proj = (proj - dark) / (flat.mean(0) - dark)
proj = -cupy.log(proj)

proj = flex.data.raw2astra(proj)

flex.util.display_slice(proj, title='Sinogram')

#%% Recon

vol = numpy.zeros([1, 2000, 2000], dtype='float32')

flex.project.FDK(proj, vol, meta['geometry'])

flex.util.display_slice(vol, bounds=[], title='FDK')
Ejemplo n.º 14
0
 def mlog(self, psi):
     res = psi.copy()
     res[cp.abs(psi) < 1e-32] = 1e-32
     res = cp.log(res)
     return res
Ejemplo n.º 15
0
def _support_choice(dist, rand):
    return cp.log(dist) + rand
Ejemplo n.º 16
0
    def __call__(self, input_x, t):
        output = self.predictor(input_x)
        batch_size, _, grid_h, grid_w = output.shape
        self.seen += batch_size
        x, y, w, h, conf, prob = F.split_axis(F.reshape(
            output, (batch_size, self.predictor.n_boxes,
                     self.predictor.n_classes + 5, grid_h, grid_w)),
                                              (1, 2, 3, 4, 5),
                                              axis=2)
        x = F.sigmoid(x)  # xのactivation
        y = F.sigmoid(y)  # yのactivation
        conf = F.sigmoid(conf)  # confのactivation
        prob = F.transpose(prob, (0, 2, 1, 3, 4))
        prob = F.softmax(prob)  # probablitiyのacitivation

        # 教師データの用意
        tw = xp.zeros(
            w.shape,
            dtype=xp.float32)  # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1)
        th = xp.zeros(h.shape, dtype=xp.float32)
        tx = xp.tile(0.5, x.shape).astype(xp.float32)  # 活性化後のxとyが0.5になるように学習()
        ty = xp.tile(0.5, y.shape).astype(xp.float32)

        if self.seen < self.unstable_seen:  # centerの存在しないbbox誤差学習スケールは基本0.1
            box_learning_scale = xp.tile(0.1, x.shape).astype(xp.float32)
        else:
            box_learning_scale = xp.tile(0, x.shape).astype(xp.float32)

        tconf = xp.zeros(
            conf.shape, dtype=xp.float32
        )  # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる
        conf_learning_scale = xp.tile(0.1, conf.shape).astype(xp.float32)

        tprob = prob.data.copy()  # best_anchor以外は学習させない(自身との二乗和誤差 = 0)

        # 全bboxとtruthのiouを計算(batch単位で計算する)
        x_shift = Variable(
            xp.broadcast_to(xp.arange(grid_w, dtype=xp.float32), x.shape[1:]))
        y_shift = Variable(
            xp.broadcast_to(
                xp.arange(grid_h, dtype=xp.float32).reshape(grid_h, 1),
                y.shape[1:]))
        w_anchor = Variable(
            xp.broadcast_to(
                xp.reshape(
                    xp.array(self.anchors, dtype=xp.float32)[:, 0],
                    (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:]))
        h_anchor = Variable(
            xp.broadcast_to(
                xp.reshape(
                    xp.array(self.anchors, dtype=xp.float32)[:, 1],
                    (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:]))
        x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu(
        )
        best_ious = []
        for batch in range(batch_size):
            n_truth_boxes = len(t[batch])
            box_x = (x[batch] + x_shift) / grid_w
            box_y = (y[batch] + y_shift) / grid_h
            box_w = F.exp(w[batch]) * w_anchor / grid_w
            box_h = F.exp(h[batch]) * h_anchor / grid_h

            ious = []
            for truth_index in range(n_truth_boxes):
                truth_box_x = Variable(
                    xp.broadcast_to(
                        xp.array(t[batch][truth_index]["x"], dtype=xp.float32),
                        box_x.shape))
                truth_box_y = Variable(
                    xp.broadcast_to(
                        xp.array(t[batch][truth_index]["y"], dtype=xp.float32),
                        box_y.shape))
                truth_box_w = Variable(
                    xp.broadcast_to(
                        xp.array(t[batch][truth_index]["w"], dtype=xp.float32),
                        box_w.shape))
                truth_box_h = Variable(
                    xp.broadcast_to(
                        xp.array(t[batch][truth_index]["h"], dtype=xp.float32),
                        box_h.shape))
                truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(
                ), truth_box_h.to_gpu()
                ious.append(
                    multi_box_iou(
                        Box(box_x, box_y, box_w, box_h),
                        Box(truth_box_x, truth_box_y, truth_box_w,
                            truth_box_h)).data.get())
            ious = xp.array(ious)
            best_ious.append(xp.max(ious, axis=0))
        best_ious = xp.array(best_ious)

        # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。
        tconf[best_ious > self.thresh] = conf.data.get()[
            best_ious > self.thresh]
        conf_learning_scale[best_ious > self.thresh] = 0

        # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正
        abs_anchors = self.anchors / xp.array([grid_w, grid_h])
        for batch in range(batch_size):
            for truth_box in t[batch]:
                truth_w = int(float(truth_box["x"]) * grid_w)
                truth_h = int(float(truth_box["y"]) * grid_h)
                truth_n = 0
                best_iou = 0.0
                for anchor_index, abs_anchor in enumerate(abs_anchors):
                    iou = box_iou(
                        Box(0, 0, float(truth_box["w"]),
                            float(truth_box["h"])),
                        Box(0, 0, abs_anchor[0], abs_anchor[1]))
                    if best_iou < iou:
                        best_iou = iou
                        truth_n = anchor_index

                # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。
                box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0
                tx[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["x"]) * grid_w - truth_w
                ty[batch, truth_n, :, truth_h,
                   truth_w] = float(truth_box["y"]) * grid_h - truth_h
                tw[batch, truth_n, :, truth_h, truth_w] = xp.log(
                    float(truth_box["w"]) / abs_anchors[truth_n][0])
                th[batch, truth_n, :, truth_h, truth_w] = xp.log(
                    float(truth_box["h"]) / abs_anchors[truth_n][1])
                tprob[batch, :, truth_n, truth_h, truth_w] = 0
                tprob[batch,
                      int(truth_box["label"]), truth_n, truth_h, truth_w] = 1

                # IOUの観測
                full_truth_box = Box(float(truth_box["x"]),
                                     float(truth_box["y"]),
                                     float(truth_box["w"]),
                                     float(truth_box["h"]))
                predicted_box = Box(
                    (x[batch][truth_n][0][truth_h][truth_w].data.get() +
                     truth_w) / grid_w,
                    (y[batch][truth_n][0][truth_h][truth_w].data.get() +
                     truth_h) / grid_h,
                    xp.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) *
                    abs_anchors[truth_n][0],
                    xp.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) *
                    abs_anchors[truth_n][1])
                predicted_iou = box_iou(full_truth_box, predicted_box)
                tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou
                conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0

            # debug prints
            maps = F.transpose(prob[batch], (2, 3, 1, 0)).data
            print(
                "best confidences and best conditional probability and predicted class of each grid:"
            )
            for i in range(grid_h):
                for j in range(grid_w):
                    print("%2d" %
                          (int(conf[batch, :, :, i, j].data.max() * 100)),
                          end=" ")
                print("     ", end="")
                for j in range(grid_w):
                    print("%2d" % (maps[i][j][int(
                        maps[i][j].max(axis=1).argmax())].argmax()),
                          end=" ")
                print("     ", end="")
                for j in range(grid_w):
                    print("%2d" % (maps[i][j][int(
                        maps[i][j].max(axis=1).argmax())].max() * 100),
                          end=" ")
                print()

            print(
                "best default iou: %.2f   predicted iou: %.2f   confidence: %.2f   class: %s"
                % (best_iou, predicted_iou,
                   conf[batch][truth_n][0][truth_h][truth_w].data,
                   t[batch][0]["label"]))
            print("-------------------------------")
        print("seen = %d" % self.seen)

        # loss計算
        tx, ty, tw, th, tconf, tprob = Variable(tx), Variable(ty), Variable(
            tw), Variable(th), Variable(tconf), Variable(tprob)
        box_learning_scale, conf_learning_scale = Variable(
            box_learning_scale), Variable(conf_learning_scale)
        tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu(
        ), tprob.to_gpu()
        box_learning_scale.to_gpu()
        conf_learning_scale.to_gpu()

        x_loss = F.sum((tx - x)**2 * box_learning_scale) / 2
        y_loss = F.sum((ty - y)**2 * box_learning_scale) / 2
        w_loss = F.sum((tw - w)**2 * box_learning_scale) / 2
        h_loss = F.sum((th - h)**2 * box_learning_scale) / 2
        c_loss = F.sum((tconf - conf)**2 * conf_learning_scale) / 2
        p_loss = F.sum((tprob - prob)**2) / 2
        print(
            "x_loss: %f  y_loss: %f  w_loss: %f  h_loss: %f  c_loss: %f   p_loss: %f"
            % (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data,
               F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data))

        loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss
        return loss
Ejemplo n.º 17
0
def categorical_cross_entropy(x, y, epsilon=10**(-13)):
    x = cp.clip(x, epsilon, 1. - epsilon)
    N = x.shape[0]

    return -cp.sum(y * cp.log(x + 0.00001)) / N
Ejemplo n.º 18
0
def log_loss(y_true,
             y_pred,
             eps=1e-15,
             normalize=True,
             sample_weight=None) -> float:
    """ Log loss, aka logistic loss or cross-entropy loss.
    This is the loss function used in (multinomial) logistic regression
    and extensions of it such as neural networks, defined as the negative
    log-likelihood of a logistic model that returns ``y_pred`` probabilities
    for its training data ``y_true``.
    The log loss is only defined for two or more labels.

    Parameters
    ----------
    y_true : array-like, shape = (n_samples,)
    y_pred : array-like of float,
        shape = (n_samples, n_classes) or (n_samples,)
    eps : float (default=1e-15)
        Log loss is undefined for p=0 or p=1, so probabilities are
        clipped to max(eps, min(1 - eps, p)).
    normalize : bool, optional (default=True)
        If true, return the mean loss per sample.
        Otherwise, return the sum of the per-sample losses.
    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    Returns
    -------
    loss : float

    Examples
    --------
    .. code-block:: python

        >>> from cuml.metrics import log_loss
        >>> import cupy as cp
        >>> log_loss(cp.array([1, 0, 0, 1]),
        ...          cp.array([[.1, .9], [.9, .1], [.8, .2], [.35, .65]]))
        0.21616...

    References
    ----------
    C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,
    p. 209.

    Notes
    -----
    The logarithm used is the natural logarithm (base-e).

    """
    y_true, n_rows, n_cols, ytype = \
        input_to_cupy_array(y_true, check_dtype=[np.int32, np.int64,
                                                 np.float32, np.float64])

    if y_true.dtype.kind == 'f' and np.any(y_true != y_true.astype(int)):
        raise ValueError("'y_true' can only have integer values")
    if y_true.min() < 0:
        raise ValueError("'y_true' cannot have negative values")

    y_pred, _, _, _ = \
        input_to_cupy_array(y_pred, check_dtype=[np.int32, np.int64,
                                                 np.float32, np.float64],
                            check_rows=n_rows)

    y_true_max = y_true.max()
    if (y_pred.ndim == 1 and y_true_max > 1) \
       or (y_pred.ndim > 1 and y_pred.shape[1] <= y_true_max):
        raise ValueError("The shape of y_pred doesn't "
                         "match the number of classes")

    y_true = y_true.astype('int32')
    y_pred = cp.clip(y_pred, eps, 1 - eps)
    if y_pred.ndim == 1:
        y_pred = cp.expand_dims(y_pred, axis=1)
    if y_pred.shape[1] == 1:
        y_pred = cp.hstack([1 - y_pred, y_pred])

    y_pred /= cp.sum(y_pred, axis=1, keepdims=True)
    loss = -cp.log(y_pred)[cp.arange(y_pred.shape[0]), y_true]
    return _weighted_sum(loss, sample_weight, normalize).item()
Ejemplo n.º 19
0
def GetStepSize(photon_state, tau_atm):
    step = -cp.log(cp.random.rand(len(photon_state)))
    return step
Ejemplo n.º 20
0
def mi_1d_gpu_gg(x, y, biascorrect=True, demeaned=False):
    """Mutual information (MI) between two Gaussian variables in bits.

    This is the GPU variant of the m1_1d_gg function, using CuPy

    I = mi_gg(x,y) returns the MI between two (possibly multidimensional)
    Gaussian variables, x and y, with bias correction.

    Parameters
    ----------
    x, y : array_like
        Gaussian arrays of shape (n_epochs,) or (n_dimensions, n_epochs)
    biascorrect : bool | True
        Specifies whether bias correction should be applied to the estimated MI
    demeaned : bool | False
        Specifies whether the input data already has zero mean (true if it has
        been copula-normalized)

    Returns
    -------
    i : float
        Information shared by x and y (in bits)
    """
    x, y = cp.atleast_2d(x), cp.atleast_2d(y)
    if (x.ndim > 2) or (y.ndim > 2):
        raise ValueError("x and y must be at most 2d")
    nvarx, ntrl = x.shape
    nvary = y.shape[0]
    nvarxy = nvarx + nvary

    if y.shape[1] != ntrl:
        raise ValueError("number of trials do not match")

    # joint variable
    xy = cp.vstack((x, y))
    if not demeaned:
        xy = xy - xy.mean(axis=1)[:, cp.newaxis]
    cxy = cp.dot(xy, xy.T) / float(ntrl - 1)
    # submatrices of joint covariance
    cx = cxy[:nvarx, :nvarx]
    cy = cxy[nvarx:, nvarx:]

    chcxy = cp.linalg.cholesky(cxy)
    chcx = cp.linalg.cholesky(cx)
    chcy = cp.linalg.cholesky(cy)

    # entropies in nats
    # normalizations cancel for mutual information
    hx = cp.sum(cp.log(cp.diagonal(chcx)))
    hy = cp.sum(cp.log(cp.diagonal(chcy)))
    hxy = cp.sum(cp.log(cp.diagonal(chcxy)))

    ln2 = cp.log(2)
    if biascorrect:
        psiterms = psi(
            (ntrl - cp.arange(1, nvarxy + 1)).astype(cp.float) / 2.) / 2.
        dterm = (ln2 - cp.log(ntrl - 1.)) / 2.
        hx = hx - nvarx * dterm - psiterms[:nvarx].sum()
        hy = hy - nvary * dterm - psiterms[:nvary].sum()
        hxy = hxy - nvarxy * dterm - psiterms[:nvarxy].sum()

    # MI in bits
    i = (hx + hy - hxy) / ln2
    return i
Ejemplo n.º 21
0
def cmi_1d_gpu_ggg(x, y, z, biascorrect=True, demeaned=False):
    """Conditional MI between two Gaussian variables conditioned on a third.

    I = cmi_ggg(x,y,z) returns the CMI between two (possibly multidimensional)
    Gaussian variables, x and y, conditioned on a third, z, with bias
    correction.

    Parameters
    ----------
    x, y, z : array_like
        Gaussians arrays of shape (n_epochs,) or (n_dimensions, n_epochs).
    biascorrect : bool | True
        Specifies whether bias correction should be applied to the estimated MI
    demeaned : bool | False
        Specifies whether the input data already has zero mean (true if it has
        been copula-normalized)

    Returns
    -------
    i : float
        Information shared by x and y conditioned by z (in bits)
    """
    x, y, z = cp.atleast_2d(x), cp.atleast_2d(y), cp.atleast_2d(z)
    if x.ndim > 2 or y.ndim > 2 or z.ndim > 2:
        raise ValueError("x, y and z must be at most 2d")
    ntrl = x.shape[1]
    nvarx = x.shape[0]
    nvary = y.shape[0]
    nvarz = z.shape[0]
    nvaryz = nvary + nvarz
    nvarxy = nvarx + nvary
    nvarxz = nvarx + nvarz
    nvarxyz = nvarx + nvaryz

    if y.shape[1] != ntrl or z.shape[1] != ntrl:
        raise ValueError("number of trials do not match")

    # joint variable
    xyz = cp.vstack((x, y, z))
    if not demeaned:
        xyz = xyz - xyz.mean(axis=1)[:, cp.newaxis]
    cxyz = cp.dot(xyz, xyz.T) / float(ntrl - 1)
    # submatrices of joint covariance
    cz = cxyz[nvarxy:, nvarxy:]
    cyz = cxyz[nvarx:, nvarx:]
    cxz = cp.zeros((nvarxz, nvarxz))
    cxz[:nvarx, :nvarx] = cxyz[:nvarx, :nvarx]
    cxz[:nvarx, nvarx:] = cxyz[:nvarx, nvarxy:]
    cxz[nvarx:, :nvarx] = cxyz[nvarxy:, :nvarx]
    cxz[nvarx:, nvarx:] = cxyz[nvarxy:, nvarxy:]

    chcz = cp.linalg.cholesky(cz)
    chcxz = cp.linalg.cholesky(cxz)
    chcyz = cp.linalg.cholesky(cyz)
    chcxyz = cp.linalg.cholesky(cxyz)

    # entropies in nats
    # normalizations cancel for cmi
    hz = cp.sum(cp.log(cp.diagonal(chcz)))
    hxz = cp.sum(cp.log(cp.diagonal(chcxz)))
    hyz = cp.sum(cp.log(cp.diagonal(chcyz)))
    hxyz = cp.sum(cp.log(cp.diagonal(chcxyz)))

    ln2 = cp.log(2)
    if biascorrect:
        psiterms = psi(
            (ntrl - cp.arange(1, nvarxyz + 1)).astype(cp.float) / 2.) / 2.
        dterm = (ln2 - cp.log(ntrl - 1.)) / 2.
        hz = hz - nvarz * dterm - psiterms[:nvarz].sum()
        hxz = hxz - nvarxz * dterm - psiterms[:nvarxz].sum()
        hyz = hyz - nvaryz * dterm - psiterms[:nvaryz].sum()
        hxyz = hxyz - nvarxyz * dterm - psiterms[:nvarxyz].sum()

    # MI in bits
    i = (hxz + hyz - hxyz - hz) / ln2
    return i
Ejemplo n.º 22
0
def get_error(y, t):
    # 2値の交差エントロピー誤差を返す。
    eps = 1e-7
    return -np.sum(t * np.log(y + eps) +
                   (1 - t) * np.log(1 - y + eps)) / len(y)
Ejemplo n.º 23
0
def slogdet(a):
    """Returns sign and logarithm of the determinant of an array.

    It calculates the natural logarithm of the determinant of a given value.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(..., N, N)``.

    Returns:
        tuple of :class:`~cupy.ndarray`:
            It returns a tuple ``(sign, logdet)``. ``sign`` represents each
            sign of the determinant as a real number ``0``, ``1`` or ``-1``.
            'logdet' represents the natural logarithm of the absolute of the
            determinant.
            If the determinant is zero, ``sign`` will be ``0`` and ``logdet``
            will be ``-inf``.
            The shapes of both ``sign`` and ``logdet`` are equal to
            ``a.shape[:-2]``.

    .. warning::
        This function calls one or more cuSOLVER routine(s) which may yield
        invalid results if input conditions are not met.
        To detect these invalid results, you can set the `linalg`
        configuration to a value that is not `ignore` in
        :func:`cupyx.errstate` or :func:`cupyx.seterr`.

    .. warning::
        To produce the same results as :func:`numpy.linalg.slogdet` for
        singular inputs, set the `linalg` configuration to `raise`.

    .. seealso:: :func:`numpy.linalg.slogdet`
    """
    if a.ndim < 2:
        msg = ('%d-dimensional array given. '
               'Array must be at least two-dimensional' % a.ndim)
        raise linalg.LinAlgError(msg)
    _util._assert_nd_squareness(a)

    dtype = numpy.promote_types(a.dtype.char, 'f')
    real_dtype = numpy.dtype(dtype.char.lower())

    if dtype not in (numpy.float32, numpy.float64,
                     numpy.complex64, numpy.complex128):
        msg = ('dtype must be float32, float64, complex64, or complex128'
               ' (actual: {})'.format(a.dtype))
        raise ValueError(msg)

    a_shape = a.shape
    shape = a_shape[:-2]
    n = a_shape[-2]

    if a.size == 0:
        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
        sign = cupy.ones(shape, dtype)
        logdet = cupy.zeros(shape, real_dtype)
        return sign, logdet

    lu, ipiv, dev_info = _decomposition._lu_factor(a, dtype)

    # dev_info < 0 means illegal value (in dimensions, strides, and etc.) that
    # should never happen even if the matrix contains nan or inf.
    # TODO(kataoka): assert dev_info >= 0 if synchronization is allowed for
    # debugging purposes.

    diag = cupy.diagonal(lu, axis1=-2, axis2=-1)

    logdet = cupy.log(cupy.abs(diag)).sum(axis=-1)

    # ipiv is 1-origin
    non_zero = cupy.count_nonzero(ipiv != cupy.arange(1, n + 1), axis=-1)
    if dtype.kind == "f":
        non_zero += cupy.count_nonzero(diag < 0, axis=-1)

    # Note: sign == -1 ** (non_zero % 2)
    sign = (non_zero % 2) * -2 + 1
    if dtype.kind == "c":
        sign = sign * cupy.prod(diag / cupy.abs(diag), axis=-1)

    singular = dev_info > 0
    return (
        cupy.where(singular, dtype.type(0), sign.astype(dtype)).reshape(shape),
        cupy.where(singular, real_dtype.type('-inf'), logdet).reshape(shape),
    )
Ejemplo n.º 24
0
def log_softmax(x: xp.ndarray, axis=-1):
    c = xp.max(x, axis=axis, keepdims=True)  # [*, 1, *]
    x2 = x - c  # [*, ?, *]
    logsumexp = xp.log(xp.exp(x2).sum(axis=axis, keepdims=True))  # [*, 1, *]
    return x2 - logsumexp
Ejemplo n.º 25
0
def main():

    nwaves = 256
    pw = 256
    dw = 512+16+3

    A = lt.Propagation(nwaves, dw, pw)
    B = to.Propagation(dw, pw, array_module=cp, asnumpy=cp.asnumpy)

    shape = (nwaves, pw, pw)

    nearplane = cp.random.rand(*shape) + 1j * cp.random.rand(*shape)

    amplitude = plt.imread("/home/beams/DCHING/Pictures/images/Cryptomeria_japonica-0256.tif") / 255
    phase = plt.imread("/home/beams/DCHING/Pictures/images/Erdhummel_Bombus_terrestris-0256.tif") / 255
    nearplane[0] = cp.asarray(amplitude + 1j * phase)
    nearplane = cp.ascontiguousarray(nearplane, dtype='complex64')


    start = time.time()
    farplaneB = B.fwd(nearplane)
    stop = time.time()
    print(farplaneB.shape, stop-start)

    start = time.time()
    farplaneA = A.fwd(nearplane)
    stop = time.time()
    print(farplaneA.shape, stop-start)

    return

    plt.figure()

    plt.subplot(1, 3, 1)
    plt.imshow(cp.log(cp.abs(farplaneA)).get()[0])
    plt.colorbar()
    plt.title('CUDA')

    plt.subplot(1, 3, 2)
    plt.imshow(cp.log(cp.abs(farplaneB)).get()[0])
    plt.colorbar()
    plt.title('CUPY')

    plt.subplot(1, 3, 3)
    plt.imshow(
        cp.log(
          cp.abs(farplaneA)
          - cp.abs(farplaneB)
        ).get()[0]
    )
    plt.colorbar()
    plt.title('DIFF')
    plt.show()

    # cp.testing.assert_array_equal(farplaneA, farplaneB)


    nearplaneA = A.adj(farplaneB)
    nearplaneB = B.adj(farplaneA)

    plt.figure()

    plt.subplot(1, 3, 1)
    plt.imshow(nearplaneA.real.get()[0])
    plt.colorbar()
    plt.title('CUDA')

    plt.subplot(1, 3, 2)
    plt.imshow(nearplaneB.real.get()[0])
    plt.colorbar()
    plt.title('CUPY')

    plt.subplot(1, 3, 3)
    plt.imshow(
        cp.log(
          nearplaneB.real - nearplaneA.real
        ).get()[0]
    )
    plt.colorbar()
    plt.title('DIFF')
    plt.show()

    cp.testing.assert_array_equal(nearplaneA, nearplaneB)
Ejemplo n.º 26
0
    def choice(self, a, size=None, replace=True, p=None):
        """Returns an array of random values from a given 1-D array.

        .. seealso::
            :func:`cupy.random.choice` for full document,
            :meth:`numpy.random.choice`

        """
        if a is None:
            raise ValueError('a must be 1-dimensional or an integer')
        if isinstance(a, cupy.ndarray) and a.ndim == 0:
            raise NotImplementedError
        if isinstance(a, six.integer_types):
            a_size = a
            if a_size <= 0:
                raise ValueError('a must be greater than 0')
        else:
            a = cupy.array(a, copy=False)
            if a.ndim != 1:
                raise ValueError('a must be 1-dimensional or an integer')
            else:
                a_size = len(a)
                if a_size == 0:
                    raise ValueError('a must be non-empty')

        if p is not None:
            p = cupy.array(p)
            if p.ndim != 1:
                raise ValueError('p must be 1-dimensional')
            if len(p) != a_size:
                raise ValueError('a and p must have same size')
            if not (p >= 0).all():
                raise ValueError('probabilities are not non-negative')
            p_sum = cupy.sum(p).get()
            if not numpy.allclose(p_sum, 1):
                raise ValueError('probabilities do not sum to 1')

        if size is None:
            raise NotImplementedError
        shape = size
        size = numpy.prod(shape)

        if not replace and p is None:
            if a_size < size:
                raise ValueError(
                    'Cannot take a larger sample than population when '
                    '\'replace=False\'')
            if isinstance(a, six.integer_types):
                indices = cupy.arange(a, dtype='l')
            else:
                indices = a.copy()
            self.shuffle(indices)
            return indices[:size].reshape(shape)

        if not replace:
            raise NotImplementedError

        if p is not None:
            p = cupy.broadcast_to(p, (size, a_size))
            index = cupy.argmax(cupy.log(p) +
                                cupy.random.gumbel(size=(size, a_size)),
                                axis=1)
            if not isinstance(shape, six.integer_types):
                index = cupy.reshape(index, shape)
        else:
            index = cupy.random.randint(0, a_size, size=shape)
            # Align the dtype with NumPy
            index = index.astype(cupy.int64, copy=False)

        if isinstance(a, six.integer_types):
            return index

        if index.ndim == 0:
            return cupy.array(a[index], dtype=a.dtype)

        return a[index]
Ejemplo n.º 27
0
def select_log_next_cupy(X, gains, current_values, idxs):
    gains[:] = cupy.sum(cupy.log(current_values + X + 1), axis=1)[idxs]
Ejemplo n.º 28
0
def beam_search(model, X, params, return_alphas=False, eos_sym=0, null_sym=2, model_ensemble=False, n_models=0):
    """
    Beam search method for Cond models.
    (https://en.wikibooks.org/wiki/Artificial_Intelligence/Search/Heuristic_search/Beam_search)
    The algorithm in a nutshell does the following:

    1. k = beam_size
    2. open_nodes = [[]] * k
    3. while k > 0:

        3.1. Given the inputs, get (log) probabilities for the outputs.

        3.2. Expand each open node with all possible output.

        3.3. Prune and keep the k best nodes.

        3.4. If a sample has reached the <eos> symbol:

            3.4.1. Mark it as final sample.

            3.4.2. k -= 1

        3.5. Build new inputs (state_below) and go to 1.

    4. return final_samples, final_scores
    :param model: Model to use
    :param X: Model inputs
    :param params: Search parameters
    :param return_alphas: Whether we should return attention weights or not.
    :param eos_sym: <eos> symbol
    :param null_sym: <null> symbol
    :param model_ensemble: Whether we are using several models in an ensemble
    :param n_models; Number of models in the ensemble.
    :return: UNSORTED list of [k_best_samples, k_best_scores] (k: beam size)
    """
    k = params['beam_size']
    samples = []
    sample_scores = []
    pad_on_batch = params['pad_on_batch']
    dead_k = 0  # samples that reached eos
    live_k = 1  # samples that did not yet reach eos
    hyp_samples = [[]] * live_k
    hyp_scores = cp.zeros(live_k, dtype='float32')
    ret_alphas = return_alphas or params['pos_unk']
    if ret_alphas:
        sample_alphas = []
        hyp_alphas = [[]] * live_k
    if pad_on_batch:
        maxlen = int(len(X[params['dataset_inputs'][0]][0]) * params['output_max_length_depending_on_x_factor']) if \
            params['output_max_length_depending_on_x'] else params['maxlen']
        minlen = int(
            len(X[params['dataset_inputs'][0]][0]) / params['output_min_length_depending_on_x_factor'] + 1e-7) if \
            params['output_min_length_depending_on_x'] else 0
    else:
        minlen = int(np.argmax(X[params['dataset_inputs'][0]][0] == eos_sym) /
                     params['output_min_length_depending_on_x_factor'] + 1e-7) if \
            params['output_min_length_depending_on_x'] else 0

        maxlen = int(np.argmax(X[params['dataset_inputs'][0]][0] == eos_sym) * params[
            'output_max_length_depending_on_x_factor']) if \
            params['output_max_length_depending_on_x'] else params['maxlen']
        maxlen = min(params['state_below_maxlen'] - 1, maxlen)

    # we must include an additional dimension if the input for each timestep are all the generated "words_so_far"
    if params['words_so_far']:
        if k > maxlen:
            raise NotImplementedError("BEAM_SIZE can't be higher than MAX_OUTPUT_TEXT_LEN on the current implementation.")
        state_below = np.asarray([[null_sym]] * live_k) if pad_on_batch else np.asarray([np.zeros((maxlen, maxlen))] * live_k)
    else:
        state_below = np.asarray([null_sym] * live_k) if pad_on_batch else np.asarray([np.zeros(params['state_below_maxlen']) + null_sym] * live_k)
    prev_out = [None] * n_models if model_ensemble else None

    for ii in range(maxlen):
        # for every possible live sample calc prob for every possible label
        if params['optimized_search']:  # use optimized search model if available
            if model_ensemble:
                [probs, prev_out, alphas] = model.predict_cond_optimized(X, state_below, params, ii, prev_out)
            else:
                [probs, prev_out] = model.predict_cond_optimized(X, state_below, params, ii, prev_out)
                if ret_alphas:
                    alphas = prev_out[-1][0]  # Shape: (k, n_steps)
                    prev_out = prev_out[:-1]
        else:
            probs = model.predict_cond(X, state_below, params, ii)
        log_probs = cp.log(probs)
        if minlen > 0 and ii < minlen:
            log_probs[:, eos_sym] = -cp.inf
        # total score for every sample is sum of -log of word prb
        cand_scores = hyp_scores[:, None] - log_probs
        cand_flat = cand_scores.flatten()
        # Find the best options by calling argsort of flatten array
        ranks_flat = cp.argsort(cand_flat)[:(k - dead_k)]
        # Decypher flatten indices
        voc_size = log_probs.shape[1]
        trans_indices = ranks_flat // voc_size  # index of row
        word_indices = ranks_flat % voc_size  # index of col
        costs = cand_flat[ranks_flat]
        best_cost = costs[0]
        if cupy:
            trans_indices = cp.asnumpy(trans_indices)
            word_indices = cp.asnumpy(word_indices)
            if ret_alphas:
                alphas = cp.asnumpy(alphas)

        # Form a beam for the next iteration
        new_hyp_samples = []
        new_trans_indices = []
        new_hyp_scores = cp.zeros(k - dead_k, dtype='float32')
        if ret_alphas:
            new_hyp_alphas = []
        for idx, [ti, wi] in list(enumerate(zip(trans_indices, word_indices))):
            if params['search_pruning']:
                if costs[idx] < k * best_cost:
                    new_hyp_samples.append(hyp_samples[ti] + [wi])
                    new_trans_indices.append(ti)
                    new_hyp_scores[idx] = copy.copy(costs[idx])
                    if ret_alphas:
                        new_hyp_alphas.append(hyp_alphas[ti] + [alphas[ti]])
                else:
                    dead_k += 1
            else:
                new_hyp_samples.append(hyp_samples[ti] + [wi])
                new_trans_indices.append(ti)
                new_hyp_scores[idx] = copy.copy(costs[idx])
                if ret_alphas:
                    new_hyp_alphas.append(hyp_alphas[ti] + [alphas[ti]])
        # check the finished samples
        new_live_k = 0
        hyp_samples = []
        hyp_scores = []
        hyp_alphas = []
        indices_alive = []
        for idx in range(len(new_hyp_samples)):
            if new_hyp_samples[idx][-1] == eos_sym:  # finished sample
                samples.append(new_hyp_samples[idx])
                sample_scores.append(new_hyp_scores[idx])
                if ret_alphas:
                    sample_alphas.append(new_hyp_alphas[idx])
                dead_k += 1
            else:
                indices_alive.append(new_trans_indices[idx])
                new_live_k += 1
                hyp_samples.append(new_hyp_samples[idx])
                hyp_scores.append(new_hyp_scores[idx])
                if ret_alphas:
                    hyp_alphas.append(new_hyp_alphas[idx])
        hyp_scores = cp.array(np.asarray(hyp_scores, dtype='float32'), dtype='float32')
        live_k = new_live_k

        if new_live_k < 1:
            break
        if dead_k >= k:
            break
        state_below = np.asarray(hyp_samples, dtype='int64')

        state_below = np.hstack((np.zeros((state_below.shape[0], 1), dtype='int64') + null_sym, state_below)) \
            if pad_on_batch else \
            np.hstack((np.zeros((state_below.shape[0], 1), dtype='int64') + null_sym,
                       state_below,
                       np.zeros((state_below.shape[0],
                                 max(params['state_below_maxlen'] - state_below.shape[1] - 1, 0)), dtype='int64')))

        # we must include an additional dimension if the input for each timestep are all the generated words so far
        if params['words_so_far']:
            state_below = np.expand_dims(state_below, axis=0)

        if params['optimized_search'] and ii > 0:
            # filter next search inputs w.r.t. remaining samples
            if model_ensemble:
                for n_model in range(n_models):
                    # filter next search inputs w.r.t. remaining samples
                    for idx_vars in range(len(prev_out[n_model])):
                        prev_out[n_model][idx_vars] = prev_out[n_model][idx_vars][indices_alive]
            else:
                for idx_vars in range(len(prev_out)):
                    prev_out[idx_vars] = prev_out[idx_vars][indices_alive]

    # dump every remaining one
    if live_k > 0:
        for idx in range(live_k):
            samples.append(hyp_samples[idx])
            sample_scores.append(hyp_scores[idx])
            if ret_alphas:
                sample_alphas.append(hyp_alphas[idx])
    if ret_alphas:
        return samples, sample_scores, np.asarray(sample_alphas)
    else:
        return samples, sample_scores, None
Ejemplo n.º 29
0
def logit(x):
    return cp.log(x / (1 - x))
Ejemplo n.º 30
0
def convolutional_barycenter_gpu(

    Hv         ,
    reg        : float,
    alpha     : np.ndarray,
    stabThresh = 1e-30,
    niter      = 1500,
    tol        = 1e-9,
    sharpening = False,
    verbose    = False):

    """Main function solving wasserstein barycenter problem using gpu
    Parameters:
        Hv    {Set            of distributions (cparray)} --
        reg   {regularization term "gamma"              } -- float superior to 0, generally equals size of space/40
        alpha {list                                     } -- set of weights
    Keyword Parameters:
        stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30})
        niter      {int  } -- Maximum number of loop      iteration (default: {1500})
        tolerance        {float} -- convergence             tolerance at which point iterations stop (default: {1e-9})
        sharpening {bool } -- Whether or not entropic sharpening is used (default: {False})
        verbose    {bool } -- verbose option
    Returns:
        cparray -- solution of weighted wassertein barycenter problem
    """
    import cupy as cp
    from cupyx.scipy.ndimage import gaussian_filter as cupyx_gaussian_filter

    def K_cupyx(x):
        return cupyx_gaussian_filter(x,sigma=reg)

    def to_find_root(barycenter, H0, beta):
        return entropy(barycenter**beta) - H0

    alpha        = cp.array(alpha)
    alpha        = alpha/alpha.sum()
    Hv           = cp.array(Hv)

    for i in range(len(Hv)):
        Hv[i] = Hv[i]/Hv[i].sum()

    v  = cp.ones(Hv.shape)
    Kw = cp.ones(Hv.shape)
    entropy_max = max_entropy      (Hv         )
    barycenter  = cp         .zeros(Hv[0].shape)

    cumtime_agg = 0

    rolling_delta   = []
    cumtime         = []
    iterations      = []

    change = 1
    for j in range(niter):
        print("For every iteration.. ")
        t0            = time.time()
        barycenterOld = barycenter
        barycenter    = cp.zeros_like(Hv[0, :, :])
       
        
        print("Hv shape is", Hv.shape)
        for i in range(Hv.shape[0]):
            #* for each of two distributions(which are identical)
            #* distribution  *i* becomes Kernel of (dist1 over the Kernel of v(i))
              
            Kw[i, :, :] = K_cupyx(Hv[i, :, :] / cp.maximum(stabThresh,K_cupyx(v[i, :, :])) )
            #* barycenter is barycenter plus weighted log of v(i)*Kw(i)
            barycenter += alpha[i] * cp.log(cp.maximum(stabThresh, v[i, :, :]*Kw[i, :, :]))

        barycenter = cp.exp(barycenter)
        change     = cp.sum(cp.abs(barycenter-barycenterOld))

        if sharpening :
            if (entropy(barycenter)) > (entropy_max): 
                beta = newton(lambda beta : to_find_root(barycenter,entropy_max,beta), 1, tol=1e-6)
                if beta < 0 : 
                    beta = 1
            else :
                beta = 1
            barycenter = barycenter**beta
        for i in range(Hv.shape[0]):
            # assign to v(i) barycenter normalized by Kw(i)'s largest
            v[i, :, :] =  barycenter / cp.maximum(stabThresh, Kw[i, : ,: ])



        elapsed = np.around(time.time() - t0, 4)
        delta   = np.around(change,10)

        cumtime_agg += elapsed

        iterations.append(j)
        cumtime.append(cumtime_agg)
        rolling_delta.append(float( delta ))

        print(f"Refinement iter {j} |  delta: {delta} | elapsed : {elapsed}") 
        if change < tol :
            print(f"Exited. Change {change} under tolerance.")
            log = {
                "iterations"     : iterations,
                "cumtime"        : cumtime,
                "rolling_delta"  : rolling_delta,
                "exited_on"      : j,
                "exited_under_tolerance": True
            }
                # print(f"Exited with 0 on iter {j}")
            return [ cp.asnumpy(barycenter),log ]
            break

    
    log = {
        "iterations"            : iterations,
        "cumtime"               : cumtime,
        "rolling_delta"         : rolling_delta,
        "exited_on"             : j,
        "exited_under_tolerance": False
    }
        # print(f"Exited with 0 on iter {j}")
    return [ cp.asnumpy(barycenter),log ]
Ejemplo n.º 31
0
def learnAndSolve8b(ctx):
    """This is the main optimization. Takes the longest time and uses the GPU heavily."""

    Nbatch = ctx.intermediate.Nbatch
    params = ctx.params
    probe = ctx.probe
    ir = ctx.intermediate
    proc = ir.proc

    iorig = ir.iorig

    # TODO: move_to_config
    NrankPC = 6  # this one is the rank of the PCs, used to detect spikes with threshold crossings
    Nrank = 3  # this one is the rank of the templates

    wTEMP, wPCA = extractTemplatesfromSnippets(proc=proc,
                                               probe=probe,
                                               params=params,
                                               Nbatch=Nbatch,
                                               nPCs=NrankPC)

    # move these to the GPU
    wPCA = cp.asarray(wPCA[:, :Nrank], dtype=np.float32, order='F')
    wTEMP = cp.asarray(wTEMP, dtype=np.float32, order='F')
    wPCAd = cp.asarray(wPCA, dtype=np.float64,
                       order='F')  # convert to double for extra precision

    nt0 = params.nt0
    nt0min = params.nt0min
    nBatches = Nbatch
    NT = params.NT
    Nfilt = params.Nfilt
    Nchan = probe.Nchan

    # two variables for the same thing? number of nearest channels to each primary channel
    # TODO: unclear - let's fix this
    NchanNear = min(probe.Nchan, 32)
    Nnearest = min(probe.Nchan, 32)

    # decay of gaussian spatial mask centered on a channel
    sigmaMask = params.sigmaMask

    batchstart = list(range(0, NT * nBatches + 1, NT))

    # find the closest NchanNear channels, and the masks for those channels
    iC, mask, C2C = getClosestChannels(probe, sigmaMask, NchanNear)

    # sorting order for the batches
    isortbatches = iorig
    nhalf = int(ceil(nBatches / 2)) - 1  # halfway point

    # this batch order schedule goes through half of the data forward and backward during the model
    # fitting and then goes through the data symmetrically-out from the center during the final
    # pass
    ischedule = np.concatenate(
        (np.arange(nhalf, nBatches), np.arange(nBatches - 1, nhalf - 1, -1)))
    i1 = np.arange(nhalf - 1, -1, -1)
    i2 = np.arange(nhalf, nBatches)

    irounds = np.concatenate((ischedule, i1, i2))

    niter = irounds.size
    if irounds[niter - nBatches - 1] != nhalf:
        # this check is in here in case I do somehting weird when I try different schedules
        raise ValueError('Mismatch between number of batches')

    # these two flags are used to keep track of what stage of model fitting we're at
    # flag_final = 0
    flag_resort = 1

    # this is the absolute temporal offset in seconds corresponding to the start of the
    # spike sorted time segment
    t0 = 0  # ceil(params.trange(1) * ops.fs)

    nInnerIter = 60  # this is for SVD for the power iteration

    # schedule of learning rates for the model fitting part
    # starts small and goes high, it corresponds approximately to the number of spikes
    # from the past that were averaged to give rise to the current template
    pmi = cp.exp(
        -1. /
        cp.linspace(params.momentum[0], params.momentum[1], niter - nBatches))

    Nsum = min(
        Nchan,
        7)  # how many channels to extend out the waveform in mexgetspikes
    # lots of parameters passed into the CUDA scripts
    Params = np.array([
        NT, Nfilt, params.Th[0], nInnerIter, nt0, Nnearest, Nrank, params.lam,
        pmi[0], Nchan, NchanNear, params.nt0min, 1, Nsum, NrankPC, params.Th[0]
    ],
                      dtype=np.float64)

    # W0 has to be ordered like this
    W0 = cp.transpose(
        cp.atleast_3d(cp.asarray(wPCA, dtype=np.float64, order='F')),
        [0, 2, 1])

    # initialize the list of channels each template lives on
    iList = cp.zeros((Nnearest, Nfilt), dtype=np.int32, order='F')

    # initialize average number of spikes per batch for each template
    nsp = cp.zeros((0, 1), dtype=np.float64, order='F')

    # this flag starts 0, is set to 1 later
    Params[12] = 0

    # kernels for subsample alignment
    Ka, Kb = getKernels(params)

    p1 = .95  # decay of nsp estimate in each batch

    ntot = 0
    # this keeps track of dropped templates for debugging purposes
    ndrop = np.zeros(2, dtype=np.float32, order='F')

    # this is the minimum firing rate that all templates must maintain, or be dropped
    m0 = params.minFR * params.NT / params.fs

    # allocate variables when switching to extraction phase
    # this holds spike times, clusters and other info per spike
    st3 = []  # cp.zeros((int(1e7), 5), dtype=np.float32, order='F')

    # these ones store features per spike
    # Nnearest is the number of nearest templates to store features for
    fW = LargeArrayWriter(ctx.path('fW', ext='.dat'),
                          dtype=np.float32,
                          shape=(Nnearest, -1))
    # NchanNear is the number of nearest channels to take PC features from
    fWpc = LargeArrayWriter(ctx.path('fWpc', ext='.dat'),
                            dtype=np.float32,
                            shape=(NchanNear, Nrank, -1))

    for ibatch in tqdm(range(niter), desc="Optimizing templates"):
        # korder is the index of the batch at this point in the schedule
        korder = int(irounds[ibatch])
        # k is the index of the batch in absolute terms
        k = int(isortbatches[korder])
        logger.debug("Batch %d/%d, %d templates.", ibatch, niter, Nfilt)

        if ibatch > niter - nBatches - 1 and korder == nhalf:
            # this is required to revert back to the template states in the middle of the
            # batches
            W, dWU = ir.W, ir.dWU
            logger.debug('Reverted back to middle timepoint.')

        if ibatch < niter - nBatches:
            # obtained pm for this batch
            Params[8] = float(pmi[ibatch])
            pm = pmi[ibatch] * ones((Nfilt, ), dtype=np.float64, order='F')

        # loading a single batch (same as everywhere)
        offset = Nchan * batchstart[k]
        dat = proc.flat[offset:offset + NT * Nchan].reshape((-1, Nchan),
                                                            order='F')
        dataRAW = cp.asarray(dat, dtype=np.float32) / params.scaleproc

        if ibatch == 0:
            # only on the first batch, we first get a new set of spikes from the residuals,
            # which in this case is the unmodified data because we start with no templates
            # CUDA function to get spatiotemporal clips from spike detections
            dWU, cmap = mexGetSpikes2(Params, dataRAW, wTEMP, iC)

            dWU = cp.asarray(dWU, dtype=np.float64, order='F')

            # project these into the wPCA waveforms
            dWU = cp.reshape(cp.dot(
                wPCAd,
                cp.dot(wPCAd.T, dWU.reshape((dWU.shape[0], -1), order='F'))),
                             dWU.shape,
                             order='F')

            # initialize the low-rank decomposition with standard waves
            W = W0[:, cp.ones(dWU.shape[2], dtype=np.int32), :]
            Nfilt = W.shape[1]  # update the number of filters/templates
            # initialize the number of spikes for new templates with the minimum allowed value,
            # so it doesn't get thrown back out right away
            nsp = _extend(nsp, 0, Nfilt, m0)
            Params[1] = Nfilt  # update in the CUDA parameters

        if flag_resort:
            # this is a flag to resort the order of the templates according to best peak
            # channel
            # this is important in order to have cohesive memory requests from the GPU RAM
            # max channel (either positive or negative peak)
            iW = cp.argmax(cp.abs(dWU[nt0min - 1, :, :]), axis=0)
            # iW = int32(squeeze(iW))

            isort = cp.argsort(iW)  # sort by max abs channel
            iW = iW[isort]
            W = W[:,
                  isort, :]  # user ordering to resort all the other template variables
            dWU = dWU[:, :, isort]
            nsp = nsp[isort]

        # decompose dWU by svd of time and space (via covariance matrix of 61 by 61 samples)
        # this uses a "warm start" by remembering the W from the previous iteration
        W, U, mu = mexSVDsmall2(Params, dWU, W, iC, iW, Ka, Kb)

        # UtU is the gram matrix of the spatial components of the low-rank SVDs
        # it tells us which pairs of templates are likely to "interfere" with each other
        # such as when we subtract off a template
        # this needs to change (but I don't know why!)
        UtU, maskU = getMeUtU(iW, iC, mask, Nnearest, Nchan)

        # main CUDA function in the whole codebase. does the iterative template matching
        # based on the current templates, gets features for these templates if requested
        # (featW, featPC),
        # gets scores for the template fits to each spike (vexp), outputs the average of
        # waveforms assigned to each cluster (dWU0),
        # and probably a few more things I forget about
        st0, id0, x0, featW, dWU0, drez, nsp0, featPC, vexp = mexMPnu8(
            Params, dataRAW, U, W, mu, iC, iW, UtU, iList, wPCA)

        logger.debug("%d spikes.", x0.size)

        # Sometimes nsp can get transposed (think this has to do with it being
        # a single element in one iteration, to which elements are added
        # nsp, nsp0, and pm must all be row vectors (Nfilt x 1), so force nsp
        # to be a row vector.
        # nsp = cp.atleast_2d(nsp)
        # nsprow, nspcol = nsp.shape
        # if nsprow < nspcol:
        #     nsp = nsp.T
        nsp = nsp.squeeze()

        # updates the templates as a running average weighted by recency
        # since some clusters have different number of spikes, we need to apply the
        # exp(pm) factor several times, and fexp is the resulting update factor
        # for each template
        fexp = np.exp(nsp0 * cp.log(pm[:Nfilt]))
        fexp = cp.reshape(fexp, (1, 1, -1), order='F')
        dWU = dWU * fexp + (1 - fexp) * (
            dWU0 / cp.reshape(cp.maximum(1, nsp0), (1, 1, -1), order='F'))

        # nsp just gets updated according to the fixed factor p1
        nsp = nsp * p1 + (1 - p1) * nsp0

        if ibatch == niter - nBatches - 1:
            # if we reached this point, we need to disable secondary template updates
            # like dropping, and adding new templates. We need to memorize the state of the
            # templates at this timepoint, and set the processing mode to "extraction and
            # tracking"

            flag_resort = 0  # no need to resort templates by channel any more
            # flag_final = 1  # this is the "final" pass

            # final clean up, triage templates one last time
            W, U, dWU, mu, nsp, ndrop = triageTemplates2(
                params, iW, C2C, W, U, dWU, mu, nsp, ndrop)

            # final number of templates
            Nfilt = W.shape[1]
            Params[1] = Nfilt

            # final covariance matrix between all templates
            WtW, iList = getMeWtW(W, U, Nnearest)

            # iW is the final channel assigned to each template
            iW = cp.argmax(cp.abs(dWU[nt0min - 1, :, :]), axis=0)

            # extract ALL features on the last pass
            Params[
                12] = 2  # this is a flag to output features (PC and template features)

            # different threshold on last pass?
            Params[2] = params.Th[
                -1]  # usually the threshold is much lower on the last pass

            # memorize the state of the templates
            logger.debug("Memorized middle timepoint.")
            ir.W, ir.dWU, ir.U, ir.mu = W, dWU, U, mu
            ir.Wraw = cp.zeros((U.shape[0], W.shape[0], U.shape[1]),
                               dtype=np.float64,
                               order='F')
            for n in range(U.shape[1]):
                # temporarily use U rather Urot until I have a chance to test it
                ir.Wraw[:, :, n] = mu[n] * cp.dot(U[:, n, :], W[:, n, :].T)

        if ibatch < niter - nBatches - 1:
            # during the main "learning" phase of fitting a model
            if ibatch % 5 == 0:
                # this drops templates based on spike rates and/or similarities to
                # other templates
                W, U, dWU, mu, nsp, ndrop = triageTemplates2(
                    params, iW, C2C, W, U, dWU, mu, nsp, ndrop)

            Nfilt = W.shape[1]  # update the number of filters
            Params[1] = Nfilt

            # this adds new templates if they are detected in the residual
            dWU0, cmap = mexGetSpikes2(Params, drez, wTEMP, iC)

            if dWU0.shape[2] > 0:
                # new templates need to be integrated into the same format as all templates
                # apply PCA for smoothing purposes
                dWU0 = cp.reshape(cp.dot(
                    wPCAd,
                    cp.dot(
                        wPCAd.T,
                        dWU0.reshape(
                            (dWU0.shape[0], dWU0.shape[1] * dWU0.shape[2]),
                            order='F'))),
                                  dWU0.shape,
                                  order='F')
                dWU = cp.concatenate((dWU, dWU0), axis=2)

                m = dWU0.shape[2]
                # initialize temporal components of waveforms
                W = _extend(W,
                            Nfilt,
                            Nfilt + m,
                            W0[:, cp.ones(m, dtype=np.int32), :],
                            axis=1)

                # initialize the number of spikes with the minimum allowed
                nsp = _extend(nsp, Nfilt, Nfilt + m,
                              params.minFR * NT / params.fs)
                # initialize the amplitude of this spike with a lowish number
                mu = _extend(mu, Nfilt, Nfilt + m, 10)

                # if the number of filters exceed the maximum allowed, clip it
                Nfilt = min(params.Nfilt, W.shape[1])
                Params[1] = Nfilt

                W = W[:, :
                      Nfilt, :]  # remove any new filters over the maximum allowed
                dWU = dWU[:, :, :
                          Nfilt]  # remove any new filters over the maximum allowed
                nsp = nsp[:
                          Nfilt]  # remove any new filters over the maximum allowed
                mu = mu[:
                        Nfilt]  # remove any new filters over the maximum allowed

        if ibatch > niter - nBatches - 1:
            # during the final extraction pass, this keeps track of all spikes and features

            # we memorize the spatio-temporal decomposition of the waveforms at this batch
            # this is currently only used in the GUI to provide an accurate reconstruction
            # of the raw data at this time
            ir.WA[..., k] = cp.asnumpy(W)
            ir.UA[..., k] = cp.asnumpy(U)
            ir.muA[..., k] = cp.asnumpy(mu)

            # we carefully assign the correct absolute times to spikes found in this batch
            ioffset = params.ntbuff - 1
            if k == 0:
                ioffset = 0  # the first batch is special (no pre-buffer)

            toff = nt0min + t0 - ioffset + (NT - params.ntbuff) * k
            st = toff + st0

            st30 = np.c_[
                cp.asnumpy(st),  # spike times
                cp.asnumpy(id0),  # spike clusters (0-indexing)
                cp.asnumpy(x0),  # template amplitudes
                cp.asnumpy(vexp),  # residual variance of this spike
                korder *
                np.ones(st.size),  # batch from which this spike was found
            ]
            # Check the number of spikes.
            assert st30.shape[0] == featW.shape[1] == featPC.shape[2]
            st3.append(st30)
            fW.append(featW)
            fWpc.append(featPC)

            ntot = ntot + x0.size  # keeps track of total number of spikes so far

        if ibatch == niter - nBatches - 1:
            # these next three store the low-d template decompositions
            ir.WA = np.zeros((nt0, Nfilt, Nrank, nBatches),
                             dtype=np.float32,
                             order='F')
            ir.UA = np.zeros((Nchan, Nfilt, Nrank, nBatches),
                             dtype=np.float32,
                             order='F')
            ir.muA = np.zeros((Nfilt, nBatches), dtype=np.float32, order='F')

        if ibatch % 100 == 0:
            # this is some of the relevant diagnostic information to be printed during training
            logger.info(('%d / %d batches, %d units, nspks: %2.4f, mu: %2.4f, '
                         'nst0: %d, merges: %2.4f, %2.4f'), ibatch, niter,
                        Nfilt, nsp.sum(), median(mu), st0.size, *ndrop)

        free_gpu_memory()

    # Close the large array writers and save the JSON metadata files to disk.
    fW.close()
    fWpc.close()

    # just display the total number of spikes
    logger.info("Found %d spikes.", ntot)

    # Save results to the ctx.intermediate object.
    ir.st3 = np.concatenate(st3, axis=0)

    # the similarity score between templates is simply the correlation,
    # taken as the max over several consecutive time delays
    ir.simScore = cp.asnumpy(cp.max(WtW, axis=2))

    # NOTE: these are now already saved by LargeArrayWriter
    # fWa = np.concatenate(fW, axis=-1)
    # fWpca = np.concatenate(fWpc, axis=-1)

    # the template features are stored in cProj, like in Kilosort1
    # ir.cProj = fWa.T
    # the neihboring templates idnices are stored in iNeigh
    ir.iNeigh = cp.asnumpy(iList)

    #  permute the PC projections in the right order
    # ir.cProjPC = np.transpose(fWpca, (2, 1, 0))
    # iNeighPC keeps the indices of the channels corresponding to the PC features
    ir.iNeighPC = cp.asnumpy(iC[:, iW])

    # Number of spikes.
    assert ir.st3.shape[0] == fW.shape[-1] == fWpc.shape[-1]

    # this whole next block is just done to compress the compressed templates
    # we separately svd the time components of each template, and the spatial components
    # this also requires a careful decompression function, available somewhere in the GUI code
    nKeep = min(Nchan * 3, 20)  # how many PCs to keep
    W_a = np.zeros((nt0 * Nrank, nKeep, Nfilt), dtype=np.float32)
    W_b = np.zeros((nBatches, nKeep, Nfilt), dtype=np.float32)
    U_a = np.zeros((Nchan * Nrank, nKeep, Nfilt), dtype=np.float32)
    U_b = np.zeros((nBatches, nKeep, Nfilt), dtype=np.float32)

    for j in tqdm(range(Nfilt), desc='Compressing templates'):
        # do this for every template separately
        WA = np.reshape(ir.WA[:, j, ...], (-1, nBatches), order='F')
        # svd on the GPU was faster for this, but the Python randomized CPU version
        # might be faster still
        # WA = gpuArray(WA)
        A, B, C = svdecon_cpu(WA)
        # W_a times W_b results in a reconstruction of the time components
        W_a[:, :, j] = np.dot(A[:, :nKeep], B[:nKeep, :nKeep])
        W_b[:, :, j] = C[:, :nKeep]

        UA = np.reshape(ir.UA[:, j, ...], (-1, nBatches), order='F')
        # UA = gpuArray(UA)
        A, B, C = svdecon_cpu(UA)
        # U_a times U_b results in a reconstruction of the time components
        U_a[:, :, j] = np.dot(A[:, :nKeep], B[:nKeep, :nKeep])
        U_b[:, :, j] = C[:, :nKeep]

    logger.info('Finished compressing time-varying templates.')

    return Bunch(
        wPCA=wPCA[:, :Nrank],
        wTEMP=wTEMP,
        st3=ir.st3,
        simScore=ir.simScore,
        # cProj=ir.cProj,
        # cProjPC=ir.cProjPC,
        iNeigh=ir.iNeigh,
        iNeighPC=ir.iNeighPC,
        WA=ir.WA,
        UA=ir.UA,
        W=ir.W,
        U=ir.U,
        dWU=ir.dWU,
        mu=ir.mu,
        W_a=W_a,
        W_b=W_b,
        U_a=U_a,
        U_b=U_b,
    )
Ejemplo n.º 32
0
def cross_entropy(label, prob):
    loss = -np.sum(label * np.log(prob))
    return loss