예제 #1
0
    def grad(self, _cur_output, _next_output, _next_action,
             _batch_tuples, _err_list, _err_count, _k):
        # alloc
        if self.config.gpu:
            _cur_output.grad = cupy.zeros_like(_cur_output.data)
        else:
            _cur_output.grad = np.zeros_like(_cur_output.data)

        # compute grad from each tuples
        for i in range(len(_batch_tuples)):
            # if use bootstrap and masked
            if not _batch_tuples[i].mask[_k]:
                continue

            cur_action_value = \
                _cur_output.data[i][_batch_tuples[i].action].tolist()
            reward = _batch_tuples[i].reward
            target_value = reward
            # if not empty position, not terminal state
            if _batch_tuples[i].next_state.in_game:
                next_action_value = \
                    _next_output.data[i][_next_action[i]].tolist()
                target_value += self.config.gamma * next_action_value
            loss = cur_action_value - target_value
            _cur_output.grad[i][_batch_tuples[i].action] = 2 * loss

            _err_list[i] += abs(loss)
            _err_count[i] += 1
예제 #2
0
def run(args):
    onnx_filename = os.path.join(args.test_dir, 'model.onnx')
    input_names, output_names = onnx_input_output_names(onnx_filename)
    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = load_test_data(test_data_dir, input_names, output_names)

    with open(onnx_filename, 'rb') as f:
        onnx_proto = f.read()

    if args.debug:
        logger = tensorrt.Logger(tensorrt.Logger.Severity.INFO)
    else:
        logger = tensorrt.Logger()
    builder = tensorrt.Builder(logger)
    # TODO(hamaji): Infer batch_size from inputs.
    builder.max_batch_size = args.batch_size
    network = builder.create_network()
    parser = tensorrt.OnnxParser(network, logger)
    parser.parse(onnx_proto)
    engine = builder.build_cuda_engine(network)
    context = engine.create_execution_context()

    assert len(inputs) + len(outputs) == engine.num_bindings
    for i, (_, input) in enumerate(inputs):
        assert args.batch_size == input.shape[0]
        assert input.shape[1:] == engine.get_binding_shape(i)
    for i, (_, output) in enumerate(outputs):
        assert args.batch_size == output.shape[0]
        i += len(inputs)
        assert output.shape[1:] == engine.get_binding_shape(i)

    inputs = [v for n, v in inputs]
    outputs = [v for n, v in outputs]
    gpu_inputs = to_gpu(inputs)
    gpu_outputs = []
    for output in outputs:
        gpu_outputs.append(cupy.zeros_like(cupy.array(output)))
    bindings = [a.data.ptr for a in gpu_inputs]
    bindings += [a.data.ptr for a in gpu_outputs]

    context.execute(args.batch_size, bindings)

    actual_outputs = to_cpu(gpu_outputs)

    for i, (name, expected, actual) in enumerate(
            zip(output_names, outputs, actual_outputs)):
        np.testing.assert_allclose(expected, actual,
                                   rtol=1e-3, atol=1e-4), name
        print('%s: OK' % name)
    print('ALL OK')

    if args.iterations > 1:
        num_iterations = args.iterations - 1
        start = time.time()
        for t in range(num_iterations):
            context.execute(args.batch_size, bindings)
            cupy.cuda.device.Device().synchronize()
        elapsed = time.time() - start
        print('Elapsed: %.3f msec' % (elapsed * 1000 / num_iterations))
예제 #3
0
    def test_scan_out(self, dtype):
        element_num = 10000

        if dtype in {cupy.int8, cupy.uint8, cupy.float16}:
            element_num = 100

        a = cupy.ones((element_num,), dtype=dtype)
        b = cupy.zeros_like(a)
        cupy.core.core.scan(a, b)
        expect = cupy.arange(start=1, stop=element_num + 1).astype(dtype)

        testing.assert_array_equal(b, expect)

        cupy.core.core.scan(a, a)
        testing.assert_array_equal(a, expect)
예제 #4
0
파일: cuda.py 프로젝트: hillbig/chainer
def zeros_like(array, stream=None):
    """Creates a zero-filled cupy.ndarray object like the given array.

    Args:
        array (cupy.ndarray or numpy.ndarray): Base array.
        stream (cupy.cuda.Stream): CUDA stream.

    Returns:
        cupy.ndarray: Zero-filled array.

    """
    warnings.warn("chainer.cuda.zeros_like is deprecated. Use cupy.zeros_like instead.", DeprecationWarning)
    check_cuda_available()
    assert stream is None
    if isinstance(array, cupy.ndarray):
        return cupy.zeros_like(array)
    return cupy.zeros(array.shape, dtype=array.dtype)
예제 #5
0
 def forward_gpu(self, inputs):
     a, b = inputs
     c = cp.zeros_like(a, 'float32')
     chainer.cuda.elementwise(
         'int32 j, raw T a, raw T b',
         'raw T c',
         '''
             float* ap = &a[j * 3];
             float* bp = &b[j * 3];
             float* cp = &c[j * 3];
             cp[0] = ap[1] * bp[2] - ap[2] * bp[1];
             cp[1] = ap[2] * bp[0] - ap[0] * bp[2];
             cp[2] = ap[0] * bp[1] - ap[1] * bp[0];
         ''',
         'function',
     )(
         cp.arange(a.size / 3).astype('int32'), a, b, c,
     )
     return c,
예제 #6
0
def _process_data(samples_history,
                  u_samples_history,
                  n,
                  n_ext,
                  t_start,
                  t_end,
                  target_image,
                  corrupted_image,
                  burn_percentage,
                  isSinogram,
                  sinogram,
                  theta,
                  fbp,
                  SimulationResult_dir,
                  result_file,
                  cmap=plt.cm.seismic_r,
                  desired_n_ext=256):

    #remove pdf files:
    remove_pdf_files(SimulationResult_dir)

    burn_start_index = np.int(0.01 * burn_percentage *
                              u_samples_history.shape[0])
    fourier = imc.FourierAnalysis_2D(n, desired_n_ext, t_start, t_end)
    sL2 = util.sigmasLancosTwo(cp.int(n))
    # n_ext = 2*n
    scalling_factor = (2 * fourier.extended_basis_number - 1) / (2 * n_ext - 1)

    #initial conditions
    samples_init = samples_history[0, :]

    #change
    u_samples_history = u_samples_history[burn_start_index:, :]
    samples_history = samples_history[burn_start_index:, :]
    N = u_samples_history.shape[0]

    #initial condition
    vF_init = util.symmetrize(cp.asarray(samples_init)).reshape(
        2 * n - 1, 2 * n - 1, order=imc.ORDER) * scalling_factor
    # vF_init = vF_init.conj()

    vF_mean = util.symmetrize(cp.asarray(np.mean(samples_history,
                                                 axis=0))) * scalling_factor
    vF_stdev = util.symmetrize(cp.asarray(np.std(samples_history, axis=0)))
    vF_abs_stdev = util.symmetrize(
        cp.asarray(np.std(np.abs(samples_history), axis=0)))

    print('fourier n_ext = {}'.format(n_ext))
    # if isSinogram:
    #     vF_init = util.symmetrize_2D(fourier.rfft2(cp.asarray(fbp,dtype=cp.float32)))

    #    if not isSinogram:
    vForiginal = util.symmetrize_2D(
        fourier.rfft2(cp.array(
            target_image,
            dtype=cp.float32)))  #target image does not need to be scalled
    reconstructed_image_original = fourier.irfft2(vForiginal[:, n - 1:])
    reconstructed_image_init = fourier.irfft2(vF_init[:, n - 1:])

    samples_history_cp = cp.asarray(samples_history) * scalling_factor
    v_image_count = 0
    v_image_M = cp.zeros_like(reconstructed_image_original)
    v_image_M2 = cp.zeros_like(reconstructed_image_original)
    v_image_aggregate = (v_image_count, v_image_M, v_image_M2)
    for i in range(N):
        vF = util.symmetrize(samples_history_cp[i, :]).reshape(2 * n - 1,
                                                               2 * n - 1,
                                                               order=imc.ORDER)
        v_temp = fourier.irfft2(vF[:, n - 1:])
        v_image_aggregate = util.updateWelford(v_image_aggregate, v_temp)

    v_image_mean, v_image_var, v_image_s_var = util.finalizeWelford(
        v_image_aggregate)

    #TODO: This is sign of wrong processing, Remove this
    # if isSinogram:
    #     reconstructed_image_init = cp.fliplr(reconstructed_image_init)
    #     v_image_mean = cp.fliplr(v_image_mean)
    #     v_image_s_var = cp.fliplr(v_image_s_var)

    mask = cp.zeros_like(reconstructed_image_original)
    r = (mask.shape[0] + 1) // 2
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            x = 2 * (i - r) / mask.shape[0]
            y = 2 * (j - r) / mask.shape[1]
            if (x**2 + y**2 < 1):
                mask[i, j] = 1.

    u_samples_history_cp = cp.asarray(u_samples_history) * scalling_factor
    u_image = cp.zeros_like(v_image_mean)
    # ell_image = cp.zeros_like(v_image_mean)

    u_image_count = 0
    u_image_M = cp.zeros_like(u_image)
    u_image_M2 = cp.zeros_like(u_image)
    u_image_aggregate = (u_image_count, u_image_M, u_image_M2)
    ell_image_count = 0
    ell_image_M = cp.zeros_like(u_image)
    ell_image_M2 = cp.zeros_like(u_image)
    ell_image_aggregate = (ell_image_count, ell_image_M, ell_image_M2)
    for i in range(N):
        uF = util.symmetrize(u_samples_history_cp[i, :]).reshape(
            2 * n - 1, 2 * n - 1, order=imc.ORDER)
        u_temp = fourier.irfft2(uF[:, n - 1:])
        u_image_aggregate = util.updateWelford(u_image_aggregate, u_temp)
        ell_temp = cp.exp(u_temp)
        ell_image_aggregate = util.updateWelford(ell_image_aggregate, ell_temp)
    u_image_mean, u_image_var, u_image_s_var = util.finalizeWelford(
        u_image_aggregate)
    ell_image_mean, ell_image_var, ell_image_s_var = util.finalizeWelford(
        ell_image_aggregate)

    # if isSinogram:
    # u_image_mean = cp.flipud(u_image_mean) #cp.rot90(cp.fft.fftshift(u_image),1)
    # u_image_var = cp.flipud(u_image_var) #cp.rot90(cp.fft.fftshift(u_image),1)
    # ell_image_mean = cp.flipud(ell_image_mean)# cp.rot90(cp.fft.fftshift(ell_image),1)
    # ell_image_var = cp.flipud(ell_image_var)# cp.rot90(cp.fft.fftshift(ell_image),1)

    ri_fourier = cp.asnumpy(reconstructed_image_original)

    if isSinogram:
        ri_compare = fbp
    else:
        ri_compare = ri_fourier

    is_masked = True
    if is_masked:
        reconstructed_image_var = mask * v_image_s_var
        reconstructed_image_mean = mask * v_image_mean
        reconstructed_image_init = mask * reconstructed_image_init
        u_image_mean = u_image_mean  #cp.rot90(cp.fft.fftshift(u_image),1)
        u_image_s_var = u_image_s_var  #cp.rot90(cp.fft.fftshift(u_image),1)
        ell_image_mean = ell_image_mean  # cp.rot90(cp.fft.fftshift(ell_image),1)
        ell_image_s_var = ell_image_s_var  # cp.rot90(cp.fft.fftshift(ell_image),1)
    else:
        reconstructed_image_mean = v_image_mean
        reconstructed_image_var = v_image_s_var
        reconstructed_image_mean = v_image_mean
        reconstructed_image_init = reconstructed_image_init
        u_image_mean = u_image_mean  #cp.rot90(cp.fft.fftshift(u_image),1)
        u_image_s_var = u_image_s_var  #cp.rot90(cp.fft.fftshift(u_image),1)
        ell_image_mean = ell_image_mean  # cp.rot90(cp.fft.fftshift(ell_image),1)
        ell_image_s_var = ell_image_s_var  # cp.rot90(cp.fft.fftshift(ell_image),1)

    ri_init = cp.asnumpy(reconstructed_image_init)

    # ri_fourier = fourier.irfft2((sL2.astype(cp.float32)*vForiginal)[:,n-1:])
    vForiginal_n = cp.asnumpy(vForiginal)
    vF_init_n = cp.asnumpy(vF_init)
    ri_fourier_n = cp.asnumpy(ri_fourier)
    vF_mean_n = cp.asnumpy(
        vF_mean.reshape(2 * n - 1, 2 * n - 1, order=imc.ORDER))
    vF_stdev_n = cp.asnumpy(
        vF_stdev.reshape(2 * n - 1, 2 * n - 1, order=imc.ORDER))
    vF_abs_stdev_n = cp.asnumpy(
        vF_abs_stdev.reshape(2 * n - 1, 2 * n - 1, order=imc.ORDER))
    ri_mean_n = cp.asnumpy(reconstructed_image_mean)
    ri_var_n = cp.asnumpy(reconstructed_image_var)
    ri_std_n = np.sqrt(ri_var_n)

    #    ri_n_scalled = ri_n*cp.asnumpy(scalling_factor)
    u_mean_n = cp.asnumpy(u_image_mean)
    u_var_n = cp.asnumpy(u_image_s_var)
    ell_mean_n = cp.asnumpy(ell_image_mean)
    ell_var_n = cp.asnumpy(ell_image_s_var)

    #Plotting one by one
    #initial condition
    fig = plt.figure()
    plt.subplot(1, 2, 1)
    im = plt.imshow(np.absolute(vF_init_n), cmap=cmap, vmin=-1, vmax=1)
    fig.colorbar(im)
    plt.title('Fourier - real part')
    plt.subplot(1, 2, 2)
    im = plt.imshow(np.angle(vF_init_n), cmap=cmap, vmin=-np.pi, vmax=np.pi)
    fig.colorbar(im)
    plt.title('Fourier - imaginary part')
    plt.tight_layout()
    # plt.savefig(str(SimulationResult_dir/'vF_init'+image_extension), bbox_inches='tight')
    savefig(SimulationResult_dir / ('vF_init' + image_extension))
    plt.close()

    #vF Original
    fig = plt.figure()
    plt.subplot(1, 2, 1)
    im = plt.imshow(np.absolute(vForiginal_n), cmap=cmap, vmin=-1, vmax=1)
    fig.colorbar(im)
    plt.title('Fourier - absolute')
    plt.subplot(1, 2, 2)
    im = plt.imshow(np.angle(vForiginal_n), cmap=cmap, vmin=-np.pi, vmax=np.pi)
    fig.colorbar(im)
    plt.title('Fourier - angle')
    plt.tight_layout()
    # plt.savefig(SimulationResult_dir/'vForiginal'+image_extension), bbox_inches='tight')
    savefig(SimulationResult_dir / ('vForiginal' + image_extension))
    plt.close()

    #vF Original
    fig = plt.figure()
    plt.subplot(1, 2, 1)
    im = plt.imshow(np.absolute(vF_mean_n), cmap=cmap, vmin=-1, vmax=1)
    fig.colorbar(im)
    plt.title('Fourier - absolute')
    plt.subplot(1, 2, 2)
    im = plt.imshow(np.angle(vF_mean_n), cmap=cmap, vmin=-np.pi, vmax=np.pi)
    fig.colorbar(im)
    plt.title('Fourier - phase')
    plt.tight_layout()
    # plt.savefig(SimulationResult_dir/'vF_mean'+image_extension), bbox_inches='tight')
    savefig(SimulationResult_dir / ('vF_mean' + image_extension))
    plt.close()

    #Absolute error of vF - vForiginal
    imshow(np.abs(vF_mean_n - vForiginal_n), cmap, -1, 1, 'Fourier abs Error',
           'abs_err_vF_mean', SimulationResult_dir)

    #Absolute error of vF_init - vForiginal
    imshow(np.abs(vF_init_n - vForiginal_n), cmap, -1, 1, 'Fourier abs Error',
           'abs_err_vF_init', SimulationResult_dir)

    #Absolute error of vF_init - vForiginal
    imshow(np.abs(vF_init_n - vF_mean_n), cmap, -1, 1, 'Fourier abs Error',
           'abs_err_vF_init_vF_mean', SimulationResult_dir)

    #Ri_mean
    imshow(ri_mean_n, cmap, -1, 1, 'Posterior mean', 'ri_mean_n',
           SimulationResult_dir)

    #Ri_fourier
    imshow(ri_fourier, cmap, -1, 1, 'Reconstructed image through Fourier',
           'ri_or_n', SimulationResult_dir)

    #Ri_fourier
    imshow(ri_init, cmap, -1, 1, 'Reconstructed image through Fourier',
           'ri_init', SimulationResult_dir)

    #Reconstructed Image variance
    imshow(ri_var_n, cmap, None, None, 'Posterior variance', 'ri_var_n',
           SimulationResult_dir)

    #Target Image
    imshow(target_image, cmap, -1, 1, 'Target Image', 'target_image',
           SimulationResult_dir)

    #Filtered Back Projection
    imshow(ri_compare, cmap, -1, 1, 'Filtered Back Projection', 'ri_compare',
           SimulationResult_dir)

    #Errors
    imshow((target_image - ri_mean_n), cmap, -1, 1, 'Error FPB', 'err_RI_TI',
           SimulationResult_dir)

    #Errors
    imshow((target_image - ri_compare), cmap, -1, 1, 'Error FPB-SPDE',
           'err_RIO_TI', SimulationResult_dir)

    #Errors
    imshow((ri_mean_n - ri_compare), cmap, -1, 1, 'Error SPDE', 'err_RI_CMP',
           SimulationResult_dir)

    #Mean $u$
    imshow(u_mean_n, cmap, None, None, 'Mean $u$', 'u_mean_n',
           SimulationResult_dir)

    #'Var $u$'
    imshow(u_var_n, cmap, None, None, 'Var $u$', 'u_var_n',
           SimulationResult_dir)

    #'Mean $\ell$'
    imshow(ell_mean_n, cmap, None, None, r'Mean $\ell$', 'ell_mean_n',
           SimulationResult_dir)

    #'Var $\ell$'
    imshow(ell_var_n, cmap, None, None, r'Var $\ell$', 'ell_var_n',
           SimulationResult_dir)

    fig = plt.figure()
    if isSinogram:
        im = plt.imshow(sinogram, cmap=cmap)
        plt.title('Sinogram')
    else:
        im = plt.imshow(corrupted_image, cmap=cmap)
        plt.title('corrupted_image --- CI')
    fig.colorbar(im)
    plt.tight_layout()
    # plt.savefig(SimulationResult_dir/'measurement'+image_extension), bbox_inches='tight')
    savefig(SimulationResult_dir / ('measurement' + image_extension))
    plt.close()

    #plot several slices
    N_slices = 16
    t_index = np.arange(target_image.shape[1])
    for i in range(N_slices):
        fig = plt.figure()
        slice_index = target_image.shape[0] * i // N_slices
        plt.plot(t_index,
                 target_image[slice_index, :],
                 '-k',
                 linewidth=0.25,
                 markersize=1)
        plt.plot(t_index,
                 ri_fourier_n[slice_index, :],
                 '-r',
                 linewidth=0.25,
                 markersize=1)
        plt.plot(t_index,
                 ri_mean_n[slice_index, :],
                 '-b',
                 linewidth=0.25,
                 markersize=1)

        plt.fill_between(
            t_index,
            ri_mean_n[slice_index, :] - 2 * ri_std_n[slice_index, :],
            ri_mean_n[slice_index, :] + 2 * ri_std_n[slice_index, :],
            color='b',
            alpha=0.1)
        plt.plot(t_index,
                 ri_compare[slice_index, :],
                 ':k',
                 linewidth=0.25,
                 markersize=1)
        # plt.savefig(SimulationResult_dir/'1D_Slice_{}'+image_extension.format(slice_index-(target_image.shape[0]//2))), bbox_inches='tight')
        savefig(SimulationResult_dir /
                ('1D_Slice_{}'.format(slice_index -
                                      (target_image.shape[0] // 2)) +
                 image_extension))
        plt.close()

    f_index = np.arange(n)
    for i in range(N_slices):
        fig = plt.figure()
        slice_index = vForiginal_n.shape[0] * i // N_slices
        plt.plot(f_index,
                 np.abs(vForiginal_n[slice_index, n - 1:]),
                 '-r',
                 linewidth=0.25,
                 markersize=1)
        plt.plot(f_index,
                 np.abs(vF_init_n[slice_index, n - 1:]),
                 ':k',
                 linewidth=0.25,
                 markersize=1)
        plt.plot(f_index,
                 np.abs(vF_mean_n[slice_index, n - 1:]),
                 '-b',
                 linewidth=0.25,
                 markersize=1)

        plt.fill_between(f_index,
                         np.abs(vF_mean_n[slice_index, n - 1:]) -
                         2 * vF_abs_stdev_n[slice_index, n - 1:],
                         np.abs(vF_mean_n[slice_index, n - 1:]) +
                         2 * vF_abs_stdev_n[slice_index, n - 1:],
                         color='b',
                         alpha=0.1)
        # plt.savefig(SimulationResult_dir/'1D_F_Slice_{}'+image_extension.format(slice_index-n)), bbox_inches='tight')
        savefig(SimulationResult_dir /
                ('1D_F_Slice_{}'.format(slice_index - n) + image_extension))
        plt.close()

    error = (target_image - ri_mean_n)
    error_CMP = (target_image - ri_compare)

    L2_error = np.linalg.norm(error)
    MSE = np.sum(error * error) / error.size
    PSNR = 10 * np.log10(np.max(ri_mean_n)**2 / MSE)
    SNR = np.mean(ri_mean_n) / np.sqrt(MSE * (error.size / (error.size - 1)))

    L2_error_CMP = np.linalg.norm(error_CMP)
    MSE_CMP = np.sum(error_CMP * error_CMP) / error_CMP.size
    PSNR_CMP = 10 * np.log10(np.max(ri_compare)**2 / MSE_CMP)
    SNR_CMP = np.mean(ri_compare) / np.sqrt(MSE_CMP * (error_CMP.size /
                                                       (error_CMP.size - 1)))
    metric = {
        'L2_error': L2_error,
        'MSE': MSE,
        'PSNR': PSNR,
        'SNR': SNR,
        'L2_error_CMP': L2_error_CMP,
        'MSE_CMP': MSE_CMP,
        'PSNR_CMP': PSNR_CMP,
        'SNR_CMP': SNR_CMP
    }
    # with h5py.File(result_file,mode='a') as file:
    #     for key,value in metric.items():
    #         if key in file.keys():
    #             del file[key]
    #         # else:
    #         file.create_dataset(key,data=value)

    print('Shallow-SPDE : L2-error {}, MSE {}, SNR {}, PSNR {},'.format(
        L2_error, MSE, SNR, PSNR))
    print('FBP : L2-error {}, MSE {}, SNR {}, PSNR {}'.format(
        L2_error_CMP, MSE_CMP, SNR_CMP, PSNR_CMP))
import numpy as np
import cupy
from chainer import cuda

def _mul_i():
    return cuda.elementwise(
            "raw T x", "raw T y",
            """
                y[i] = x[i]
            """,
            "muli")

o = cupy.ones((3,2,2))
y = cupy.zeros_like(o)
print _mul_i()(o,y, size=6)
def _voxelize_sub3(voxels):
    # fill in
    bs, vs = voxels.shape[:2]
    voxels = cp.ascontiguousarray(voxels)
    visible = cp.zeros_like(voxels, 'int32')
    chainer.cuda.elementwise(
        'int32 j, raw int32 bs, raw int32 vs',
        'raw int32 voxels, raw int32 visible',
        '''
            int z = j % vs;
            int x = (j / vs) % vs;
            int y = (j / (vs * vs)) % vs;
            int bn = j / (vs * vs * vs);
            int pn = j;
            if ((y == 0) || (y == vs - 1) || (x == 0) || (x == vs - 1) || (z == 0) || (z == vs - 1)) {
                if (voxels[pn] == 0) visible[pn] = 1;
            }
        ''',
        'function',
    )(cp.arange(bs * vs * vs * vs).astype('int32'), bs, vs, voxels, visible)

    sum_visible = visible.sum()
    while True:
        chainer.cuda.elementwise(
            'int32 j, raw int32 bs, raw int32 vs',
            'raw int32 voxels, raw int32 visible',
            '''
                int z = j % vs;
                int x = (j / vs) % vs;
                int y = (j / (vs * vs)) % vs;
                int bn = j / (vs * vs * vs);
                int pn = j;
                if ((y == 0) || (y == vs - 1) || (x == 0) || (x == vs - 1) || (z == 0) || (z == vs - 1)) return;
                if (voxels[pn] == 0 && visible[pn] == 0) {
                    int yi, xi, zi;
                    yi = y - 1;
                    xi = x;
                    zi = z;
                    if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1;
                    yi = y + 1;
                    xi = x;
                    zi = z;
                    if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1;
                    yi = y;
                    xi = x - 1;
                    zi = z;
                    if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1;
                    yi = y;
                    xi = x + 1;
                    zi = z;
                    if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1;
                    yi = y;
                    xi = x;
                    zi = z - 1;
                    if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1;
                    yi = y;
                    xi = x;
                    zi = z + 1;
                    if (visible[bn * vs * vs * vs + yi * vs * vs + xi * vs + zi] != 0) visible[pn] = 1;
                }
            ''',
            'function',
        )(cp.arange(bs * vs * vs * vs).astype('int32'), bs, vs, voxels, visible)
        if visible.sum() == sum_visible:
            break
        else:
            sum_visible = visible.sum()
    return 1 - visible
예제 #9
0
파일: _util.py 프로젝트: zelo2/cupy
def _get_output(output, input, shape=None):
    if not isinstance(output, cupy.ndarray):
        return cupy.zeros_like(input, shape=shape, dtype=output, order='C')
    if output.shape != (input.shape if shape is None else tuple(shape)):
        raise ValueError('output shape is not correct')
    return output
예제 #10
0
learning_rate = 1e-4
gamma = 0.99  # discount factor for reward
decay_rate = 0.99  # decay factor for RMSProp leaky sum of grad^2
resume = False  # resume from previous checkpoint?
render = False

# model initialization
D = 80 * 80  # input dimensionality: 80x80 grid
if resume:
    model = pickle.load(open('save.p', 'rb'))
else:
    model = {}
    model['W1'] = np.random.randn(H, D) / np.sqrt(D)  # "Xavier" initialization
    model['W2'] = np.random.randn(H) / np.sqrt(H)

grad_buffer = {k: np.zeros_like(v)
               for k, v in model.items()
               }  # update buffers that add up gradients over a batch
rmsprop_cache = {k: np.zeros_like(v)
                 for k, v in model.items()}  # rmsprop memory


def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-x)
                  )  # sigmoid "squashing" function to interval [0,1]


def prepro(I):
    """ prepro 210x160x3 uint8 frame into 6400 (80x80) 1D float vector """
    I = I[35:195]  # crop
    I = I[::2, ::2, 0]  # downsample by factor of 2
예제 #11
0
파일: _eigen.py 프로젝트: venkywonka/cupy
def svds(a,
         k=6,
         *,
         ncv=None,
         tol=0,
         which='LM',
         maxiter=None,
         return_singular_vectors=True):
    """Finds the largest ``k`` singular values/vectors for a sparse matrix.

    Args:
        a (cupy.ndarray or cupyx.scipy.sparse.csr_matrix): A real or complex
            array with dimension ``(m, n)``
        k (int): The number of singular values/vectors to compute. Must be
            ``1 <= k < min(m, n)``.
        ncv (int): The number of Lanczos vectors generated. Must be
            ``k + 1 < ncv < min(m, n)``. If ``None``, default value is used.
        tol (float): Tolerance for singular values. If ``0``, machine precision
            is used.
        which (str): Only 'LM' is supported. 'LM': finds ``k`` largest singular
            values.
        maxiter (int): Maximum number of Lanczos update iterations.
            If ``None``, default value is used.
        return_singular_vectors (bool): If ``True``, returns singular vectors
            in addition to singular values.

    Returns:
        tuple:
            If ``return_singular_vectors`` is ``True``, it returns ``u``, ``s``
            and ``vt`` where ``u`` is left singular vectors, ``s`` is singular
            values and ``vt`` is right singular vectors. Otherwise, it returns
            only ``s``.

    .. seealso:: :func:`scipy.sparse.linalg.svds`

    .. note::
        This is a naive implementation using cupyx.scipy.sparse.linalg.eigsh as
        an eigensolver on ``a.H @ a`` or ``a @ a.H``.

    """
    if a.ndim != 2:
        raise ValueError('expected 2D (shape: {})'.format(a.shape))
    if a.dtype.char not in 'fdFD':
        raise TypeError('unsupprted dtype (actual: {})'.format(a.dtype))
    m, n = a.shape
    if k <= 0:
        raise ValueError('k must be greater than 0 (actual: {})'.format(k))
    if k >= min(m, n):
        raise ValueError('k must be smaller than min(m, n) (actual: {})'
                         ''.format(k))

    aH = a.conj().T
    if m >= n:
        aa = aH @ a
    else:
        aa = a @ aH

    if return_singular_vectors:
        w, x = eigsh(aa,
                     k=k,
                     which=which,
                     ncv=ncv,
                     maxiter=maxiter,
                     tol=tol,
                     return_eigenvectors=True)
    else:
        w = eigsh(aa,
                  k=k,
                  which=which,
                  ncv=ncv,
                  maxiter=maxiter,
                  tol=tol,
                  return_eigenvectors=False)

    w = cupy.maximum(w, 0)
    t = w.dtype.char.lower()
    factor = {'f': 1e3, 'd': 1e6}
    cond = factor[t] * numpy.finfo(t).eps
    cutoff = cond * cupy.max(w)
    above_cutoff = (w > cutoff)
    n_large = above_cutoff.sum()
    s = cupy.zeros_like(w)
    s[:n_large] = cupy.sqrt(w[above_cutoff])
    if not return_singular_vectors:
        return s

    x = x[:, above_cutoff]
    if m >= n:
        v = x
        u = a @ v / s[:n_large]
    else:
        u = x
        v = aH @ u / s[:n_large]
    u = _augmented_orthnormal_cols(u, k - n_large)
    v = _augmented_orthnormal_cols(v, k - n_large)

    return u, s, v.conj().T
예제 #12
0
def perfft2(im, compute_P=True, compute_spatial=False):
    """
    Moisan's Periodic plus Smooth Image Decomposition. The image is
    decomposed into two parts:

        im = s + p

    where 's' is the 'smooth' component with mean 0, and 'p' is the 'periodic'
    component which has no sharp discontinuities when one moves cyclically
    across the image boundaries.

    useage: S, [P, s, p] = perfft2(im)

    where:  im      is the image
            S       is the FFT of the smooth component
            P       is the FFT of the periodic component, returned if
                    compute_P (default)
            s & p   are the smooth and periodic components in the spatial
                    domain, returned if compute_spatial

    By default this function returns `P` and `S`, the FFTs of the periodic and
    smooth components respectively. If `compute_spatial=True`, the spatial
    domain components 'p' and 's' are also computed.

    This code is adapted from Lionel Moisan's Scilab function 'perdecomp.sci'
    "Periodic plus Smooth Image Decomposition" 07/2012 available at:

        <http://www.mi.parisdescartes.fr/~moisan/p+s>
    """

    if im.dtype not in ['float32', 'float64']:
        im = np.float64(im)

    rows, cols = im.shape

    # Compute the boundary image which is equal to the image discontinuity
    # values across the boundaries at the edges and is 0 elsewhere
    s = np.zeros_like(im)
    s[0, :] = im[0, :] - im[-1, :]
    s[-1, :] = -s[0, :]
    s[:, 0] = s[:, 0] + im[:, 0] - im[:, -1]
    s[:, -1] = s[:, -1] - im[:, 0] + im[:, -1]

    # Generate grid upon which to compute the filter for the boundary image
    # in the frequency domain.  Note that cos is cyclic hence the grid
    # values can range from 0 .. 2*pi rather than 0 .. pi and then pi .. 0
    x, y = (2 * np.pi * np.arange(0, v) / float(v) for v in (cols, rows))
    cx, cy = np.meshgrid(x, y)

    denom = (2. * (2. - np.cos(cx) - np.cos(cy)))
    denom[0, 0] = 1.     # avoid / 0

    S = fft2(s) / denom
    S[0, 0] = 0      # enforce zero mean

    if compute_P or compute_spatial:

        P = fft2(im) - S

        if compute_spatial:
            s = ifft2(S).real
            p = im - s

            return S, P, s, p
        else:
            return S, P
    else:
        return S
예제 #13
0
def powerspectrum(*U,
                  average=False,
                  kmin=None,
                  kmax=None,
                  npts=None,
                  compute_fft=True,
                  compute_sqr=True,
                  double=True,
                  bench=False,
                  **kwargs):
    """
    Returns the 1D radially averaged power spectrum :math:`P(k)`
    of a 1D, 2D, or 3D real or complex-valued scalar or
    vector field :math:`U`. This is computed as

    .. math::

        P(k) = \sum\limits_{|\mathbf{k}| = k} |\hat{U}(\mathbf{k})|^2,

    where :math:`\hat{U}` is the FFT of :math:`U`, :math:`\mathbf{k}`
    is a wavevector, and :math:`k` is a scalar wavenumber.

    Parameters
    ----------
    U : `np.ndarray`
        Real or complex vector or scalar data.
        If vector data, pass arguments as ``U1, U2, ..., Un``
        where ``Ui`` is the ith vector component.
        Each ``Ui`` can be 1D, 2D, or 3D, and all must have the
        same ``Ui.shape`` and ``Ui.dtype``.
    average : `bool`, optional
        If ``True``, average over values in a given
        bin and multiply by the bin volume.
        If ``False``, compute the sum.
    kmin : `int` or `float`, optional
        Minimum wavenumber in power spectrum bins.
        If ``None``, ``kmin = 1``.
    kmax : `int` or `float`, optional
        Maximum wavenumber in power spectrum bins.
        If ``None``, ``kmax = max(U.shape)//2``.
    npts : `int`, optional
        Number of modes between ``kmin`` and ``kmax``,
        inclusive.
        If ``None``, ``npts = kmax-kmin+1``.
    compute_fft : `bool`, optional
        If ``False``, do not take the FFT of the input data.
        FFTs should not be passed with the zero-frequency
        component in the center.
    compute_sqr : `bool`, optional
        If ``False``, sum the real part of the FFT. This can be
        useful for purely real FFTs, where the sign of the
        FFT is useful information. If ``True``, take the square
        as usual.
    double : `bool`, optional
        If ``False``, calculate FFTs in single precision.
        Useful for saving memory.
    bench : `bool`, optional
        Print message for time of calculation.
    kwargs
        Additional keyword arguments passed to
        ``cupyx.scipy.fft.fftn`` or ``cupyx.scipy.fft.rfftn``.

    Returns
    -------
    spectrum : `np.ndarray`, shape `(npts,)`
        Radially averaged power spectrum :math:`P(k)`.
    kn : `np.ndarray`, shape `(npts,)`
        Corresponding bins for spectrum :math:`k`.
    """
    if bench:
        t0 = time()

    shape = U[0].shape
    ndim = U[0].ndim
    ncomp = len(U)
    N = max(U[0].shape)

    if np.issubdtype(U[0].dtype, np.floating):
        real = True
        dtype = cp.float64 if double else cp.float32
    else:
        real = False
        dtype = cp.complex128 if double else cp.complex64

    if ndim not in [1, 2, 3]:
        raise ValueError("Dimension of image must be 1, 2, or 3.")

    # Get memory pools
    mempool = cp.get_default_memory_pool()
    pinned_mempool = cp.get_default_pinned_memory_pool()

    # Compute power spectral density with memory efficiency
    density = None
    comp = cp.empty(shape, dtype=dtype)
    for i in range(ncomp):
        temp = cp.asarray(U[i], dtype=dtype)
        comp[...] = temp
        del temp
        if compute_fft:
            fft = _cufftn(comp, **kwargs)
        else:
            fft = comp
        if density is None:
            fftshape = fft.shape
            density = cp.zeros(fft.shape)
        if compute_sqr:
            density[...] += _mod_squared(fft)
        else:
            density[...] += cp.real(fft)
        del fft
        mempool.free_all_blocks()
        pinned_mempool.free_all_blocks()

    # Need to double count if using rfftn
    if real:
        density[...] *= 2

    # Get radial coordinates
    kr = cp.asarray(_kmag_sampling(fftshape, real=real).astype(np.float32))

    # Flatten arrays
    kr = kr.ravel()
    density = density.ravel()

    # Get minimum and maximum k for binning if not given
    if kmin is None:
        kmin = 1
    if kmax is None:
        kmax = int(N / 2)
    if npts is None:
        npts = kmax - kmin + 1

    # Generate bins
    kn = cp.linspace(kmin, kmax, npts, endpoint=True)  # Left edges of bins
    dk = kn[1] - kn[0]
    kn += dk / 2  # Convert kn to bin centers.

    # Radially average power spectral density
    if ndim == 1:
        fac = 2 * np.pi
    elif ndim == 2:
        fac = 4 * np.pi
    elif ndim == 3:
        fac = 4. / 3. * np.pi
    spectrum = cp.zeros_like(kn)
    for i, ki in enumerate(kn):
        ii = cp.where(np.logical_and(kr >= ki - dk / 2, kr < ki + dk / 2))
        if average:
            dv = fac * cp.pi * ((ki + dk / 2)**ndim - (ki - dk / 2)**ndim)
            spectrum[i] = dv * cp.mean(density[ii])
        else:
            spectrum[i] = cp.sum(density[ii])

    spectrum = cp.asnumpy(spectrum)
    kn = cp.asnumpy(kn)

    del density, kr
    mempool.free_all_blocks()
    pinned_mempool.free_all_blocks()

    if bench:
        print(f"Time: {time() - t0:.04f} s")

    return spectrum, kn
예제 #14
0
def random_noise(image, mode="gaussian", seed=None, clip=True, **kwargs):
    """
    Function to add random noise of various types to a floating-point image.

    Parameters
    ----------
    image : ndarray
        Input image data. Will be converted to float.
    mode : str, optional
        One of the following strings, selecting the type of noise to add:

        - 'gaussian'  Gaussian-distributed additive noise.
        - 'localvar'  Gaussian-distributed additive noise, with specified
                      local variance at each point of `image`.
        - 'poisson'   Poisson-distributed noise generated from the data.
        - 'salt'      Replaces random pixels with 1.
        - 'pepper'    Replaces random pixels with 0 (for unsigned images) or
                      -1 (for signed images).
        - 's&p'       Replaces random pixels with either 1 or `low_val`, where
                      `low_val` is 0 for unsigned images or -1 for signed
                      images.
        - 'speckle'   Multiplicative noise using out = image + n*image, where
                      n is uniform noise with specified mean & variance.
    seed : int, optional
        If provided, this will set the random seed before generating noise,
        for valid pseudo-random comparisons.
    clip : bool, optional
        If True (default), the output will be clipped after noise applied
        for modes `'speckle'`, `'poisson'`, and `'gaussian'`. This is
        needed to maintain the proper image data range. If False, clipping
        is not applied, and the output may extend beyond the range [-1, 1].
    mean : float, optional
        Mean of random distribution. Used in 'gaussian' and 'speckle'.
        Default : 0.
    var : float, optional
        Variance of random distribution. Used in 'gaussian' and 'speckle'.
        Note: variance = (standard deviation) ** 2. Default : 0.01
    local_vars : ndarray, optional
        Array of positive floats, same shape as `image`, defining the local
        variance at every image point. Used in 'localvar'.
    amount : float, optional
        Proportion of image pixels to replace with noise on range [0, 1].
        Used in 'salt', 'pepper', and 'salt & pepper'. Default : 0.05
    salt_vs_pepper : float, optional
        Proportion of salt vs. pepper noise for 's&p' on range [0, 1].
        Higher values represent more salt. Default : 0.5 (equal amounts)

    Returns
    -------
    out : ndarray
        Output floating-point image data on range [0, 1] or [-1, 1] if the
        input `image` was unsigned or signed, respectively.

    Notes
    -----
    Speckle, Poisson, Localvar, and Gaussian noise may generate noise outside
    the valid image range. The default is to clip (not alias) these values,
    but they may be preserved by setting `clip=False`. Note that in this case
    the output may contain values outside the ranges [0, 1] or [-1, 1].
    Use this option with care.

    Because of the prevalence of exclusively positive floating-point images in
    intermediate calculations, it is not possible to intuit if an input is
    signed based on dtype alone. Instead, negative values are explicitly
    searched for. Only if found does this function assume signed input.
    Unexpected results only occur in rare, poorly exposes cases (e.g. if all
    values are above 50 percent gray in a signed `image`). In this event,
    manually scaling the input to the positive domain will solve the problem.

    The Poisson distribution is only defined for positive integers. To apply
    this noise type, the number of unique values in the image is found and
    the next round power of two is used to scale up the floating-point result,
    after which it is scaled back down to the floating-point image range.

    To generate Poisson noise against a signed image, the signed image is
    temporarily converted to an unsigned image in the floating point domain,
    Poisson noise is generated, then it is returned to the original range.

    """
    mode = mode.lower()

    # Detect if a signed image was input
    if image.min() < 0:
        low_clip = -1.0
    else:
        low_clip = 0.0

    image = img_as_float(image)
    if seed is not None:
        cp.random.seed(seed=seed)

    allowedtypes = {
        "gaussian": "gaussian_values",
        "localvar": "localvar_values",
        "poisson": "poisson_values",
        "salt": "sp_values",
        "pepper": "sp_values",
        "s&p": "s&p_values",
        "speckle": "gaussian_values",
    }

    kwdefaults = {
        "mean": 0.0,
        "var": 0.01,
        "amount": 0.05,
        "salt_vs_pepper": 0.5,
        "local_vars": cp.zeros_like(image) + 0.01,
    }

    allowedkwargs = {
        "gaussian_values": ["mean", "var"],
        "localvar_values": ["local_vars"],
        "sp_values": ["amount"],
        "s&p_values": ["amount", "salt_vs_pepper"],
        "poisson_values": [],
    }

    for key in kwargs:
        if key not in allowedkwargs[allowedtypes[mode]]:
            raise ValueError("%s keyword not in allowed keywords %s" %
                             (key, allowedkwargs[allowedtypes[mode]]))

    # Set kwarg defaults
    for kw in allowedkwargs[allowedtypes[mode]]:
        kwargs.setdefault(kw, kwdefaults[kw])

    if mode == "gaussian":
        noise = cp.random.normal(kwargs["mean"], kwargs["var"]**0.5,
                                 image.shape)
        out = image + noise

    elif mode == "localvar":
        # Ensure local variance input is correct
        if (kwargs["local_vars"] <= 0).any():
            raise ValueError("All values of `local_vars` must be > 0.")

        # Safe shortcut usage broadcasts kwargs['local_vars'] as a ufunc

        # out = image + cp.random.normal(0, kwargs['local_vars'] ** 0.5)
        # TODO: CuPy bug -> have to specify size argument for this to work.
        out = image + cp.random.normal(0, kwargs["local_vars"]**0.5,
                                       kwargs["local_vars"].shape)

    elif mode == "poisson":
        # Determine unique values in image & calculate the next power of two
        vals = len(cp.unique(image))
        vals = 2**cp.ceil(cp.log2(vals))

        # Ensure image is exclusively positive
        if low_clip == -1.0:
            old_max = image.max()
            image = (image + 1.0) / (old_max + 1.0)

        # Generating noise for each unique value in image.
        out = cp.random.poisson(image * vals) / float(vals)

        # Return image to original range if input was signed
        if low_clip == -1.0:
            out = out * (old_max + 1.0) - 1.0

    elif mode == "salt":
        # Re-call function with mode='s&p' and p=1 (all salt noise)
        out = random_noise(
            image,
            mode="s&p",
            seed=seed,
            amount=kwargs["amount"],
            salt_vs_pepper=1.0,
        )

    elif mode == "pepper":
        # Re-call function with mode='s&p' and p=1 (all pepper noise)
        out = random_noise(
            image,
            mode="s&p",
            seed=seed,
            amount=kwargs["amount"],
            salt_vs_pepper=0.0,
        )

    elif mode == "s&p":
        out = image.copy()
        p = kwargs["amount"]
        q = kwargs["salt_vs_pepper"]
        flipped = cp.random.choice([True, False],
                                   size=image.shape,
                                   p=[p, 1 - p])
        salted = cp.random.choice([True, False],
                                  size=image.shape,
                                  p=[q, 1 - q])
        peppered = ~salted
        out[flipped & salted] = 1
        out[flipped & peppered] = low_clip

    elif mode == "speckle":
        noise = cp.random.normal(kwargs["mean"], kwargs["var"]**0.5,
                                 image.shape)
        out = image + image * noise

    # Clip back to original range, if necessary
    if clip:
        out = cp.clip(out, low_clip, 1.0)

    return out
예제 #15
0
def morphological_geodesic_active_contour(gimage,
                                          iterations,
                                          init_level_set='circle',
                                          smoothing=1,
                                          threshold='auto',
                                          balloon=0,
                                          iter_callback=lambda x: None):
    """Morphological Geodesic Active Contours (MorphGAC).

    Geodesic active contours implemented with morphological operators. It can
    be used to segment objects with visible but noisy, cluttered, broken
    borders.

    Parameters
    ----------
    gimage : (M, N) or (L, M, N) array
        Preprocessed image or volume to be segmented. This is very rarely the
        original image. Instead, this is usually a preprocessed version of the
        original image that enhances and highlights the borders (or other
        structures) of the object to segment.
        `morphological_geodesic_active_contour` will try to stop the contour
        evolution in areas where `gimage` is small. See
        `morphsnakes.inverse_gaussian_gradient` as an example function to
        perform this preprocessing. Note that the quality of
        `morphological_geodesic_active_contour` might greatly depend on this
        preprocessing.
    iterations : uint
        Number of iterations to run.
    init_level_set : str, (M, N) array, or (L, M, N) array
        Initial level set. If an array is given, it will be binarized and used
        as the initial level set. If a string is given, it defines the method
        to generate a reasonable initial level set with the shape of the
        `image`. Accepted values are 'checkerboard' and 'circle'. See the
        documentation of `checkerboard_level_set` and `circle_level_set`
        respectively for details about how these level sets are created.
    smoothing : uint, optional
        Number of times the smoothing operator is applied per iteration.
        Reasonable values are around 1-4. Larger values lead to smoother
        segmentations.
    threshold : float, optional
        Areas of the image with a value smaller than this threshold will be
        considered borders. The evolution of the contour will stop in this
        areas.
    balloon : float, optional
        Balloon force to guide the contour in non-informative areas of the
        image, i.e., areas where the gradient of the image is too small to push
        the contour towards a border. A negative value will shrink the contour,
        while a positive value will expand the contour in these areas. Setting
        this to zero will disable the balloon force.
    iter_callback : function, optional
        If given, this function is called once per iteration with the current
        level set as the only argument. This is useful for debugging or for
        plotting intermediate results during the evolution.

    Returns
    -------
    out : (M, N) or (L, M, N) array
        Final segmentation (i.e., the final level set)

    See Also
    --------
    inverse_gaussian_gradient, circle_level_set, checkerboard_level_set

    Notes
    -----

    This is a version of the Geodesic Active Contours (GAC) algorithm that uses
    morphological operators instead of solving partial differential equations
    (PDEs) for the evolution of the contour. The set of morphological operators
    used in this algorithm are proved to be infinitesimally equivalent to the
    GAC PDEs (see [1]_). However, morphological operators are do not suffer
    from the numerical stability issues typically found in PDEs (e.g., it is
    not necessary to find the right time step for the evolution), and are
    computationally faster.

    The algorithm and its theoretical derivation are described in [1]_.

    References
    ----------
    .. [1] A Morphological Approach to Curvature-based Evolution of Curves and
           Surfaces, Pablo Márquez-Neila, Luis Baumela, Luis Álvarez. In IEEE
           Transactions on Pattern Analysis and Machine Intelligence (PAMI),
           2014, :DOI:`10.1109/TPAMI.2013.106`
    """

    image = gimage
    init_level_set = _init_level_set(init_level_set, image.shape)

    _check_input(image, init_level_set)

    if threshold == 'auto':
        threshold = cp.percentile(image, 40)

    structure = cp.ones((3, ) * len(image.shape), dtype=cp.int8)
    dimage = cp.gradient(image)
    # threshold_mask = image > threshold
    if balloon != 0:
        threshold_mask_balloon = image > threshold / cp.abs(balloon)

    u = (init_level_set > 0).astype(cp.int8)

    iter_callback(u)

    for _ in range(iterations):

        # Balloon
        if balloon > 0:
            aux = ndi.binary_dilation(u, structure)
        elif balloon < 0:
            aux = ndi.binary_erosion(u, structure)
        if balloon != 0:
            u[threshold_mask_balloon] = aux[threshold_mask_balloon]

        # Image attachment
        aux = cp.zeros_like(image)
        du = cp.gradient(u)
        for el1, el2 in zip(dimage, du):
            aux += el1 * el2
        u[aux > 0] = 1
        u[aux < 0] = 0

        # Smoothing
        for _ in range(smoothing):
            u = _curvop(u)

        iter_callback(u)

    return u
예제 #16
0
 def forward(self, is_training):
     for layer in self.outbound_layers:
         layer.input_tensor = self.output_tensor
     if is_training:
         if self.require_grads:
             self.grads = cp.zeros_like(self.output_tensor)
예제 #17
0
def convolve_gpu_chunked(x,
                         b,
                         pad='flip',
                         nwin=DEFAULT_CONV_CHUNK,
                         ntap=500,
                         overlap=2000):
    """Chunked GPU FFT-based convolution for large arrays.

    This memory-controlled version splits the signal into chunks of n samples.
    Each chunk is tapered in and out, the overlap is designed to get clear of the taper
    splicing of overlaping chunks is done in a cosine way.

    param: pad None, 'zeros', 'constant', 'flip'

    """
    x = cp.asarray(x)
    b = cp.asarray(b)
    assert b.ndim == 1
    n = x.shape[0]
    assert overlap >= 2 * ntap
    # create variables, the gain is to control the splicing
    y = cp.zeros_like(x)
    gain = cp.zeros(n)
    # compute tapers/constants outside of the loop
    taper_in = (-cp.cos(cp.linspace(0, 1, ntap) * cp.pi) / 2 + 0.5)[:,
                                                                    cp.newaxis]
    taper_out = cp.flipud(taper_in)
    assert b.shape[0] < nwin < n
    # this is the convolution wavelet that we shift to be 0 lag
    bp = cp.pad(b, (0, nwin - b.shape[0]), mode='constant')
    bp = cp.roll(bp, -b.size // 2 + 1)
    bp = cp.fft.rfft(bp, n=nwin)[:, cp.newaxis]
    # this is used to splice windows together: cosine taper. The reversed taper is complementary
    scale = cp.minimum(
        cp.maximum(0, cp.linspace(-0.5, 1.5, overlap - 2 * ntap)), 1)
    splice = (-cp.cos(scale * cp.pi) / 2 + 0.5)[:, cp.newaxis]
    # loop over the signal by chunks and apply convolution in frequency domain
    first = 0
    while True:
        first = min(n - nwin, first)
        last = min(first + nwin, n)
        # the convolution
        x_ = cp.copy(x[first:last, :])
        x_[:ntap] *= taper_in
        x_[-ntap:] *= taper_out
        x_ = cp.fft.irfft(cp.fft.rfft(x_, axis=0, n=nwin) * bp, axis=0, n=nwin)
        # this is to check the gain of summing the windows
        tt = cp.ones(nwin)
        tt[:ntap] *= taper_in[:, 0]
        tt[-ntap:] *= taper_out[:, 0]
        # the full overlap is outside of the tapers: we apply a cosine splicing to this part only
        if first > 0:
            full_overlap_first = first + ntap
            full_overlap_last = first + overlap - ntap
            gain[full_overlap_first:full_overlap_last] *= (1. - splice[:, 0])
            gain[full_overlap_first:full_overlap_last] += tt[ntap:overlap -
                                                             ntap] * splice[:,
                                                                            0]
            gain[full_overlap_last:last] = tt[overlap - ntap:]
            y[full_overlap_first:full_overlap_last] *= (1. - splice)
            y[full_overlap_first:full_overlap_last] += x_[ntap:overlap -
                                                          ntap] * splice
            y[full_overlap_last:last] = x_[overlap - ntap:]
        else:
            y[first:last, :] = x_
            gain[first:last] = tt
        if last == n:
            break
        first += nwin - overlap
    return y
예제 #18
0
    def move(self,
             move_preference_matrix,
             move_probability_matrix,
             ratio_random_move=0.1):
        """
        1.  Select all living agents and their neighbours.
        2.  Create a movement matrix. All occupied by agent cells should be unavailable for move.
            add {move_preference_matrix} for values of neighbours.
        3.  If agent does not have any available cells for moving - it should die.
            Drop all died agents from current moving agents.
        4.  10% of the time the agent moves randomly.
            Agent can't go to unavailable cells, so we recalculate probability for available neighbours.
            (sum of prob should be 1).
        5.  Vectorized way to get random indices from array of probs. Like random.choice, but for 2d array.
        6.  Find new flat indexes for random moving agents.
        7.  Find new flat indexes for normal moving agents. Before argmax selection we shuffle neighbours,
            otherwise we will use always first max index.
        8.  Create an array with new agents positions.
        9.  If two agents want to occupy same cell - then we accept only first.
            All agents, which was declined to move because of collision will die.
        10. If agent reach top - it dies too.


        :param move_preference_matrix:  The agent decides which space to move to by adding this move
                                        preference array to the value array of the surrounding environment.

        :param move_probability_matrix:  10% of the time the agent moves randomly to an adjacent space.
                                         It is the move probability matrix.
        :return:
        """
        # (1)
        live_agents_neighbour_flat_positions = self.agents_neighbour_flat_positions[
            self.agents_state]
        # (2)
        move_candidates = self.env.ravel(
        )[live_agents_neighbour_flat_positions].copy()

        is_available = self.is_available_env.ravel(
        )[live_agents_neighbour_flat_positions]
        move_candidates[~is_available] = cp.nan
        move_candidates = move_candidates + cp.asarray(move_preference_matrix)

        # (3)
        should_die = cp.all(cp.isnan(move_candidates.reshape(-1, 27)), axis=1)
        should_die_agents = cp.flatnonzero(self.agents_state)[should_die]

        self.agents_state[should_die_agents] = False

        move_candidates = move_candidates[~should_die]
        live_agents_neighbour_flat_positions = live_agents_neighbour_flat_positions[
            ~should_die]

        # (4)
        is_random_move = cp.random.binomial(
            1, ratio_random_move,
            live_agents_neighbour_flat_positions.shape[0])
        is_random_move = is_random_move.astype(cp.bool)
        random_move_candidates = move_candidates[is_random_move]

        random_move_probs = (~cp.isnan(random_move_candidates) *
                             cp.asarray(move_probability_matrix)).reshape(
                                 -1, 27)
        random_move_probs /= random_move_probs.sum(axis=1)[:, None]

        # (5)
        random_vals = cp.expand_dims(cp.random.rand(
            random_move_probs.shape[0]),
                                     axis=1)
        random_indexes = (random_move_probs.cumsum(axis=1) >
                          random_vals).argmax(axis=1)

        # (6)
        random_live_agents_neighbour_flat_positions = live_agents_neighbour_flat_positions[
            is_random_move]
        random_new_positions = cp.take_along_axis(
            random_live_agents_neighbour_flat_positions.reshape(-1, 27),
            random_indexes[:, None],
            axis=1).T[0]

        # (7)
        normal_move_candidates = move_candidates[~is_random_move]

        # normal_move_indexes = cp.nanargmax(normal_move_candidates.reshape(-1, 27), axis=1)[:, None]
        # smart analog of cp.nanargmax(normal_move_candidates.reshape(-1, 27), axis=1)[:, None]

        normal_flattened_move_candidates = normal_move_candidates.reshape(
            -1, 27)
        normal_shuffled_candidates_idx = cp.random.rand(
            *normal_flattened_move_candidates.shape).argsort(axis=1)
        normal_shuffled_flattened_move_candidates = cp.take_along_axis(
            normal_flattened_move_candidates,
            normal_shuffled_candidates_idx,
            axis=1)
        normal_shuffled_candidates_max_idx = cp.nanargmax(
            normal_shuffled_flattened_move_candidates, axis=1)[:, None]

        normal_move_indexes = cp.take_along_axis(
            normal_shuffled_candidates_idx,
            normal_shuffled_candidates_max_idx,
            axis=1)
        ####

        normal_live_agents_neighbour_flat_positions = live_agents_neighbour_flat_positions[
            ~is_random_move]
        normal_move_new_positions = cp.take_along_axis(
            normal_live_agents_neighbour_flat_positions.reshape(-1, 27),
            normal_move_indexes,
            axis=1).T[0]
        # (8)
        moving_agents_flat_positions = self.agents_flat_positions[
            self.agents_state]
        new_agents_flat_positions = moving_agents_flat_positions.copy()

        new_agents_flat_positions[is_random_move] = random_new_positions

        new_agents_flat_positions[~is_random_move] = normal_move_new_positions

        live_agents_indexes = cp.flatnonzero(self.agents_state)

        # (9)
        _, flat_positions_first_entry = cp.unique(new_agents_flat_positions,
                                                  return_index=True)

        is_live = cp.zeros_like(new_agents_flat_positions).astype(cp.bool)
        is_live[flat_positions_first_entry] = True

        new_agents_flat_positions[~is_live] = moving_agents_flat_positions[
            ~is_live]
        new_agents_positions = cp.array(
            cp.unravel_index(new_agents_flat_positions, self.env.shape)).T

        # (10)
        is_live[new_agents_positions[:, 2] == 1] = False

        self._agents_positions[live_agents_indexes] = new_agents_positions
        self.agents_state[live_agents_indexes] = is_live

        self.is_available_env.ravel()[moving_agents_flat_positions] = True
        self.is_available_env.ravel()[new_agents_flat_positions] = False

        self._agents_positions_all_time.append(
            cp.asnumpy(self._agents_positions))
예제 #19
0
파일: misc.py 프로젝트: mritools/cupyimg
def remove_small_objects(ar, min_size=64, connectivity=1, in_place=False):
    """Remove objects smaller than the specified size.

    Expects ar to be an array with labeled objects, and removes objects
    smaller than min_size. If `ar` is bool, the image is first labeled.
    This leads to potentially different behavior for bool and 0-and-1
    arrays.

    Parameters
    ----------
    ar : ndarray (arbitrary shape, int or bool type)
        The array containing the objects of interest. If the array type is
        int, the ints must be non-negative.
    min_size : int, optional (default: 64)
        The smallest allowable object size.
    connectivity : int, {1, 2, ..., ar.ndim}, optional (default: 1)
        The connectivity defining the neighborhood of a pixel. Used during
        labelling if `ar` is bool.
    in_place : bool, optional (default: False)
        If ``True``, remove the objects in the input array itself.
        Otherwise, make a copy.

    Raises
    ------
    TypeError
        If the input array is of an invalid type, such as float or string.
    ValueError
        If the input array contains negative values.

    Returns
    -------
    out : ndarray, same shape and type as input `ar`
        The input array with small connected components removed.

    Examples
    --------
    >>> import cupy as cp
    >>> from cupyimg.skimage import morphology
    >>> a = cp.array([[0, 0, 0, 1, 0],
    ...               [1, 1, 1, 0, 0],
    ...               [1, 1, 1, 0, 1]], bool)
    >>> b = morphology.remove_small_objects(a, 6)
    >>> b
    array([[False, False, False, False, False],
           [ True,  True,  True, False, False],
           [ True,  True,  True, False, False]])
    >>> c = morphology.remove_small_objects(a, 7, connectivity=2)
    >>> c
    array([[False, False, False,  True, False],
           [ True,  True,  True, False, False],
           [ True,  True,  True, False, False]])
    >>> d = morphology.remove_small_objects(a, 6, in_place=True)
    >>> d is a
    True

    """
    # Raising type error if not int or bool
    _check_dtype_supported(ar)

    if in_place:
        out = ar
    else:
        out = ar.copy()

    if min_size == 0:  # shortcut for efficiency
        return out

    if out.dtype == bool:
        selem = ndi.generate_binary_structure(ar.ndim, connectivity)
        ccs = cp.zeros_like(ar, dtype=cp.int32)
        ndi.label(ar, selem, output=ccs)
    else:
        ccs = out

    try:
        component_sizes = cp.bincount(ccs.ravel())
    except ValueError:
        raise ValueError("Negative value labels are not supported. Try "
                         "relabeling the input with `scipy.ndimage.label` or "
                         "`skimage.morphology.label`.")

    if len(component_sizes) == 2 and out.dtype != bool:
        warn("Only one label was provided to `remove_small_objects`. "
             "Did you mean to use a boolean array?")

    too_small = component_sizes < min_size
    too_small_mask = too_small[ccs]
    out[too_small_mask] = 0

    return out
예제 #20
0
파일: _denoise.py 프로젝트: grlee77/cucim
def denoise_tv_chambolle(image,
                         weight=0.1,
                         eps=2.0e-4,
                         n_iter_max=200,
                         multichannel=False):
    """Perform total-variation denoising on n-dimensional images.

    Parameters
    ----------
    image : ndarray of ints, uints or floats
        Input data to be denoised. `image` can be of any numeric type,
        but it is cast into an ndarray of floats for the computation
        of the denoised image.
    weight : float, optional
        Denoising weight. The greater `weight`, the more denoising (at
        the expense of fidelity to `input`).
    eps : float, optional
        Relative difference of the value of the cost function that
        determines the stop criterion. The algorithm stops when:

            (E_(n-1) - E_n) < eps * E_0

    n_iter_max : int, optional
        Maximal number of iterations used for the optimization.
    multichannel : bool, optional
        Apply total-variation denoising separately for each channel. This
        option should be true for color images, otherwise the denoising is
        also applied in the channels dimension.

    Returns
    -------
    out : ndarray
        Denoised image.

    Notes
    -----
    Make sure to set the multichannel parameter appropriately for color images.

    The principle of total variation denoising is explained in
    https://en.wikipedia.org/wiki/Total_variation_denoising

    The principle of total variation denoising is to minimize the
    total variation of the image, which can be roughly described as
    the integral of the norm of the image gradient. Total variation
    denoising tends to produce "cartoon-like" images, that is,
    piecewise-constant images.

    This code is an implementation of the algorithm of Rudin, Fatemi and Osher
    that was proposed by Chambolle in [1]_.

    References
    ----------
    .. [1] A. Chambolle, An algorithm for total variation minimization and
           applications, Journal of Mathematical Imaging and Vision,
           Springer, 2004, 20, 89-97.

    Examples
    --------
    2D example on astronaut image:

    >>> from skimage import color, data
    >>> img = color.rgb2gray(data.astronaut())[:50, :50]
    >>> img += 0.5 * img.std() * np.random.randn(*img.shape)
    >>> denoised_img = denoise_tv_chambolle(img, weight=60)

    3D example on synthetic data:

    >>> x, y, z = np.ogrid[0:20, 0:20, 0:20]
    >>> mask = (x - 22)**2 + (y - 20)**2 + (z - 17)**2 < 8**2
    >>> mask = mask.astype(np.float)
    >>> mask += 0.2*np.random.randn(*mask.shape)
    >>> res = denoise_tv_chambolle(mask, weight=100)

    """
    im_type = image.dtype
    if not im_type.kind == 'f':
        image = img_as_float(image)

    if multichannel:
        out = cp.zeros_like(image)
        for c in range(image.shape[-1]):
            out[..., c] = _denoise_tv_chambolle_nd(image[..., c], weight, eps,
                                                   n_iter_max)
    else:
        out = _denoise_tv_chambolle_nd(image, weight, eps, n_iter_max)
    return out
예제 #21
0
void mymul(int n,
    const double* x1, const double* x2, double* y)
{
    int tid = blockDim.x * blockIdx.x + threadIdx.x;
    if (tid < n)
    {
        y[tid] = x1[tid] * x2[tid];
    }
}

}"""
module = cupy.RawModule(code=source_str)
mymul_kernel = module.get_function("mymul")

x1 = np.array([1, 2, 3, 4, 5], dtype=np.float64)
x2 = np.array([7, 8, 9, 10, 12], dtype=np.float64)

x1_dev = cupy.array(x1)
x2_dev = cupy.array(x2)
y_dev = cupy.zeros_like(x1_dev)

blocksize = 2
n_blocks = int(np.ceil(len(x1) / blocksize))
mymul_kernel(grid=(n_blocks, ),
             block=(blocksize, ),
             args=(len(x1), x1_dev, x2_dev, y_dev))

y = y_dev.get()

assert np.allclose(y, x1 * x2)
예제 #22
0
def reverse2(d):
    d = d.T
    c = np.zeros_like(d)
    c[0] = d[0] ^ d[7] ^ d[10] ^ d[12] ^ d[13] ^ d[15] ^ d[18] ^ d[19] ^ d[
        21] ^ d[22] ^ d[25] ^ d[28] ^ d[29] ^ d[30] ^ d[31]
    c[1] = d[1] ^ d[4] ^ d[7] ^ d[10] ^ d[11] ^ d[12] ^ d[14] ^ d[15] ^ d[
        16] ^ d[18] ^ d[21] ^ d[23] ^ d[25] ^ d[26] ^ d[28]
    c[2] = d[2] ^ d[5] ^ d[8] ^ d[11] ^ d[13] ^ d[15] ^ d[16] ^ d[17] ^ d[
        19] ^ d[20] ^ d[22] ^ d[26] ^ d[27] ^ d[28] ^ d[29]
    c[3] = d[3] ^ d[6] ^ d[9] ^ d[12] ^ d[14] ^ d[17] ^ d[18] ^ d[20] ^ d[
        21] ^ d[23] ^ d[24] ^ d[27] ^ d[28] ^ d[29] ^ d[30]
    c[4] = d[3] ^ d[4] ^ d[8] ^ d[9] ^ d[11] ^ d[14] ^ d[17] ^ d[18] ^ d[
        22] ^ d[23] ^ d[24] ^ d[25] ^ d[26] ^ d[27] ^ d[29]
    c[5] = d[0] ^ d[3] ^ d[5] ^ d[8] ^ d[10] ^ d[11] ^ d[14] ^ d[15] ^ d[
        17] ^ d[19] ^ d[20] ^ d[22] ^ d[24] ^ d[29] ^ d[30]
    c[6] = d[1] ^ d[6] ^ d[9] ^ d[11] ^ d[12] ^ d[15] ^ d[16] ^ d[18] ^ d[
        20] ^ d[21] ^ d[23] ^ d[24] ^ d[25] ^ d[30] ^ d[31]
    c[7] = d[2] ^ d[7] ^ d[8] ^ d[10] ^ d[13] ^ d[16] ^ d[17] ^ d[19] ^ d[
        21] ^ d[22] ^ d[24] ^ d[25] ^ d[26] ^ d[28] ^ d[31]
    c[8] = d[2] ^ d[4] ^ d[5] ^ d[7] ^ d[8] ^ d[15] ^ d[17] ^ d[20] ^ d[
        21] ^ d[22] ^ d[23] ^ d[26] ^ d[27] ^ d[29] ^ d[30]
    c[9] = d[2] ^ d[3] ^ d[4] ^ d[6] ^ d[7] ^ d[9] ^ d[12] ^ d[15] ^ d[17] ^ d[
        18] ^ d[20] ^ d[24] ^ d[26] ^ d[29] ^ d[31]
    c[10] = d[0] ^ d[3] ^ d[5] ^ d[7] ^ d[10] ^ d[13] ^ d[18] ^ d[19] ^ d[
        20] ^ d[21] ^ d[24] ^ d[25] ^ d[27] ^ d[28] ^ d[30]
    c[11] = d[1] ^ d[4] ^ d[6] ^ d[11] ^ d[14] ^ d[16] ^ d[19] ^ d[20] ^ d[
        21] ^ d[22] ^ d[25] ^ d[26] ^ d[28] ^ d[29] ^ d[31]
    c[12] = d[0] ^ d[1] ^ d[3] ^ d[6] ^ d[11] ^ d[12] ^ d[16] ^ d[17] ^ d[
        18] ^ d[19] ^ d[21] ^ d[25] ^ d[26] ^ d[30] ^ d[31]
    c[13] = d[0] ^ d[2] ^ d[3] ^ d[6] ^ d[7] ^ d[8] ^ d[11] ^ d[13] ^ d[
        16] ^ d[21] ^ d[22] ^ d[25] ^ d[27] ^ d[28] ^ d[30]
    c[14] = d[1] ^ d[3] ^ d[4] ^ d[7] ^ d[9] ^ d[14] ^ d[16] ^ d[17] ^ d[
        22] ^ d[23] ^ d[24] ^ d[26] ^ d[28] ^ d[29] ^ d[31]
    c[15] = d[0] ^ d[2] ^ d[5] ^ d[10] ^ d[15] ^ d[16] ^ d[17] ^ d[18] ^ d[
        20] ^ d[23] ^ d[24] ^ d[25] ^ d[27] ^ d[29] ^ d[30]
    c[16] = d[2] ^ d[3] ^ d[5] ^ d[6] ^ d[9] ^ d[12] ^ d[13] ^ d[14] ^ d[
        15] ^ d[16] ^ d[23] ^ d[26] ^ d[28] ^ d[29] ^ d[31]
    c[17] = d[0] ^ d[2] ^ d[5] ^ d[7] ^ d[9] ^ d[10] ^ d[12] ^ d[17] ^ d[
        20] ^ d[23] ^ d[26] ^ d[27] ^ d[28] ^ d[30] ^ d[31]
    c[18] = d[0] ^ d[1] ^ d[3] ^ d[4] ^ d[6] ^ d[10] ^ d[11] ^ d[12] ^ d[
        13] ^ d[18] ^ d[21] ^ d[24] ^ d[27] ^ d[29] ^ d[31]
    c[19] = d[1] ^ d[2] ^ d[4] ^ d[5] ^ d[7] ^ d[8] ^ d[11] ^ d[12] ^ d[
        13] ^ d[14] ^ d[19] ^ d[22] ^ d[25] ^ d[28] ^ d[30]
    c[20] = d[1] ^ d[2] ^ d[6] ^ d[7] ^ d[8] ^ d[9] ^ d[10] ^ d[11] ^ d[
        13] ^ d[19] ^ d[20] ^ d[24] ^ d[25] ^ d[27] ^ d[30]
    c[21] = d[1] ^ d[3] ^ d[4] ^ d[6] ^ d[8] ^ d[13] ^ d[14] ^ d[16] ^ d[
        19] ^ d[21] ^ d[24] ^ d[26] ^ d[27] ^ d[30] ^ d[31]
    c[22] = d[0] ^ d[2] ^ d[4] ^ d[5] ^ d[7] ^ d[8] ^ d[9] ^ d[14] ^ d[15] ^ d[
        17] ^ d[22] ^ d[25] ^ d[27] ^ d[28] ^ d[31]
    c[23] = d[0] ^ d[1] ^ d[3] ^ d[5] ^ d[6] ^ d[8] ^ d[9] ^ d[10] ^ d[12] ^ d[
        15] ^ d[18] ^ d[23] ^ d[24] ^ d[26] ^ d[29]
    c[24] = d[1] ^ d[4] ^ d[5] ^ d[6] ^ d[7] ^ d[10] ^ d[11] ^ d[13] ^ d[
        14] ^ d[18] ^ d[20] ^ d[21] ^ d[23] ^ d[24] ^ d[31]
    c[25] = d[1] ^ d[2] ^ d[4] ^ d[8] ^ d[10] ^ d[13] ^ d[15] ^ d[18] ^ d[
        19] ^ d[20] ^ d[22] ^ d[23] ^ d[25] ^ d[28] ^ d[31]
    c[26] = d[2] ^ d[3] ^ d[4] ^ d[5] ^ d[8] ^ d[9] ^ d[11] ^ d[12] ^ d[
        14] ^ d[16] ^ d[19] ^ d[21] ^ d[23] ^ d[26] ^ d[29]
    c[27] = d[0] ^ d[3] ^ d[4] ^ d[5] ^ d[6] ^ d[9] ^ d[10] ^ d[12] ^ d[
        13] ^ d[15] ^ d[17] ^ d[20] ^ d[22] ^ d[27] ^ d[30]
    c[28] = d[0] ^ d[1] ^ d[2] ^ d[3] ^ d[5] ^ d[9] ^ d[10] ^ d[14] ^ d[
        15] ^ d[16] ^ d[17] ^ d[19] ^ d[22] ^ d[27] ^ d[28]
    c[29] = d[0] ^ d[5] ^ d[6] ^ d[9] ^ d[11] ^ d[12] ^ d[14] ^ d[16] ^ d[
        18] ^ d[19] ^ d[22] ^ d[23] ^ d[24] ^ d[27] ^ d[29]
    c[30] = d[0] ^ d[1] ^ d[6] ^ d[7] ^ d[8] ^ d[10] ^ d[12] ^ d[13] ^ d[
        15] ^ d[17] ^ d[19] ^ d[20] ^ d[23] ^ d[25] ^ d[30]
    c[31] = d[0] ^ d[1] ^ d[2] ^ d[4] ^ d[7] ^ d[8] ^ d[9] ^ d[11] ^ d[13] ^ d[
        14] ^ d[16] ^ d[18] ^ d[21] ^ d[26] ^ d[31]
    return c.T
예제 #23
0
파일: kwd.py 프로젝트: samuelmat19/CloGAN
               + cp.trace(cp.matmul(J_2_m, calc_K_v_v)) \
               - 2 * (cp.trace(cp.matmul(cp.matmul(calc_K_u_v, J_2_m), cp.matmul(calc_K_u_v.T, J_1_m)))) ** .5

    if USE_CUPY: cp.cuda.Stream.null.synchronize()

    return W_2


def kl_divergence(p, q):
    return np.sum(np.where(p != 0, p * np.log(p / q), 0))


if __name__ == "__main__":
    a = cp.random.normal(1, 2, 2048)
    b = cp.random.normal(0, 1, 2048)
    b = cp.zeros_like(a)
    print(np.var(a), np.mean(a))
    _foo = np.mean(a)
    a = (a - _foo) / np.std(a, axis=-1, keepdims=True) + _foo
    print(np.var(a), np.mean(a))

    import time
    import scipy.stats
    start_time = time.time()
    x = ([kernel_wasserstein_distance(a, b, True) for _ in range(1)])
    print("time spent:", time.time() - start_time)
    print(x)
    start_time = time.time()
    print(kernel_wasserstein_distance(a, b, False))
    print("time spent:", time.time() - start_time)
    print(scipy.stats.wasserstein_distance(a, b))
예제 #24
0
def reconstruct_alt(imgs,
                    discs,
                    hres_size,
                    row,
                    n_iters=1,
                    o_f_init=None,
                    del_1=1000,
                    del_2=1,
                    round_values=True,
                    plot_per_frame=False,
                    show_interval=None,
                    subtract_bg=False,
                    out_path=None):
    """The main reconstruction algorithm. Adapted from Tian et. al."""
    # Put input images on GPU, estimate background noise
    imgs = [cp.array(img) for img in imgs]
    bgs = get_bg(imgs) if subtract_bg else cp.zeros(len(imgs))

    IMAGESIZE = imgs[0].shape[0]
    CUTOFF_FREQ_px = get_cutoff(row)
    FRAMES = len(imgs)

    orig = IMAGESIZE // 2 - 1  # Low-res origin
    lres_size = (IMAGESIZE, IMAGESIZE)
    m1, n1 = lres_size
    m, n = hres_size

    losses = []  # Reconstruction Loss
    convs = []  # Inverse Convergence index

    # Initial high-res guess
    if lres_size == hres_size:  # Initialize with ones
        # Use old algorithm
        F = lambda x: cp.fft.fftshift(cp.fft.fft2(x))
        Ft = lambda x: cp.fft.ifft2(cp.fft.ifftshift(x))
        o = cp.ones(hres_size)
        o_f = F(o)
    elif o_f_init is not None:  # Initialize with given initialization
        F = lambda x: cp.fft.fftshift(cp.fft.fft2(cp.fft.ifftshift(x)))
        Ft = lambda x: cp.fft.fftshift(cp.fft.ifft2(cp.fft.ifftshift(x)))
        o = cp.zeros_like(o_f_init)
        o_f = o_f_init
    else:  # Intialize with resized first frame from imgs
        F = lambda x: cp.fft.fftshift(cp.fft.fft2(cp.fft.ifftshift(x)))
        Ft = lambda x: cp.fft.fftshift(cp.fft.ifft2(cp.fft.ifftshift(x)))
        o = cp.sqrt(
            cp.array(cv2.resize(cp.asnumpy(imgs[0] - bgs[0]), hres_size)))
        o_f = Ft(o)

    # Pupil Function
    p = cp.zeros(lres_size)
    p = cp.array(cv2.circle(cp.asnumpy(p), (orig, orig), CUTOFF_FREQ_px, 1,
                            -1))
    ctf = p.copy()  # Ideal Pupil, for filtering later on

    # Main Loop
    log = tqdm(
        total=n_iters,
        desc=f'Starting...',
        bar_format=
        '{percentage:3.0f}% [{elapsed}<{remaining} ({rate_inv_fmt})]{bar}{desc}',
        leave=False,
        ascii=True)

    for j in range(n_iters):
        conv = []  # Convergence Index
        for i in range(FRAMES):

            if discs[i] == 0:  # Empty frame
                continue

            # Get k0x, k0y and hence, shifting values
            k0x, k0y = discs[i]

            # Construct auxillary functions for the set of LEDs (= 1, here)
            if hres_size == lres_size:
                shift_x, shift_y = [
                    -round(k0x - orig), -round(k0y - orig)
                ] if round_values else [-(k0x - orig), -(k0y - orig)]

                if not round_values:
                    o_f_i = FourierShift2D(o_f,
                                           [shift_x, shift_y])  # O_i(k - k_m)
                else:
                    o_f_i = cp.roll(o_f, int(shift_y), axis=0)
                    o_f_i = cp.roll(o_f_i, int(shift_x), axis=1)

                yl, xl = 0, 0  # To reduce code later on

            else:  # Output size larger than individual frames
                _orig = hres_size[0] // 2 - 1

                del_x, del_y = k0x - orig, k0y - orig
                x, y = round(_orig - del_x), round(_orig - del_y)

                yl = int(y - m1 // 2)
                xl = int(x - n1 // 2)

                assert xl > 0 and yl > 0, 'Both should be > 0'
                o_f_i = o_f[yl:yl + n1, xl:xl + m1].copy()

            psi_k = o_f_i * p * ctf  #DEBUG: REPLACE * ctf with * p

            # Plot outputs after each frame, for debugging
            if plot_per_frame:
                o_i = Ft(o_f_i * p)
                plt.figure(figsize=(10, 2))
                plt.subplot(161)
                plt.imshow(cp.asnumpy(correct(abs(o_i))))
                plt.title(f'$I_{{l}}({i})$')
                opts()  #DEBUG
                plt.subplot(162)
                plt.imshow(
                    cp.asnumpy(
                        cv2.convertScaleAbs(
                            cp.asnumpy(20 * cp.log(1 + abs(o_f_i * p))))))
                plt.title(f'$S_{{l}}({i})$')
                opts()  #DEBUG

            # Impose intensity constraint and update auxillary function
            psi_r = F(psi_k)  #DEBUG: CHANGE BACK TO F

            # Low-res estimate obtained from our reconstruction
            I_l = abs(psi_r) if lres_size != hres_size else abs(psi_r)

            # Subtract background noise and clip values to avoid NaN
            I_hat = cp.clip(imgs[i] - bgs[i], a_min=0)
            phi_r = cp.sqrt(I_hat / (cp.abs(psi_r)**2)) * psi_r

            phi_k = Ft(phi_r)  #DEBUG: CHANGE BACK TO Ft

            # Update object and pupil estimates
            if hres_size == lres_size:
                if not round_values:
                    p_i = FourierShift2D(p, [-shift_x, -shift_y])  # P_i(k+k_m)
                else:
                    p_i = cp.roll(p, int(-shift_y), axis=0)
                    p_i = cp.roll(p_i, int(-shift_x), axis=1)

                if not round_values:
                    phi_k_i = FourierShift2D(
                        phi_k, [-shift_x, -shift_y])  # Phi_m_i(k+k_m)
                else:
                    phi_k_i = cp.roll(phi_k, int(-shift_y), axis=0)
                    phi_k_i = cp.roll(phi_k_i, int(-shift_x), axis=1)
            else:  # Output size larger than individual frames
                p_i = p.copy()
                phi_k_i = phi_k.copy()

            ## O_{i+1}(k)
            temp = o_f[yl:yl + n1, xl:xl + m1].copy() + ( cp.abs(p_i) * cp.conj(p_i) * (phi_k_i - o_f[yl:yl + n1, xl:xl + m1].copy() * p_i) ) / \
                        ( cp.abs(p).max() * (cp.abs(p_i) ** 2 + del_1) )

            ## P_{i+1}(k)
            p   =  p  + ( cp.abs(o_f_i) * cp.conj(o_f_i) * (phi_k - o_f_i * p) ) / \
                        ( cp.abs(o_f[yl:yl + n1, xl:xl + m1].copy()).max() * (cp.abs(o_f_i) ** 2 + del_2) )

            o_f[yl:yl + n1, xl:xl + m1] = temp.copy()

            ###### Using F here instead of Ft to get upright image
            o = F(o_f) if lres_size != hres_size else Ft(o_f)
            ######

            if plot_per_frame:
                plt.subplot(163)
                plt.imshow(cp.asnumpy(cp.mod(ctf * cp.angle(p), 2 * cp.pi)))
                plt.title(f'P({i})')
                opts()  #DEBUG
                plt.subplot(164)
                plt.imshow(cp.asnumpy(correct(abs(o))))
                plt.title(f'$I_{{h}}({i})$')
                opts()  #DEBUG
                plt.subplot(165)
                plt.imshow(cp.asnumpy(correct(cp.angle(o))))
                plt.title(f'$\\theta(I_{{h}}({i}))$')
                opts()  #DEBUG
                plt.subplot(166)
                plt.imshow(cp.asnumpy(show(cp.asnumpy(o_f))))
                plt.title(f'$S_{{h}}({i})$')
                opts()
                plt.show()  #DEBUG

            c = inv_conv_idx(I_l, imgs[i])
            conv.append(c)

        if not plot_per_frame and (show_interval is not None
                                   and j % show_interval == 0):
            o_i = Ft(o_f_i * p)  #DEBUG
            plt.figure(figsize=(10, 2))
            plt.subplot(161)
            plt.imshow(cp.asnumpy(correct(abs(o_i))))
            plt.title(f'$I_{{l}}({i})$')
            opts()  #DEBUG
            plt.subplot(162)
            plt.imshow(
                cp.asnumpy(
                    cv2.convertScaleAbs(
                        cp.asnumpy(20 * cp.log(1 + abs(o_f_i * p))))))
            plt.title(f'$S_{{l}}({i})$')
            opts()  #DEBUG
            plt.subplot(163)
            plt.imshow(cp.asnumpy(cp.mod(ctf * cp.angle(p), 2 * cp.pi)))
            plt.title(f'P({i})')
            opts()  #DEBUG
            plt.subplot(164)
            plt.imshow(cp.asnumpy(correct(abs(o))))
            plt.title(f'$I_{{h}}({i})$')
            opts()  #DEBUG
            plt.subplot(165)
            plt.imshow(cp.asnumpy(correct(cp.angle(o))))
            plt.title(f'$\\theta(I_{{h}}({i}))$')
            opts()  #DEBUG
            plt.subplot(166)
            plt.imshow(
                cp.asnumpy(
                    cv2.convertScaleAbs(cp.asnumpy(20 *
                                                   cp.log(1 + abs(o_f))))))
            plt.title(f'$S_{{h}}({i})$')
            opts()
            plt.show()  #DEBUG

        loss = metric_norm(imgs, o_f_i, p)
        losses.append(loss)
        conv = float(sum(conv) / len(conv))
        convs.append(conv)
        log.set_description_str(
            f'[Iteration {j + 1}] Convergence Loss: {cp.asnumpy(conv):e}')
        log.update(1)

    scale = 7
    plt.figure(figsize=(3 * scale, 4 * scale))

    plt.subplot(421)
    plt.plot(cp.asnumpy(cp.arange(len(losses))),
             cp.asnumpy(cp.clip(cp.array(losses), a_min=None, a_max=1e4)),
             'b-')
    plt.title('Loss Curve')
    plt.ylabel('Loss Value')
    plt.xlabel('Iteration')
    plt.subplot(422)
    plt.plot(cp.asnumpy(cp.arange(len(convs))),
             cp.asnumpy(cp.clip(cp.array(convs), a_min=None, a_max=1e14)),
             'b-')
    plt.title('Convergence Index Curve')
    plt.ylabel('Convergence Index')
    plt.xlabel('Iteration')

    amp = cp.array(cv2.resize(
        read_tiff(row.AMPLITUDE.values[0])[0], hres_size))
    phase = cp.array(cv2.resize(read_tiff(row.PHASE.values[0])[0], hres_size))

    plt.subplot(434)
    plt.title(f'amplitude (Scaled up from {lres_size})')
    plt.imshow(cp.asnumpy(to_uint8(amp)))
    opts()

    plt.subplot(435)
    plt.title(f'phase (Scaled up from {lres_size})')
    plt.imshow(cp.asnumpy(to_uint8(phase)))

    plt.subplot(436)
    rec = abs(cp.sqrt(amp) * cp.exp(1j * phase))
    plt.title(f'Ground Truth (Scaled up from {lres_size})')
    plt.imshow(cp.asnumpy(to_uint8(rec)))

    plt.subplot(437)
    plt.title('Reconstruction Amplitude')
    amp = abs(o)
    if lres_size == hres_size:
        amp = correct(amp)
    plt.imshow(cp.asnumpy(to_uint8((amp))))

    plt.subplot(438)
    plt.title('Reconstruction Phase')
    phase = cp.angle(o)
    if lres_size == hres_size:
        phase = correct(phase)
    plt.imshow(cp.asnumpy(to_uint8(phase)))

    plt.subplot(439)
    plt.title('Reconstructed Image')
    rec = abs(cp.sqrt(amp) * cp.exp(1j * phase))
    plt.imshow(cp.asnumpy(to_uint8(rec)))

    plt.subplot(427)
    plt.title(f'Recovered Pupil')
    p_show = cp.mod(ctf * cp.angle(p), 2 * cp.pi)
    p_show = (p_show / p_show.max() * 255).astype(np.uint8)
    plt.imshow(cp.asnumpy(p_show), cmap='nipy_spectral')

    plt.subplot(428)
    plt.title(f'Raw frames\' mean (Scaled up from {lres_size})')
    plt.imshow(cv2.resize(cp.asnumpy(cp.array(imgs).mean(axis=0)), hres_size))

    if out_path is None:
        plt.show()
    else:
        plt.savefig(out_path, bbox_inches='tight')
        plt.close('all')

    # Ignore early noise and print where the error is lowest
    if n_iters > 10:
        it = cp.argmin(cp.array(convs[10:])) + 11
        if out_path is not None:
            print(f'Convergence index lowest at {it}th iteration.')
    else:
        it = cp.argmin(cp.array(convs)) + 1
        if out_path is not None:
            print(f'Convergence index lowest at {it}th iteration.')

    if lres_size == hres_size:
        o = correct(o)
    return o, p, it
예제 #25
0
 def _prep_output(self, a):
     if self.output == 'empty':
         return cupy.zeros_like(a)
     return self.output
예제 #26
0
def rezToPhy(ctx, dat_path=None, output_dir=None):
    # pull out results from kilosort's rez to either return to workspace or to
    # save in the appropriate format for the phy GUI to run on. If you provide
    # a savePath it should be a folder

    savePath = output_dir
    Path(savePath).mkdir(exist_ok=True, parents=True)

    ctx = checkClusters(ctx)  # check clusters integrity

    probe = ctx.probe
    ir = ctx.intermediate
    params = ctx.params
    nt0 = params.nt0

    # spikeTimes will be in samples, not seconds
    W = cp.asarray(ir.Wphy).astype(np.float32)
    Wrot = ir.Wrot
    est_contam_rate = ir.est_contam_rate
    good = ir.good
    Ths = ir.Ths

    st3 = cp.asarray(ir.st3_c)

    U = cp.asarray(ir.U_s).astype(np.float32)
    iNeigh = ir.iNeigh_s
    iNeighPC = ir.iNeighPC_s
    simScore = ir.simScore_s

    if st3.shape[1] > 4:
        st3 = st3[:, :4]

    isort = cp.argsort(st3[:, 0])
    st3 = st3[isort, :]
    # cProj = ir.cProj_c[cp.asnumpy(isort), :]
    # cProjPC = ir.cProjPC_c[cp.asnumpy(isort), :, :]

    fs = os.listdir(savePath)
    for file in fs:
        if file.endswith('.npy'):
            os.remove(join(savePath, file))
    if os.path.isdir(join(savePath, '.phy')):
        shutil.rmtree(join(savePath, '.phy'))

    spikeTimes = st3[:, 0].astype(cp.uint64)
    spikeTemplates = st3[:, 1].astype(cp.uint32)

    # (DEV_NOTES) if statement below seems useless due to above if statement
    if st3.shape[1] > 4:
        spikeClusters = (1 + st3[:, 4]).astype(cp.uint32)

    # templateFeatures = cProj
    templateFeatureInds = iNeigh.astype(cp.uint32)
    # pcFeatures = cProjPC
    pcFeatureInds = iNeighPC.astype(cp.uint32)

    whiteningMatrix = cp.asarray(Wrot) / params.scaleproc
    whiteningMatrixInv = cp.linalg.pinv(whiteningMatrix)

    amplitudes = st3[:, 2]

    Nchan = probe.Nchan

    xcoords = probe.xc
    ycoords = probe.yc
    chanMap = probe.chanMap
    chanMap0ind = chanMap  # - 1

    nt0, Nfilt = W.shape[:2]

    # (DEV_NOTES) 2 lines below can be combined
    # templates = cp.einsum('ikl,jkl->ijk', U, W).astype(cp.float32)
    # templates = cp.zeros((Nchan, nt0, Nfilt), dtype=np.float32, order='F')
    tempAmpsUnscaled = cp.zeros(Nfilt, dtype=np.float32)
    templates_writer = NpyWriter(join(savePath, 'templates.npy'),
                                 (Nfilt, nt0, Nchan), np.float32)
    for iNN in tqdm(range(Nfilt), desc="Computing templates"):
        t = cp.dot(U[:, iNN, :], W[:, iNN, :].T).T
        templates_writer.append(t)
        t_unw = cp.dot(t, whiteningMatrixInv)
        assert t_unw.ndim == 2
        tempChanAmps = t_unw.max(axis=0) - t_unw.min(axis=0)
        tempAmpsUnscaled[iNN] = tempChanAmps.max()

    templates_writer.close()
    # templates = cp.transpose(templates, (2, 1, 0))  # now it's nTemplates x nSamples x nChannels
    # we include all channels so this is trivial
    templatesInds = cp.tile(np.arange(Nfilt), (Nchan, 1))

    # here we compute the amplitude of every template...

    # unwhiten all the templates
    # tempsUnW = cp.einsum('ijk,kl->ijl', templates, whiteningMatrixinv)
    # tempsUnW = cp.zeros(templates.shape, dtype=np.float32, order='F')
    # for t in tqdm(range(templates.shape[0]), desc="Unwhitening the templates"):
    #     tempsUnW[t, :, :] = cp.dot(templates[t, :, :], whiteningMatrixInv)

    # The amplitude on each channel is the positive peak minus the negative
    # tempChanAmps = tempsUnW.max(axis=1) - tempsUnW.min(axis=1)

    # The template amplitude is the amplitude of its largest channel
    # tempAmpsUnscaled = tempChanAmps.max(axis=1)

    # assign all spikes the amplitude of their template multiplied by their
    # scaling amplitudes
    # tempAmpsUnscaled = cp.(tempAmpsUnscaled, axis=0).astype(np.float32)
    spikeAmps = tempAmpsUnscaled[spikeTemplates] * amplitudes

    # take the average of all spike amps to get actual template amps (since
    # tempScalingAmps are equal mean for all templates)
    ta = clusterAverage(spikeTemplates, spikeAmps)
    tids = cp.unique(spikeTemplates).astype(np.int64)
    tempAmps = cp.zeros_like(tempAmpsUnscaled, order='F')
    tempAmps[
        tids] = ta  # because ta only has entries for templates that had at least one spike
    tempAmps = params.gain * tempAmps  # for consistency, make first dimension template number

    save_pcs(ir.spikes_to_remove, ir.cProj, ir.cProjPC, savePath, st3, isort)

    # with open(, 'wb') as fp:
    #     save_large_array(fp, templateFeatures)
    # cProj = ir.cProj_c[cp.asnumpy(isort), :]
    # cProjPC = ir.cProjPC_c[cp.asnumpy(isort), :, :]

    def _save(name, arr, dtype=None):
        cp.save(join(savePath, name + '.npy'), arr.astype(dtype or arr.dtype))

    if savePath is not None:
        _save('spike_times', spikeTimes)
        _save('spike_templates', spikeTemplates, cp.uint32)
        if st3.shape[1] > 4:
            _save('spike_clusters', spikeClusters, cp.uint32)
        else:
            _save('spike_clusters', spikeTemplates, cp.uint32)
        _save('amplitudes', amplitudes)
        # _save('templates', templates)
        _save('templates_ind', templatesInds)

        chanMap0ind = chanMap0ind.astype(cp.int32)

        _save('channel_map', chanMap0ind)
        _save('channel_positions', np.c_[xcoords, ycoords])

        # _save('template_features', templateFeatures)
        # with open(join(savePath, 'template_features.npy'), 'wb') as fp:
        #     save_large_array(fp, templateFeatures)
        _save('template_feature_ind', templateFeatureInds.T)

        # _save('pc_features', pcFeatures)
        # with open(join(savePath, 'pc_features.npy'), 'wb') as fp:
        #     save_large_array(fp, pcFeatures)
        _save('pc_feature_ind', pcFeatureInds.T)

        _save('whitening_mat', whiteningMatrix)
        _save('whitening_mat_inv', whiteningMatrixInv)

        _save('thresholds', Ths)

        if 'simScore' in ir:
            similarTemplates = simScore
            _save('similar_templates', similarTemplates)

        est_contam_rate[np.isnan(est_contam_rate)] = 1
        with open(join(savePath, 'cluster_group.tsv'), 'w') as f:
            f.write('cluster_id\tgroup\n')
            for j in range(len(good)):
                if good[j]:
                    f.write('%d\tgood\n' % j)
                # else:
                #     f.write('%d\tmua\n' % j)

        with open(join(savePath, 'cluster_ContamPct.tsv'), 'w') as f:
            f.write('cluster_id\tContamPct\n')
            for j in range(len(good)):
                f.write('%d\t%.1f\n' % (j, 100 * est_contam_rate[j]))

        with open(join(savePath, 'cluster_Amplitude.tsv'), 'w') as f:
            f.write('cluster_id\tAmplitude\n')
            for j in range(len(good)):
                f.write('%d\t%.1f\n' % (j, tempAmps[j]))

        # make params file
        if not os.path.exists(join(savePath, 'params.py')):
            with open(join(savePath, 'params.py'), 'w') as f:
                if os.path.isabs(dat_path):
                    f.write('dat_path = "%s"\n' % dat_path)
                else:
                    f.write('dat_path = "../%s"\n' % dat_path)
                f.write('n_channels_dat = %d\n' % probe.NchanTOT)
                f.write('dtype = "int16"\n')
                f.write('offset = 0\n')
                f.write('hp_filtered = False\n')
                f.write('sample_rate = %i\n' % params.fs)
                f.write('template_scaling = %.1f\n' % params.templateScaling)
예제 #27
0
def convolutional_barycenter_gpu(Hv,
                                 reg,
                                 alpha,
                                 stabThresh=1e-30,
                                 niter=1500,
                                 tol=1e-9,
                                 sharpening=False,
                                 verbose=False):
    """Main function solving wasserstein barycenter problem using gpu

    Arguments:
        Hv {Set of distributions (cparray)} -- 
        reg {regularization term "gamma"} -- float superior to 0, generally equals size of space/40
        alpha {list} -- set of weights

    Keyword Arguments:
        stabThresh {float} -- Stabilization threshold to prevent division by 0 (default: {1e-30})
        niter {int} -- Maximum number of loop iteration (default: {1500})
        tol {float} -- convergence tolerance at which point iterations stop (default: {1e-9})
        sharpening {bool} -- Whether or not entropic sharpening is used (default: {False})
        verbose {bool} --  verbose option

    Returns:
        cparray -- solution of weighted wassertein barycenter problem
    """
    def K(x):
        return cp.array(gaussian_filter(cp.asnumpy(x), sigma=reg))

    def to_find_root(barycenter, H0, beta):
        return entropy(barycenter**beta) - H0

    alpha = cp.array(alpha)
    alpha = alpha / alpha.sum()
    Hv = cp.array(Hv)
    mean_weights = (Hv[0].sum() + Hv[1].sum()) / 2.
    #print('mean weights', mean_weights)
    for i in range(len(Hv)):
        Hv[i] = Hv[i] / Hv[i].sum()
    v = cp.ones(Hv.shape)
    Kw = cp.ones(Hv.shape)

    entropy_max = max_entropy(Hv)
    barycenter = cp.zeros(Hv[0].shape)

    change = 1
    for j in range(niter):
        t0 = time.time()
        barycenterOld = barycenter

        barycenter = cp.zeros_like(Hv[0, :, :])
        for i in range(Hv.shape[0]):

            Kw[i, :, :] = K(Hv[i, :, :] /
                            cp.maximum(stabThresh, K(v[i, :, :])))
            barycenter += alpha[i] * cp.log(
                cp.maximum(stabThresh, v[i, :, :] * Kw[i, :, :]))

        barycenter = cp.exp(barycenter)
        change = cp.sum(cp.abs(barycenter - barycenterOld))
        if sharpening:
            if (entropy(barycenter)) > (entropy_max):

                beta = newton(
                    lambda beta: to_find_root(barycenter, entropy_max, beta),
                    1,
                    tol=1e-6)
                if beta < 0:
                    beta = 1

            else:
                beta = 1
            barycenter = barycenter**beta

        for i in range(Hv.shape[0]):
            v[i, :, :] = barycenter / cp.maximum(stabThresh, Kw[i, :, :])

        if verbose:
            print("iter : ", j, "change : ", change, 'time :',
                  time.time() - t0)
        if change < tol:
            break

    return cp.asnumpy(barycenter * mean_weights)
예제 #28
0
def variance(input, labels=None, index=None):
    """Calculates the variance of the values of an n-D image array, optionally
    at specified sub-regions.

    Args:
        input (cupy.ndarray): Nd-image data to process.
        labels (cupy.ndarray or None): Labels defining sub-regions in `input`.
            If not None, must be same shape as `input`.
        index (cupy.ndarray or None): `labels` to include in output. If None
            (default), all values where `labels` is non-zero are used.

    Returns:
        variance (cupy.ndarray): Values of variance, for each sub-region if
            `labels` and `index` are specified.

    .. seealso:: :func:`scipy.ndimage.variance`
    """
    if not isinstance(input, cupy.ndarray):
        raise TypeError('input must be cupy.ndarray')

    if input.dtype in (cupy.complex64, cupy.complex128):
        raise TypeError("cupyx.scipy.ndimage.variance doesn't support %{}"
                        "".format(input.dtype.type))

    use_kern = False
    # There are constraints on types because of atomicAdd() in CUDA.
    if input.dtype not in [
            cupy.int32, cupy.float16, cupy.float32, cupy.float64, cupy.uint32,
            cupy.uint64, cupy.ulonglong
    ]:
        warnings.warn(
            'Using the slower implementation as '
            'cupyx.scipy.ndimage.sum supports int32, float16, '
            'float32, float64, uint32, uint64 as data types'
            'for the fast implementation', _util.PerformanceWarning)
        use_kern = True

    def calc_var_with_intermediate_float(input):
        vals_c = input - input.mean()
        count = vals_c.size
        # Does not use `ndarray.mean()` here to return the same results as
        # SciPy does, especially in case `input`'s dtype is float16.
        return cupy.square(vals_c).sum() / cupy.asanyarray(count).astype(float)

    if labels is None:
        return calc_var_with_intermediate_float(input)

    if not isinstance(labels, cupy.ndarray):
        raise TypeError('label must be cupy.ndarray')

    input, labels = cupy.broadcast_arrays(input, labels)

    if index is None:
        return calc_var_with_intermediate_float(input[labels > 0])

    if cupy.isscalar(index):
        return calc_var_with_intermediate_float(input[labels == index])

    if not isinstance(index, cupy.ndarray):
        if not isinstance(index, int):
            raise TypeError('index must be cupy.ndarray or a scalar int')
        else:
            return (input[labels == index]).var().astype(cupy.float64,
                                                         copy=False)

    mean_val, count = _mean_driver(input, labels, index, True, use_kern)
    if use_kern:
        new_axis = (..., *(cupy.newaxis for _ in range(input.ndim)))
        return cupy.where(labels[None, ...] == index[new_axis],
                          cupy.square(input - mean_val[new_axis]), 0).sum(
                              tuple(range(1, input.ndim + 1))) / count
    out = cupy.zeros_like(index, dtype=cupy.float64)
    return _ndimage_variance_kernel(input, labels, index, index.size, mean_val,
                                    out) / count
예제 #29
0
def main():
    try:
        os.mkdir(args.snapshot_path)
    except:
        pass

    dataset = gqn.data.Dataset(args.dataset_path)
    sampler = gqn.data.Sampler(dataset)
    iterator = gqn.data.Iterator(sampler, batch_size=args.batch_size)

    hyperparams = HyperParameters()
    model = Model(hyperparams)
    model.to_gpu()

    optimizer = Optimizer(model.parameters)

    for iteration in range(args.training_steps):
        for batch_index, data_indices in enumerate(iterator):
            # shape: (batch, views, height, width, channels)
            # range: [-1, 1]
            images, viewpoints = dataset[data_indices]

            image_size = images.shape[2:4]
            total_views = images.shape[1]

            # sample number of views
            num_views = random.choice(range(total_views))
            query_index = random.choice(range(total_views))

            if num_views > 0:
                observed_images = images[:, :num_views]
                observed_viewpoints = viewpoints[:, :num_views]

                # (batch, views, height, width, channels) -> (batch * views, height, width, channels)
                observed_images = observed_images.reshape((
                    args.batch_size * num_views, ) + observed_images.shape[2:])
                observed_viewpoints = observed_viewpoints.reshape(
                    (args.batch_size * num_views, ) +
                    observed_viewpoints.shape[2:])

                # (batch * views, height, width, channels) -> (batch * views, channels, height, width)
                observed_images = observed_images.transpose((0, 3, 1, 2))

                # transfer to gpu
                observed_images = chainer.cuda.to_gpu(observed_images)
                observed_viewpoints = chainer.cuda.to_gpu(observed_viewpoints)

                r = model.representation_network.compute_r(
                    observed_images, observed_viewpoints)

                # (batch * views, channels, height, width) -> (batch, views, channels, height, width)
                r = r.reshape((args.batch_size, num_views) + r.shape[1:])

                # sum element-wise across views
                r = cf.sum(r, axis=1)
            else:
                r = np.zeros((args.batch_size, hyperparams.channels_r) +
                             hyperparams.chrz_size,
                             dtype="float32")
                r = chainer.cuda.to_gpu(r)

            query_images = images[:, query_index]
            query_viewpoints = viewpoints[:, query_index]

            # (batch * views, height, width, channels) -> (batch * views, channels, height, width)
            query_images = query_images.transpose((0, 3, 1, 2))

            # transfer to gpu
            query_images = chainer.cuda.to_gpu(query_images)
            query_viewpoints = chainer.cuda.to_gpu(query_viewpoints)

            hg_0 = xp.zeros((
                args.batch_size,
                hyperparams.channels_chz,
            ) + hyperparams.chrz_size,
                            dtype="float32")
            cg_0 = xp.zeros((
                args.batch_size,
                hyperparams.channels_chz,
            ) + hyperparams.chrz_size,
                            dtype="float32")
            u_0 = xp.zeros((
                args.batch_size,
                hyperparams.generator_u_channels,
            ) + image_size,
                           dtype="float32")
            he_0 = xp.zeros((
                args.batch_size,
                hyperparams.channels_chz,
            ) + hyperparams.chrz_size,
                            dtype="float32")
            ce_0 = xp.zeros((
                args.batch_size,
                hyperparams.channels_chz,
            ) + hyperparams.chrz_size,
                            dtype="float32")

            sigma_t = 1.0

            loss_kld = 0
            he_l = he_0
            ce_l = ce_0
            hg_l = hg_0
            cg_l = cg_0
            u_l = u_0
            for l in range(hyperparams.generator_total_timestep):
                # zg_l = model.generation_network.sample_z(hg_l)
                # hg_l, cg_l, u_l = model.generation_network.forward_onestep(
                #     hg_0, cg_0, u_0, zg_l, query_viewpoints, r)
                # x = model.generation_network.sample_x(u_l)

                he_next, ce_next = model.inference_network.forward_onestep(
                    hg_l, he_l, ce_l, query_images, query_viewpoints, r)
                mu_z_q = model.inference_network.compute_mu_z(he_l)
                ze_l = cf.gaussian(mu_z_q, xp.zeros_like(mu_z_q))
                hg_next, cg_next, u_next = model.generation_network.forward_onestep(
                    hg_l, cg_l, u_l, ze_l, query_viewpoints, r)
                mu_z_p = model.generation_network.compute_mu_z(hg_l)

                kld = gqn.nn.chainer.functions.gaussian_kl_divergence(
                    mu_z_q, mu_z_p)

                loss_kld += cf.mean(kld)

                hg_l = hg_next
                cg_l = cg_next
                u_l = u_next
                he_l = he_next
                ce_l = ce_next

            mu_x = model.generation_network.compute_mu_x(u_l)
            negative_log_likelihood = gqn.nn.chainer.functions.gaussian_negative_log_likelihood(
                query_images, mu_x, xp.full_like(mu_x, math.log(sigma_t)))
            loss_nll = cf.mean(negative_log_likelihood)
            loss = loss_nll + loss_kld
            model.cleargrads()
            loss.backward()
            optimizer.step()

            print("Iteration {}: {} / {} - loss: {}".format(
                iteration + 1, batch_index + 1, len(iterator),
                float(loss.data)))
        chainer.serializers.save_hdf5(
            os.path.join(args.snapshot_path, "model.hdf5"), model.parameters)
예제 #30
0
def test_compose_vector_fields(shape):
    r"""
    Creates two random displacement field that exactly map pixels from an input
    image to an output image. The resulting displacements and their
    composition, although operating in physical space, map the points exactly
    (up to numerical precision).
    """
    np.random.seed(8315759)
    input_shape = shape
    tgt_sh = shape
    ndim = len(shape)
    if ndim == 3:
        # create a simple affine transformation
        ns = input_shape[0]
        nr = input_shape[1]
        nc = input_shape[2]
        s = 1.5
        t = 2.5
        trans = np.array([
            [1, 0, 0, -t * ns],
            [0, 1, 0, -t * nr],
            [0, 0, 1, -t * nc],
            [0, 0, 0, 1],
        ])
        trans_inv = np.linalg.inv(trans)
        scale = np.array([[1 * s, 0, 0, 0], [0, 1 * s, 0, 0], [0, 0, 1 * s, 0],
                          [0, 0, 0, 1]])
        dipy_func = vfu.compose_vector_fields_3d
        dipy_create_func = vfu.create_random_displacement_3d
    elif ndim == 2:
        # create a simple affine transformation
        nr = input_shape[0]
        nc = input_shape[1]
        s = 1.5
        t = 2.5
        trans = np.array([[1, 0, -t * nr], [0, 1, -t * nc], [0, 0, 1]])
        trans_inv = np.linalg.inv(trans)
        scale = np.array([[1 * s, 0, 0], [0, 1 * s, 0], [0, 0, 1]])
        dipy_func = vfu.compose_vector_fields_2d
        dipy_create_func = vfu.create_random_displacement_2d

    gt_affine = trans_inv.dot(scale.dot(trans))

    # create two random displacement fields
    input_grid2world = gt_affine
    target_grid2world = gt_affine

    disp1, assign1 = dipy_create_func(
        np.array(input_shape, dtype=np.int32),
        input_grid2world,
        np.array(tgt_sh, dtype=np.int32),
        target_grid2world,
    )
    disp1 = np.array(disp1, dtype=floating)
    assign1 = np.array(assign1)

    disp2, assign2 = dipy_create_func(
        np.array(input_shape, dtype=np.int32),
        input_grid2world,
        np.array(tgt_sh, dtype=np.int32),
        target_grid2world,
    )
    disp2 = np.array(disp2, dtype=floating)
    assign2 = np.array(assign2)

    # create a random image (with decimal digits) to warp
    moving_image = np.empty(tgt_sh, dtype=floating)
    moving_image[...] = np.random.randint(0, 10,
                                          np.size(moving_image)).reshape(
                                              tuple(tgt_sh))
    # set boundary values to zero so we don't test wrong interpolation due to
    # floating point precision
    if ndim == 3:
        moving_image[0, :, :] = 0
        moving_image[-1, :, :] = 0
        moving_image[:, 0, :] = 0
        moving_image[:, -1, :] = 0
        moving_image[:, :, 0] = 0
        moving_image[:, :, -1] = 0
        # evaluate the composed warping using the exact assignments
        # (first 1 then 2)

        warp1 = moving_image[(assign2[..., 0], assign2[..., 1], assign2[...,
                                                                        2])]
        expected = warp1[(assign1[..., 0], assign1[..., 1], assign1[..., 2])]

    elif ndim == 2:
        moving_image[0, :] = 0
        moving_image[-1, :] = 0
        moving_image[:, 0] = 0
        moving_image[:, -1] = 0
        # evaluate the composed warping using the exact assignments
        # (first 1 then 2)

        warp1 = moving_image[(assign2[..., 0], assign2[..., 1])]
        expected = warp1[(assign1[..., 0], assign1[..., 1])]

    # compose the displacement fields
    target_world2grid = np.linalg.inv(target_grid2world)
    premult_index = target_world2grid.dot(input_grid2world)
    premult_disp = target_world2grid

    disp1d = cupy.asarray(disp1)
    disp2d = cupy.asarray(disp2)
    premult_indexd = cupy.asarray(premult_index)
    premult_dispd = cupy.asarray(premult_disp)
    moving_imaged = cupy.asarray(moving_image)

    for time_scaling in [0.25, 1.0, 4.0]:
        composition, stats = dipy_func(
            disp1,
            disp2 / time_scaling,
            premult_index,
            premult_disp,
            time_scaling,
            None,
        )
        compositiond, statsd = compose_vector_fields(
            disp1d,
            disp2d / time_scaling,
            premult_indexd,
            premult_dispd,
            time_scaling,
            None,
        )
        cupy.testing.assert_array_almost_equal(composition, compositiond)
        cupy.testing.assert_array_almost_equal(stats, statsd)

        for order in [0, 1]:
            warped = warp(
                moving_imaged,
                compositiond,
                None,
                premult_indexd,
                premult_dispd,
                order=order,
            )
            cupy.testing.assert_array_almost_equal(warped, expected)

        # test updating the displacement field instead of creating a new one
        compositiond = disp1d.copy()
        compose_vector_fields(
            compositiond,
            disp2d / time_scaling,
            premult_indexd,
            premult_dispd,
            time_scaling,
            compositiond,
        )

        for order in [0, 1]:
            warped = warp(
                moving_imaged,
                compositiond,
                None,
                premult_indexd,
                premult_dispd,
                order=order,
            )
            cupy.testing.assert_array_almost_equal(warped, expected)

    # Test non-overlapping case
    if ndim == 3:
        x_0 = np.asarray(range(input_shape[0]))
        x_1 = np.asarray(range(input_shape[1]))
        x_2 = np.asarray(range(input_shape[2]))
        X = np.empty(input_shape + (3, ), dtype=np.float64)
        O = np.ones(input_shape)
        X[..., 0] = x_0[:, None, None] * O
        X[..., 1] = x_1[None, :, None] * O
        X[..., 2] = x_2[None, None, :] * O
        sz = input_shape[0] * input_shape[1] * input_shape[2] * 3
        random_labels = np.random.randint(0, 2, sz)
        random_labels = random_labels.reshape(input_shape + (3, ))
    elif ndim == 2:
        # Test non-overlapping case
        x_0 = np.asarray(range(input_shape[0]))
        x_1 = np.asarray(range(input_shape[1]))
        X = np.empty(input_shape + (2, ), dtype=np.float64)
        O = np.ones(input_shape)
        X[..., 0] = x_0[:, None] * O
        X[..., 1] = x_1[None, :] * O
        random_labels = np.random.randint(0, 2,
                                          input_shape[0] * input_shape[1] * 2)
        random_labels = random_labels.reshape(input_shape + (2, ))
    values = np.array([-1, tgt_sh[0]])
    disp1 = (values[random_labels] - X).astype(floating)
    disp1d = cupy.asarray(disp1)
    disp2d = cupy.asarray(disp2)
    composition, stats = compose_vector_fields(disp1d, disp2d, None, None, 1.0,
                                               None)
    cupy.testing.assert_array_almost_equal(composition,
                                           cupy.zeros_like(composition))

    # test updating the displacement field instead of creating a new one
    compositiond = disp1d.copy()
    compose_vector_fields(compositiond, disp2d, None, None, 1.0, compositiond)
    cupy.testing.assert_array_almost_equal(compositiond,
                                           cupy.zeros_like(composition))
예제 #31
0
    def update(self, data, now_epoch):
        if self.KL_counter < self.KL_loss_iter:
            self.KL_loss_ratio = self.KL_counter * (1 / self.KL_loss_iter)
            self.KL_counter += 1
        else:
            self.KL_loss_ratio = 1

        for i in range(self.gpu_num):
            netG = getattr(self.model, f"netG_{i}")
            netD = getattr(self.model, f"netD_{i}")

            depth = getattr(data, f"depth_{i}")
            real_img = getattr(data, f"image_{i}")
            embeddings = getattr(data, f"text_{i}")

            wrong_img = getattr(data, f"wrong_image_{i}")
            wrong_depth = getattr(data, f"wrong_depth_{i}")
            #wrong_text = getattr(data, f"wrong_text_{i}")

            fake_img, KL_loss = netG(embeddings)
            g_loss = self.KL_loss_conf * KL_loss
            #g_loss = self.KL_loss_conf * self.KL_loss_ratio * KL_loss

            d_loss = 0
            for key in real_img.keys():
                real_logit, real_img_logit_local = netD(real_img[key],
                                                        embeddings,
                                                        fg=fg,
                                                        bg=bg)
                fake_logit, fake_img_logit_local = netD(fake_img[key],
                                                        embeddings,
                                                        fg=fg,
                                                        bg=bg)
                wrong_logit, wrong_img_logit_local = netD(wrong_img[key],
                                                          embeddings,
                                                          fg=fg,
                                                          bg=bg)
                ''' compute disc pair loss '''
                real_labels = cuda.to_gpu(
                    xp.ones_like(real_logit.data, dtype="float32"), i)
                fake_labels = cuda.to_gpu(
                    xp.zeros_like(real_logit.data, dtype="float32"), i)
                pair_loss = compute_d_pair_loss(real_logit, wrong_logit,
                                                fake_logit, real_labels,
                                                fake_labels)
                ''' compute disc image loss '''
                real_labels = cuda.to_gpu(
                    xp.ones_like(real_img_logit_local.data, dtype="float32"),
                    i)
                fake_labels = cuda.to_gpu(
                    xp.zeros_like(real_img_logit_local.data, dtype="float32"),
                    i)
                img_loss = compute_d_img_loss(wrong_img_logit_local,
                                              real_img_logit_local,
                                              fake_img_logit_local,
                                              real_labels, fake_labels)

                d_loss += (pair_loss + img_loss)
                ''' compute gen loss '''
                real_labels = cuda.to_gpu(
                    xp.ones_like(fake_logit.data, dtype="float32"), i)
                g_loss += compute_g_loss(fake_logit, real_labels)
                real_labels = cuda.to_gpu(
                    xp.ones_like(fake_img_logit_local.data, dtype="float32"),
                    i)
                g_loss += compute_g_loss(fake_img_logit_local, real_labels)

            if self.counter % self.n_dis == 0:
                netG.cleargrads()
                g_loss.backward()
            unchain_backward(fake_img)

            netD.cleargrads()
            d_loss.backward()

        #add calc grad
        netG_0 = getattr(self.model, "netG_0")
        netD_0 = getattr(self.model, "netD_0")
        for i in range(1, self.gpu_num - 1):
            netG = getattr(self.model, f"netG_{i}")
            netD = getattr(self.model, f"netD_{i}")
            netG_0.addgrads(netG)
            netD_0.addgrads(netD)

        if self.now_epoch != now_epoch:
            self.now_epoch = now_epoch
            if self.now_epoch % self.epoch_decay == 0:
                self.netG_opt.hyperparam.alpha /= 2
                self.netD_opt.hyperparam.alpha /= 2

        self.netG_opt.update()
        self.netD_opt.update()
        cuda.memory_pool.free_all_blocks()
        self.counter += 1
예제 #32
0
def run(args):
    onnx_filename = run_onnx_util.onnx_model_file(args.test_dir, args.model_file)
    input_names, output_names = run_onnx_util.onnx_input_output_names(
        onnx_filename)
    test_data_dir = os.path.join(args.test_dir, 'test_data_set_0')
    inputs, outputs = run_onnx_util.load_test_data(
        test_data_dir, input_names, output_names)

    with open(onnx_filename, 'rb') as f:
        onnx_proto = f.read()

    if args.debug:
        logger = tensorrt.Logger(tensorrt.Logger.Severity.INFO)
    else:
        logger = tensorrt.Logger()
    builder = tensorrt.Builder(logger)
    if args.fp16_mode:
        builder.fp16_mode = True
    # TODO(hamaji): Infer batch_size from inputs.
    builder.max_batch_size = args.batch_size
    network = builder.create_network()
    parser = tensorrt.OnnxParser(network, logger)
    if not parser.parse(onnx_proto):
        for i in range(parser.num_errors):
             sys.stderr.write('ONNX import failure: %s\n' % parser.get_error(i))
             raise RuntimeError('ONNX import failed')
    engine = builder.build_cuda_engine(network)
    context = engine.create_execution_context()

    assert len(inputs) + len(outputs) == engine.num_bindings
    for i, (_, input) in enumerate(inputs):
        assert args.batch_size == input.shape[0]
        assert input.shape[1:] == engine.get_binding_shape(i)
    for i, (_, output) in enumerate(outputs):
        assert args.batch_size == output.shape[0]
        i += len(inputs)
        assert output.shape[1:] == engine.get_binding_shape(i)

    inputs = [v for n, v in inputs]
    outputs = [v for n, v in outputs]
    gpu_inputs = to_gpu(inputs)
    gpu_outputs = []
    for output in outputs:
        gpu_outputs.append(cupy.zeros_like(cupy.array(output)))
    bindings = [a.data.ptr for a in gpu_inputs]
    bindings += [a.data.ptr for a in gpu_outputs]

    context.execute(args.batch_size, bindings)

    actual_outputs = to_cpu(gpu_outputs)

    for i, (name, expected, actual) in enumerate(
            zip(output_names, outputs, actual_outputs)):
        np.testing.assert_allclose(expected, actual,
                                   rtol=args.rtol, atol=args.atol), name
        print('%s: OK' % name)
    print('ALL OK')

    def compute():
        context.execute(args.batch_size, bindings)
        cupy.cuda.device.Device().synchronize()

    return run_onnx_util.run_benchmark(compute, args.iterations)
예제 #33
0
def sum(input, labels=None, index=None):
    """Calculates the sum of the values of an n-D image array, optionally
       at specified sub-regions.

    Args:
        input (cupy.ndarray): Nd-image data to process.
        labels (cupy.ndarray or None): Labels defining sub-regions in `input`.
            If not None, must be same shape as `input`.
        index (cupy.ndarray or None): `labels` to include in output. If None
            (default), all values where `labels` is non-zero are used.

    Returns:
       sum (cupy.ndarray): sum of values, for each sub-region if
       `labels` and `index` are specified.

    .. seealso:: :func:`scipy.ndimage.sum`
    """
    if not isinstance(input, cupy.ndarray):
        raise TypeError('input must be cupy.ndarray')

    if input.dtype in (cupy.complex64, cupy.complex128):
        raise TypeError("cupyx.scipy.ndimage.sum does not support %{}".format(
            input.dtype.type))

    use_kern = False
    # There is constraints on types because of atomicAdd() in CUDA.
    if input.dtype not in [
            cupy.int32, cupy.float16, cupy.float32, cupy.float64, cupy.uint32,
            cupy.uint64, cupy.ulonglong
    ]:
        warnings.warn(
            'Using the slower implmentation as '
            'cupyx.scipy.ndimage.sum supports int32, float16, '
            'float32, float64, uint32, uint64 as data types'
            'for the fast implmentation', _util.PerformanceWarning)
        use_kern = True

    if labels is None:
        return input.sum()

    if not isinstance(labels, cupy.ndarray):
        raise TypeError('label must be cupy.ndarray')

    input, labels = cupy.broadcast_arrays(input, labels)

    if index is None:
        return input[labels != 0].sum()

    if not isinstance(index, cupy.ndarray):
        if not isinstance(index, int):
            raise TypeError('index must be cupy.ndarray or a scalar int')
        else:
            return (input[labels == index]).sum()

    if index.size == 0:
        return cupy.array([], dtype=cupy.int64)

    out = cupy.zeros_like(index, dtype=cupy.float64)

    # The following parameters for sum where determined using a Tesla P100.
    if (input.size >= 262144 and index.size <= 4) or use_kern:
        return _ndimage_sum_kernel_2(input, labels, index, out)
    return _ndimage_sum_kernel(input, labels, index, index.size, out)
예제 #34
0
    def test_fetch_float_texture(self):
        width, height, depth = self.dimensions
        dim = 3 if depth != 0 else 2 if height != 0 else 1

        if (self.mem_type == 'linear' and dim != 1) or \
           (self.mem_type == 'pitch2D' and dim != 2):
            pytest.skip('The test case {0} is inapplicable for {1} and thus '
                        'skipped.'.format(self.dimensions, self.mem_type))

        # generate input data and allocate output buffer
        shape = (depth, height, width) if dim == 3 else \
                (height, width) if dim == 2 else \
                (width,)

        # prepare input, output, and texture memory
        tex_data = cupy.random.random(shape, dtype=cupy.float32)
        real_output = cupy.zeros_like(tex_data)
        ch = ChannelFormatDescriptor(32, 0, 0, 0,
                                     runtime.cudaChannelFormatKindFloat)
        assert tex_data.flags['C_CONTIGUOUS']
        assert real_output.flags['C_CONTIGUOUS']
        if self.mem_type == 'CUDAarray':
            arr = CUDAarray(ch, width, height, depth)
            expected_output = cupy.zeros_like(tex_data)
            assert expected_output.flags['C_CONTIGUOUS']
            # test bidirectional copy
            arr.copy_from(tex_data)
            arr.copy_to(expected_output)
        else:  # linear are pitch2D are backed by ndarray
            arr = tex_data
            expected_output = tex_data

        # create resource and texture descriptors
        if self.mem_type == 'CUDAarray':
            res = ResourceDescriptor(runtime.cudaResourceTypeArray, cuArr=arr)
        elif self.mem_type == 'linear':
            res = ResourceDescriptor(runtime.cudaResourceTypeLinear,
                                     arr=arr,
                                     chDesc=ch,
                                     sizeInBytes=arr.size * arr.dtype.itemsize)
        else:  # pitch2D
            # In this case, we rely on the fact that the hand-picked array
            # shape meets the alignment requirement. This is CUDA's limitation,
            # see CUDA Runtime API reference guide. "TexturePitchAlignment" is
            # assumed to be 32, which should be applicable for most devices.
            res = ResourceDescriptor(runtime.cudaResourceTypePitch2D,
                                     arr=arr,
                                     chDesc=ch,
                                     width=width,
                                     height=height,
                                     pitchInBytes=width * arr.dtype.itemsize)
        address_mode = (runtime.cudaAddressModeClamp,
                        runtime.cudaAddressModeClamp)
        tex = TextureDescriptor(address_mode, runtime.cudaFilterModePoint,
                                runtime.cudaReadModeElementType)

        if self.target == 'object':
            # create a texture object
            texobj = TextureObject(res, tex)
            mod = cupy.RawModule(source_obj)
        else:  # self.target == 'reference'
            mod = cupy.RawModule(source_ref)
            texref_name = 'texref'
            texref_name += '3D' if dim == 3 else '2D' if dim == 2 else '1D'
            texrefPtr = mod.get_texref(texref_name)
            # bind texture ref to resource
            texref = TextureReference(texrefPtr, res, tex)  # noqa

        # get and launch the kernel
        ker_name = 'copyKernel'
        ker_name += '3D' if dim == 3 else '2D' if dim == 2 else '1D'
        ker_name += 'fetch' if self.mem_type == 'linear' else ''
        ker = mod.get_function(ker_name)
        block = (4, 4, 2) if dim == 3 else (4, 4) if dim == 2 else (4, )
        grid = ()
        args = (real_output, )
        if self.target == 'object':
            args = args + (texobj, )
        if dim >= 1:
            grid_x = (width + block[0] - 1) // block[0]
            grid = grid + (grid_x, )
            args = args + (width, )
        if dim >= 2:
            grid_y = (height + block[1] - 1) // block[1]
            grid = grid + (grid_y, )
            args = args + (height, )
        if dim == 3:
            grid_z = (depth + block[2] - 1) // block[2]
            grid = grid + (grid_z, )
            args = args + (depth, )
        ker(grid, block, args)

        # validate result
        assert (real_output == expected_output).all()
예제 #35
0
    def test_backward(self):
        images = np.random.normal(size=(10, 32, 32, 3)).astype('float32')
        x = np.tile(
            np.arange(32).astype('float32')[None, None, :, None],
            (10, 32, 1, 1))
        y = np.tile(
            np.arange(32).astype('float32')[None, :, None, None],
            (10, 1, 32, 1))
        coordinates = np.concatenate((x, y), axis=-1)
        coordinates = ((coordinates / 31) * 2 - 1) * 31. / 32.
        noise = np.random.normal(size=(10, 32, 32, 3)).astype('float32')
        step = 2 / 32.

        images = chainer.cuda.to_gpu(images)
        coordinates = chainer.Variable(chainer.cuda.to_gpu(coordinates))
        noise = chainer.cuda.to_gpu(noise)

        loss = cf.sum(
            neural_renderer_chainer.differentiation(images, coordinates) *
            noise)
        loss.backward()

        grad_coordinates = coordinates.grad

        for i in range(100):
            yi = np.random.randint(1, 31)
            xi = np.random.randint(1, 31)

            images_yb = images.copy()
            images_yb[:, yi - 1, xi] = images[:, yi, xi].copy()
            images_yb[:, yi, xi] = images[:, yi + 1, xi].copy()
            grad_yb = ((images_yb - images) * noise).sum((1, 2, 3)) / step
            grad_yb = cp.minimum(grad_yb, cp.zeros_like(grad_yb))

            images_yt = images.copy()
            images_yt[:, yi + 1, xi] = images[:, yi, xi].copy()
            images_yt[:, yi, xi] = images[:, yi - 1, xi].copy()
            grad_yt = ((images_yt - images) * noise).sum((1, 2, 3)) / step
            grad_yt = cp.minimum(grad_yt, cp.zeros_like(grad_yt))

            grad_y_abs = cp.maximum(cp.abs(grad_yb), cp.abs(grad_yt))

            chainer.testing.assert_allclose(
                grad_y_abs, cp.abs(grad_coordinates[:, yi, xi, 1]))

            images_xl = images.copy()
            images_xl[:, yi, xi - 1] = images[:, yi, xi].copy()
            images_xl[:, yi, xi] = images[:, yi, xi + 1].copy()
            grad_xl = ((images_xl - images) * noise).sum((1, 2, 3)) / step
            grad_xl = cp.minimum(grad_xl, cp.zeros_like(grad_xl))

            images_xr = images.copy()
            images_xr[:, yi, xi + 1] = images[:, yi, xi].copy()
            images_xr[:, yi, xi] = images[:, yi, xi - 1].copy()
            grad_xr = ((images_xr - images) * noise).sum((1, 2, 3)) / step
            grad_xr = cp.minimum(grad_xr, cp.zeros_like(grad_xr))

            grad_x_abs = cp.maximum(cp.abs(grad_xl), cp.abs(grad_xr))

            chainer.testing.assert_allclose(
                grad_x_abs, cp.abs(grad_coordinates[:, yi, xi, 0]))
예제 #36
0
파일: _denoise.py 프로젝트: grlee77/cucim
def _denoise_tv_chambolle_nd(image, weight=0.1, eps=2.0e-4, n_iter_max=200):
    """Perform total-variation denoising on n-dimensional images.

    Parameters
    ----------
    image : ndarray
        n-D input data to be denoised.
    weight : float, optional
        Denoising weight. The greater `weight`, the more denoising (at
        the expense of fidelity to `input`).
    eps : float, optional
        Relative difference of the value of the cost function that determines
        the stop criterion. The algorithm stops when:

            (E_(n-1) - E_n) < eps * E_0

    n_iter_max : int, optional
        Maximal number of iterations used for the optimization.

    Returns
    -------
    out : ndarray
        Denoised array of floats.

    Notes
    -----
    Rudin, Osher and Fatemi algorithm.

    """

    ndim = image.ndim
    p = cp.zeros((image.ndim, ) + image.shape, dtype=image.dtype)
    g = cp.zeros_like(p)
    d = cp.zeros_like(image)
    i = 0
    slices_g = [slice(None)] * (ndim + 1)
    slices_d = [slice(None)] * ndim
    slices_p = [slice(None)] * (ndim + 1)
    while i < n_iter_max:
        if i > 0:
            # d will be the (negative) divergence of p
            d = -p.sum(0)
            for ax in range(ndim):
                slices_d[ax] = slice(1, None)
                slices_p[ax + 1] = slice(0, -1)
                slices_p[0] = ax
                d[tuple(slices_d)] += p[tuple(slices_p)]
                slices_d[ax] = slice(None)
                slices_p[ax + 1] = slice(None)
            out = image + d
            E = (d * d).sum()
        else:
            out = image
            E = 0.0

        # g stores the gradients of out along each axis
        # e.g. g[0] is the first order finite difference along axis 0
        for ax in range(ndim):
            slices_g[ax + 1] = slice(0, -1)
            slices_g[0] = ax
            g[tuple(slices_g)] = cp.diff(out, axis=ax)
            slices_g[ax + 1] = slice(None)

        norm = (g * g).sum(axis=0, keepdims=True)
        cp.sqrt(norm, out=norm)
        E += weight * norm.sum()
        tau = 1.0 / (2.0 * ndim)
        norm *= tau / weight
        norm += 1.0
        p -= tau * g
        p /= norm
        E /= float(image.size)
        if i == 0:
            E_init = E
            E_previous = E
        else:
            if abs(E_previous - E) < eps * E_init:
                break
            else:
                E_previous = E
        i += 1
    return out
예제 #37
0
popcount_log       = []


for in_dim in in_dims:
    for out_dim in out_dims:
        binarize_time       = 0
        preprocess_time     = 0
        preprocess_vec_time = 0
        xnor_time           = 0
        popcount_time       = 0
        
        for _ in range(1):
            W = cupy.random.rand(out_dim, in_dim)-0.5
            x = cupy.random.rand(in_dim, )-0.5
            
            yw = cupy.zeros_like(W)
            yx = cupy.zeros_like(x)
            
            s              = time.time()
            Wb             = _binarize()(W, yw)
            xb             = _binarize()(x, yx)
            Wb             = Wb.astype('int32')
            xb             = xb.astype('int32')
            binarize_time += time.time()-s
            
            s = time.time()
            Wb = _preprocess()(Wb,
                               Wb.shape[0],
                               cupy.zeros((Wb.shape[0], Wb.shape[1]//32)).astype("int32"),
                               size=Wb.shape[1]
                              )
예제 #38
0
파일: Agent.py 프로젝트: thinkother/DQN
    def train(self):
        # clear grads
        self.q_func.zerograds()

        # pull tuples from memory pool
        batch_tuples = self.replay.pull(Config.batch_size)
        if not len(batch_tuples):
            return

        # stack inputs
        cur_x = [self.env.getX(t.state) for t in batch_tuples]
        next_x = [self.env.getX(t.next_state) for t in batch_tuples]
        # merge inputs into one array
        if Config.gpu:
            cur_x = [cupy.expand_dims(t, 0) for t in cur_x]
            cur_x = cupy.concatenate(cur_x, 0)
            next_x = [cupy.expand_dims(t, 0) for t in next_x]
            next_x = cupy.concatenate(next_x, 0)
        else:
            cur_x = np.stack(cur_x)
            next_x = np.stack(next_x)

        # get cur outputs
        cur_output = self.QFunc(self.q_func, cur_x)
        # get next outputs, NOT target
        next_output = self.QFunc(self.q_func, next_x)
        # choose next action for each output
        next_action = [
            self.env.getBestAction(
                o.data,
                [t.next_state for t in batch_tuples]
            ) for o in next_output  # for each head in Model
        ]
        # get next outputs, target
        next_output = self.QFunc(self.target_q_func, next_x)

        # clear err of tuples
        for t in batch_tuples:
            t.err = 0.
        # store err count
        err_count_list = [0.] * len(batch_tuples)

        # compute grad's weights
        weights = np.array([t.P for t in batch_tuples], np.float32)
        if Config.gpu:
            weights = cuda.to_gpu(weights)
        if self.replay.getPoolSize():
            weights *= self.replay.getPoolSize()
        weights = weights ** -Config.beta
        weights /= weights.max()
        if Config.gpu:
            weights = cupy.expand_dims(weights, 1)
        else:
            weights = np.expand_dims(weights, 1)

        # update beta
        Config.beta = min(1, Config.beta + Config.beta_add)

        # compute grad for each head
        for k in range(Config.K):
            if Config.gpu:
                cur_output[k].grad = cupy.zeros_like(cur_output[k].data)
            else:
                cur_output[k].grad = np.zeros_like(cur_output[k].data)
            # compute grad from each tuples
            for i in range(len(batch_tuples)):
                if batch_tuples[i].mask[k]:
                    cur_action_value = \
                        cur_output[k].data[i][batch_tuples[i].action].tolist()
                    reward = batch_tuples[i].reward
                    next_action_value = \
                        next_output[k].data[i][next_action[k][i]].tolist()
                    target_value = reward
                    # if not empty position, not terminal state
                    if batch_tuples[i].next_state.in_game:
                        target_value += Config.gamma * next_action_value
                    loss = cur_action_value - target_value
                    cur_output[k].grad[i][batch_tuples[i].action] = 2 * loss
                    # count err
                    if cur_action_value:
                        batch_tuples[i].err += abs(loss / cur_action_value)
                        err_count_list[i] += 1

            # multiply weights with grad and clip
            if Config.gpu:
                cur_output[k].grad = cupy.multiply(
                    cur_output[k].grad, weights)
                cur_output[k].grad = cupy.clip(cur_output[k].grad, -1, 1)
            else:
                cur_output[k].grad = np.multiply(
                    cur_output[k].grad, weights)
                cur_output[k].grad = np.clip(cur_output[k].grad, -1, 1)
            # backward
            cur_output[k].backward()

        # adjust grads of shared
        for param in self.q_func.shared.params():
            param.grad /= Config.K

        # update params
        self.optimizer.update()

        # avg err
        for i in range(len(batch_tuples)):
            if err_count_list[i] > 0:
                batch_tuples[i].err /= err_count_list[i]

        self.replay.merge(Config.alpha)

        return np.mean([t.err for t in batch_tuples])