Ejemplo n.º 1
0
def main():
    loops = int(sys.argv[1])

    n = np.random.randint(0, 1234)

    num_samps = 2**16
    cpu_sig, gpu_sig = rand_data_gen_gpu(num_samps)

    # Run baseline with scipy.signal.gauss_spline
    with prof.time_range("scipy_gauss_spline", 0):
        cpu_gauss_spline = signal.gauss_spline(cpu_sig, n)

    # Run CuPy version
    with prof.time_range("cupy_gauss_spline", 1):
        gpu_gauss_spline = gauss_spline(gpu_sig, n)

    # Compare results
    np.testing.assert_allclose(cpu_gauss_spline, cp.asnumpy(gpu_gauss_spline),
                               1e-3)

    # Run multiple passes to get average
    for _ in range(loops):
        with prof.time_range("cupy_gauss_spline_loop", 2):
            gpu_gauss_spline = gauss_spline(gpu_sig, n)
            cp.cuda.runtime.deviceSynchronize()
def main():
    loops = int(sys.argv[1])

    num_samps = 2**16

    cpu_sig = np.random.rand(num_samps) + 1.0j * np.random.rand(num_samps)
    gpu_sig = cp.array(cpu_sig)

    # Run baseline with cupy_signal
    with prof.time_range("CuPy signal", 0):
        amp, phase, real, imag = cupy_signal(gpu_sig)

    # Run EWK version
    with prof.time_range("EWK signal", 1):
        amp_EWK, phase_EWK, real_EWK, imag_EWK = signal(gpu_sig)

    # Compare results
    cp.testing.assert_allclose(amp, amp_EWK, 1e-3)
    cp.testing.assert_allclose(phase, phase_EWK, 1e-3)
    cp.testing.assert_allclose(real, real_EWK, 1e-3)
    cp.testing.assert_allclose(imag, imag_EWK, 1e-3)

    # Run multiple passes to get average
    for _ in range(loops):
        with prof.time_range("cupy_signal_avg", 2):
            amp, phase, real, imag = cupy_signal(gpu_sig)
            cp.cuda.runtime.deviceSynchronize()

    # Run multiple passes to get average
    for _ in range(loops):
        with prof.time_range("ewk_signal_avg", 3):
            amp_EWK, phase_EWK, real_EWK, imag_EWK = signal(gpu_sig)
            cp.cuda.runtime.deviceSynchronize()
Ejemplo n.º 3
0
def main():

    loops = int(sys.argv[1])

    n = np.random.randint(0, 1234)

    num_samps = 2**16
    cpu_sig = rand_data_gen(num_samps)

    # Run baseline with scipy.signal.gauss_spline
    with prof.time_range("scipy_gauss_spline", 0):
        cpu_gauss_spline = signal.gauss_spline(cpu_sig, n)

    # Run multiple passes to get average
    for _ in range(loops):
        with prof.time_range("scipy_gauss_spline_loop", 0):
            cpu_gauss_spline = signal.gauss_spline(cpu_sig, n)
Ejemplo n.º 4
0
 def test_time_range(self):
     push_patch = mock.patch('cupy.cuda.nvtx.RangePush')
     pop_patch = mock.patch('cupy.cuda.nvtx.RangePop')
     with push_patch as push, pop_patch as pop:
         with prof.time_range('test:time_range', color_id=-1):
             pass
         push.assert_called_once_with('test:time_range', -1)
         pop.assert_called_once_with()
Ejemplo n.º 5
0
 def test_time_range(self):
     push_patch = mock.patch('cupy.cuda.nvtx.RangePush')
     pop_patch = mock.patch('cupy.cuda.nvtx.RangePop')
     with push_patch as push, pop_patch as pop:
         with prof.time_range('test:time_range', color_id=-1):
             pass
         push.assert_called_once_with('test:time_range', -1)
         pop.assert_called_once_with()
Ejemplo n.º 6
0
 def update(self, lossfun=None, *args, **kwds):
     if self._sync:
         iter_sync = True
     with prof.time_range('iteration',
                          sync=iter_sync,
                          argb_color=_itr_argb_color):
         ret = self.actual_optimizer.update(lossfun, *args, **kwds)
     return ret
Ejemplo n.º 7
0
 def test_time_range_with_ARGB(self):
     push_patch = mock.patch('cupy.cuda.nvtx.RangePushC')
     pop_patch = mock.patch('cupy.cuda.nvtx.RangePop')
     with push_patch as push, pop_patch as pop:
         with prof.time_range('test:time_range_with_ARGB',
                              argb_color=0xFF00FF00):
             pass
         push.assert_called_once_with('test:time_range_with_ARGB',
                                      0xFF00FF00)
         pop.assert_called_once_with()
Ejemplo n.º 8
0
 def test_time_range_with_ARGB(self):
     push_patch = mock.patch('cupy.cuda.nvtx.RangePushC')
     pop_patch = mock.patch('cupy.cuda.nvtx.RangePop')
     with push_patch as push, pop_patch as pop:
         with prof.time_range('test:time_range_with_ARGB',
                              argb_color=0xFF00FF00):
             pass
         push.assert_called_once_with(
             'test:time_range_with_ARGB', 0xFF00FF00)
         pop.assert_called_once_with()
Ejemplo n.º 9
0
 def test_time_range_err(self):
     push_patch = mock.patch('cupy.cuda.nvtx.RangePush')
     pop_patch = mock.patch('cupy.cuda.nvtx.RangePop')
     with push_patch as push, pop_patch as pop:
         try:
             with prof.time_range('test:time_range_error', -1):
                 raise Exception()
         except Exception:
             pass
         push.assert_called_once_with('test:time_range_error', -1)
         pop.assert_called_once_with()
Ejemplo n.º 10
0
 def test_time_range_err(self):
     push_patch = mock.patch('cupy.cuda.nvtx.RangePush')
     pop_patch = mock.patch('cupy.cuda.nvtx.RangePop')
     with push_patch as push, pop_patch as pop:
         try:
             with prof.time_range('test:time_range_error', -1):
                 raise Exception()
         except Exception:
             pass
         push.assert_called_once_with('test:time_range_error', -1)
         pop.assert_called_once_with()
Ejemplo n.º 11
0
    def backward(self, *args, **kwargs):
        if not self._sync:
            bwd_sync = False
            bwd_each_sync = False
        else:
            bwd_sync = (self._sync_level >= SyncLevel.SECOND)
            bwd_each_sync = (self._sync_level >= SyncLevel.FINEST)

        with prof.time_range('model.backward',
                             sync=bwd_sync,
                             argb_color=_bwd_argb_color):
            with FwdBwdProfileMarkHook(sync=bwd_each_sync,
                                       argb_color=_bwd_argb_color):
                ret = self._variable.backward(*args, **kwargs)
        return ret
Ejemplo n.º 12
0
    def forward_wrapper(*args, **kwargs):
        if seprately_mark_for_iter and sync_level >= SyncLevel.COARSEST:
            range_push(sync, 'iteration', _itr_argb_color)

        if not sync:
            fwd_sync = False
            fwd_each_sync = False
        else:
            fwd_sync = (sync_level >= SyncLevel.SECOND)
            fwd_each_sync = (sync_level >= SyncLevel.FINEST)

        with prof.time_range('model.forward',
                             sync=fwd_sync,
                             argb_color=_fwd_argb_color):
            with FwdBwdProfileMarkHook(sync=fwd_each_sync,
                                       argb_color=_fwd_argb_color):
                loss = link._org_forward(*args, **kwargs)
        return _VariableWrapper(loss, sync, sync_level)
          (libcudnn.CUDNN_PARAM_WDATA_PLACEHOLDER, ptr_ph),
          (libcudnn.CUDNN_PARAM_YDESC, y_desc),
          (libcudnn.CUDNN_PARAM_YDATA_PLACEHOLDER, ptr_ph),
          (libcudnn.CUDNN_PARAM_YSTATS_DESC, ysum_desc),
          (libcudnn.CUDNN_PARAM_YSUM_PLACEHOLDER, ptr_ph),
          (libcudnn.CUDNN_PARAM_YSQSUM_PLACEHOLDER, ptr_ph)))

workspace_size = cudnn.make_fused_ops_plan(plan, const_pack)
workspace = cupy.empty((workspace_size,), dtype=numpy.int8)
# print('workspace_size: {}'.format(workspace_size))

var_pack = cudnn.create_fused_ops_variant_param_pack(
    ops, ((libcudnn.CUDNN_PTR_XDATA, x),
          (libcudnn.CUDNN_PTR_BN_EQSCALE, scale),
          (libcudnn.CUDNN_PTR_BN_EQBIAS, bias),
          (libcudnn.CUDNN_PTR_WDATA, w),
          (libcudnn.CUDNN_PTR_YDATA, y),
          (libcudnn.CUDNN_PTR_YSUM, ysum),
          (libcudnn.CUDNN_PTR_YSQSUM, ysqsum),
          (libcudnn.CUDNN_PTR_WORKSPACE, workspace),
          (libcudnn.CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES,
           workspace_size)))

with prof.time_range('fusedOpsExecute', color_id=1, sync=True):
    cudnn.fused_ops_execute(plan, var_pack)

#

print('per-channel ysum:\n{}'.format(ysum.reshape((y_c))))
print('per-channel ysqsum:\n{}'.format(ysqsum.reshape(y_c)))
Ejemplo n.º 14
0
    x = x[r >= frac_points]
    y = A * np.cos(w * x + phi)
    f = np.linspace(0.01, 10, out_samps)

    # Use float32 if b32 passed
    if dtype == 'float32':
        x = x.astype(np.float32)
        y = y.astype(np.float32)
        f = f.astype(np.float32)

    d_x = cp.array(x)
    d_y = cp.array(y)
    d_f = cp.array(f)

    # Run baseline with scipy.signal.lombscargle
    with prof.time_range("scipy_lombscargle", 0):
        cpu_lombscargle = signal.lombscargle(x, y, f)

    # Run Numba version
    with prof.time_range("cupy_lombscargle", 1):
        gpu_lombscargle = lombscargle(d_x, d_y, d_f)

    # Copy result to host
    gpu_lombscargle = cp.asnumpy(gpu_lombscargle)

    # Compare results
    np.testing.assert_allclose(cpu_lombscargle, gpu_lombscargle, 1e-3)

    # Run multiple passes to get average
    for _ in range(loops):
        with prof.time_range("cupy_lombscargle_loop", 2):
Ejemplo n.º 15
0
def divide_chunks(l, n):

    # looping till length l
    for i in range(0, len(l), n):
        yield l[i:i + n]


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--imageFolder",
                        help="Image folder location",
                        default=[])
    parser.add_argument("--num_gpus",
                        help="Number of GPUs",
                        default=1,
                        type=int)
    parser.add_argument("--num_read_processes",
                        help="Number of Read Processes (DALI)",
                        default=1,
                        type=int)
    parser.add_argument("--batch_size",
                        help="Read batch size",
                        default=1,
                        type=int)

    args = parser.parse_args()

    with prof.time_range("run", 0):
        run(args)
Ejemplo n.º 16
0
 def test_time_range(self):
     with self.assertRaises(RuntimeError):
         with prof.time_range(''):
             pass
Ejemplo n.º 17
0
 def test_time_range(self):
     with self.assertRaises(RuntimeError):
         with prof.time_range(''):
             pass