Exemple #1
0
 def test_concat_arrays_gpu(self):
     self.check_concat_arrays(self.int_arrays,
                              device=cuda.Device().id,
                              expected_type=numpy.int64)
     self.check_concat_arrays(self.float_arrays,
                              device=cuda.Device().id,
                              expected_type=numpy.float64)
Exemple #2
0
def _det_gpu(b):
    # We do a batched LU decomposition on the GPU to compute
    # and compute the determinant by multiplying the diagonal.
    # Change the shape of the array to be size=1 minibatch if necessary.
    # Also copy the matrix as the elments will be modified in-place.
    a = matmul._as_batch_mat(b).copy()
    n = a.shape[1]
    n_matrices = len(a)
    # Pivot array
    p = cuda.cupy.zeros((n_matrices, n), dtype='int32')
    # Output array
    # These arrays hold information on the execution success
    # or if the matrix was singular.
    info = cuda.cupy.zeros(n_matrices, dtype=numpy.intp)
    ap = matmul._mat_ptrs(a)
    _, lda = matmul._get_ld(a)
    if b.dtype == numpy.float32:
        cuda.cublas.sgetrfBatched(cuda.Device().cublas_handle, n, ap.data.ptr,
                                  lda, p.data.ptr, info.data.ptr, n_matrices)
    elif b.dtype == numpy.float64:
        cuda.cublas.dgetrfBatched(cuda.Device().cublas_handle, n, ap.data.ptr,
                                  lda, p.data.ptr, info.data.ptr, n_matrices)
    else:
        assert False
    det = cuda.cupy.prod(a.diagonal(axis1=1, axis2=2), axis=1)
    # The determinant is equal to the product of the diagonal entries
    # of `a` where the sign of `a` is flipped depending on whether
    # the pivot array is equal to its index.
    rng = cuda.cupy.arange(1, n + 1, dtype='int32')
    parity = cuda.cupy.sum(p != rng, axis=1) % 2
    sign = 1. - 2. * parity.astype(b.dtype, copy=False)
    return det * sign, info
Exemple #3
0
 def test_cupy_array2(self):
     with cuda.Device(0):
         x = cuda.to_gpu(self.x)
         if not self.c_contiguous:
             x = cuda.cupy.asfortranarray(x)
     with cuda.Device(1):
         y = cuda.to_cpu(x)
     self.assertIsInstance(y, numpy.ndarray)
     numpy.testing.assert_array_equal(self.x, y)
Exemple #4
0
    def check_device_spec_cupy(self, device_spec, expected_device_id):
        device = backend.get_device(device_spec)
        assert isinstance(device, backend.GpuDevice)
        assert isinstance(device.device, cuda.Device)
        assert device.xp is cuda.cupy
        assert device.device.id == expected_device_id

        with backend.using_device(device_spec):
            # TODO(niboshi): Test the Chainer default device
            assert cuda.Device() == cuda.Device(expected_device_id)
Exemple #5
0
 def test_cupy_array_async3(self):
     with cuda.Device(0):
         x = cuda.to_gpu(self.x)
         if not self.c_contiguous:
             x = cuda.cupy.asfortranarray(x)
     with cuda.Device(1):
         with testing.assert_warns(DeprecationWarning):
             y = cuda.to_gpu(x, stream=cuda.Stream.null)
     self.assertIsInstance(y, cuda.ndarray)
     self.assertIsNot(x, y)  # Do copy
     cuda.cupy.testing.assert_array_equal(x, y)
Exemple #6
0
 def to_gpu(self, device=None):
     super(Parameter, self).to_gpu(device)
     if self.array is None:
         if device is None:
             device = cuda.Device().id
         self._initial_backend = 'cuda'
         self._initial_device = device
Exemple #7
0
    def run(self):
        dev = cuda.Device(self.device)
        dev.use()
        self.setup()
        while True:
            job, data = self.pipe.recv()
            if job == 'finalize':
                dev.synchronize()
                break
            if job == 'update':
                # For reducing memory
                self.model.cleargrads()

                batch = self.converter(self.iterator.next(), self.device)
                with self.reporter.scope({}):  # pass dummy observation
                    loss = _calc_loss(self.model, batch)

                self.model.cleargrads()
                loss.backward()
                del loss

                gg = gather_grads(self.model)
                nccl_data_type = _get_nccl_data_type(gg.dtype)
                null_stream = cuda.Stream.null
                self.comm.reduce(gg.data.ptr, gg.data.ptr, gg.size,
                                 nccl_data_type, nccl.NCCL_SUM, 0,
                                 null_stream.ptr)
                del gg
                self.model.cleargrads()
                gp = gather_params(self.model)
                nccl_data_type = _get_nccl_data_type(gp.dtype)
                self.comm.bcast(gp.data.ptr, gp.size, nccl_data_type, 0,
                                null_stream.ptr)
                scatter_params(self.model, gp)
                del gp
Exemple #8
0
    def __call__(self, trainer):
        iteration = trainer.updater.iteration

        if self.devices is not None:
            devices = self.devices
        else:
            devices = [cuda.get_device_from_id(trainer.updater.get_optimizer('opt_gen').target._device_id) for _ in range(2)]

        with chainer.using_config('train', False), cuda.Device(devices[0]):
            self.xp = np if trainer.updater.get_optimizer('opt_gen').target._device_id < 0 else cuda.cupy
            image = self.xp.array(self.image)
            predictor = trainer.updater.get_optimizer('opt_gen').target
            rois, bboxes, objectness_scores = predictor(image[self.xp.newaxis, ...])[:3]

            if len(rois.shape) > 4:
                rois = F.reshape(rois, (-1,) + rois.shape[-3:])
                bboxes = F.reshape(bboxes, (-1,) + bboxes.shape[-3:])
                objectness_scores = F.reshape(objectness_scores, (-1, objectness_scores.shape[-1]))

            discriminator = trainer.updater.get_optimizer('opt_dis').target
            class_predictions = discriminator(rois)

            backprop_visualizations = self.get_backprop_visualization(predictor)
            feature_visualizations = self.get_feature_maps(predictor)

            self.render_rois(
                rois,
                bboxes,
                iteration,
                self.image.copy(),
                backprop_vis=backprop_visualizations,
                feature_vis=feature_visualizations,
                objectness_scores=objectness_scores,
                class_predictions=class_predictions,
            )
Exemple #9
0
def _inv_gpu(b):
    # We do a batched LU decomposition on the GPU to compute the inverse
    # Change the shape of the array to be size=1 minibatch if necessary
    # Also copy the matrix as the elments will be modified in-place
    a = matmul._as_batch_mat(b).copy()
    n = a.shape[1]
    n_matrices = len(a)
    # Pivot array
    p = cuda.cupy.empty((n, n_matrices), dtype=numpy.int32)
    # Output array
    c = cuda.cupy.empty_like(a)
    # These arrays hold information on the execution success
    # or if the matrix was singular
    info = cuda.cupy.empty(n_matrices, dtype=numpy.int32)
    ap = matmul._mat_ptrs(a)
    cp = matmul._mat_ptrs(c)
    _, lda = matmul._get_ld(a)
    _, ldc = matmul._get_ld(c)
    handle = cuda.Device().cublas_handle
    if b.dtype == numpy.float32:
        cuda.cublas.sgetrfBatched(handle, n, ap.data.ptr, lda, p.data.ptr,
                                  info.data.ptr, n_matrices)
        cuda.cublas.sgetriBatched(handle, n, ap.data.ptr, lda, p.data.ptr,
                                  cp.data.ptr, ldc, info.data.ptr, n_matrices)
    elif b.dtype == numpy.float64:
        cuda.cublas.dgetrfBatched(handle, n, ap.data.ptr, lda, p.data.ptr,
                                  info.data.ptr, n_matrices)
        cuda.cublas.dgetriBatched(handle, n, ap.data.ptr, lda, p.data.ptr,
                                  cp.data.ptr, ldc, info.data.ptr, n_matrices)
    else:
        assert False
    return c, info
    def evaluate(self, snapshot_name=''):
        current_device = cuda.get_device_from_id(self.args.gpu)
        with current_device:
            gt_data = []
            pred_data = []

            for i, batch in enumerate(
                    tqdm(self.data_iterator,
                         total=len(self.data_loader) // self.args.batchsize)):
                image, gt_bboxes, gt_labels = batch[0]
                gt_data.append((gt_bboxes, gt_labels))
                # if self.args.gpu is not None:
                #     image = cuda.to_gpu(image, current_device)

                with cuda.Device(self.args.gpu):
                    with configuration.using_config('train', False):
                        bboxes, labels, scores = self.model.predict(
                            image.copy()[None, ...])
                        if len(bboxes[0]) == 0:
                            bboxes = [np.zeros((1, 4), dtype=np.float32)]
                            labels = [np.zeros((1, ), dtype=np.int32)]
                            scores = [np.zeros((1, ), dtype=np.float32)]
                        pred_data.append((bboxes[0], labels[0], scores[0]))
                        # TODO handle empty predictions!!

            bboxes, labels, scores = zip(*pred_data)
            gt_bboxes, gt_labels = concat_examples(gt_data)
            result = eval_detection_voc(bboxes, labels, scores, gt_bboxes,
                                        gt_labels, None)
            map = result['map']

            self.save_eval_results(snapshot_name, map)
    def __call__(self, *inputs):
        images, labels = inputs[:2]
        with cuda.Device(self.device):
            _, bboxes = self.link(images)

            bboxes = cuda.to_cpu(bboxes.data)
            labels = cuda.to_cpu(labels)

            xp = cuda.get_array_module(bboxes)

            bboxes = self.extract_corners(bboxes)
            bboxes = self.scale_bboxes(bboxes, Size._make(images.shape[-2:]))

            ious = bbox_iou(bboxes.data.copy(), xp.squeeze(labels))[xp.eye(len(bboxes)).astype(xp.bool)]
            mean_iou = ious.mean()

            reporter.report({'mean_iou': mean_iou})

            pred_bboxes = [bbox.data[xp.newaxis, ...].astype(xp.int32) for bbox in F.separate(bboxes, axis=0)]
            pred_scores = xp.ones((len(bboxes), 1))
            pred_labels = xp.zeros_like(pred_scores)

            gt_bboxes = [bbox.data[...] for bbox in F.separate(labels, axis=0)]
            gt_labels = xp.zeros_like(pred_scores)

            result = chainercv.evaluations.eval_detection_voc(
                pred_bboxes,
                pred_labels,
                pred_scores,
                gt_bboxes,
                gt_labels
            )

            reporter.report({'map': result['map']})
            reporter.report({'ap/sheep': result['ap'][0]})
Exemple #12
0
    def update_core(self):
        self.setup_workers()

        self._send_message(('update', None))
        with cuda.Device(self._devices[0]):
            # For reducing memory
            self._master.cleargrads()

            optimizer = self.get_optimizer('main')
            batch = self.get_iterator('main').next()
            batch = self.converter(batch, self._devices[0])

            loss = self._calc_loss(self._master,
                                   batch,
                                   cleargrads_func=self._master.cleargrads)

            self._master.cleargrads()
            loss.backward()

            # NCCL: reduce grads
            null_stream = cuda.Stream.null
            if self.comm is not None:
                gg = gather_grads(self._master)
                nccl_data_type = _get_nccl_data_type(gg.dtype)
                self.comm.reduce(gg.data.ptr, gg.data.ptr, gg.size,
                                 nccl_data_type, nccl.NCCL_SUM, 0,
                                 null_stream.ptr)
                scatter_grads(self._master, gg)
                del gg
            optimizer.update()
            if self.comm is not None:
                gp = gather_params(self._master)
                nccl_data_type = _get_nccl_data_type(gp.dtype)
                self.comm.bcast(gp.data.ptr, gp.size, nccl_data_type, 0,
                                null_stream.ptr)
Exemple #13
0
    def __call__(self, images):
        self.visual_backprop_anchors.clear()

        with cuda.Device(images.data.device):
            input_images = self.prepare_images(images.copy() * 255)
        h = self.feature_extractor(input_images)

        if self.train_imagenet:
            return h

        if images.shape[-2] > 224:
            h = self.res6(h)

            if images.shape[-2] > 300:
                h = self.res7(h)

        self.visual_backprop_anchors.append(h)
        h = _global_average_pooling_2d(h)

        transform_params = self.param_predictor(h)
        transform_params = rotation_dropout(F.reshape(transform_params,
                                                      (-1, 2, 3)),
                                            ratio=0.0)
        points = F.spatial_transformer_grid(transform_params, self.out_size)
        rois = F.spatial_transformer_sampler(images, points)

        if self.transform_rois_to_grayscale:
            assert rois.shape[
                1] == 3, "rois are not in RGB, can not convert them to grayscale"
            b, g, r = F.split_axis(rois, 3, axis=1)
            rois = 0.299 * r + 0.587 * g + 0.114 * b

        return rois, points
Exemple #14
0
    def __call__(self, **kwargs):
        image = kwargs.pop('image', None)
        words = kwargs.pop('words', None)
        return_predictions = kwargs.pop('return_predictions', False)

        batch_size, images_per_image, num_channels, height, width = image.shape
        image = self.xp.reshape(image, (-1, num_channels, height, width))

        with cuda.Device(self.device):
            rois, bboxes = self.localizer.predict(image)[:2]
            predicted_words, raw_classification_result = self.recognizer.predict(rois, return_raw_classification_result=True)
            predicted_words = F.reshape(predicted_words, (batch_size, images_per_image) + predicted_words.shape[1:])
            raw_classification_result = F.reshape(
                raw_classification_result,
                (batch_size, images_per_image) + raw_classification_result.shape[1:]
            )

            best_indices, scores = self.determine_best_prediction_indices(raw_classification_result)
            chosen_indices = best_indices
            self.calc_word_accuracy(
                self.xp.concatenate([predicted_words[i, best_indices[i]].array for i in range(batch_size)], axis=0),
                words,
                self.strip_non_alphanumeric_predictions,
            )
            if not self.only_return_best_result:
                best_indices = self.xp.arange(images_per_image)[None, ...]
                best_indices = self.xp.tile(best_indices, (batch_size, 1))
            predicted_words = self.xp.stack([predicted_words[i, best_indices[i]].array for i in range(batch_size)], axis=0)

        if return_predictions:
            rois = F.reshape(rois, (batch_size, images_per_image) + rois.shape[1:])
            bboxes = F.reshape(bboxes, (batch_size, images_per_image) + bboxes.shape[1:])
            rois = self.xp.stack([rois[i, best_indices[i]].array for i in range(batch_size)], axis=0)
            bboxes = self.xp.stack([bboxes[i, best_indices[i]].array for i in range(batch_size)], axis=0)
            return rois, bboxes, predicted_words, best_indices, chosen_indices, scores
Exemple #15
0
    def __call__(self, *inputs):
        images, labels = inputs[:2]
        with cuda.Device(self.device):
            rois, bboxes = self.link.predict(images)[:2]

            self.xp = cuda.get_array_module(bboxes)
            bboxes = bboxes.data
            labels = self.ndarray_to_list(labels)

            batch_size, num_predicted_masks, pred_masks = self.bboxes_to_masks(
                bboxes, images)
            pred_masks = self.ndarray_to_list(pred_masks)

            if self.assessor is not None:
                pred_scores = self.ndarray_to_list(
                    self.assessor.extract_iou_prediction(
                        self.assessor(rois)).data.reshape(
                            batch_size, num_predicted_masks))
                pred_masks, pred_scores = self.perform_nms(
                    batch_size, bboxes, num_predicted_masks, pred_masks,
                    pred_scores)
            else:
                pred_scores = self.ndarray_to_list(
                    numpy.ones((batch_size, num_predicted_masks)))

            ious = self.xp.concatenate(self.calculate_iou(pred_masks, labels))
            mean_iou = float(self.xp.sum(ious) / len(ious))
            reporter.report({'mean_iou': mean_iou})

            result = self.calculate_map(pred_masks, pred_scores, labels)
            reporter.report({'map': result['map']})
Exemple #16
0
 def test_numpy_array_async3(self):
     with cuda.Device(1):
         with testing.assert_warns(DeprecationWarning):
             y = cuda.to_gpu(self.x, stream=cuda.Stream.null)
     self.assertIsInstance(y, cuda.ndarray)
     cuda.cupy.testing.assert_array_equal(self.x, y)
     self.assertEqual(int(y.device), 1)
Exemple #17
0
def get_random_state():
    global _random_states
    dev = cuda.Device()
    rs = _random_states.get(dev.id, None)
    if rs is None:
        rs = DropoutRandomStates(os.getenv('CHAINER_SEED'))
        _random_states[dev.id] = rs
    return rs
Exemple #18
0
 def test_linear_model_multi_gpu(self):
     backend_config = backend.BackendConfig({
         'use_cuda': True,
         'cuda_device': 1
     })
     with cuda.Device(0):
         accuracy = self.model.accuracy(backend_config)
     self.assertGreater(cuda.to_cpu(accuracy.data), 0.9)
Exemple #19
0
    def test_from_array(self, backend_config):
        with cuda.Device(backend_config.cuda_device):
            arr = cuda.ndarray((), numpy.float32)
        # Test precondition check
        assert arr.device.id == backend_config.cuda_device

        device = backend.GpuDevice.from_array(arr)
        assert isinstance(device, backend.GpuDevice)
        assert (device
                == chainer.get_device((cuda.cupy, backend_config.cuda_device)))
Exemple #20
0
    def test_from_array(self, backend_config):
        with cuda.Device(backend_config.cuda_device):
            arr = cuda.ndarray((), numpy.float32)
        # Test precondition check
        assert arr.device.id == backend_config.cuda_device

        device = backend.GpuDevice.from_array(arr)
        self.check_device(device, backend_config)
        assert device == backend.GpuDevice.from_device_id(
            backend_config.cuda_device)
 def test_linear_model_multi_gpu(self):
     backend_config = backend.BackendConfig({
         'use_cuda': True,
         'cuda_device': 1
     })
     skip, msg = self.skip_loss_scaling(backend_config)
     if skip:
         return unittest.SkipTest(msg)
     with cuda.Device(0):
         accuracy = self.model.accuracy(backend_config)
     self.assertGreater(cuda.to_cpu(accuracy.data), 0.9)
 def test_model_setup_multi_gpu(self):
     with cuda.Device(0):
         model = self.model.model
         optimizer = self.model.optimizer
         model.to_gpu(1)
         optimizer.setup(model)
     # Initialize the optimizer state by running an update
     for param in optimizer.target.params(False):
         param.cleargrad()
         param.update()
         for v in six.itervalues(param.update_rule.state):
             self.assertEqual(int(param.data.device), int(v.device))
Exemple #23
0
    def test_get_device_from_array(self, backend_config):
        with cuda.Device(backend_config.cuda_device):
            arr = cuda.ndarray((), numpy.float32)
        # Test precondition check
        assert arr.device.id == backend_config.cuda_device

        expected_device = backend_config.device

        device = backend.GpuDevice.from_array(arr)
        assert device == expected_device

        device = backend.get_device_from_array(arr)
        assert device == expected_device
Exemple #24
0
    def test_chainerx_cuda_to_cupy_multigpu(self):
        orig = self.orig_chainerx('cuda:0')
        converted = self.send_check_equal(orig, '@cupy:1')
        assert isinstance(converted, cuda.ndarray)
        assert converted.device.id == 1

        # memory must not be shared
        converted_copy = converted.copy()
        with cuda.Device(1):
            converted[:] *= 2
        numpy.testing.assert_array_equal(
            backend.CpuDevice().send(orig),
            backend.CpuDevice().send(converted_copy))
Exemple #25
0
def _guess_device_from_array_module(xp):
    """Returns a plausible device from array module

    .. warning::

        There can be multiple devices for a module

    """
    if xp is cuda.cupy:
        return cuda.GpuDevice(cuda.Device())
    elif xp is chainerx:
        return _chainerx.ChainerxDevice(chainerx.get_default_device())
    else:
        # Cannot detect intel64, because xp of intel64 is numpy.
        return _cpu.CpuDevice()
Exemple #26
0
def _get_device(device_spec):
    # Converts device specificer to a chainer.Device instance.
    # Additionally to chainer.get_device,
    # this function supports the following conversions:
    # - None: returns None
    # - negative integer: returns CpuDevice
    # - non-negative integer: returns GpuDevice
    if device_spec is None:
        return None

    # For backward compatibilities
    if isinstance(device_spec, six.integer_types):
        if device_spec < 0:
            return backend.CpuDevice()
        return backend.get_device(cuda.Device(device_spec))
    return backend.get_device(device_spec)
 def test_model_setup_multi_gpu(self):
     skip, msg = self.skip_loss_scaling()
     if skip:
         return unittest.SkipTest(msg)
     with cuda.Device(0):
         model = self.model.model
         optimizer = self.model.optimizer
         model.to_gpu(1)
         optimizer.setup(model)
         _optimizer_loss_scaling(optimizer, self.loss_scaling)
     # Initialize the optimizer state by running an update
     for param in optimizer.target.params(False):
         param.cleargrad()
         param.update()
         for v in six.itervalues(param.update_rule.state):
             self.assertEqual(int(param.data.device), int(v.device))
Exemple #28
0
    def update_core(self):
        localizer_optimizer = self.get_optimizer('opt_gen')
        discriminator_optimizer = self.get_optimizer('opt_dis')
        xp = self.localizer.xp

        with cuda.Device(self.device):
            batch = next(self.get_iterator('real'))
            real_images, labels = self.converter(batch, self.device)[:2]

            y_real = self.discriminator(real_images)

            batch = next(self.get_iterator('main'))
            fake_images = self.converter(batch, self.device)
            x_fake, bboxes = self.localizer(fake_images)
            y_fake = self.discriminator(x_fake)

            localization_labels = xp.full((len(y_fake), 1),
                                          self.localizer_target,
                                          dtype=xp.float32)
            loss_localizer = F.mean_squared_error(y_fake, localization_labels)

            for regularizer in self.regularizers:
                loss_localizer += regularizer.calc_loss(
                    bboxes, Size._make(fake_images.shape[-2:]))

            self.discriminator.disable_update()

            self.localizer.cleargrads()
            loss_localizer.backward()
            localizer_optimizer.update()
            chainer.reporter.report({'loss_localizer': loss_localizer})

            self.discriminator.enable_update()

            x_fake.unchain_backward()
            bboxes.unchain_backward()

            loss_dis = F.mean_squared_error(y_real, labels)

            if not self.freeze_discriminator:
                self.discriminator.cleargrads()
                self.localizer.cleargrads()
                loss_dis.backward()
                discriminator_optimizer.update()

            chainer.reporter.report({'loss_dis': loss_dis})
    def __call__(self, **kwargs):
        data = kwargs.pop('data')
        labels = kwargs.pop('label')

        with cuda.Device(self.device):
            data = self.net.xp.array(data)
            labels = self.net.xp.array(labels)

            prediction = self.net.predict(data)
            # part accuracy is the accuracy for each number and accuracy is the accuracy
            # for the complete vector of numbers
            part_accuracy, accuracy = self.calc_accuracy(prediction, labels)

        reporter.report({
            "part_accuracy": part_accuracy,
            "accuracy": accuracy
        })
Exemple #30
0
    def __call__(self, **kwargs):
        image = kwargs.pop('image', None)
        words = kwargs.pop('words', None)
        return_predictions = kwargs.pop('return_predictions', False)

        with cuda.Device(self.device):
            rois, bboxes = self.localizer.predict(image)[:2]
            predicted_words = self.recognizer.predict(rois).array
            self.xp = cuda.get_array_module(bboxes)
            batch_size, num_bboxes, num_channels, height, width = rois.shape
            rois = self.xp.reshape(rois.array, (-1, num_channels, height, width))
            bboxes = self.xp.reshape(bboxes.array, (-1, 2, height, width))

            self.calc_word_accuracy(predicted_words, words)

        if return_predictions:
            return rois, bboxes, predicted_words