예제 #1
0
def test_multi_gpu():
    from renom.cuda import cuGetDeviceCount

    class NN2(rm.Model):
        def __init__(self):
            super(NN2, self).__init__()
            self.layer1 = rm.Dense(output_size=2)
            self.layer2 = rm.Dense(output_size=2)

        def forward(self, x):
            return self.layer2(rm.relu(self.layer1(x)))

        def weight_initiallize(self, input_size):
            self.layer1.weight_initiallize(input_size)
            self.layer2.weight_initiallize(input_size)

    nn = NN2()
    nn.set_gpu(0)
    nn.weight_initiallize((2, ))

    nn2 = NN2()
    nn2.set_gpu(cuGetDeviceCount() - 1)

    for i in range(2):
        nn2.copy_params(nn)
        x = np.random.rand(100, 2)
        with nn.train():
            ret1 = nn(x[:50])

        with use_device(nn.device_id):
            loss1 = rm.softmax_cross_entropy(ret1, np.random.rand(50, 2))

        with nn2.train():
            ret2 = nn2(x[50:])

        with use_device(nn2.device_id):
            loss2 = rm.softmax_cross_entropy(ret2, np.random.rand(50, 2))

        nn.sync()
        nn2.sync()

        grad1 = loss1.grad()

        with use_device(nn2.device_id):
            grad2 = loss2.grad()

        grad2.get(nn2.layer1.params.w)
        org_l1_w = grad1.get(nn.layer1.params.w)

        nn.join_grads(grad1, [(nn2, grad2)])

        assert np.allclose(grad1.get(nn.layer1.params.w),
                           org_l1_w + grad2.get(nn2.layer1.params.w).copy())

        grad1.update(models=[nn])
 def __itruediv__(self, other):
     with use_device(self.device_id):
         assert getattr(self, "shape", (1,)) == getattr(self, "shape", (1,))
         new_shape = calc_broadcast_shape(self, other)
         ret = GPUValue(shape=new_shape)
         cudiv(self, other, ret)
         return ret
예제 #3
0
 def __call__(self, x, *args, **kwargs):
     with use_device(self._device_id):
         if not self.params:
             assert len(
                 x.shape) > 1, "Input must be at least of 2 dimensions."
             self.weight_initiallize(x.shape[1:])
         return super(Parametrized, self).__call__(x, *args, **kwargs)
 def __add__(self, other):
     with use_device(self.device_id):
         new_shape = calc_broadcast_shape(self, other)
         ret = GPUValue(shape=new_shape)
         # Only data type float32 is acceptable.
         cuadd(self, other, ret)
         return ret
예제 #5
0
    def join_grads(self, grads, others):
        """Merge gradients of other models.
        Others is a list of tuple of (model, grads) to be merged.
        Models listed in the others should have same structure with self."""

        values = {
            name: params
            for name, params, attrs in self.flatten_values()
        }
        for model, _grads in others:
            o = model._get_grads(_grads)

            for (name, attrname), diff in o.items():
                obj = values[name][attrname]
                curdiff = grads.get(obj, None)
                if curdiff is not None:
                    if not isinstance(curdiff, Node):
                        curdiff = Node(curdiff)
                    if not isinstance(diff, Node):
                        diff = Node(diff)
                    with use_device(curdiff.device_id):
                        if GPUValue is not None and diff.device_id != curdiff.device_id:
                            g = GPUValue(shape=diff.shape)
                            g.copy_from(diff.get_gpu())
                            diff = Node(g)

                        newdiff = curdiff + diff

                grads.set(obj, newdiff)
 def copy(self):
     if cuGetDevice() == self.device_id:
         ret = GPUValue(shape=self.shape)
         self._ptr.memcpyD2D(ret._ptr, self.nbytes)
     else:
         with use_device(self.device_id):
             arr = self.new_array()
         ret = GPUValue(arr)
     return ret
    def _oper_pow(self, other):
        if not isinstance(self, GPUValue):
            return other.__rpow__(self, modulo)

        with use_device(self.device_id):
            new_shape = calc_broadcast_shape(self, other)
            ret = GPUValue(shape=new_shape)
            cupow(self, other, ret)
            return ret
예제 #8
0
 def __call__(self):
     set_cuda_active(True)
     with self.gpu_resource:
         self._gpu = self.gpus.pop()
         try:
             with use_device(self._gpu):
                 return self._exec()
         finally:
             self.gpus.add(self._gpu)
    def __truediv__(self, other):
        if not isinstance(self, GPUValue):
            return other.__rtruediv__(self)

        with use_device(self.device_id):
            new_shape = calc_broadcast_shape(self, other)
            ret = GPUValue(shape=new_shape)
            cudiv(self, other, ret)
            return ret
예제 #10
0
 def sync(self):
     if is_cuda_active():
         done = set()
         for m in self.iter_models():
             device_id = m._device_id
             if device_id not in done:
                 done.add(device_id)
                 with use_device(m._device_id):
                     renom.cuda.cuDeviceSynchronize()
예제 #11
0
파일: server.py 프로젝트: clockfly/ReNomRG
 def run(self, f, *args, **kwargs):
     with self.gpu_resource:
         self.active_gpu.id = self.gpus.pop()
         try:
             set_cuda_active(True)
             with use_device(self.active_gpu.id):
                 return f(*args, **kwargs)
         finally:
             self.gpus.add(self.active_gpu.id)
             release_mem_pool()
    def __getitem__(self, indexes):
        with use_device(self.device_id):
            slices, result_shapes, dest_shapes = build_shapes(self, indexes)

            dest_size = calc_int_prod(dest_shapes)

            ret = cu_get_item(self, self.size, dest_size, slices)

            ret.shape = tuple(result_shapes)
            return ret
 def T(self):
     with use_device(self.device_id):
         n = len(self.shape)
         assert n < 3
         clone = self.zeros_like_me()
         if n == 2:
             new_shape = list(clone.shape)
             with cublas.cublas_handler() as cublas_handle:
                 cublas.cublas_transpose(cublas_handle, self, clone)
             new_shape[0] = clone.shape[1]
             new_shape[1] = clone.shape[0]
             clone.shape = tuple(new_shape)
         return clone
    def to_gpu(self, value):
        if value.dtype is not self.dtype:
            value = value.astype(self.dtype)

        assert value.shape == self.shape, "{} {}".format(value.shape, self.shape)

        if not self._ptr:
            self.nbytes = value.nbytes
            self.alloc()

        # todo: value.flatten() copies buffer
        with use_device(self.device_id):
            self._ptr.memcpyH2D(value.ravel(), value.nbytes)
예제 #15
0
def test_copy_from_another_gpu():
    set_cuda_active(True)

    src = Variable(rand((100, )))
    src.to_gpu()

    with use_device(1):
        dest = Variable(rand((100, )))
        dest.to_gpu()

    dest.copy_from(src)
    close(src, dest)

    close(src._gpu.new_array(), dest._gpu.new_array())
예제 #16
0
    def __call__(self, x, *args, **kwargs):
        with use_device(self._device_id):
            if self._model_hook:
                x, args, kwargs = self._model_hook.call_enter(self, x, args, kwargs)

            if not self._model_hook:
                ret = self.forward(x, *args, **kwargs)
            else:
                ret = self._model_hook.on_forward(self, self.forward, x, args, kwargs)

            if self._model_hook:
                ret = self._model_hook.call_leave(self, ret, x, args, kwargs)

            return ret
예제 #17
0
    def copy_params(self, model):
        value_list = model.flatten_values()
        with use_device(self._device_id):
            for names, values, attrs in value_list:
                layer = self
                for name in names[1:]:
                    layer = getattr(layer, name)

                for k, v in values.items():
                    if k in layer.params:
                        layer.params[k].copy_from(v)
                    else:
                        layer.params[k] = v.copy()

                    layer.params[k]._auto_update = v._auto_update
    def __setitem__(self, indexes, value):
        with use_device(self.device_id):
            value = get_gpu(value)
            slices, result_shapes, dest_shapes = build_shapes(self, indexes)
            if calc_int_prod(result_shapes) == 0:
                return

            dest_strides = calc_strides(dest_shapes)
            mask, broadcasted = _build_broadcast_mask(dest_shapes, value.shape)

            broadcasted_strides = calc_strides(broadcasted)
            broadcasted_strides = [m * b for m, b in zip(mask, broadcasted_strides)]

            valuesize = calc_int_prod(dest_shapes)

            cu_set_item(value, valuesize, self, slices, dest_strides, broadcasted_strides)
 def __idiv__(self, other):
     with use_device(self.device_id):
         return self.__itruediv__(other)
예제 #20
0
    def train(self, train_distributor, test_distributor=None):
        """Train method.
        This method executes train loop.
        If test_distributor is given, validation loss will be calculated.

        Args:
            train_distributor (Distributor): Distributor for yielding train data.
            test_distributor (Distributor): Distributor for yielding test data.
        """

        self.epoch = 0
        self.train_distributor = train_distributor
        self.test_distributor = test_distributor
        self.on_event('start')
        self.train_loss_list = []
        self.test_loss_list = []

        models = [self.model]
        if self.num_gpu > 1:
            models.extend(
                [self.model.__class__() for _ in range(self.num_gpu - 1)])
            for n in range(self.num_gpu):
                models[n].set_gpu(n)

        while self.epoch < self.num_epoch:
            self.on_event('start_epoch')
            self.nth = 0
            self.avg_train_loss = 0

            for iteration, (data, target) in enumerate(
                    self.train_distributor.batch(self.batch_size,
                                                 self.shuffle)):
                datalen = len(data) // len(models)
                self.data = [
                    data[i:i + datalen]
                    for i in range(0, datalen * len(models), datalen)
                ]
                if is_cuda_active():
                    self.data = [Node(d) for d in self.data]
                    for n, d in enumerate(self.data):
                        with use_device(n):
                            d.to_gpu()

                targetlen = len(target) // len(models)
                self.targets = [
                    target[i:i + targetlen]
                    for i in range(0, targetlen * len(models), targetlen)
                ]
                if is_cuda_active():
                    self.targets = [Node(d) for d in self.targets]
                    for n, d in enumerate(self.targets):
                        with use_device(n):
                            d.to_gpu()

                for gpu in range(1, self.num_gpu):
                    models[gpu].copy_params(models[0])

                for gpu in range(0, self.num_gpu):
                    models[gpu].set_models(inference=False)

                self.on_event('forward')
                self.outputs = []

                for gpu in range(self.num_gpu):
                    model = models[gpu]
                    with model.train():
                        self.outputs.append(model(self.data[gpu]))

                self.on_event('loss')
                self.losses = []

                for gpu in range(self.num_gpu):
                    model = models[gpu]
                    with use_device(gpu):
                        self.losses.append(
                            self.loss_func(self.outputs[gpu],
                                           self.targets[gpu]))

                self.avg_train_loss += (self.losses[0] -
                                        self.avg_train_loss) / (iteration + 1)

                self.on_event('backward')
                self.grads = []

                for gpu in range(self.num_gpu):
                    model = models[gpu]
                    with use_device(gpu):
                        self.grads.append(self.losses[gpu].grad())

                self.on_event('grad')

                if self.num_gpu > 1:
                    models[0].join_grads(self.grads[0],
                                         zip(models[1:], self.grads[1:]))

                self.grads[0].update(self.optimizer)

                self.on_event('updated')
                self.nth += 1

            self.on_event('end_epoch')
            self.epoch += 1

            # release objects
            self.data = self.target = None
            self.outputs = self.losses = self.grads = None
            self.avg_train_loss = None
 def __sub__(self, other):
     with use_device(self.device_id):
         new_shape = calc_broadcast_shape(self, other)
         ret = GPUValue(shape=new_shape)
         cusub(self, other, ret)
         return ret
예제 #22
0
 def __call__(self, x, *args, **kwargs):
     with use_device(self._device_id):
         if not self.params:
             self.weight_initiallize(x.shape[1:])
         return super(Parametrized, self).__call__(x, *args, **kwargs)
예제 #23
0
 def __call__(self, *args, **kwargs):
     with use_device(self._device_id):
         return self.forward(*args, **kwargs)
 def __rpow__(self, other, modulo):
     with use_device(self.device_id):
         new_shape = calc_broadcast_shape(self, other)
         ret = GPUValue(shape=new_shape)
         curpow(self, other, ret)
         return ret
    def __div__(self, other):
        if not isinstance(self, GPUValue):
            return other.__rdiv__(self)

        with use_device(self.device_id):
            return self.__truediv__(other)
 def __isub__(self, other):
     with use_device(self.device_id):
         assert getattr(self, "shape", (1,)) == getattr(self, "shape", (1,))
         cublas.cublas_axpy(-get_gpu(other), get_gpu(self))
         return self
 def __rmul__(self, other):
     with use_device(self.device_id):
         return self.__mul__(other)
예제 #28
0
 def __call__(self, x, *args, **kwargs):
     with use_device(self._device_id):
         x = self.mark_enter(x)
         ret = self.forward(x, *args, **kwargs)
         return self.mark_leave(ret)
예제 #29
0
 def __call__(self, x):
     with use_device(self._device_id):
         return self.forward(x)