Exemple #1
0
def _grad(self, initial=None, detach_graph=True, weight_decay=None, **kwargs):
    '''This method follows computational graph and returns the gradients of
    Variable object.

    Args:
        initial (ndarray): Initial value of following the graph.
        detach_graph (bool): If it's True, the computational graph will be destroyed.
        weight_decay (float): Sets the default weight decay of the model.
                            See the Variable class for more info.
    '''
    if not self._has_autoupdate():
        return Grads()

    if initial is None:
        if self.size > 1:
            raise ValueError("Initial diff is required for scalar value.")

        if is_cuda_active():
            initial = Node(get_gpu(self).ones_like_me())
        else:
            initial = np.ones_like(self).astype(precision)

    context = Grads(self, weight_decay=weight_decay)
    self._update_diff(context, initial, **kwargs)

    if detach_graph:
        self.detach_graph()
    return context
Exemple #2
0
 def _backward_gpu(self, context, dy, **kwargs):
     dy.to_cpu()
     cu.set_cuda_active(False)
     dx = imnpool(self.attrs._original_x, self.attrs._kernel, self.attrs._stride,
                  self.attrs._padding, mode="max", alternate_input=dy)
     cu.set_cuda_active(True)
     dx = Node(dx)
     self.attrs._x._update_diff(context, dx)
def gpu_check(opt):
    node = Variable(np.array(np.random.rand(3, 3, 3, 3), dtype=precision))
    grad = Variable(np.array(np.random.rand(3, 3, 3, 3), dtype=precision))

    set_cuda_active(False)
    for _ in range(3):
        opt(grad, node)
    dy_cpu = opt(grad, node)
    assert isinstance(dy_cpu, Node)
    opt.reset()

    set_cuda_active(True)
    for _ in range(3):
        opt(grad, node)
    dy_gpu = opt(grad, node)
    assert isinstance(dy_gpu, GPUValue)
    dy_gpu = Node(dy_gpu)
    dy_gpu.to_cpu()

    close(dy_gpu, dy_cpu)
Exemple #4
0
    def join_grads(self, grads, others):
        """Merge gradients of other models.
        Others is a list of tuple of (model, grads) to be merged.
        Models listed in the others should have same structure with self."""

        values = {
            name: params
            for name, params, attrs in self.flatten_values()
        }
        for model, _grads in others:
            o = model._get_grads(_grads)

            for (name, attrname), diff in o.items():
                obj = values[name][attrname]
                curdiff = grads.get(obj, None)
                if curdiff is not None:
                    if not isinstance(curdiff, Node):
                        curdiff = Node(curdiff)
                    if not isinstance(diff, Node):
                        diff = Node(diff)
                    with use_device(curdiff.device_id):
                        if GPUValue is not None and diff.device_id != curdiff.device_id:
                            g = GPUValue(shape=diff.shape)
                            g.copy_from(diff.get_gpu())
                            diff = Node(g)

                        newdiff = curdiff + diff

                grads.set(obj, newdiff)
Exemple #5
0
    def load(self, filename):
        """Load saved weights to model.

        Args:
            filename (str): File name of saved model.

        Example:
            >>> model = rm.Dense(2)
            >>> model.load("model.hd5")
        """
        import h5py
        f = h5py.File(filename, 'r')
        values = f['values']
        types = f['types']

        names = sorted(values.keys())

        def get_attr(root, names):
            names = names.split('.')[1:]
            ret = root
            for name in names:
                ret = getattr(ret, name)
            return ret

        target = self
        for name in names:
            target = get_attr(self, name)

            values_grp = values[name]
            types_grp = types[name]

            for k, v in values_grp.items():
                v = v.value
                if isinstance(v, np.ndarray):
                    type = types_grp.get(k, None)
                    if type:
                        if type.value == 'renom.Variable':
                            auto_update = types_grp[k + '._auto_update'].value
                            v = Variable(v, auto_update=auto_update)
                        else:
                            v = Node(v)

                if k.startswith('__dict__.'):
                    obj = target
                    name = k.split(".", 1)[1]
                else:
                    obj = target.params
                    name = k

                setattr(obj, name, v)
Exemple #6
0
    def train(self, train_distributor, test_distributor=None):
        """Train method.
        This method executes train loop.
        If test_distributor is given, validation loss will be calculated.

        Args:
            train_distributor (Distributor): Distributor for yielding train data.
            test_distributor (Distributor): Distributor for yielding test data.
        """

        self.epoch = 0
        self.train_distributor = train_distributor
        self.test_distributor = test_distributor
        self.on_event('start')
        self.train_loss_list = []
        self.test_loss_list = []

        models = [self.model]
        if self.num_gpu > 1:
            models.extend(
                [self.model.__class__() for _ in range(self.num_gpu - 1)])
            for n in range(self.num_gpu):
                models[n].set_gpu(n)

        while self.epoch < self.num_epoch:
            self.on_event('start_epoch')
            self.nth = 0
            self.avg_train_loss = 0

            for iteration, (data, target) in enumerate(
                    self.train_distributor.batch(self.batch_size,
                                                 self.shuffle)):
                datalen = len(data) // len(models)
                self.data = [
                    data[i:i + datalen]
                    for i in range(0, datalen * len(models), datalen)
                ]
                if is_cuda_active():
                    self.data = [Node(d) for d in self.data]
                    for n, d in enumerate(self.data):
                        with use_device(n):
                            d.to_gpu()

                targetlen = len(target) // len(models)
                self.targets = [
                    target[i:i + targetlen]
                    for i in range(0, targetlen * len(models), targetlen)
                ]
                if is_cuda_active():
                    self.targets = [Node(d) for d in self.targets]
                    for n, d in enumerate(self.targets):
                        with use_device(n):
                            d.to_gpu()

                for gpu in range(1, self.num_gpu):
                    models[gpu].copy_params(models[0])

                for gpu in range(0, self.num_gpu):
                    models[gpu].set_models(inference=False)

                self.on_event('forward')
                self.outputs = []

                for gpu in range(self.num_gpu):
                    model = models[gpu]
                    with model.train():
                        self.outputs.append(model(self.data[gpu]))

                self.on_event('loss')
                self.losses = []

                for gpu in range(self.num_gpu):
                    model = models[gpu]
                    with use_device(gpu):
                        self.losses.append(
                            self.loss_func(self.outputs[gpu],
                                           self.targets[gpu]))

                self.avg_train_loss += (self.losses[0] -
                                        self.avg_train_loss) / (iteration + 1)

                self.on_event('backward')
                self.grads = []

                for gpu in range(self.num_gpu):
                    model = models[gpu]
                    with use_device(gpu):
                        self.grads.append(self.losses[gpu].grad())

                self.on_event('grad')

                if self.num_gpu > 1:
                    models[0].join_grads(self.grads[0],
                                         zip(models[1:], self.grads[1:]))

                self.grads[0].update(self.optimizer)

                self.on_event('updated')
                self.nth += 1

            self.on_event('end_epoch')
            self.epoch += 1

            # release objects
            self.data = self.target = None
            self.outputs = self.losses = self.grads = None
            self.avg_train_loss = None
 def preload_single(batch):
     with cu.asyncBehaviour():
         batch = batch.astype(np.dtype(precision))
         cu.pinNumpy(batch)
         ret = Node(get_gpu(batch))
     return ret
Exemple #8
0
 def store(self, node, dy):
     selfid = id(node)
     self.stroage[selfid] = Node(
         dy)  # if cuda active, dy must be GPUValue type.