Esempio n. 1
0
class Solver(object):
    """
    Solver merges updates to optimize the ``Net``.

    Inspired by `SolverWrapper`_, we simplified it from the original C++ implementation.
    """
    def __init__(self, prototxt):
        """Construct a Solver.

        Parameters
        ----------
        prototxt : str
            The path of ``.prototxt`` file.

        Returns
        -------
        Solver
            The solver.

        Examples
        --------
        >>> solver = Solver('solver.prototxt')

        """
        self._param = pb.SolverParameter()
        Parse(open(prototxt, 'r').read(), self._param)
        self.ParseUpdateParam()
        self._net = None
        self._test_nets = []
        self._layer_blobs = []
        self._iter = self._current_step = 0
        self._optimizer = None
        self.scalar_writer = sw.ScalarSummary() if root_solver() else None

        self.InitTrainNet()
        self.InitTestNets()
        self.BuildNets()

    def InitTrainNet(self):
        """Initialize the train net.

        Returns
        -------
        None

        References
        ----------
        The implementation of `InitTrainNet(solver.cpp, L63)`_.

        """
        if self._param.HasField('net'):
            self._net = Net(self._param.net, "TRAIN")

        if self._param.HasField('train_net'):
            if self._net is not None:
                raise RuntimeError('net or train_net can not be specfied both.')
            self._net = Net(self._param.train_net, "TRAIN")

    def InitTestNets(self):
        """Initialize the test nets.

        Returns
        -------
        None

        References
        ----------
        The implementation of `InitTestNets(solver.cpp, L104)`_.

        """
        if mpi.Is_Init():
            idx, group = mpi.AllowParallel()
            # only the root in a parallel group can test
            if idx != -1 and mpi.Rank() != group[0]: return

        num_test_net = len(self._param.test_iter)
        if num_test_net > 0:
            if self._param.test_interval <= 0:
                raise RuntimeError('the val of test interval: {} is invaild.')

        if len(self._param.test_net) > 0:
            for test_net in self._param.test_net:
                 self._test_nets.append(Net(test_net, "TEST"))
            num_test_net -= len(self._param.test_net)

        # consider generic_net
        if num_test_net > 0:
            self._test_nets.append(Net(self._param.net, "TEST"))

        # share with training net
        for test_net in self._test_nets: test_net.share_with(self._net)

    def BuildNets(self):
        """Build the nets.

        Returns
        -------
        None

        See Also
        --------
        `Net.function(*args, **kwargs)`_ - How to transform ``Net`` into ``Graph``.

        """
        self.train = self._net.function()
        self.tests = [test_net.function() for test_net in self._test_nets]

    def ParseUpdateParam(self):
        """Parse the parameters for optimizer.

        Returns
        ------=
        None

        """
        self._update_param = {'scale_gradient': float(1.0 / self._param.iter_size),
                              'clip_gradient': float(self._param.clip_gradients),
                              'l2_decay': float(self._param.weight_decay) \
                                  if str(self._param.regularization_type) == 'L2' else -1.0}

    def BuildOptimizer(self):
        """Build the optimizer.

        Returns
        -------
        None

        """
        # collect
        for layer, blobs in self._net.params.items():
            self._layer_blobs.extend(blobs)
        # push
        for idx, blob in enumerate(self._layer_blobs):
            if self._net._lr_mults[idx] > 0:
                if blob.diff is None: continue
                self._optimizer.append((blob.data, blob.diff),
                                       self._net._lr_mults[idx],
                                       self._net._decay_mults[idx])
        self.update = theano.function(updater=self._optimizer)

    def GetLearningRate(self):
        """Get learning rate based on the preset policy.

        Returns
        -------
        None

        References
        ----------
        The implementation of `GetLearningRate(solver.cpp, L27)`_.

        """
        from dragon.config import logger
        policy = self._param.lr_policy

        if policy == "step":
            new_step = int(self._iter / self._param.stepsize)
            if self._current_step != new_step:
                new_lr = self._param.base_lr * pow(self._param.gamma, new_step)
                self._current_step = new_step
                self._optimizer.lr = new_lr

        if policy == 'multistep':
            if self._current_step < len(self._param.stepvalue) \
                    and self._iter >= self._param.stepvalue[self._current_step]:
                self._current_step = self._current_step + 1
                logger.info('MultiStep Status: Iteration {},  step = {}' \
                    .format(self._iter, self._current_step))
                new_lr = self._param.base_lr * \
                         pow(self._param.gamma, self._current_step)
                self._optimizer.lr = new_lr

        if policy == 'multifixed':
            stage_lrs = self._param.stage_lr
            stage_iters = self._param.stage_iter
            if self._iter < stage_iters[self._current_step]:
                self._optimizer.lr = stage_lrs[self._current_step]
            else:
                if self._current_step + 1 < len(stage_iters):
                    self._current_step = self._current_step + 1
                    logger.info('MultiFixed Status: Iteration {},  stage = {}' \
                        .format(self._iter, self._current_step))
                    self._optimizer.lr = stage_lrs[self._current_step]

        if policy == 'inv':
            power = self._param.power
            gamma = self._param.gamma
            self._optimizer.lr = self._param.base_lr * \
                               pow(1.0 + gamma * self._iter, -power)

        if policy == 'poly':
            power = self._param.power
            max_iter = self._param.max_iter
            self._optimizer.lr = self._param.base_lr * \
                        pow(1.0 - float(self.iter) / max_iter, power)

    def Test(self, test_idx):
        """Test the specific net.

        Parameters
        ----------
        test_idx : int
            The idx of test net.

        Returns
        -------
        None

        References
        ----------
        The implementation of `Test(solver.cpp, L328)`_.

        """
        from dragon.config import logger
        test_score = []
        output_id = []
        test_iter = self._param.test_iter[test_idx]
        net = self._test_nets[test_idx]

        for iter in xrange(test_iter):
            self.tests[test_idx](return_outputs=False)
            if not root_solver(): continue
            if iter == 0:
                for net_output in net._net_outputs:
                    vals = ws.FetchTensor(net.blobs[net_output].data)
                    for idx, val in enumerate(vals):
                        test_score.append(val)
                        output_id.append(net_output)
            else:
                i = 0
                for net_output in net._net_outputs:
                    vals = ws.FetchTensor(net.blobs[net_output].data)
                    for idx, val in enumerate(vals):
                        test_score[i] += val
                        i += 1

        if not root_solver(): return

        logger.info('Iteration {}, Test net #{}'.format(self._iter, test_idx))
        for idx, score in enumerate(test_score):
            logger.info('		 Test net output #%d(%s): %.4f' % (idx, output_id[idx], score / test_iter))
            self.scalar_writer.add_summary((output_id[idx], score / test_iter), self._iter)

    def step(self, iters):
        """Step the train net. [**PyCaffe Style**]

        Parameters
        ----------
        iters : int
            The number of iterations to step.

        Returns
        -------
        None

        References
        ----------
        The implementation of `Step(solver.cpp, L180)`_.

        """
        from dragon.config import logger
        start_iter = self._iter; stop_iter = self._iter + iters
        loss_vec = []; smoothed_loss = 0
        tic = time.time()
        while self._iter < stop_iter:
            # test if necessary
            if self._param.test_interval and \
                 self._iter % self._param.test_interval == 0:
                if (self._iter == 0 and
                        self._param.test_initialization) or self._iter != 0:
                    for test_id in xrange(len(self.tests)): self.Test(test_id)

            # forward & backward & compute_loss
            loss = 0.0
            for i in xrange(self._param.iter_size):
                self.train(return_outputs=False)
                if root_solver():
                    for cost in self._net._costs:
                        cost_value = ws.FetchTensor(cost)
                        if cost_value.size == 1:
                            loss += cost_value[0]

            if root_solver():
                loss /= self._param.iter_size
                if len(loss_vec) < self._param.average_loss:
                    loss_vec.append(loss)
                    smoothed_loss = (smoothed_loss * (len(loss_vec) - 1) + loss) / len(loss_vec);
                else:
                    idx = (self._iter - start_iter) % self._param.average_loss
                    smoothed_loss += ((loss - loss_vec[idx]) / self._param.average_loss)
                    loss_vec[idx] = loss

            # apply update
            self.GetLearningRate()
            self.update()

            # display
            if root_solver() and self._param.display:
                if self._iter % self._param.display == 0:
                    base_lr = self._optimizer.lr
                    logger.info('Iteration %d, lr = %s, loss = %f, time = %.2fs' % \
                          (self._iter, str(base_lr), smoothed_loss, time.time() - tic))
                    tic = time.time()
                    for idx, net_output in enumerate(self._net.outputs):
                        vals = ws.FetchTensor(self._net.blobs[net_output].data)
                        for val in vals:
                            logger.info('		Train net output #{}({}): {}'.format(idx, net_output, val))
                            self.scalar_writer.add_summary((net_output, val), self._iter)
            self._iter = self._iter + 1

            # snapshot
            if self._param.snapshot:
                if self._iter % self._param.snapshot == 0: self.snapshot()

    def snapshot(self):
        """Snapshot the parameters of train net. [**PyCaffe Style**]

        Returns
        -------
        None

        See Also
        --------
        `workspace.Snapshot(*args, **kwargs)`_ - How to snapshot tensors into a file.

        References
        ----------
        The implementation of `Snapshot(solver.cpp, L403)`_.

        """
        tensors = [blob.data for blob in self._layer_blobs]
        filename = "_iter_" + str(self._iter)
        ws.Snapshot(tensors, filename,
                    prefix=self._param.snapshot_prefix,
                    suffix='.caffemodel', format='caffe')

    @property
    def net(self):
        """Return the train net. [**PyCaffe Style**]

        Returns
        -------
        Net
            The train net.

        """
        return self._net

    @property
    def test_nets(self):
        """Return the test nets. [**PyCaffe Style**]

        Returns
        -------
        list of Net
            The test nets.

        """
        return self._test_nets

    @property
    def iter(self):
        """Return or Set the current iteration. [**PyCaffe Style**]

        Parameters
        ----------
        iter : int
            The value of iteration to set.

        Returns
        -------
            The current iteration.

        """
        return self._iter

    @iter.setter
    def iter(self, value):
        self._iter = value
Esempio n. 2
0
class Solver(object):
    """Solver merges updates to optimize the ``Net``.

    Inspired by `SolverWrapper`_, we simplified it from the original C++ implementation.

    """
    def __init__(self, proto_txt):
        """Construct a Solver.

        Parameters
        ----------
        proto_txt : str
            The path of ``.prototxt`` file.

        Returns
        -------
        Solver
            The solver.

        Examples
        --------
        >>> solver = Solver('solver.prototxt')

        """
        self._param = pb.SolverParameter()
        parse_text_proto(open(proto_txt, 'r').read(), self._param)
        if self._param.iter_size > 1:
            raise NotImplementedError('Gradients accumulating is deprecated.')
        self._net = None
        self._test_nets = []
        self._layer_blobs = []
        self._iter = self._current_step = 0
        self.optimizer = None
        self.InitTrainNet()
        self.InitTestNets()
        self.BuildNets()
        self.ParseOptimizerArguments()

    def InitTrainNet(self):
        """Initialize the train net.

        Returns
        -------
        None

        References
        ----------
        The implementation of `InitTrainNet(solver.cpp, L63)`_.

        """
        if self._param.HasField('net'):
            self._net = Net(self._param.net, "TRAIN")

        if self._param.HasField('train_net'):
            if self._net is not None:
                raise RuntimeError(
                    'net or train_net can not be specified both.')
            self._net = Net(self._param.train_net, "TRAIN")

    def InitTestNets(self):
        """Initialize the test nets.

        Returns
        -------
        None

        References
        ----------
        The implementation of `InitTestNets(solver.cpp, L104)`_.

        """
        if dragon.mpi.Is_Init():
            idx, group = dragon.mpi.AllowParallel()
            # Only the root in a parallel group can test
            if idx != -1 and dragon.mpi.Rank() != group[0]: return

        num_test_net = len(self._param.test_iter)
        if num_test_net > 0:
            if self._param.test_interval <= 0:
                raise RuntimeError('the val of test interval: {} is invaild.')

        if len(self._param.test_net) > 0:
            for test_net in self._param.test_net:
                self._test_nets.append(Net(test_net, "TEST"))
            num_test_net -= len(self._param.test_net)

        # Consider generic_net
        if num_test_net > 0:
            self._test_nets.append(Net(self._param.net, "TEST"))

    def BuildNets(self):
        """Build the nets.

        Returns
        -------
        None

        See Also
        --------
        `Net.function(*args, **kwargs)`_ - How to transform ``Net`` into ``Graph``.

        """
        self.train = self._net.function()
        self.tests = [test_net.function() for test_net in self._test_nets]

    def ParseOptimizerArguments(self):
        """Parse the arguments for optimizer.

        Returns
        -------
        None

        """
        self._optimizer_arguments = {
            'scale_gradient': float(1.0 / self._param.iter_size),
            'clip_gradient': float(self._param.clip_gradients),
            'l2_decay': float(self._param.weight_decay) \
                if str(self._param.regularization_type) == 'L2' else -1.0,
        }

    def BuildOptimizer(self):
        """Build the optimizer.

        Returns
        -------
        None

        """
        # Collect
        for layer, blobs in self.net.params.items():
            self._layer_blobs.extend(blobs)

        # Push
        for idx, blob in enumerate(self._layer_blobs):
            if blob.lr_multiplier > 0 and blob.diff is not None:
                self.optimizer.append((blob.data, blob.diff),
                                      blob.lr_multiplier,
                                      blob.decay_multiplier)

        # Compile
        self.update = dragon.function(updater=self.optimizer)

    def GetLearningRate(self):
        """Get learning rate based on the preset policy.

        Returns
        -------
        None

        References
        ----------
        The implementation of `GetLearningRate(solver.cpp, L27)`_.

        """
        policy = self._param.lr_policy

        if policy == "step":
            new_step = int(self.iter / self._param.stepsize)
            if self._current_step != new_step:
                new_lr = self._param.base_lr * pow(self._param.gamma, new_step)
                self._current_step = new_step
                self.optimizer.base_lr = new_lr

        if policy == 'multistep':
            if self._current_step < len(self._param.stepvalue) \
                    and self.iter >= self._param.stepvalue[self._current_step]:
                self._current_step = self._current_step + 1
                print('MultiStep Status: Iteration {},  step = {}' \
                    .format(self.iter, self._current_step))
                new_lr = self._param.base_lr * \
                         pow(self._param.gamma, self._current_step)
                self.optimizer.base_lr = new_lr

        if policy == 'multifixed':
            stage_lrs = self._param.stage_lr
            stage_iters = self._param.stage_iter
            if self.iter < stage_iters[self._current_step]:
                self.optimizer.base_lr = stage_lrs[self._current_step]
            else:
                if self._current_step + 1 < len(stage_iters):
                    self._current_step = self._current_step + 1
                    print('MultiFixed Status: Iteration {},  stage = {}' \
                        .format(self.iter, self._current_step))
                    self.optimizer.base_lr = stage_lrs[self._current_step]

        if policy == 'inv':
            power = self._param.power
            gamma = self._param.gamma
            self.optimizer.base_lr = self._param.base_lr * \
                pow(1.0 + gamma * self.iter, -power)

        if policy == 'poly':
            power = self._param.power
            max_iter = self._param.max_iter
            self.optimizer.base_lr = self._param.base_lr * \
                pow(1.0 - float(self.iter) / max_iter, power)

    def Test(self, test_idx):
        """Test the specific net.

        Parameters
        ----------
        test_idx : int
            The idx of test net.

        Returns
        -------
        None

        References
        ----------
        The implementation of `Test(solver.cpp, L328)`_.

        """
        test_score, output_id = [], []
        net = self._test_nets[test_idx]
        test_iter = self._param.test_iter[test_idx]

        for iter in range(test_iter):
            self.tests[test_idx](return_outputs=False)
            if not root_solver(): continue
            if iter == 0:
                for key in net.outputs:
                    values = net.blobs[key].data.get_value().flatten()
                    for idx, value in enumerate(values):
                        test_score.append(value)
                        output_id.append(key)
            else:
                i = 0
                for key in net.outputs:
                    values = net.blobs[key].data.get_value().flatten()
                    for idx, value in enumerate(values):
                        test_score[i] += value
                        i += 1

        if not root_solver(): return

        print('Iteration {}, Test net #{}'.format(self.iter, test_idx))
        for idx, score in enumerate(test_score):
            print('		 Test net output #%d(%s): %.4f' %
                  (idx, output_id[idx], score / test_iter))

    def step(self, iters):
        """Step the train net. [**PyCaffe Style**]

        Parameters
        ----------
        iters : int
            The number of iterations to step.

        Returns
        -------
        None

        References
        ----------
        The implementation of `Step(solver.cpp, L180)`_.

        """
        start_iter, stop_iter = self.iter, self.iter + iters
        loss_vec, smoothed_loss = [], 0.

        tic = time.time()

        while self.iter < stop_iter:
            # Test if necessary
            if self._param.test_interval and \
                 self.iter % self._param.test_interval == 0:
                if (self.iter == 0
                        and self._param.test_initialization) or self.iter != 0:
                    for test_id in range(len(self.tests)):
                        self.Test(test_id)

            # Forward && Backward && Compute Loss
            loss = 0.0
            for i in range(self._param.iter_size):
                self.train(return_outputs=False)
                if root_solver():
                    for e in self.net.losses:
                        values = e.get_value().flatten()
                        for v in values:
                            loss += v

            if root_solver():
                loss /= self._param.iter_size
                if len(loss_vec) < self._param.average_loss:
                    loss_vec.append(loss)
                    smoothed_loss = (smoothed_loss * (len(loss_vec) - 1) +
                                     loss) / len(loss_vec)
                else:
                    idx = (self.iter - start_iter) % self._param.average_loss
                    smoothed_loss += ((loss - loss_vec[idx]) /
                                      self._param.average_loss)
                    loss_vec[idx] = loss

            # Apply Update
            self.GetLearningRate()
            self.update()

            # Display
            if root_solver() and self._param.display:
                if self.iter % self._param.display == 0:
                    base_lr = self.optimizer.base_lr
                    print('Iteration %d, lr = %s, loss = %f, time = %.2fs' % \
                          (self.iter, str(base_lr), smoothed_loss, time.time() - tic))
                    tic = time.time()
                    for idx, net_output in enumerate(self.net.outputs):
                        values = self.net.blobs[net_output].data.get_value(
                        ).flatten()
                        for v in values:
                            print('		Train net output #{}({}): {}'.format(
                                idx, net_output, v))

            # Inc Iterations
            self.iter = self.iter + 1

            # Snapshot
            if self._param.snapshot:
                if self.iter % self._param.snapshot == 0: self.snapshot()

    def one_step(self):
        """One step run the train net.

        Returns
        -------
        dict
            The stats.

        """
        if self._param.test_interval and \
                self.iter % self._param.test_interval == 0:
            if (self.iter == 0
                    and self._param.test_initialization) or self.iter != 0:
                for test_id in range(len(self.tests)):
                    self.Test(test_id)

        # Forward && Backward && Compute_loss
        run_time, stats = 0., {'loss': {'total': 0.}, 'iter': self.iter}
        for i in range(self._param.iter_size):
            tic = time.time()
            self.train(return_outputs=False)
            run_time += (time.time() - tic)

            # Total loss
            for e in self.net.losses:
                values = e.get_value().flatten()
                if values.size == 1:
                    stats['loss']['total'] += values[0]

            # Partial loss
            for key in self.net.outputs:
                values = self.net.blobs[key].data.get_value().flatten()
                if values.size != 1: continue
                if key not in stats['loss']: stats['loss'][key] = 0.
                stats['loss'][key] += values[0]

        # Apply Update
        self.GetLearningRate()
        tic = time.time()
        self.update()
        run_time += (time.time() - tic)
        self.iter = self.iter + 1

        # Snapshot
        if self._param.snapshot:
            if self.iter % self._param.snapshot == 0: self.snapshot()

        # Average loss by the iter size
        for k in stats['loss'].keys():
            stats['loss'][k] /= self._param.iter_size

        # Misc stats
        stats['lr'] = self.optimizer.base_lr
        stats['time'] = run_time
        return stats

    def snapshot(self):
        """Snapshot the parameters of train net. [**PyCaffe Style**]

        Returns
        -------
        None

        See Also
        --------
        `workspace.Snapshot(*args, **kwargs)`_ - How to snapshot tensors into a file.

        References
        ----------
        The implementation of `Snapshot(solver.cpp, L403)`_.

        """
        tensors = [blob.data for blob in self._layer_blobs]
        filename = "_iter_" + str(self.iter)
        dragon.workspace.Snapshot(tensors,
                                  filename,
                                  prefix=self._param.snapshot_prefix,
                                  suffix='.caffemodel',
                                  format='caffe')

    @property
    def net(self):
        """Return the train net. [**PyCaffe Style**]

        Returns
        -------
        Net
            The train net.

        """
        return self._net

    @property
    def test_nets(self):
        """Return the test nets. [**PyCaffe Style**]

        Returns
        -------
        list of Net
            The test nets.

        """
        return self._test_nets

    @property
    def iter(self):
        """Return or Set the current iteration. [**PyCaffe Style**]

        Parameters
        ----------
        iter : int
            The value of iteration to set.

        Returns
        -------
            The current iteration.

        """
        return self._iter

    @iter.setter
    def iter(self, value):
        self._iter = value

    @property
    def base_lr(self):
        """Return or Set the current learning rate. [**Extended**]

        Parameters
        ----------
        iter : float
            The value of learning rate to set.

        Returns
        -------
        The current learning rate.

        """
        return self.optimizer.base_lr

    @base_lr.setter
    def base_lr(self, value):
        self.optimizer.base_lr = value