def solve(self, sampler, param_init = None, K = None,
          resume = None, new_lr = None):
     """The solve function.
     Input:
         sampler: the data sampler. sampler.sample() should return a list
             of training data, either (X, Y, weight) or (X, Y, None)
             depending on whether weight is enforced.
         param_init: the initial parameter. See SolverMC for details.
     """
     mode = self._args.get('mode', 'lbfgs').lower()
     # even when we use Adagrad we create a solver_basic to deal with
     # function value and gradient computation, etc.
     solver_basic = SolverMC(self._gamma, self.loss, self.reg,
             self._args, self._lossargs, self._regargs,
             self._fminargs)
     param = param_init
     iter_start = 0
     if resume is not None:
         # load data from
         logging.debug('Resuming from %s' % resume)
         npzdata = np.load(resume)
         param = (npzdata['w'], npzdata['b'])
         iter_start = npzdata['iter'] + 1
         if 'accum_grad' in npzdata:
             accum_grad = npzdata['accum_grad']
         if 'base_lr' in npzdata:
             self._args['base_lr'] = npzdata['base_lr']
         if new_lr is not None:
             self._args['base_lr'] = new_lr
     timer = util.Timer()
     for iter in range(iter_start, self._args['num_iter']):
         Xbatch, Ybatch, weightbatch = sampler.sample(self._args['minibatch'])
         # carry out the computation
         if mode == 'lbfgs':
             accum_grad = None
             param = solver_basic.solve(Xbatch, Ybatch, weightbatch, param, K = K)
             logging.debug('iter %d time = %s' % \
                     (iter, str(timer.total(False))))
         else:
             # adagrad: compute gradient and update
             if iter == iter_start:
                 logging.debug("Adagrad: Initializing")
                 param_flat = solver_basic.presolve(\
                         Xbatch, Ybatch, weightbatch, param, K = K)
                 # we need to build the cache in solver_basic as well as
                 # the accumulated gradients
                 if iter == 0:
                     accum_grad = np.ones_like(param_flat) * \
                             (self._args.get('eta', 0.) ** 2) + \
                             np.finfo(np.float64).eps
                 if 'base_lr' not in self._args or self._args['base_lr'] < 0:
                     logging.debug("Adagrad: Performing line search")
                     # do a line search to get the value
                     self._args['base_lr'] = \
                             mathutil.wolfe_line_search_adagrad(param_flat,
                             lambda x: SolverMC.obj(x, solver_basic),
                             alpha = np.abs(self._args.get('base_lr', 1.)),
                             eta = self._args.get('eta', 0.))
                     # reset the timer to exclude the base learning rate tuning
                     # time
                     timer.reset()
             else:
                 solver_basic._X = Xbatch
                 solver_basic._Y = Ybatch
                 solver_basic._weight = weightbatch
             logging.debug("Adagrad: Computing func and grad")
             f0, g = SolverMC.obj(param_flat, solver_basic)
             logging.debug('gradient max/min: %f/%f' % (g.max(), g.min()))
             accum_grad += g * g
             # we are MINIMIZING, so go against the gradient direction
             param_flat -= g / np.sqrt(accum_grad) * self._args['base_lr']
             # the below code could be used to debug, but is commented out
             # currently for speed considerations.
             if False:
                 f = SolverMC.obj(param_flat, solver_basic)[0] 
                 logging.debug('iter %d f0 = %f f = %f time = %s' % \
                         (iter, f0, f,\
                         str(timer.total(False))))
             else:
                 logging.debug('iter %d f0 = %f time = %s' % \
                         (iter, f0, str(timer.total(False))))
             param = solver_basic.unflatten_params(param_flat)
         callback = self._args.get('callback', None)
         if callback is None:
             pass
         elif type(callback) is not list:
             cb_val = callback(param)
             logging.debug('cb: ' + str(cb_val))
         else:
             cb_val = [cb_func(param) for cb_func in callback]
             logging.debug('cb: ' + ' '.join([str(v) for v in cb_val]))
         if 'dump_every' in self._args and \
                 (iter + 1) % self._args['dump_every'] == 0:
             logging.debug('dumping param...')
             mpi.root_savez(self._args['dump_name'],\
                     iter=iter, w = param[0], b = param[1], \
                     accum_grad = accum_grad, base_lr = self._args['base_lr'])
     return param
Example #2
0
    sys.exit(0)

if FLAGS.speedtest > 0:
    logging.info("Testing speed")
    logging.info("minibatch size: %d" % FLAGS.minibatch)
    from iceberk.util import Timer
    timer = Timer()
    for i in range(FLAGS.speedtest):
        batch = sampler.sample(FLAGS.minibatch)
        logging.info("Local size: %d" % batch[0].shape[0])
        total_size = mpi.COMM.allreduce(batch[0].shape[0])
        logging.info("Total size: %d" % total_size)
        logging.info("Sampling took %s secs" % timer.lap())
    sys.exit(0)

logging.info("Performing classification")
if os.path.exists(DUMPNAME):
    resume = DUMPNAME
else:
    resume = None
# adagrad
solver = classifier.SolverStochastic(FLAGS.reg,
        loss,
        classifier.Reg.reg_l2,
        args = {'mode': 'adagrad', 'base_lr': FLAGS.base_lr, 'minibatch': FLAGS.minibatch,
                'num_iter': 1000, 'callback': callback, 'eta': 1e-8,
                'dump_every': 25, 'dump_name': DUMPNAME})
w,b = solver.solve(sampler, resume = resume)

mpi.root_savez(DUMPNAME[:-4] + ".final.npz", w = w, b = b)