def solve(self, sampler, param_init = None, K = None, resume = None, new_lr = None): """The solve function. Input: sampler: the data sampler. sampler.sample() should return a list of training data, either (X, Y, weight) or (X, Y, None) depending on whether weight is enforced. param_init: the initial parameter. See SolverMC for details. """ mode = self._args.get('mode', 'lbfgs').lower() # even when we use Adagrad we create a solver_basic to deal with # function value and gradient computation, etc. solver_basic = SolverMC(self._gamma, self.loss, self.reg, self._args, self._lossargs, self._regargs, self._fminargs) param = param_init iter_start = 0 if resume is not None: # load data from logging.debug('Resuming from %s' % resume) npzdata = np.load(resume) param = (npzdata['w'], npzdata['b']) iter_start = npzdata['iter'] + 1 if 'accum_grad' in npzdata: accum_grad = npzdata['accum_grad'] if 'base_lr' in npzdata: self._args['base_lr'] = npzdata['base_lr'] if new_lr is not None: self._args['base_lr'] = new_lr timer = util.Timer() for iter in range(iter_start, self._args['num_iter']): Xbatch, Ybatch, weightbatch = sampler.sample(self._args['minibatch']) # carry out the computation if mode == 'lbfgs': accum_grad = None param = solver_basic.solve(Xbatch, Ybatch, weightbatch, param, K = K) logging.debug('iter %d time = %s' % \ (iter, str(timer.total(False)))) else: # adagrad: compute gradient and update if iter == iter_start: logging.debug("Adagrad: Initializing") param_flat = solver_basic.presolve(\ Xbatch, Ybatch, weightbatch, param, K = K) # we need to build the cache in solver_basic as well as # the accumulated gradients if iter == 0: accum_grad = np.ones_like(param_flat) * \ (self._args.get('eta', 0.) ** 2) + \ np.finfo(np.float64).eps if 'base_lr' not in self._args or self._args['base_lr'] < 0: logging.debug("Adagrad: Performing line search") # do a line search to get the value self._args['base_lr'] = \ mathutil.wolfe_line_search_adagrad(param_flat, lambda x: SolverMC.obj(x, solver_basic), alpha = np.abs(self._args.get('base_lr', 1.)), eta = self._args.get('eta', 0.)) # reset the timer to exclude the base learning rate tuning # time timer.reset() else: solver_basic._X = Xbatch solver_basic._Y = Ybatch solver_basic._weight = weightbatch logging.debug("Adagrad: Computing func and grad") f0, g = SolverMC.obj(param_flat, solver_basic) logging.debug('gradient max/min: %f/%f' % (g.max(), g.min())) accum_grad += g * g # we are MINIMIZING, so go against the gradient direction param_flat -= g / np.sqrt(accum_grad) * self._args['base_lr'] # the below code could be used to debug, but is commented out # currently for speed considerations. if False: f = SolverMC.obj(param_flat, solver_basic)[0] logging.debug('iter %d f0 = %f f = %f time = %s' % \ (iter, f0, f,\ str(timer.total(False)))) else: logging.debug('iter %d f0 = %f time = %s' % \ (iter, f0, str(timer.total(False)))) param = solver_basic.unflatten_params(param_flat) callback = self._args.get('callback', None) if callback is None: pass elif type(callback) is not list: cb_val = callback(param) logging.debug('cb: ' + str(cb_val)) else: cb_val = [cb_func(param) for cb_func in callback] logging.debug('cb: ' + ' '.join([str(v) for v in cb_val])) if 'dump_every' in self._args and \ (iter + 1) % self._args['dump_every'] == 0: logging.debug('dumping param...') mpi.root_savez(self._args['dump_name'],\ iter=iter, w = param[0], b = param[1], \ accum_grad = accum_grad, base_lr = self._args['base_lr']) return param
sys.exit(0) if FLAGS.speedtest > 0: logging.info("Testing speed") logging.info("minibatch size: %d" % FLAGS.minibatch) from iceberk.util import Timer timer = Timer() for i in range(FLAGS.speedtest): batch = sampler.sample(FLAGS.minibatch) logging.info("Local size: %d" % batch[0].shape[0]) total_size = mpi.COMM.allreduce(batch[0].shape[0]) logging.info("Total size: %d" % total_size) logging.info("Sampling took %s secs" % timer.lap()) sys.exit(0) logging.info("Performing classification") if os.path.exists(DUMPNAME): resume = DUMPNAME else: resume = None # adagrad solver = classifier.SolverStochastic(FLAGS.reg, loss, classifier.Reg.reg_l2, args = {'mode': 'adagrad', 'base_lr': FLAGS.base_lr, 'minibatch': FLAGS.minibatch, 'num_iter': 1000, 'callback': callback, 'eta': 1e-8, 'dump_every': 25, 'dump_name': DUMPNAME}) w,b = solver.solve(sampler, resume = resume) mpi.root_savez(DUMPNAME[:-4] + ".final.npz", w = w, b = b)