def test_rmsprop(): """ Make sure that learning_rule.RMSProp obtains the same parameter values as with a hand-crafted RMSProp implementation, given a dummy model and learning rate scaler for each parameter. """ # We include a cost other than SumOfParams so that data is actually # queried from the training set, and the expected number of updates # are applied. cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())]) scales = [.01, .02, .05, 1., 5.] shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)] model = DummyModel(shapes, lr_scalers=scales) dataset = ArangeDataset(1) learning_rate = .001 decay = 0.90 max_scaling = 1e5 sgd = SGD(cost=cost, learning_rate=learning_rate, learning_rule=RMSProp(decay), batch_size=1) sgd.setup(model=model, dataset=dataset) state = {} for param in model.get_params(): param_shape = param.get_value().shape state[param] = {} state[param]['g2'] = np.zeros(param_shape) def rmsprop_manual(model, state): inc = [] rval = [] epsilon = 1. / max_scaling for scale, param in izip(scales, model.get_params()): pstate = state[param] param_val = param.get_value() # begin rmsprop pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2 rms_g_t = np.maximum(np.sqrt(pstate['g2']), epsilon) dx_t = -scale * learning_rate / rms_g_t * param_val rval += [param_val + dx_t] return rval manual = rmsprop_manual(model, state) sgd.train(dataset=dataset) assert all( np.allclose(manual_param, sgd_param.get_value()) for manual_param, sgd_param in izip(manual, model.get_params()))
def get_ae_trainer(model, dataset, save_path, epochs=5): """ An Autoencoder (AE) trainer """ config = { 'learning_rate': 1e-2, 'train_iteration_mode': 'shuffled_sequential', 'batch_size': 250, #'batches_per_iter' : 2000, 'learning_rule': RMSProp(), 'monitoring_dataset': dataset, 'cost': MeanSquaredReconstructionError(), 'termination_criterion': EpochCounter(max_epochs=epochs), } return Train(model=model, algorithm=SGD(**config), dataset=dataset, save_path=save_path, save_freq=1) #, extensions=extensions)
def get_rbm_trainer(model, dataset, save_path, epochs=5): """ A Restricted Boltzmann Machine (RBM) trainer """ config = { 'learning_rate': 1e-2, 'train_iteration_mode': 'shuffled_sequential', 'batch_size': 250, #'batches_per_iter' : 100, 'learning_rule': RMSProp(), 'monitoring_dataset': dataset, 'cost': SML(250, 1), 'termination_criterion': EpochCounter(max_epochs=epochs), } return Train(model=model, algorithm=SGD(**config), dataset=dataset, save_path=save_path, save_freq=1) #, extensions=extensions)
def get_trainer(model, trainset, validset, save_path): monitoring = dict(valid=validset, train=trainset) termination = MonitorBased(channel_name='valid_y_misclass', prop_decrease=.001, N=100) extensions = [MonitorBasedSaveBest(channel_name='valid_y_misclass', save_path=save_path), #MomentumAdjustor(start=1, saturate=100, final_momentum=.9), LinearDecayOverEpoch(start=1, saturate=200, decay_factor=0.01)] config = { 'learning_rate': .01, #'learning_rule': Momentum(0.5), 'learning_rule': RMSProp(), 'train_iteration_mode': 'shuffled_sequential', 'batch_size': 1200,#250, #'batches_per_iter' : 100, 'monitoring_dataset': monitoring, 'monitor_iteration_mode' : 'shuffled_sequential', 'termination_criterion' : termination, } return Train(model=model, algorithm=SGD(**config), dataset=trainset, extensions=extensions)
def __init__(self, layers, random_state=None, learning_rule='sgd', learning_rate=0.01, learning_momentum=0.9, dropout=False, batch_size=1, n_iter=None, n_stable=50, f_stable=0.001, valid_set=None, valid_size=0.0, verbose=False, **params): self.layers = [] for i, layer in enumerate(layers): assert isinstance(layer, Layer),\ "Specify each layer as an instance of a `sknn.mlp.Layer` object." # Layer names are optional, if not specified then generate one. if layer.name is None: label = "hidden" if i < len(layers) - 1 else "output" layer.name = "%s%i" % (label, i) # sklearn may pass layers in as additional named parameters, remove them. if layer.name in params: del params[layer.name] self.layers.append(layer) # Don't support any additional parameters that are not in the constructor. # These are specified only so `get_params()` can return named layers, for double- # underscore syntax to work. assert len(params) == 0,\ "The specified additional parameters are unknown." self.random_state = random_state self.learning_rule = learning_rule self.learning_rate = learning_rate self.learning_momentum = learning_momentum self.dropout = dropout if type(dropout) is float else ( 0.5 if dropout else 0.0) self.batch_size = batch_size self.n_iter = n_iter self.n_stable = n_stable self.f_stable = f_stable self.valid_set = valid_set self.valid_size = valid_size self.verbose = verbose self.unit_counts = None self.input_space = None self.mlp = None self.weights = None self.vs = None self.ds = None self.trainer = None self.f = None self.train_set = None self.best_valid_error = float("inf") self.cost = "Dropout" if dropout else None if learning_rule == 'sgd': self._learning_rule = None # elif learning_rule == 'adagrad': # self._learning_rule = AdaGrad() elif learning_rule == 'adadelta': self._learning_rule = AdaDelta() elif learning_rule == 'momentum': self._learning_rule = Momentum(learning_momentum) elif learning_rule == 'nesterov': self._learning_rule = Momentum(learning_momentum, nesterov_momentum=True) elif learning_rule == 'rmsprop': self._learning_rule = RMSProp() else: raise NotImplementedError( "Learning rule type `%s` is not supported." % learning_rule) self._setup()