def get_trainer(model, trainset, validset, epochs=20, batch_size=200): monitoring_batches = None if validset is None else 20 train_algo = SGD(batch_size=batch_size, init_momentum=0.5, learning_rate=0.1, monitoring_batches=monitoring_batches, monitoring_dataset=validset, cost=Dropout(input_include_probs={ 'h0': 0.8, 'h1': 0.8, 'h2': 0.8, 'h3': 0.8, 'y': 0.5 }, input_scales={ 'h0': 1. / 0.8, 'h1': 1. / 0.8, 'h2': 1. / 0.8, 'h3': 1. / 0.8, 'y': 1. / 0.5 }, default_input_include_prob=0.5, default_input_scale=1. / 0.5), termination_criterion=EpochCounter(epochs), update_callbacks=ExponentialDecay(decay_factor=1.0001, min_lr=0.001)) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(epochs*0.8)), ])
def get_trainer(model, trainset, validset, epochs=50): monitoring_batches = None if validset is None else 50 train_algo = SGD( batch_size = 200, init_momentum = 0.5, learning_rate = 0.5, monitoring_batches = monitoring_batches, monitoring_dataset = validset, cost = MethodCost(method='cost_from_X', supervised=1), termination_criterion = EpochCounter(epochs), update_callbacks = ExponentialDecay(decay_factor=1.0005, min_lr=0.001) ) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.95, start=0, saturate=int(epochs*0.8)), ])
def get_trainer(model, trainset, validset, epochs=20, batch_size=100): monitoring_batches = None if validset is None else 20 train_algo = SGD( batch_size=batch_size, init_momentum=0.5, learning_rate=0.05, monitoring_batches=monitoring_batches, monitoring_dataset=validset, cost=Dropout(input_include_probs={'h0': 0.8}, input_scales={'h0': 1.}, default_input_include_prob=0.5, default_input_scale=1. / 0.5), #termination_criterion = MonitorBased(channel_name='y_misclass', prop_decrease=0., N=50), termination_criterion=EpochCounter(epochs), update_callbacks=ExponentialDecay(decay_factor=1.00002, min_lr=0.0001)) return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \ extensions=[MomentumAdjustor(final_momentum=0.7, start=0, saturate=int(0.8*epochs))])
def test_correctness(): """ Test that the cost function works with float64 """ x_train, y_train, x_valid, y_valid = create_dataset() trainset = DenseDesignMatrix(X=np.array(x_train), y=y_train) validset = DenseDesignMatrix(X=np.array(x_valid), y=y_valid) n_inputs = trainset.X.shape[1] n_outputs = 1 n_hidden = 10 hidden_istdev = 4 * (6 / float(n_inputs + n_hidden)) ** 0.5 output_istdev = 4 * (6 / float(n_hidden + n_outputs)) ** 0.5 model = MLP(layers=[Sigmoid(dim=n_hidden, layer_name='hidden', istdev=hidden_istdev), Sigmoid(dim=n_outputs, layer_name='output', istdev=output_istdev)], nvis=n_inputs, seed=[2013, 9, 16]) termination_criterion = And([EpochCounter(max_epochs=1), MonitorBased(prop_decrease=1e-7, N=2)]) cost = SumOfCosts([(0.99, Default()), (0.01, L1WeightDecay({}))]) algo = SGD(1e-1, update_callbacks=[ExponentialDecay(decay_factor=1.00001, min_lr=1e-10)], cost=cost, monitoring_dataset=validset, termination_criterion=termination_criterion, monitor_iteration_mode='even_shuffled_sequential', batch_size=2) train = Train(model=model, dataset=trainset, algorithm=algo) train.main_loop()
def run_sgd(mode): # Must be seeded the same both times run_sgd is called disturb_mem.disturb_mem() rng = np.random.RandomState([2012, 11, 27]) batch_size = 5 train_batches = 3 valid_batches = 4 num_features = 2 # Synthesize dataset with a linear decision boundary w = rng.randn(num_features) def make_dataset(num_batches): disturb_mem.disturb_mem() m = num_batches * batch_size X = rng.randn(m, num_features) y = np.zeros((m, 1)) y[:, 0] = np.dot(X, w) > 0. rval = DenseDesignMatrix(X=X, y=y) rval.yaml_src = "" # suppress no yaml_src warning X = rval.get_batch_design(batch_size) assert X.shape == (batch_size, num_features) return rval train = make_dataset(train_batches) valid = make_dataset(valid_batches) num_chunks = 10 chunk_width = 2 class ManyParamsModel(Model): """ Make a model with lots of parameters, so that there are many opportunities for their updates to get accidentally re-ordered non-deterministically. This makes non-determinism bugs manifest more frequently. """ def __init__(self): self.W1 = [ sharedX(rng.randn(num_features, chunk_width)) for i in xrange(num_chunks) ] disturb_mem.disturb_mem() self.W2 = [ sharedX(rng.randn(chunk_width)) for i in xrange(num_chunks) ] self._params = safe_union(self.W1, self.W2) self.input_space = VectorSpace(num_features) self.output_space = VectorSpace(1) disturb_mem.disturb_mem() model = ManyParamsModel() disturb_mem.disturb_mem() class LotsOfSummingCost(Cost): """ Make a cost whose gradient on the parameters involves summing many terms together, so that T.grad is more likely to sum things in a random order. """ supervised = True def expr(self, model, data, **kwargs): self.get_data_specs(model)[0].validate(data) X, Y = data disturb_mem.disturb_mem() def mlp_pred(non_linearity): Z = [T.dot(X, W) for W in model.W1] H = map(non_linearity, Z) Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)] pred = sum(Z) return pred nonlinearity_predictions = map( mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin]) pred = sum(nonlinearity_predictions) disturb_mem.disturb_mem() return abs(pred - Y[:, 0]).sum() def get_data_specs(self, model): data = CompositeSpace( (model.get_input_space(), model.get_output_space())) source = (model.get_input_source(), model.get_target_source()) return (data, source) cost = LotsOfSummingCost() disturb_mem.disturb_mem() algorithm = SGD( cost=cost, batch_size=batch_size, init_momentum=.5, learning_rate=1e-3, monitoring_dataset={ 'train': train, 'valid': valid }, update_callbacks=[ExponentialDecay(decay_factor=2., min_lr=.0001)], termination_criterion=EpochCounter(max_epochs=5)) disturb_mem.disturb_mem() train_object = Train(dataset=train, model=model, algorithm=algorithm, extensions=[ PolyakAveraging(start=0), MomentumAdjustor(final_momentum=.9, start=1, saturate=5), ], save_freq=0) disturb_mem.disturb_mem() train_object.main_loop()
print "[MESSAGE] The model is built"; ### build algorithm algorithm=SGD(batch_size=100, learning_rate=0.05, monitoring_dataset={'train':valid_data, 'valid':valid_data, 'test':test_data}, termination_criterion=Or(criteria=[MonitorBased(channel_name="valid_objective", prop_decrease=0.00001, N=40), EpochCounter(max_epochs=200)]), cost = Dropout(input_include_probs={'hidden_0':1., 'hidden_1':1., 'y':0.5}, input_scales={ 'hidden_0': 1., 'hidden_1':1., 'y':2.}), update_callbacks=ExponentialDecay(decay_factor=1.0000003, min_lr=.000001)); print "[MESSAGE] Training algorithm is built"; ### build training idpath = os.path.splitext(os.path.abspath(__file__))[0]; # ID for output files. save_path = idpath + '.pkl'; train=Train(dataset=train_data, model=model, algorithm=algorithm, save_path=save_path, save_freq=100); print "[MESSAGE] Trainer is built";