Ejemplo n.º 1
0
                                               d_p_0=dropout_p_h_0,
                                               d_p_1=dropout_p_h_1,
                                               init=init,
                                               quasi_ortho_init=True))
    #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init))
    model.stack(Dropout(p=dropout_p_2), Dense(10, init=init))

    learning_rate_start = 3e-3
    #learning_rate_target = 3e-7
    #learning_rate_epochs = 100
    #learning_rate_decay  = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs)
    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(
        learning_rate_start)
    #conf.gradient_clipping = 1
    conf.patience = 20
    #conf.gradient_tolerance = 5
    conf.avoid_nan = True
    conf.min_improvement = 1e-10

    #trainer = MomentumTrainer(model)
    trainer = AdamTrainer(model, conf)

    mnist = MiniBatches(MnistDataset(), batch_size=100)
    #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100)

    #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)])
    trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)])
    logging.info('Setting best parameters for testing.')
    trainer.set_params(*trainer.best_params)
    trainer._run_test(-1, mnist.test_set())
    for _ in range(T):
        #model.stack(HighwayLayerLRDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init))
        model.stack(HighwayLayerLRDiagDropoutBatchNorm(activation=activation, gate_bias=gate_bias, projection_dim=d, d_p_0 = dropout_p_h_0, d_p_1 = dropout_p_h_1, init=init, quasi_ortho_init=True))
    #model.stack(BatchNormalization(),Dropout(p=dropout_p_2), Dense(10, init=init))
    model.stack(Dropout(p=dropout_p_2), Dense(10, init=init))

    
    learning_rate_start  = 3e-3
    #learning_rate_target = 3e-7
    #learning_rate_epochs = 100
    #learning_rate_decay  = (learning_rate_target / learning_rate_start) ** (1.0 / learning_rate_epochs)
    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(learning_rate_start)
    #conf.gradient_clipping = 1
    conf.patience = 20
    #conf.gradient_tolerance = 5
    conf.avoid_nan = True
    conf.min_improvement = 1e-10

    #trainer = MomentumTrainer(model)
    trainer = AdamTrainer(model, conf)

    mnist = MiniBatches(MnistDataset(), batch_size=100)
    #mnist = MiniBatches(MnistDatasetSmallValid(), batch_size=100)

    #trainer.run(mnist, controllers=[IncrementalLearningRateAnnealer(trainer, 0, learning_rate_decay)])
    trainer.run(mnist, controllers=[LearningRateAnnealer(trainer, 3, 14)])
    logging.info('Setting best parameters for testing.')
    trainer.set_params(*trainer.best_params)
    trainer._run_test(-1, mnist.test_set())
Ejemplo n.º 3
0
if __name__ == '__main__':

    ap = ArgumentParser()
    ap.add_argument("--model", default=os.path.join(os.path.dirname(__file__), "models", "sequence_adding_100_2.gz"))
    args = ap.parse_args()

    model = NeuralRegressor(input_dim=2, input_tensor=3)
    model.stack(IRNN(hidden_size=100, input_type="sequence",
                     output_type="one"),
                      Dense(1))

    if os.path.exists(args.model):
        model.load_params(args.model)

    conf = TrainerConfig()
    conf.learning_rate = LearningRateAnnealer.learning_rate(0.01)
    conf.gradient_clipping = 3
    conf.patience = 50
    conf.gradient_tolerance = 5
    conf.avoid_nan = False
    trainer = SGDTrainer(model, conf)

    annealer = LearningRateAnnealer(patience=20)

    trainer.run(batch_set, controllers=[annealer])

    model.save_params(args.model)
    print "Identity matrix weight:"
    print model.first_layer().W_h.get_value().diagonal()