ap.add_argument("--random_glimpse", default=False) args = ap.parse_args() mnist = MiniBatches((MnistDataset()), batch_size=1) model_path = args.model network = get_network(model_path, std=args.variance, disable_reinforce=args.disable_reinforce, random_glimpse=args.random_glimpse) trainer_conf = TrainerConfig() trainer_conf.learning_rate = args.learning_rate trainer_conf.weight_l2 = 0.0001 trainer_conf.hidden_l2 = 0.0001 trainer_conf.method = args.method trainer_conf.disable_reinforce=args.disable_reinforce trainer_conf.disable_backprop=args.disable_backprop trainer = AttentionTrainer(network, network.layers[0], config=trainer_conf) trainer_conf.report() timer = Timer() for _ in list(trainer.train(mnist.train_set(), mnist.valid_set(), mnist.test_set())): pass timer.end() network.save_params(model_path) timer.report()
model_path = args.model network = get_network(model_path, std=args.variance, disable_reinforce=args.disable_reinforce, random_glimpse=args.random_glimpse) trainer_conf = TrainerConfig() trainer_conf.learning_rate = LearningRateAnnealer.learning_rate( args.learning_rate) trainer_conf.weight_l2 = 0.0001 trainer_conf.hidden_l2 = 0.0001 trainer_conf.method = args.method trainer = FirstGlimpseTrainer(network, network.layers[0], config=trainer_conf) annealer = LearningRateAnnealer(trainer, patience=5) timer = Timer() for _ in trainer.train(mnist.train_set(), mnist.valid_set(), mnist.test_set()): if annealer.invoke(): break timer.end() network.save_params(model_path) timer.report()
batch_set = MiniBatches(dataset) if __name__ == '__main__': model = NeuralClassifier(input_dim=26, input_tensor=3) model.stack( RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.1), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.3), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.6), RNN(hidden_size=30, input_type="sequence", output_type="one", vector_core=0.9), Dense(4), Softmax()) trainer = SGDTrainer(model) annealer = LearningRateAnnealer() trainer.run(batch_set.train_set(), batch_set.valid_set(), controllers=[annealer])
outputs=train_monitors.values(), updates=gradient_updates, allow_input_downcast=True) valid_iteration = theano.function(inputs=model.input_variables, outputs=test_monitors.values(), allow_input_downcast=True) max_epochs = 10 mnist = MiniBatches(MnistDataset(), batch_size=20) for i in range(max_epochs): # Training cost_matrix = [] for inputs in mnist.train_set(): costs = train_iteration(*inputs) cost_matrix.append(costs) train_costs = list(zip(train_monitors.keys(), np.mean(cost_matrix, axis=0))) print "train", i, train_costs # Test with valid data cost_matrix = [] for inputs in mnist.valid_set(): costs = valid_iteration(*inputs) cost_matrix.append(costs) valid_costs = list(zip(test_monitors.keys(), np.mean(cost_matrix, axis=0))) print "valid", i, valid_costs model.save_params(model_path)
updates=gradient_updates, allow_input_downcast=True) valid_iteration = theano.function(inputs=model.input_variables, outputs=test_monitors.values(), allow_input_downcast=True) max_epochs = 10 mnist = MiniBatches(MnistDataset(), batch_size=20) for i in range(max_epochs): # Training cost_matrix = [] for inputs in mnist.train_set(): costs = train_iteration(*inputs) cost_matrix.append(costs) train_costs = list( zip(train_monitors.keys(), np.mean(cost_matrix, axis=0))) print "train", i, train_costs # Test with valid data cost_matrix = [] for inputs in mnist.valid_set(): costs = valid_iteration(*inputs) cost_matrix.append(costs) valid_costs = list( zip(test_monitors.keys(), np.mean(cost_matrix, axis=0))) print "valid", i, valid_costs model.save_params(model_path)
# Shuffle the data random.Random(3).shuffle(data) # Separate data valid_size = int(len(data) * 0.15) train_set = data[valid_size:] valid_set = data[:valid_size] dataset = SequentialDataset(train_set, valid=valid_set) dataset.pad_left(20) dataset.report() batch_set = MiniBatches(dataset) if __name__ == '__main__': model = NeuralClassifier(input_dim=26, input_tensor=3) model.stack(RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.1), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.3), RNN(hidden_size=30, input_type="sequence", output_type="sequence", vector_core=0.6), RNN(hidden_size=30, input_type="sequence", output_type="one", vector_core=0.9), Dense(4), Softmax()) trainer = SGDTrainer(model) annealer = LearningRateAnnealer(trainer) trainer.run(batch_set.train_set(), batch_set.valid_set(), controllers=[annealer])