def learning_word_embeddings_with_the_embedding_layer_cntk(): x_train, y_train, x_test, y_test = load_from_files() max_features = 10000 maxlen = 20 embedding_dim = 8 x = cntk.input_variable(shape=(maxlen, ), dtype=np.float32) y = cntk.input_variable(shape=(1, ), dtype=np.float32) model = cntk.one_hot(x, num_classes=max_features, sparse_output=True) model = cntk.layers.Embedding(embedding_dim)(model) model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model) loss_function = cntk.binary_cross_entropy(model.output, y) round_predictions = cntk.round(model.output) equal_elements = cntk.equal(round_predictions, y) accuracy_function = cntk.reduce_mean(equal_elements, axis=0) max_epochs = 30 batch_size = 32 learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.0001), cntk.learning_parameter_schedule_per_sample(0.99)) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner], progress_printer) evaluator = cntk.Evaluator(accuracy_function) cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer, evaluator)
def train(streamf): global net minibatch_size = 1024 max_epochs = 2000 epoch_size = 50000 net = nn(input_var) loss = cntk.losses.binary_cross_entropy(net, label_var) error = cntk.classification_error(net, label_var) lr_per_sample = [3e-4] * 4 + [1.5e-4] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = cntk.learning_rate_schedule(lr_per_minibatch, cntk.UnitType.minibatch) momentum_as_time_constant = cntk.momentum_as_time_constant_schedule(700) learner = cntk.adam(net.parameters, lr_schedule, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) progres = cntk.logging.ProgressPrinter(0) trainer = cntk.Trainer(net, (loss, error), [learner], progress_writers=progres) input_map = { input_var: streamf.streams.features, label_var: streamf.streams.labels } t = 0 for epoch in range(max_epochs): epoch_end = (epoch + 1) * epoch_size while t < epoch_end: dat1 = streamf.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(dat1) t += dat1[label_var].num_samples trainer.summarize_training_progress() return trainer
def create_trainer(network, epoch_size, num_quantization_bits, warm_up, progress_writers): print('Creating the trainer.') # Train only the last layers lr_schedule = C.learning_rate_schedule([0.01] * 10 + [0.001] * 20 + [0.0001] * 30, unit=C.UnitType.minibatch) mm_schedule = C.momentum_schedule(0.9) l2_reg_weight = 0.0001 learner = C.adam(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False) num_workers = C.distributed.Communicator.num_workers() print('Number of workers: {}'.format(num_workers)) if num_workers > 1: parameter_learner = C.train.distributed.data_parallel_distributed_learner( learner, num_quantization_bits=num_quantization_bits) trainer = C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers) else: trainer = C.Trainer(network['output'], (network['ce'], network['pe']), learner, progress_writers) return trainer
def implementing_1d_convnet_cntk(): max_features = 10000 # number of words to consider as features max_len = 500 # cut texts after this number of words (among top max_features most common words) x_train, y_train, x_test, y_test = load_data(max_features, max_len) model = build_model_cntk(max_features, max_len) x = cntk.input_variable(shape=(max_len, ), dtype=np.float32) y = cntk.input_variable(shape=(1, ), dtype=np.float32) model.replace_placeholders({model.placeholders[0]: x}) loss_function = cntk.binary_cross_entropy(model.output, y) round_predictions = cntk.round(model.output) equal_elements = cntk.equal(round_predictions, y) accuracy_function = cntk.reduce_mean(equal_elements, axis=0) max_epochs = 10 batch_size = 32 learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.0001), cntk.learning_parameter_schedule_per_sample(0.99)) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner], progress_printer) evaluator = cntk.Evaluator(accuracy_function) cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer, evaluator)
def train(self, report_freq = 500, as_policy=True): #loss = C.ops.minus(0, C.ops.argmin(self.model) - C.ops.argmin(self.model) + C.ops.minus(self.label_var, 0)) loss = C.squared_error(self.model, self.label_var) evaluation = C.squared_error(self.model, self.label_var) schedule = C.momentum_schedule(self.hp.learning_rate) progress_printer = C.logging.ProgressPrinter(num_epochs=self.hp.epochs/self.hp.minibatch_size) learner = C.adam(self.model.parameters, C.learning_rate_schedule(self.hp.learning_rate, C.UnitType.minibatch), momentum=schedule, l1_regularization_weight=self.hp.l1reg, l2_regularization_weight=self.hp.l2reg ) trainer = C.Trainer(self.model, (loss, evaluation), learner, progress_printer) self.plotdata = {"loss":[]} for epoch in range(self.hp.epochs): indata, label, total_reward = self.get_next_data(self.hp.minibatch_size, as_policy) data = {self.input_var: indata, self.label_var: label} trainer.train_minibatch(data) loss = trainer.previous_minibatch_loss_average if not (loss == "NA"): self.plotdata["loss"].append(loss) if epoch % report_freq == 0: print() print("last epoch total reward: {}".format(total_reward)) trainer.summarize_training_progress() print() # if self.hp.stop_loss > loss: # break print() trainer.summarize_training_progress()
def create_trainer(network, epoch_size, num_quantization_bits, warm_up, progress_writers): ''' Create Trainer ''' print('Creating the trainer.') # Differential Learning rate scheduler lr_schedule = C.learning_rate_schedule([2.5], unit=C.UnitType.minibatch) mm_schedule = C.momentum_schedule(0.9) l2_reg_weight = 0.001 # Create the Adam learners learner = C.adam(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False) # Compute the number of workers num_workers = C.distributed.Communicator.num_workers() print('Number of workers: {}'.format(num_workers)) if num_workers > 1: parameter_learner = C.train.distributed.data_parallel_distributed_learner(learner, num_quantization_bits=num_quantization_bits) trainer = C.Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_writers) else: trainer = C.Trainer(network['output'], (network['ce'], network['pe']), learner, progress_writers) return trainer
def create_trainer(): loss, label_error = create_criterion_function_preferred(dec, y) schedule_step = print_freq lr_per_sample = [2e-3] * 2 * schedule_step + [1e-3] * 2 * schedule_step + [ 5e-4 ] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_rate_schedule(lr_per_minibatch, C.UnitType.minibatch, epoch_size) momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000) learner = C.adam(parameters=dec.parameters, lr=lr_schedule, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer) if restore: trainer.restore_from_checkpoint("model-5.cntk") C.logging.log_number_of_parameters(dec) return trainer
def run_cntk(): text, chars, char_indices, x_train, y_train = get_data(one_hot_encode_features=False) alphabet_size = len(chars) print('alphabet_size=', alphabet_size) model = build_model_cntk(alphabet_size=alphabet_size) model_filename = 'ch8-1_cntk.model' model.save(model_filename) model = None model = cntk.load_model(model_filename) x = cntk.sequence.input_variable(shape=(), dtype=np.float32) y = cntk.input_variable(shape=(), dtype=np.float32) model.replace_placeholders({model.placeholders[0]: x}) y_oneHot = cntk.one_hot(y, num_classes=alphabet_size) loss_function = cntk.cross_entropy_with_softmax(model.output, y_oneHot) learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.001), cntk.learning_parameter_schedule_per_sample(0.9)) trainer = cntk.Trainer(model, (loss_function, loss_function), [learner],) for epoch in range(1, 60): print('epoch', epoch) cntk_train(x, y, x_train, y_train, max_epochs=32, batch_size=128, trainer=trainer) model_filename = 'final_ch8-1_cntk.model' model.save(model_filename) generate_text_cntk(char_indices, chars, model, text)
def main(params): # Create output and log directories if they don't exist if not os.path.isdir(params['output_folder']): os.makedirs(params['output_folder']) if not os.path.isdir(params['log_folder']): os.makedirs(params['log_folder']) # Create the network network = create_network() # Create readers train_reader = cbf_reader(os.path.join(params['input_folder'], 'train{}.cbf'.format(params['prefix'])), is_training=True, max_samples=cntk.io.INFINITELY_REPEAT) cv_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False, max_samples=cntk.io.FULL_DATA_SWEEP) test_reader = cbf_reader(os.path.join(params['input_folder'], 'test{}.cbf'.format(params['prefix'])), is_training=False, max_samples=cntk.io.FULL_DATA_SWEEP) input_map = { network['input']: train_reader.streams.front, network['target']: train_reader.streams.label } # Create learner mm_schedule = momentum_schedule(0.90) lr_schedule = learning_parameter_schedule([(40, 0.1), (40, 0.01)], minibatch_size=params['minibatch_size']) learner = cntk.adam(network['model'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=0.0005, epoch_size=params['epoch_size'], minibatch_size=params['minibatch_size']) # Use TensorBoard for visual logging log_file = os.path.join(params['log_folder'], 'log.txt') pp_writer = cntk.logging.ProgressPrinter(freq=10, tag='Training', num_epochs=params['max_epochs'], log_to_file=log_file) tb_writer = cntk.logging.TensorBoardProgressWriter(freq=10, log_dir=params['log_folder'], model=network['model']) # Create trainer and training session trainer = Trainer(network['model'], (network['loss'], network['metric']), [learner], [pp_writer, tb_writer]) test_config = TestConfig(minibatch_source=test_reader, minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map) cv_config = CrossValidationConfig(minibatch_source=cv_reader, frequency=(1, DataUnit.sweep), minibatch_size=params['minibatch_size'], model_inputs_to_streams=input_map) checkpoint_config = CheckpointConfig(os.path.join(params['output_folder'], model_name), frequency=(10, DataUnit.sweep), restore=params['restore']) session = training_session(trainer=trainer, mb_source=train_reader, mb_size=params['minibatch_size'], model_inputs_to_streams=input_map, max_samples=params['epoch_size'] * params['max_epochs'], progress_frequency=(1, DataUnit.sweep), checkpoint_config=checkpoint_config, cv_config=cv_config, test_config=test_config) cntk.logging.log_number_of_parameters(network['model']) session.train() # Save the trained model path = os.path.join(params['output_folder'], 'final_model.dnn') network['model'].save(path) print('Saved final model to', path)
def train(reader, model_func, max_epochs=10): # Instantiate the model function; x is the input (feature) variable model = model_func(x) # Instantiate the loss and error function loss, label_error = create_criterion_function_preferred(model, y) # training config epoch_size = 18000 # 18000 samples is half the dataset size minibatch_size = 70 # LR schedule over epochs # In CNTK, an epoch is how often we get out of the minibatch loop to # do other stuff (e.g. checkpointing, adjust learning rate, etc.) # (we don't run this many epochs, but if we did, these are good values) lr_per_sample = [0.003] * 4 + [0.0015] * 24 + [0.0003] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_rate_schedule(lr_per_minibatch, C.UnitType.minibatch, epoch_size) # Momentum schedule momentum_as_time_constant = C.momentum_as_time_constant_schedule(700) # We use a the Adam optimizer which is known to work well on this dataset # Feel free to try other optimizers from # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner learner = C.adam(parameters=model.parameters, lr=lr_schedule, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) # Setup the progress updater progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) # Uncomment below for more detailed logging #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # Instantiate the trainer trainer = C.Trainer(model, (loss, label_error), learner, progress_printer) # process minibatches and perform model training C.logging.log_number_of_parameters(model) t = 0 for epoch in range(max_epochs): # loop over epochs epoch_end = (epoch + 1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch data = reader.next_minibatch( minibatch_size, input_map={ # fetch minibatch x: reader.streams.query, y: reader.streams.slot_labels }) trainer.train_minibatch(data) # update model with it t += data[y].num_samples # samples so far trainer.summarize_training_progress()
def train(reader, model_func, max_epochs=10, task='slot_tagging'): # Create the containers for input feature (x) and the label (y) x = C.sequence.input_variable(vocab_size) y = C.sequence.input_variable(num_labels) # Instantiate the model function; x is the input (feature) variable model = model_func(x) # Instantiate the loss and error function loss, label_error = create_criterion_function_preferred(model, y) # training config epoch_size = 18000 # 18000 samples is half the dataset size minibatch_size = 70 # LR schedule over epochs # In CNTK, an epoch is how often we get out of the minibatch loop to # do other stuff (e.g. checkpointing, adjust learning rate, etc.) lr_per_sample = [3e-4]*4+[1.5e-4] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_parameter_schedule(lr_per_minibatch, epoch_size=epoch_size) # Momentum schedule momentums = C.momentum_schedule(0.9048374180359595, minibatch_size=minibatch_size) # We use a the Adam optimizer which is known to work well on this dataset # Feel free to try other optimizers from # https://www.cntk.ai/pythondocs/cntk.learner.html#module-cntk.learner learner = C.adam(parameters=model.parameters, lr=lr_schedule, momentum=momentums, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) # Setup the progress updater progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) # Uncomment below for more detailed logging #progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=max_epochs) # Instantiate the trainer trainer = C.Trainer(model, (loss, label_error), learner, progress_printer) # process minibatches and perform model training C.logging.log_number_of_parameters(model) # Assign the data fields to be read from the input if task == 'slot_tagging': data_map={x: reader.streams.query, y: reader.streams.slot_labels} else: data_map={x: reader.streams.query, y: reader.streams.intent} t = 0 for epoch in range(max_epochs): # loop over epochs epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch data = reader.next_minibatch(minibatch_size, input_map= data_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += data[y].num_samples # samples so far trainer.summarize_training_progress()
def test_ctc_encoder_train_and_network_output_to_labels(): # test CTC encoder in training loop and CTCEncoder.network_output_to_labels a = C.sequence.input_variable(10) labels = ['a', 'b', 'c'] encoder = CTCEncoder(labels) labels_tensor = C.sequence.input_variable(len( encoder.classes_)) # number of classes = 4 input_tensor = C.sequence.input_variable(100) prediction_tensor = Dense(4)(Recurrence(LSTM(100))( C.ones_like(input_tensor))) labels_graph = C.labels_to_graph(labels_tensor) fb = C.forward_backward(labels_graph, prediction_tensor, blankTokenId=encoder.blankTokenId) ground_truth = ['a', 'b', 'b', 'b', 'c'] seq_length = 10 # must be the same length as the sequence length in network_out pred = np.array([ [0., 2., 0., 0.], [0., 2., 0., 0.], [0., 0., 2., 0.], [2., 0., 0., 0.], [0., 0., 2., 0.], [2., 0., 0., 0.], [0., 0., 2., 0.], [2., 0., 0., 0.], [0., 0., 0., 2.], [0., 0., 0., 2.], ]).astype(np.float32) n = np.random.random((10, 100)).astype(np.float32) # result = fb.eval({labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)], # input_tensor: [n]}) # print(result) adam = C.adam(prediction_tensor.parameters, 0.01, 0.912) trainer = C.Trainer(prediction_tensor, (fb, ), [adam]) for i in range(300): trainer.train_minibatch({ labels_tensor: [encoder.transform(ground_truth, seq_length=seq_length)], input_tensor: [n] }) # print(trainer.previous_minibatch_loss_average) result = prediction_tensor.eval({input_tensor: [n]}) assert encoder.network_output_to_labels(result[0], squash_repeat=True) == ground_truth
def train(self, X1_train, X2_train, Y_train, X1_val, X2_val, Y_val, batch_size=128, epochs=10): assert X1_train.shape == X2_train.shape assert len(X1_train) == len(Y_train) assert X1_val.shape == X2_val.shape assert len(X1_val) == len(Y_val) if cntk.try_set_default_device(cntk.gpu(0)): print("GPU Training enabled") else: print("CPU Training :(") input_shape = (X1_train.shape[1], X1_train.shape[2], X1_train.shape[3]) self.siamese_net = self.build_network(input_shape) lr_per_minibatch = cntk.learning_rate_schedule(0.1, cntk.UnitType.minibatch) pp = cntk.logging.ProgressPrinter() out = input_variable((1)) loss = cntk.binary_cross_entropy(self.out, out) learner = cntk.adam(self.out.parameters, lr=lr_per_minibatch, momentum=0.9) trainer = cntk.Trainer(self.out, (loss, loss), [learner], [pp]) cntk.logging.log_number_of_parameters(self.out) for epoch in range(epochs): # perm = np.random.permutation(len(Y_train)) for i in range(0, len(Y_train), batch_size): max_n = min(i + batch_size, len(Y_train)) # x1 = X1_train[perm[i:max_n]] # x2 = X2_train[perm[i:max_n]] # y = Y_train[perm[i:max_n]] x1 = X1_train[i:max_n] x2 = X2_train[i:max_n] y = Y_train[i:max_n] trainer.train_minibatch({ self.left_input: x1, self.right_input: x2, out: y }) pp.update_with_trainer(trainer, with_metric=True) print('.') pp.epoch_summary(with_metric=False)
def create_learner(model): '''Create the optimized method''' lr_per_minibatch = C.learning_parameter_schedule(opt.lr) momentum_schedule = C.momentum_schedule_per_sample(0.9990913221888589) if opt.optim == 'sgd': return C.sgd(model.parameters, lr=lr_per_minibatch) elif opt.optim == 'adam': return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_schedule) elif opt.optim == 'adagrad': return C.adagrad(model.parameters, lr=lr_per_minibatch) else: raise RuntimeError("Invalid optim method: " + opt.optim)
def create_learner(model): '''Create the optimized method''' lr_per_sample = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) if opt.optim == 'sgd': return C.sgd(model.parameters, lr=lr_per_sample) elif opt.optim == 'adam': return C.adam(model.parameters, lr=lr_per_sample, momentum=momentum_time_constant) elif opt.optim == 'adagrad': return C.adagrad(model.parameters, lr=lr_per_sample) else: raise RuntimeError("Invalid optim method: " + opt.optim)
def create_learner(model): '''Create the optimized method''' lr_per_minibatch = C.learning_rate_schedule(opt.lr, C.UnitType.minibatch) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) if opt.optim == 'sgd': return C.sgd(model.parameters, lr=lr_per_minibatch) elif opt.optim == 'adam': return C.adam(model.parameters, lr=lr_per_minibatch, momentum=momentum_time_constant) elif opt.optim == 'adagrad': return C.adagrad(model.parameters, lr=lr_per_minibatch) else: raise RuntimeError("Invalid optim method: " + opt.optim)
def _create_learner(self): lr_per_sample = [3e-5] * 10 + [1.5e-5] * 20 + [1e-5] lr_per_minibatch = [lr * self.minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_rate_schedule(lr_per_minibatch, C.UnitType.minibatch, self.epoch_size) momentum_as_time_constant = C.momentum_as_time_constant_schedule(20) learner = C.adam(parameters=self.model.parameters, lr=3e-4, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=0.21, gradient_clipping_with_truncation=True) return learner
def create_trainer(): masked_dec = dec * C.ops.clip(C.ops.argmax(y), 0, 1) loss, label_error = criterion(masked_dec, y) loss *= C.ops.clip(C.ops.argmax(y), 0, 1) lr_schedule = C.learning_parameter_schedule_per_sample([1e-3] * 2 + [5e-4] * 2 + [1e-4], epoch_size=int(epoch_size)) momentum_as_time_constant = C.momentum_as_time_constant_schedule(1000) learner = C.adam(parameters=dec.parameters, lr=lr_schedule, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = C.Trainer(dec, (loss, label_error), learner, progress_printer) C.logging.log_number_of_parameters(dec) return trainer
def train(model, reader): y_pre = model(x) loss, label_error = create_criterion_function(model, y_pre, y, True) lr_per_minibatch = [lr] + [lr / 2] + [lr / 4] # lr_per_minibatch = [lr * batch_size for lr in lr_per_sample] lr_schedule = C.learning_parameter_schedule(lr_per_minibatch, epoch_size=epoch_size) # Momentum schedule momentums = C.momentum_schedule(0.9048374180359595, minibatch_size=batch_size) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epoch) # learner = C.sgd(model.parameters, lr_schedule) learner = C.adam(y_pre.parameters, lr_schedule, momentum=momentums, gradient_clipping_threshold_per_sample=15) trainer = C.Trainer(y_pre, (loss, label_error), learner, progress_printer) # [] C.logging.log_number_of_parameters( y_pre) # print # parameters and # tensor loss_summary = [] step = 0 data_map = {x: reader.streams.query, y: reader.streams.intent} t = 0 for epoch in range(max_epoch): # loop over epochs epoch_end = (epoch + 1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch data = reader.next_minibatch(batch_size, input_map=data_map) # fetch minibatch # print(data) trainer.train_minibatch(data) # update model with it t += data[y].num_samples if t % 6000 == 0: training_loss = trainer.previous_minibatch_loss_average error = trainer.previous_minibatch_evaluation_average print("epoch: {}, step: {}, loss: {:.5f}, error {:.5f}".format( epoch, t, training_loss, error)) trainer.summarize_training_progress()
def create_trainer(): loss, label_error = create_criterion_function_preferred(dec, y) schedule_step = 1 * print_freq lr_per_sample = [1e-3] lr_per_minibatch = [lr * minibatch_size for lr in lr_per_sample] lr_schedule = C.learning_rate_schedule(lr_per_minibatch, C.UnitType.minibatch, epoch_size) momentum_as_time_constant = C.momentum_as_time_constant_schedule(0) learner = C.adam(parameters=dec.parameters, lr=lr_schedule, momentum=momentum_as_time_constant, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(dec, (loss, label_error), learner) trainer.restore_from_checkpoint(model_file) return trainer
def train (train_reader, model_func, num_sweeps_to_train_with=10): # Instantiate the model function; x is the input (feature) variable # We will scale the input image pixels within 0-1 range by dividing all input value by 255. model = model_func(x/255) # Instantiate the loss and error function loss, label_error = create_criterion_function(model, y) # Instantiate the trainer object to drive the model training learning_rate = 0.001 lr_schedule = C.learning_parameter_schedule(learning_rate) learner = C.adam(z.parameters, lr_schedule, momentum=0.9) trainer = C.Trainer(z, (loss, label_error), [learner]) # Initialize the parameters for the trainer minibatch_size = 100 num_samples_per_sweep = 60000 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size # Map the data streams to the input and labels. input_map={ y : train_reader.streams.labels, x : train_reader.streams.features } # Uncomment below for more detailed logging #training_progress_output_freq = 500 # Start a timer start = time.time() for i in range(0, int(num_minibatches_to_train)): # Read a mini batch from the training data file data=train_reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(data) #print_training_progress(trainer, i, training_progress_output_freq, verbose=1) # Print training time end = time.time() print(f'{end-start:.6f}') return trainer
def train_mse_cntk(x, y, model, train_gen, val_gen, epochs, val_steps): loss_function = cntk.squared_error(model, y) accuracy_function = loss_function learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.001), cntk.learning_parameter_schedule_per_sample(0.9)) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner]) evaluator = cntk.Evaluator(accuracy_function) history = fit_generator(x, y, model=model, trainer=trainer, evaluator=evaluator, train_gen=train_gen, steps_per_epoch=500, epochs=epochs, val_gen=val_gen, validation_steps=val_steps) plot_results(history)
def build_SRResNet_graph(lr_image_shape, hr_image_shape, net): inp_dynamic_axes = [C.Axis.default_batch_axis()] real_X = C.input( lr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_X") real_Y = C.input( hr_image_shape, dynamic_axes=inp_dynamic_axes, name="real_Y") real_X_scaled = real_X/255 real_Y_scaled = real_Y/255 genG = net(real_X_scaled) G_loss = C.reduce_mean(C.square(real_Y_scaled - genG)) G_optim = C.adam(G_loss.parameters, lr=C.learning_rate_schedule( [(1, 0.01), (1, 0.001), (98, 0.0001)], C.UnitType.minibatch, 10000), momentum=C.momentum_schedule(0.9), gradient_clipping_threshold_per_sample=1.0) G_G_trainer = C.Trainer(genG, (G_loss, None), G_optim) return (real_X, real_Y, genG, real_X_scaled, real_Y_scaled, G_optim, G_G_trainer)
def Loss(self): # Evaluating old actions and values : logprobs, state_value, dist_entropy = self.policy.evaluate() # Finding the ratio (pi_theta / pi_theta__old): # (importance sampling) c_old_logprobs = C.input_variable(logprobs.shape, name='old_log_probs') ratios = C.exp(logprobs - C.stop_gradient(c_old_logprobs)) c_rewards = C.input_variable(1, name='rewards') advantages = c_rewards - C.stop_gradient(state_value) # Finding Surrogate Loss: surr1 = ratios * advantages surr2 = C.clip(ratios, 1 - self.eps_clip, 1 + self.eps_clip) * advantages neglog_loss = -C.element_min(surr1, surr2) entropy_loss = -0.01 * dist_entropy actor_loss = C.reduce_mean(neglog_loss + entropy_loss) critic_loss = 0.5 * C.reduce_mean(C.square(state_value - c_rewards)) loss = actor_loss + critic_loss chunk = { 'neglog_loss': neglog_loss, 'entropy_loss': entropy_loss, 'actor_loss': actor_loss, 'critic_loss': critic_loss } trainer = C.Trainer( loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule_per_sample(self.lr), C.momentum_schedule_per_sample(self.betas[0]), variance_momentum=C.momentum_schedule_per_sample( self.betas[1]))) # trainer = C.Trainer(loss, (loss, None), C.adam(loss.parameters, C.learning_parameter_schedule(10), C.momentum_schedule(0.9), variance_momentum=C.momentum_schedule(0.999))) # higher learning rate return loss, chunk, trainer
def _create_model(self, input_dim, output_dim, hidden_dims): c_in = C.input_variable(input_dim, name='state') model = c_in for h in hidden_dims: model = C.layers.Dense(h, activation=C.relu)(model) model = C.layers.Dense(output_dim, activation=C.softmax)(model) c_action_prob = model c_action_onehot = C.input_variable(output_dim, name='action_onehot') c_reward = C.input_variable(1, name='reward') action_prob = C.reduce_sum(c_action_prob * c_action_onehot) log_action_prog = C.log(action_prob) loss = -log_action_prog * c_reward loss = C.reduce_mean(loss) lr = 1e-2 lr_schedule = C.learning_parameter_schedule(lr) learner = C.adam(model.parameters, lr_schedule, C.momentum_schedule(0.9)) trainer = C.Trainer(model, (loss, None), learner) return model, loss, trainer
def use_glove_word_embeddings_cntk(preload_weights=False): tokenizer, x_train, y_train, x_val, y_val = from_raw_text_to_word_embeddings( ) x = cntk.input_variable(shape=(Constants.maxlen, ), dtype=np.float32) y = cntk.input_variable(shape=(1, ), dtype=np.float32) model = cntk.one_hot(x, num_classes=Constants.max_words, sparse_output=True) if preload_weights is True: embedding_matrix = compute_embedding_matrix(tokenizer) assert (Constants.embedding_dim == embedding_matrix.shape[0]) or (Constants.embedding_dim == embedding_matrix.shape[1]) model = cntk.layers.Embedding(weights=embedding_matrix)(model) else: model = cntk.layers.Embedding(Constants.embedding_dim)(model) model = cntk.layers.Dense(32, activation=cntk.relu)(model) model = cntk.layers.Dense(1, activation=cntk.sigmoid)(model) loss_function = cntk.binary_cross_entropy(model.output, y) round_predictions = cntk.round(model.output) equal_elements = cntk.equal(round_predictions, y) accuracy_function = cntk.reduce_mean(equal_elements, axis=0) max_epochs = 10 batch_size = 32 learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.0001), cntk.learning_parameter_schedule_per_sample(0.99)) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner], progress_printer) evaluator = cntk.Evaluator(accuracy_function) cntk_train(x, y, x_train, y_train, max_epochs, batch_size, trainer, evaluator)
((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0), (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0), (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0), ] MOMENTUM_SCHEDULE_PARAMS = [ ((0.2,), [0.2]), ((0.2,), [0.2, 0.2, 0.2, 0.2]), (([0.2,0.4], 5), [0.2]*5+[0.4]*20), (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad(params, lr=learning_parameter_schedule(1)), lambda params: C.adam(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_parameter_schedule(1), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_parameter_schedule(1)), lambda params: C.momentum_sgd(params, lr=learning_parameter_schedule(1), momentum=C.momentum_schedule(0.9))] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS) def test_learning_parameter_schedule(params, expectation, minibatch_size): l = learning_parameter_schedule(*params)
D_real = dcgan_discriminator(x_real) D_fake = D_real.clone(method="share", substitutions={x_real.output: G_fake.output}) # # loss function # G_loss = -C.log(D_fake) D_loss = -(C.log(D_real) + C.log(1.0 - D_fake)) # # optimizer # G_learner = C.adam(G_fake.parameters, lr=C.learning_parameter_schedule_per_sample(1e-4), momentum=0.5, gradient_clipping_threshold_per_sample=minibatch_size, gradient_clipping_with_truncation=True) D_learner = C.adam(D_real.parameters, lr=C.learning_parameter_schedule_per_sample(1e-4), momentum=0.5, gradient_clipping_threshold_per_sample=minibatch_size, gradient_clipping_with_truncation=True) G_progress_printer = C.logging.ProgressPrinter(tag="Generator") D_progress_printer = C.logging.ProgressPrinter(tag="Discriminator") if not os.path.exists("./dcgan_image"): os.mkdir("./dcgan_image") G_trainer = C.Trainer(G_fake, (G_loss, None), [G_learner], [G_progress_printer])
((0.2, 0), [0.2, 0.2, 0.2, 0.2], 0), (([0.2,0.4], 0, 5), [0.2]*5+[0.4]*20, 0), (([(3,0.2),(2,0.4),(1,0.8)], 0, 5), [0.2]*15+[0.4]*10+[0.8]*20, 0), ] MOMENTUM_SCHEDULE_PARAMS = [ ((0.2,), [0.2]), ((0.2,), [0.2, 0.2, 0.2, 0.2]), (([0.2,0.4], 5), [0.2]*5+[0.4]*20), (([(3,0.2),(2,0.4),(1,0.8)], 5), [0.2]*15+[0.4]*10+[0.8]*20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8), lambda params: C.sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.momentum_sgd(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9))] @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS_LEGACY) def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation @pytest.mark.parametrize("params, expectation, minibatch_size", LR_SCHEDULE_PARAMS) def test_learning_parameter_schedule(params, expectation, minibatch_size): l = learning_parameter_schedule(*params)
result = np.zeros((3, 256, 256)) result[0, :, :] = x * 100 result[1, :, :] = out[0, :, :] result[2, :, :] = out[1, :, :] result = np.transpose(result, (2, 0, 1)) result = np.transpose(result, (2, 0, 1)) imsave("img_result.png", lab2rgb(result)) imsave("img_gray_version.png", rgb2gray(lab2rgb(result))) if __name__ == '__main__': features, labels = image_processing() input_var = input_variable((1, image_size, image_size)) label_var = input_variable((2, image_size, image_size)) z = create_model(input_var) loss = mse(z, label_var) ev = mse(z, label_var) lr_rate = [0.001] lr_per_minibatch = c.learning_parameter_schedule(lr_rate, epoch_size=1) progress_printer = c.logging.ProgressPrinter() learner = c.adam(z.parameters, lr_per_minibatch, momentum=0.75) trainer = c.Trainer(z, (loss, ev), [learner], progress_printer) cntk.logging.log_number_of_parameters(z) learn() colorize('test.jpg')
def test_learner_init(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w = parameter(shape=(1,)) res = i * w #test new API: learning_parameter_schedule #explicitly specify reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=0.1, minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 25 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size = 25) assert learner.is_compatible_mode() == False assert learner.minibatch_size == 25 #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20)) assert learner.is_compatible_mode() == False #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1)) assert learner.is_compatible_mode() == False assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1, 20), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == 20 assert learner.learning_rate() == 0.1 #no explicitly specification of reference minibatch size and learning rate is in number: learner = sgd(res.parameters, lr=learning_parameter_schedule(0.1), minibatch_size=C.learners.IGNORE) assert learner.is_compatible_mode() == True assert learner.minibatch_size == C.learners.IGNORE #the learner's reference minibatch #with direct learner learning rate number specification, the learning rate schedule get the reference minibatch size from the learner parameters: assert learner._learning_rate_schedule.minibatch_size == C.learners.IGNORE assert learner.learning_rate() == 0.1 mysgd = C.sgd(parameters=res.parameters, lr=0.4, minibatch_size=32) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd.learning_rate() == 0.4 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum.learning_rate() == 0.4 myadadelta = C.adadelta(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta.learning_rate() == 0.4 myadam = C.adam(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam.learning_rate() == 0.4 myadagrad = C.adagrad(parameters=res.parameters, lr=0.4, minibatch_size=32) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad.learning_rate() == 0.4 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=0.4, momentum=0.9, variance_momentum=0.9, minibatch_size=32) assert myfsadagrad.minibatch_size == 32 assert myfsadagrad._learning_rate_schedule.minibatch_size == 32 assert myfsadagrad.learning_rate() == 0.4 mynesterov = C.nesterov(parameters=res.parameters, lr=0.4, momentum=0.9, minibatch_size=32) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov.learning_rate() == 0.4 myrmsrop = C.rmsprop(parameters=res.parameters, lr=0.4, gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop.learning_rate() == 0.4 mysgd = C.sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert mysgd.minibatch_size == 32 assert mysgd._learning_rate_schedule.minibatch_size == 32 assert mysgd._learning_rate_schedule[0] == 0.4 assert mysgd._learning_rate_schedule[512] == 0.1 assert mysgd._learning_rate_schedule[512 * 2] == 0.001 mymomentum = C.momentum_sgd(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mymomentum.minibatch_size == 32 assert mymomentum._learning_rate_schedule.minibatch_size == 32 assert mymomentum._learning_rate_schedule[0] == 0.4 assert mymomentum._learning_rate_schedule[512] == 0.1 assert mymomentum._learning_rate_schedule[512 * 2] == 0.001 myadadelta = C.adadelta(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadadelta.minibatch_size == 32 assert myadadelta._learning_rate_schedule.minibatch_size == 32 assert myadadelta._learning_rate_schedule[0] == 0.4 assert myadadelta._learning_rate_schedule[512] == 0.1 assert myadadelta._learning_rate_schedule[512 * 2] == 0.001 myadam = C.adam(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9, 0.1, 0.001], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadam.minibatch_size == 32 assert myadam._learning_rate_schedule.minibatch_size == 32 assert myadam._learning_rate_schedule[0] == 0.4 assert myadam._learning_rate_schedule[512] == 0.1 assert myadam._learning_rate_schedule[512 * 2] == 0.001 myadagrad = C.adagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 myfsadagrad = C.fsadagrad(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], variance_momentum=[0.9], minibatch_size=32, epoch_size=512) assert myadagrad.minibatch_size == 32 assert myadagrad._learning_rate_schedule.minibatch_size == 32 assert myadagrad._learning_rate_schedule[0] == 0.4 assert myadagrad._learning_rate_schedule[512] == 0.1 assert myadagrad._learning_rate_schedule[512 * 2] == 0.001 mynesterov = C.nesterov(parameters=res.parameters, lr=[0.4, 0.1, 0.001], momentum=[0.9], minibatch_size=32, epoch_size=512) assert mynesterov.minibatch_size == 32 assert mynesterov._learning_rate_schedule.minibatch_size == 32 assert mynesterov._learning_rate_schedule[0] == 0.4 assert mynesterov._learning_rate_schedule[512] == 0.1 assert mynesterov._learning_rate_schedule[512 * 2] == 0.001 myrmsrop = C.rmsprop(parameters=res.parameters, lr=[0.4, 0.1, 0.001], gamma=0.5, inc=1.2, dec=0.7, max=10, min=1e-8, minibatch_size=32, epoch_size=512) assert myrmsrop.minibatch_size == 32 assert myrmsrop._learning_rate_schedule.minibatch_size == 32 assert myrmsrop._learning_rate_schedule[0] == 0.4 assert myrmsrop._learning_rate_schedule[512] == 0.1 assert myrmsrop._learning_rate_schedule[512 * 2] == 0.001 learner_parameter = learner.parameters from cntk.variables import Parameter param = learner_parameter[0] assert isinstance(param, Parameter) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value momentum = C.momentum_schedule(0.999, minibatch_size=1) lr_per_sample = learning_parameter_schedule(0.1, minibatch_size = 1) C.momentum_sgd(res.parameters, lr_per_sample, momentum) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain_value) C.momentum_sgd(res.parameters, lr_per_sample, momentum, unit_gain=unit_gain_value) C.set_default_unit_gain_value(False) unit_gain_value = C.default_unit_gain_value() assert not unit_gain_value lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum) C.nesterov(res.parameters, lr_per_sample, momentum, unit_gain_value) C.nesterov(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) lr_per_sample = learning_parameter_schedule([0.1]*3 +[0.2]*2 +[0.3], minibatch_size=1) C.adagrad(res.parameters, lr=lr_per_sample, need_ave_multiplier=True) C.set_default_unit_gain_value(True) unit_gain_value = C.default_unit_gain_value() assert unit_gain_value lr_per_sample = learning_parameter_schedule([(3,0.1), (2, 0.2), (1, 0.3)], minibatch_size=1) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum) C.fsadagrad(res.parameters, lr_per_sample, momentum, unit_gain_value) C.fsadagrad(res.parameters, lr=lr_per_sample, momentum=momentum, unit_gain=unit_gain_value) gamma, inc, dec, max, min = [0.5, 1.2, 0.7, 10, 1e-8] lr_per_sample = learning_parameter_schedule([0.1, 0.2], minibatch_size = 1, epoch_size = 100) C.rmsprop(res.parameters, lr_per_sample, gamma, inc, dec, max, min, True) C.adadelta(res.parameters, lr_per_sample)
psi = h_prime(C.dot(w, z_prev)+b) * w det_jacob = C.abs(1 + C.dot(u, psi)) sum_log_det_jacob += C.log(EPS + det_jacob) z_prev = z_prev + u * h(C.dot(w, z_prev)+b) z_k = z_prev log_q_k = C.log(base_dist.pdf(z_0)) - sum_log_det_jacob log_p = C.log(EPS + true_density(z_k)) kl = C.reduce_mean(log_q_k - log_p) #%% lr = 1 lr_schedule = C.learning_parameter_schedule(lr) learner = C.adam(kl.parameters, lr_schedule, 0.9) trainer = C.Trainer(kl, (kl, None), learner) #%% for i in range(1, 2000 + 1): s = base_dist.sample(500).astype(np.float32) trainer.train_minibatch({kl.arguments[0]:s}) if i % 100 == 0: print(trainer.previous_minibatch_loss_average) # if i % 500 == 0: # v = z_k.eval({z_k.arguments[0]:s}) # plt.scatter(v[:, 0], v[:, 1], alpha=0.7) # plt.show() v = z_k.eval({z_k.arguments[0]:s}) plt.scatter(v[:, 0], v[:, 1], alpha=0.5, c='green')
(1, 0.8)], UnitType.sample, 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20), ] MOMENTUM_SCHEDULE_PARAMS = [ ((0.2, ), [0.2]), ((0.2, ), [0.2, 0.2, 0.2, 0.2]), (([0.2, 0.4], 5), [0.2] * 5 + [0.4] * 20), (([(3, 0.2), (2, 0.4), (1, 0.8)], 5), [0.2] * 15 + [0.4] * 10 + [0.8] * 20), ] LEARNER_LAMBDAS = [ lambda params: C.adadelta(params), lambda params: C.adagrad( params, lr=learning_rate_schedule(1, UnitType.minibatch)), lambda params: C.adam(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.fsadagrad(params, lr=learning_rate_schedule( 1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.nesterov(params, lr=learning_rate_schedule(1, UnitType.minibatch), momentum=C.momentum_schedule(0.9)), lambda params: C.rmsprop(params, lr=learning_rate_schedule(1, UnitType.minibatch), gamma=0.1, inc=3.0, dec=0.1, max=np.inf, min=1e-8),
def driver(self): np.random.seed(0) # Define the data dimensions image_shape = (1, 28, 28) input_dim = int(np.prod(image_shape, dtype=int)) output_dim = 10 num_train_samples = 60000 num_test_samples = 10000 # The local path where the training and test data might be found or will be downloaded to. training_data_path = os.path.join(os.getcwd(), "MNIST_data", "Train-28x28_cntk_text.txt") testing_data_path = os.path.join(os.getcwd(), "MNIST_data", "Test-28x28_cntk_text.txt") # Download the data if they don't already exist url_train_image = "train-images-idx3-ubyte.gz" url_train_labels = "train-labels-idx1-ubyte.gz" if not os.path.exists(training_data_path): url_train_image = "train-images-idx3-ubyte.gz" url_train_labels = "train-labels-idx1-ubyte.gz" print("Loading training data") saved_data_dir = os.path.join(os.getcwd(), "MNIST_data") train = self.load_mnist_data(url_train_image, url_train_labels, num_train_samples, local_data_dir=saved_data_dir) print("Writing training data text file...") self.save_as_txt(training_data_path, train) print("[Done]") url_test_image = "t10k-images-idx3-ubyte.gz" url_test_labels = "t10k-labels-idx1-ubyte.gz" if not os.path.exists(testing_data_path): url_test_image = "t10k-images-idx3-ubyte.gz" url_test_labels = "t10k-labels-idx1-ubyte.gz" print("Loading testing data") saved_data_dir = os.path.join(os.getcwd(), "MNIST_data2") test = self.load_mnist_data(url_test_image, url_test_labels, num_test_samples, saved_data_dir) print("Writing testing data text file...") self.save_as_txt(testing_data_path, test) print("[Done]") feature_stream_name = 'features' labels_stream_name = 'labels' # Convert to CNTK MinibatchSource # original as below deprecated------------ #train_minibatch_source = cntk.text_format_minibatch_source(training_data_path, [ #cntk.StreamConfiguration(feature_stream_name, input_dim), #cntk.StreamConfiguration(labels_stream_name, output_dim)]) #------------------------------------------------------------------ train_minibatch_source = MinibatchSource( CTFDeserializer( training_data_path, StreamDefs(features=StreamDef(field='features', shape=input_dim, is_sparse=False), labels=StreamDef(field='labels', shape=output_dim, is_sparse=False)))) training_features = train_minibatch_source[feature_stream_name] training_labels = train_minibatch_source[labels_stream_name] print("Training data from file %s successfully read." % training_data_path) #test_minibatch_source = cntk.text_format_minibatch_source(testing_data_path, [ #cntk.StreamConfiguration(feature_stream_name, input_dim), #cntk.StreamConfiguration(labels_stream_name, output_dim)]) test_minibatch_source = MinibatchSource( CTFDeserializer( testing_data_path, StreamDefs(features=StreamDef(field='features', shape=input_dim, is_sparse=False), labels=StreamDef(field='labels', shape=output_dim, is_sparse=False)))) test_features = test_minibatch_source[feature_stream_name] test_labels = test_minibatch_source[labels_stream_name] print("Test data from file %s successfully read." % testing_data_path) # Define the input to the neural network input_vars = cntk.ops.input_variable(image_shape, np.float32) # Create the convolutional neural network output = self.create_convolutional_neural_network(input_vars, output_dim, dropout_prob=0.5) #''' #---------------------- #Setting up the trainer #---------------------- #''' # Define the label as the other input parameter of the trainer labels = cntk.ops.input_variable(output_dim, np.float32) # Initialize the parameters for the trainer train_minibatch_size = 50 learning_rate = 1e-4 momentum = 0.9 # Define the loss function #loss = cntk.ops.cross_entropy_with_softmax(output, labels) loss = cntk.cross_entropy_with_softmax(output, labels) # Define the function that calculates classification error #label_error = cntk.ops.classification_error(output, labels) label_error = cntk.classification_error(output, labels) # Instantiate the trainer object to drive the model training #learner = cntk.adam_sgd(output.parameters, learning_rate, momentum) learner = cntk.adam( output.parameters, learning_rate_schedule(learning_rate, UnitType.sample), momentum_schedule(momentum)) trainer = cntk.Trainer(output, (loss, label_error), [learner]) #''' #----------------------------------------- #Training the Convolutional Neural Network #----------------------------------------- #''' num_training_epoch = 1 training_progress_output_freq = 100 for epoch in range(num_training_epoch): sample_count = 0 num_minibatch = 0 # loop over minibatches in the epoch while sample_count < num_train_samples: minibatch = train_minibatch_source.next_minibatch( min(train_minibatch_size, num_train_samples - sample_count)) # Specify the mapping of input variables in the model to actual minibatch data to be trained with data = { input_vars: minibatch[training_features], labels: minibatch[training_labels] } trainer.train_minibatch(data) sample_count += data[labels].num_samples num_minibatch += 1 #Print the training progress data if num_minibatch % training_progress_output_freq == 0: #training_loss = cntk.get_train_loss(trainer) training_loss = trainer.previous_minibatch_loss_average #eval_error = cntk.get_train_eval_criterion(trainer) eval_error = trainer.previous_minibatch_evaluation_average print( "Epoch %d | # of Samples: %6d | Loss: %.6f | Error: %.6f" % (epoch, sample_count, training_loss, eval_error)) print("Training Completed.", end="\n\n") #''' #------------------- #Classification Test #-------------------- #''' test_minibatch_size = 1000 sample_count = 0 test_results = [] while sample_count < num_test_samples: minibatch = test_minibatch_source.next_minibatch( min(test_minibatch_size, num_test_samples - sample_count)) # Specify the mapping of input variables in the model to actual minibatch data to be tested with data = { input_vars: minibatch[test_features], labels: minibatch[test_labels] } eval_error = trainer.test_minibatch(data) test_results.append(eval_error) sample_count += data[labels].num_samples # Printing the average of evaluation errors of all test minibatches print("Average errors of all test minibatches: %.3f%%" % (float(np.mean(test_results, dtype=float)) * 100)) a = 5
# loss = (mkld) # _q_prime = C.tanh(q) # _mu = C.reduce_mean(_q_prime, axis=C.Axis.default_batch_axis()) # _sigma = C.reduce_mean(C.square(_q_prime-_mu), axis=C.Axis.default_batch_axis()) # loss += C.reduce_mean(C.square(_mu)) + C.reduce_mean(C.square(_sigma-0.615)) # # _log_mu = C.reduce_mean(C.log(C.abs(q)), axis=C.Axis.default_batch_axis()) # # loss += C.reduce_mean(C.square(_log_mu+0.57)) from IPython import embed;embed() exit() lr_rate = 1e-3 learner = C.adam(loss.parameters, C.learning_parameter_schedule_per_sample(lr_rate), C.momentum_schedule(0.99)) trainer = C.Trainer(loss, (loss, None), [learner]) for i in tqdm(range(10000)): # v = np.random.uniform(size=(1,2)) v = datasets.make_moons(n_samples=1000, noise=.05)[0].astype(np.float32) trainer.train_minibatch({loss.arguments[0]:v}) # from IPython import embed;embed() if i%100 == 0: print('\n',trainer.previous_minibatch_loss_average) if len(bn) > 0: # batch norm result = C.combine(bn).eval({loss.arguments[0]:v}) result = list(result.values()) momentum = C.Constant(0.9)