def create_trainer(loss, pred_error, lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, cfg): # Set learning parameters if isinstance(loss, C.Variable): loss = C.combine([loss]) params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) return Trainer(None, (loss, pred_error), [learner, bias_learner])
def train(): print('Unpickling data (this could take a short while)') training_data = pickle.load(open('tmp_textdata.pickle', 'rb')) print('Preprocessing data (this could take a LONG while)...') do_subsampling(training_data, subsampling=4e-5, prog_freq=1e7) print('Preprocessing is done. Final # of training words: {}'.format(len(training_data.text_as_id_list))) mb_source = WordMinibatchSource(training_data, max_window_size) mb_num_samples = 128 mb_size = minibatch_size_schedule(mb_num_samples) freq_list = training_data.id2freq token2id = training_data.token2id vocab_dim = len(freq_list) print(vocab_dim) input_vector, label_vector = create_inputs(vocab_dim) z, cross_entropy, error = create_model(input_vector, label_vector, freq_list, vocab_dim, hidden_dim) lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample) lr_schedule2 = learning_rate_schedule([(3e-3)*(0.8**i) for i in range(10)], UnitType.sample, epoch_size=len(training_data.text_as_id_list)//2) mom_schedule = C.learners.momentum_schedule(0.005, UnitType.sample) gradient_clipping_with_truncation = True learner = C.learners.sgd(z.parameters, lr=lr_schedule2, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) # var_mom_schedule = C.learners.momentum_schedule(0.999, UnitType.sample) # learner2 = C.learners.adam(z.parameters, # lr=lr_schedule, # momentum=mom_schedule, # variance_momentum=var_mom_schedule, # epsilon=1.5e-8, # gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, # gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = C.logging.ProgressPrinter(freq=200, tag='Training') checkpoint_config = CheckpointConfig(frequency = 100000*mb_num_samples, filename = os.path.join(os.getcwd(), "word2vec_checkpoint"), restore = False) trainer = Trainer(z, (cross_entropy, error), [learner], progress_writers=[progress_printer]) input_map = { input_vector: mb_source.fsi, label_vector: mb_source.lsi } session = training_session(trainer, mb_source, mb_size, input_map, progress_frequency=len(training_data.text_as_id_list), max_samples = None, checkpoint_config=checkpoint_config, cv_config=None, test_config=None) C.logging.log_number_of_parameters(z) ; print() session.train()
def init_trainer(config, text_lines, slot_value_lines): hidden_dim = config.hidden_dim segment_begin = config.segment_begin segment_end = config.segment_end data = DataReader(text_lines, slot_value_lines, segment_begin, segment_end) # Create model nodes for the source and target inputs vocab_dim = data.vocab_dim sv_dim = data.sv_dim input_sequence, sv_pair, label_sequence, inputH, inputC = create_inputs(hidden_dim, sv_dim, vocab_dim) model = create_model(hidden_dim, sv_dim, vocab_dim) z = model(input_sequence, inputH, inputC, sv_pair) # cross_entropy: this is used training criterion ce, err = cross_entropy_with_full_softmax(z, label_sequence, sv_dim, vocab_dim) learning_rate = config.learning_rate momentum_as_time_constant = config.momentum_as_time_constant clipping_threshold_per_sample = config.clipping_threshold_per_sample lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample) gradient_clipping_with_truncation = True momentum_schedule = momentum_as_time_constant_schedule(momentum_as_time_constant) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (ce, err), learner) inputs = [input_sequence, sv_pair, label_sequence, inputH, inputC] return data, z, trainer, inputs
def test_learner_logging(): from cntk import Trainer from cntk.logging import ProgressPrinter from cntk import cross_entropy_with_softmax, classification_error features = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) z = features * w labels = C.input_variable(shape=(1,), name='b') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) writer = TestProgressWriter(); lr_values = [0.3, 0.2, 0.1, 0] m_values = [0.6, 0.7, 0.8] learner = C.momentum_sgd(z.parameters, learning_rate_schedule(lr_values, UnitType.sample, 1), C.momentum_schedule(m_values, 1)) trainer = Trainer(z, (ce, errs), [learner], writer) for i in range(10): trainer.train_minibatch({features: [[2.]], labels: [[1.]]}) assert len(writer.log_output) == len(lr_values + m_values) values = [j for i in zip(lr_values,m_values) for j in i] + [0] for i in range(len(values)): assert (values[i] == writer.log_output[i])
def create_trainer(network, epoch_size, num_epochs, minibatch_size, num_quantization_bits, progress_printer): # CNTK weights new gradient by (1-momentum) for unit gain, # thus we divide Caffe's learning rate by (1-momentum) initial_learning_rate = 0.45 # equal to 0.045 in caffe initial_learning_rate *= minibatch_size / 32 learn_rate_adjust_interval = 2 learn_rate_decrease_factor = 0.94 # Set learning parameters lr_per_mb = [] learning_rate = initial_learning_rate for i in range(0, num_epochs, learn_rate_adjust_interval): lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) learning_rate *= learn_rate_decrease_factor lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9) l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe # Create learner local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) parameter_learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=0) # Create trainer return Trainer(network['output'], (network['ce'], network['pe']), parameter_learner, progress_printer)
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer): if network['name'] == 'resnet20': lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network['name'] == 'resnet110': lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # learner object if block_size != None and num_quantization_bits != 32: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
def create_trainer(network, epoch_size, num_epochs, minibatch_size, progress_writers): # CNTK weights new gradient by (1-momentum) for unit gain, # thus we divide Caffe's learning rate by (1-momentum) initial_learning_rate = 2.0 # equal to 0.2 in caffe initial_learning_rate *= minibatch_size / 128 learn_rate_adjust_interval = 2 learn_rate_decrease_factor = 0.94 # Set learning parameters lr_per_mb = [] learning_rate = initial_learning_rate for i in range(0, num_epochs, learn_rate_adjust_interval): lr_per_mb.extend([learning_rate] * learn_rate_adjust_interval) learning_rate *= learn_rate_decrease_factor lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9) l2_reg_weight = 0.0001 # CNTK L2 regularization is per sample, thus same as Caffe # Create learner learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) # Create trainer return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_writers)
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up): # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError( "Block momentum cannot be used with quantization, please remove quantized_bits option." ) local_learner = momentum_sgd( network['output'].parameters, lr_per_minibatch, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['pe']), learner)
def test_learner_update(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) res = i * w learner = sgd(res.parameters, lr=learning_rate_schedule([0.1]*50 + [0.2]*50, UnitType.sample, 1)) assert learner.learning_rate() == 0.1 x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100) assert learner.learning_rate() == 0.2 assert w.value < w_init learner.reset_learning_rate(learning_rate_schedule([0.3]*50 + [0.4]*50, UnitType.sample, 1)); assert learner.learning_rate() == 0.3 x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100) assert learner.learning_rate() == 0.4
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer): lr_per_mb = [0.1] # [1.0]*30 + [0.1]*30 + [0.01]*20 + [0.001] l2_reg_weight = 0.0001 # adjust LR with minibatch size #if minibatch_size != 256: # for i in range(0, len(lr_per_mb)): # lr_per_mb[i] *= minibatch_size / 256 # Set learning parameters lr_schedule = learning_rate_schedule(lr_per_mb, epoch_size=epoch_size, unit=UnitType.minibatch) mm_schedule = momentum_schedule(0.9) local_learner = nesterov(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) # learner object if block_size != None and num_quantization_bits != 32: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['errs']), learner, progress_printer)
def test_noise_injection_with_checkpointing(): from cntk import initializer shape = (100,100) w1 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) w2 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) w3 = parameter(shape=shape, init=initializer.glorot_uniform(seed=123)) lr=learning_rate_schedule(0.5, UnitType.sample) m=C.momentum_schedule(0.99) learner1 = C.momentum_sgd([w1], lr, m, gaussian_noise_injection_std_dev=0.5) learner2 = C.momentum_sgd([w2], lr, m, gaussian_noise_injection_std_dev=0.5) learner3 = C.momentum_sgd([w3], lr, m, gaussian_noise_injection_std_dev=0.5) assert np.allclose(w1.value, w2.value) and np.allclose(w1.value, w3.value) for i in range(10): checkpoint = learner1.create_checkpoint() v = np.float32(np.random.rand(100,100)) learner1.update({w1: v}, 1) learner2.update({w2: v}, 1) assert not np.allclose(w1.value, w2.value) learner3.restore_from_checkpoint(checkpoint) learner3.update({w3: v}, 1) assert np.allclose(w1.value, w3.value)
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80): minibatch_size = 64 # learning parameters learner = momentum_sgd(model.parameters, lr = learning_rate_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], unit=UnitType.sample, epoch_size=epoch_size), momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size), l2_regularization_weight = 0.002) # trainer object trainer = Trainer(None, criterion, learner) # perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch. #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]}) sample_count += mb[reader.streams.labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) # return evaluation error. return loss, metric # return values from last epoch
def train(input_images, target_masks, use_existing=False): shape = input_images[0].shape data_size = input_images.shape[0] x = C.input_variable(shape) y = C.input_variable(shape) z = cntk_unet.create_model(x) dice_coef = cntk_unet.dice_coefficient(z, y) checkpoint_file = "cntk-unet.dnn" if use_existing: z.load_model(checkpoint_file) # Prepare model and trainer lr = learning_rate_schedule(0.00001, UnitType.sample) momentum = C.learners.momentum_as_time_constant_schedule(0) trainer = C.Trainer(z, (-dice_coef, -dice_coef), C.learners.adam(z.parameters, lr=lr, momentum=momentum)) # Get minibatches of training data and perform model training minibatch_size = 2 num_epochs = 10 num_mb_per_epoch = int(data_size / minibatch_size) for e in range(0, num_epochs): for i in range(0, num_mb_per_epoch): training_x = input_images[i * minibatch_size:(i + 1) * minibatch_size] training_y = target_masks[i * minibatch_size:(i + 1) * minibatch_size] trainer.train_minibatch({x: training_x, y: training_y}) trainer.save_checkpoint(checkpoint_file) return trainer
def create_adam_learner(learn_params, learning_rate=0.0005, gradient_clipping_threshold_per_sample=0.001): """ Create adam learner """ lr_schedule = learners.learning_rate_schedule(learning_rate, learners.UnitType.sample) momentum = learners.momentum_schedule(0.90) gradient_clipping_threshold_per_sample = gradient_clipping_threshold_per_sample gradient_clipping_with_truncation = True momentum_var = learners.momentum_schedule(0.999) lr = learners.adam( learn_params, lr_schedule, momentum, True, momentum_var, gradient_clipping_threshold_per_sample= gradient_clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) learner_desc = 'Alg: Adam, learning rage: {0}, momentum: {1}, gradient clip: {2}'.format( learning_rate, momentum[0], gradient_clipping_threshold_per_sample) logger.log("Create learner. {0}".format(learner_desc)) return lr
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer): lr_per_mb = [1.0]*30 + [0.1]*30 + [0.01]*20 + [0.001] l2_reg_weight = 0.0001 # adjust LR with minibatch size if minibatch_size != 256: for i in range(0, len(lr_per_mb)): lr_per_mb[i] *= minibatch_size / 256 # Set learning parameters lr_schedule = learning_rate_schedule(lr_per_mb, epoch_size=epoch_size, unit=UnitType.minibatch) mm_schedule = momentum_schedule(0.9) local_learner = nesterov(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) # learner object if block_size != None and num_quantization_bits != 32: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['errs']), learner, progress_printer)
def test_learner_update_legacy(): i = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) res = i * w learner = sgd(res.parameters, lr=learning_rate_schedule([0.1]*50 + [0.2]*50, UnitType.sample, 1)) assert learner.learning_rate() == 0.1 x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100) assert learner.learning_rate() == 0.2 assert w.value < w_init learner.reset_learning_rate(learning_rate_schedule([0.3]*50 + [0.4]*50, UnitType.sample, 1)); assert learner.learning_rate() == 0.3 x = learner.update({w: np.asarray([[2.]], dtype=np.float32)}, 100) assert learner.learning_rate() == 0.4
def train_lm(testing=False): data = DataReader(token_to_id_path, segment_sepparator) # Create model nodes for the source and target inputs input_sequence, label_sequence = create_inputs(data.vocab_dim) # Create the model. It has three output nodes # z: the input to softmax that provides the latent representation of the next token # cross_entropy: this is used training criterion # error: this a binary indicator if the model predicts the correct token z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim) # For measurement we use the (build in) full softmax. full_ce = C.cross_entropy_with_softmax(z, label_sequence) # print out some useful training information log_number_of_parameters(z) ; print() # Run the training loop num_trained_samples = 0 num_trained_samples_since_last_report = 0 # Instantiate the trainer object to drive the model training lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample) momentum_schedule = momentum_as_time_constant_schedule(momentum_as_time_constant) gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (cross_entropy, error), learner) last_avg_ce = 0 for epoch_count in range(num_epochs): for features, labels, token_count in data.minibatch_generator(train_file_path, sequence_length, sequences_per_batch): arguments = ({input_sequence : features, label_sequence : labels}) t_start = timeit.default_timer() trainer.train_minibatch(arguments) t_end = timeit.default_timer() samples_per_second = token_count / (t_end - t_start) # Print progress report every num_samples_between_progress_report samples if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0: av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data) print_progress(samples_per_second, av_ce, num_trained_samples, t_start) num_trained_samples_since_last_report = 0 last_avg_ce = av_ce num_trained_samples += token_count num_trained_samples_since_last_report += token_count if not testing: # after each epoch save the model model_filename = "models/lm_epoch%d.dnn" % epoch_count z.save(model_filename) print("Saved model to '%s'" % model_filename) return last_avg_ce
def __init__(self, input_shape, nb_actions, gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000), learning_rate=0.00025, momentum=0.95, minibatch_size=32, memory_size=500000, train_after=10000, train_interval=4, target_update_interval=10000, monitor=True): self.input_shape = input_shape self.nb_actions = nb_actions self.gamma = gamma self._train_after = train_after self._train_interval = train_interval self._target_update_interval = target_update_interval self._explorer = explorer self._minibatch_size = minibatch_size self._history = History(input_shape) self._memory = RepMem(memory_size, input_shape[1:], 4) self._num_actions_taken = 0 self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], [] with default_options(activation=relu, init=he_uniform()): self._action_value_net = Sequential([ Dense(input_shape, init=he_uniform(scale=0.01)), Dense(input_shape), Dense(nb_actions, activation=None, init=he_uniform(scale=0.01))]) self._action_value_net.update_signature(Tensor[input_shape]) self._target_net = self._action_value_net.clone(CloneMethod.freeze) @Function @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def compute_q_targets(post_states, rewards, terminals): return element_select( terminals, rewards, gamma * reduce_max(self._target_net(post_states), axis=0) + rewards, ) @Function @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions], post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def criterion(pre_states, actions, post_states, rewards, terminals): q_targets = compute_q_targets(post_states, rewards, terminals) q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0) return huber_loss(q_targets, q_acted, 1.0) lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) m_schedule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) l_sgd = adam(self._action_value_net.parameters, lr_schedule, momentum=m_schedule, variance_momentum=vm_schedule) self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None self._learner = l_sgd self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
def train(train_images, train_masks, val_images, val_masks, base_model_file, freeze=False): shape = train_images[0].shape data_size = train_images.shape[0] test_data = (val_images, val_masks) training_data = (train_images, train_masks) # Create model x = C.input_variable(shape) y = C.input_variable(train_masks[0].shape) z = cntk_resnet_fcn.create_transfer_learning_model(x, train_masks.shape[1], base_model_file, freeze) dice_coef = cntk_resnet_fcn.dice_coefficient(z, y) # Prepare model and trainer lr_mb = [0.0001] * 10 + [0.00001] * 10 + [0.000001] * 10 + [0.0000001] * 10 lr = learning_rate_schedule(lr_mb, UnitType.sample) momentum = C.learners.momentum_as_time_constant_schedule(0.9) trainer = C.Trainer( z, (-dice_coef, -dice_coef), C.learners.adam(z.parameters, lr=lr, momentum=momentum)) # Get minibatches of training data and perform model training minibatch_size = 8 num_epochs = 60 training_errors = [] test_errors = [] for e in range(0, num_epochs): for i in range(0, int(len(training_data[0]) / minibatch_size)): data_x, data_y = slice_minibatch(training_data[0], training_data[1], i, minibatch_size) trainer.train_minibatch({z.arguments[0]: data_x, y: data_y}) # Measure training error training_error = measure_error(training_data[0], training_data[1], z.arguments[0], y, trainer, minibatch_size) training_errors.append(training_error) # Measure test error test_error = measure_error(test_data[0], test_data[1], z.arguments[0], y, trainer, minibatch_size) test_errors.append(test_error) print("epoch #{}: training_error={}, test_error={}".format( e, training_errors[-1], test_errors[-1])) return trainer, training_errors, test_errors
def entrenar(checkpoint, entrRuedas, entrOperaciones, input_dim, num_output_classes, testRuedas, testOperaciones): minibatch_size = 100; epocs=900; minibatchIteraciones = int(len(entrOperaciones) / minibatch_size); # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = crearRed(input_dim, num_output_classes, feature); ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.25, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(log_to_file=checkpoint+".log", num_epochs=epocs); trainer = Trainer(netout, (ce, pe), learner, progress_printer) if os.path.isfile(checkpoint): trainer.restore_from_checkpoint(checkpoint); npentrRuedas = np.array(entrRuedas).astype(np.float32); npentrOperaciones = np.array(entrOperaciones).astype(np.float32); #iteramos una vez por cada "epoc" for i in range(0, epocs): p = np.random.permutation(len(entrRuedas)); npentrOperaciones = npentrOperaciones[p]; npentrRuedas = npentrRuedas[p]; #ahora partimos los datos en "minibatches" y entrenamos for j in range(0, minibatchIteraciones): features = npentrRuedas[j*minibatch_size:(j+1)*minibatch_size]; labels = npentrOperaciones[j*minibatch_size:(j+1)*minibatch_size]; trainer.train_minibatch({feature: features, label: labels}); trainer.summarize_training_progress() trainer.save_checkpoint(checkpoint); minibatchIteraciones = int(len(testOperaciones) / minibatch_size); avg_error = 0; for j in range(0, minibatchIteraciones): test_features = np.array(testRuedas[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32); test_labels = np.array(testOperaciones[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32); #test_features = np.array( entrRuedas[0:minibatch_size]).astype(np.float32); #test_labels = np.array(entrOperaciones[0:minibatch_size]).astype(np.float32); avg_error = avg_error + ( trainer.test_minibatch( {feature: test_features, label: test_labels}) / minibatchIteraciones) return avg_error
def test_sgd_with_noise(): # Runs a network where the number of parameters is odd # in some layers. This tests that cuRand library will not # complain about generating an odd number of random values np.random.seed(98052) learner = lambda params: sgd(params, lr=learning_rate_schedule(0.125, UnitType.minibatch), gaussian_noise_injection_std_dev=0.01) ffnet(learner) # We just verify that we did not crash assert(True)
def Evaluator(criterion): loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr = learning_rate_schedule(1, UnitType.minibatch), momentum = momentum_as_time_constant_schedule(0)) return Trainer(None, (loss, metric), dummy_learner)
def test_universal(): np.random.seed(98052) builtin_sgd = lambda params: sgd(params, lr=learning_rate_schedule(0.125, UnitType.minibatch)) builtin_last_avg_error, builtin_avg_error, _ = ffnet(builtin_sgd) np.random.seed(98052) my_sgd = lambda ps, gs: C.combine([C.assign(p, p - 0.125/25 * g) for p, g in zip(ps, gs)]) universal_sgd = lambda params: universal(my_sgd, params) my_last_avg_error, my_avg_error, _ = ffnet(universal_sgd) assert np.all(np.less_equal(my_last_avg_error, builtin_last_avg_error)) assert np.all(np.less_equal(my_avg_error, builtin_avg_error))
def test_universal(): np.random.seed(98052) builtin_sgd = lambda params: sgd(params, lr=learning_rate_schedule(0.125, UnitType.minibatch)) builtin_last_avg_error, builtin_avg_error = ffnet(builtin_sgd) np.random.seed(98052) my_sgd = lambda p, g: C.assign(p, p - 0.125/25 * g) universal_sgd = lambda params: universal(my_sgd, params) my_last_avg_error, my_avg_error = ffnet(universal_sgd) assert np.allclose(my_last_avg_error, builtin_last_avg_error) assert np.allclose(my_avg_error, builtin_avg_error)
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({input_sequence : features, label_sequence : labels}, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e*minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e+1) z.save(model_filename) print("Saved model to '%s'" % model_filename)
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input(shape=input_dim, is_sparse=True) label = input(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 for i in range(251): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def train_fast_rcnn(debug_output=False, model_path=model_file): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = C.input_variable((num_channels, image_height, image_width)) roi_input = C.input_variable((num_rois, 4)) label_input = C.input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def train_model(debug_output=False): # Create the minibatch source minibatch_source = create_reader(map_file) # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) label_input = input_variable((num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } # Instantiate the Fast R-CNN prediction model and loss function model = modify_model(image_input, num_classes) ce = cross_entropy_with_softmax(model, label_input) pe = classification_error(model, label_input) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] momentum_time_constant = 10 lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] learner = momentum_sgd(model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(model, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training image classifier for %s epochs." % max_epochs) log_number_of_parameters(model) for epoch in range(max_epochs): sample_count = 0 while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count trainer.summarize_training_progress() model.save( os.path.join(output_model_folder, 'withcrops_{}.dnn'.format(epoch + 1))) return
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input((num_channels, image_height, image_width)) roi_input = input((num_rois, 4)) label_input = input((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def Evaluator(model, criterion): from cntk import Trainer from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if model: parameters |= set(model.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr = learning_rate_schedule(1, UnitType.minibatch), momentum = momentum_as_time_constant_schedule(0)) return Trainer(model, (loss, metric), dummy_learner)
def ffnet(): inputs = 3 outputs = 3 layers = 2 hidden_dimension = 3 # input variables denoting the features and label data features = C.input((inputs), np.float32) label = C.input((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential( [Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [ sgd(z.parameters, lr=lr_per_minibatch, gaussian_noise_injection_std_dev=0.01) ], [progress_printer]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 100 aggregate_loss = 0.0 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features: train_features, label: labels}) sample_count = trainer.previous_minibatch_sample_count aggregate_loss += trainer.previous_minibatch_loss_average * sample_count last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({ features: test_features, label: test_labels }) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error
def train(reader, model, max_epochs): # declare the model's input dimension, so that the saved model is usable model.update_signature(Sequence[SparseTensor[vocab_size]]) #model.declare_args(vocab_size) # criterion: (model args, labels) -> (loss, metric) # here (query, slot_labels) -> (ce, errs) criterion = create_criterion_function(model) labels = reader.streams.slot_labels #labels = reader.streams.intent_labels # for intent classification #from cntk.logging.graph import plot #plot(criterion, filename=data_dir + "/model.pdf") # iteration parameters --needed here because learner schedule needs it epoch_size = 36000 minibatch_size = 70 #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing # SGD parameters learner = fsadagrad(criterion.parameters, lr = learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)), gradient_clipping_threshold_per_sample = 15, gradient_clipping_with_truncation = True) # trainer trainer = Trainer(None, criterion, learner) # process minibatches and perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs peek(model, epoch) # log some interesting info epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG: The change of minibatch_size parameter vv has no effect. # TODO: change all examples to this pattern; then remove this comment data = reader.next_minibatch(min(minibatch_size, epoch_end-t)) # fetch minibatch #trainer.train_minibatch(data[reader.streams.query], data[labels]) # update model with it trainer.train_minibatch({criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels]}) # update model with it t += data[labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric # return values from last epoch
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up): from Sequence2Sequence import create_criterion_function, create_model_train model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError( "Block momentum cannot be used with quantization, please remove quantized_bits option." ) lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? local_learner = fsadagrad( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) trainer = Trainer(None, criterion, learner, progress_printer) train_bind = { criterion.arguments[0]: train_reader.streams.features, criterion.arguments[1]: train_reader.streams.labels } training_session( mb_source=train_reader, trainer=trainer, model_inputs_to_streams=train_bind, mb_size=minibatch_size, progress_frequency=epoch_size, checkpoint_config=CheckpointConfig(frequency=epoch_size, filename=os.path.join( model_path, "SequenceToSequence"), restore=False), cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size)).train()
def run_model(create_model_fn, **params): NUM_TRAIN_SAMPLES = params.get('NUM_TRAIN_SAMPLES', 60000) NUM_TEST_SAMPLES = params.get('NUM_TEST_SAMPLES', 10000) INPUT_DIM_MODEL = params.get('INPUT_DIM_MODEL', 28 * 28) INPUT_DIM = params.get('INPUT_DIM', 28 * 28) NUM_OUTPUT_CLASSES = params.get('NUM_OUTPUT_CLASSES', 10) LEARNING_RATE = params.get('LEARNING_RATE', 0.2) MINIBATCH_SIZE = params.get('MINIBATCH_SIZE', 64) NUM_SAMPLES_PER_SWEEP = params.get('NUM_SAMPLES_PER_SWEEP', 60000) NUM_SWEEP_TO_TRAIN = params.get('NUM_SWEEP_TO_TRAIN', 10) train_file, test_file = load_and_save(NUM_TRAIN_SAMPLES, NUM_TEST_SAMPLES) input = C.input_variable(INPUT_DIM_MODEL) label = C.input_variable(NUM_OUTPUT_CLASSES) z = create_model_fn(input / 255.0, NUM_OUTPUT_CLASSES, **params) loss = C.cross_entropy_with_softmax(z, label) label_error = C.classification_error(z, label) lr_schedule = learning_rate_schedule(LEARNING_RATE, UnitType.minibatch) learner = sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, label_error), [learner]) # Create the reader to training data set reader_train = create_reader(train_file, True, INPUT_DIM, NUM_OUTPUT_CLASSES) # Map the data streams to the input and labels. input_map = { label: reader_train.streams.labels, input: reader_train.streams.features } tr = Trainer(MINIBATCH_SIZE, NUM_SAMPLES_PER_SWEEP, NUM_SWEEP_TO_TRAIN, trainer, reader_train) plotdata = tr.train(input_map) plot_learning(plotdata) # Read the training data reader_test = create_reader(test_file, False, INPUT_DIM, NUM_OUTPUT_CLASSES) test_input_map = { label: reader_test.streams.labels, input: reader_test.streams.features, } test_model(test_input_map, reader_test, trainer, NUM_TEST_SAMPLES, 512) return z
def train_sequence_classifier(): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifier_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = ("../../../Tests/EndToEndTests/Text/" + "SequenceClassification/Data/Train.ctf") path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample), progress_printer) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 for i in range(255): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) evaluation_average = float(trainer.previous_minibatch_evaluation_average) loss_average = float(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def ffnet(optimizer, num_minibatches_to_train): inputs = 2 outputs = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential([ Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=SEED)), Dense(outputs, init=C.glorot_uniform(seed=SEED)) ]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [optimizer(z.parameters, lr_per_minibatch)], progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({features: train_features, label: labels}) test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({ features: test_features, label: test_labels }) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return z.parameters
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print ("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def train_model(reader, reader_test, model, epoch_size=50000, max_epochs=80): # declare the model's input dimension # Training does not require this, but it is needed for deployment. model.update_signature((num_channels, image_height, image_width)) # criterion function. This is what is being trained trained. # Model gets "sandwiched" between normalization (not part of model proper) and criterion. criterion = create_criterion_function(model, normalize=lambda x: x / 256) #debughelpers.dump_function(criterion, 'criterion') #from cntk.logging.graph import plot #plot(criterion, filename=os.path.join(model_path, "ConvNet_CIFAR10_DataAug.pdf")) # iteration parameters minibatch_size = 64 #epoch_size = 1000 ; max_epochs = 1 # for faster testing # learning parameters learner = momentum_sgd(model.parameters, lr = learning_rate_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], unit=UnitType.sample, epoch_size=epoch_size), momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size), l2_regularization_weight = 0.002) # trainer object trainer = Trainer(None, criterion, learner) # perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch. #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]}) sample_count += mb[reader.streams.labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) # return evaluation error. return loss, metric # return values from last epoch
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes) ])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ feature: test_features, label: test_labels }) return avg_error
def ffnet(): inputs = 2 outputs = 2 layers = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential ([ Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 1024 aggregate_loss = 0.0 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features : train_features, label : labels}) sample_count = trainer.previous_minibatch_sample_count aggregate_loss += trainer.previous_minibatch_loss_average * sample_count last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({features : test_features, label : test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error
def ffnet(optimizer, num_minibatches_to_train): inputs = 2 outputs = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential([ Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=SEED)), Dense(outputs, init=C.glorot_uniform(seed=SEED))]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [optimizer( z.parameters, lr_per_minibatch)], progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data( minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({features: train_features, label: labels}) test_features, test_labels = generate_random_data( minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch( {features: test_features, label: test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return z.parameters
def ffnet(data, labels): input_dim = 800 num_output_classes = 3 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes)])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 features, labels = generate_stock_data(minibatch_size); for i in range(1024): # features, labels = generate_random_data( # minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {feature: test_features, label: test_labels}) return avg_error
def build_trainer(self): # Set the learning rate, and the momentum parameters for the Adam optimizer. lr = learning_rate_schedule(self.lr, UnitType.minibatch) beta1 = momentum_schedule(0.9) beta2 = momentum_schedule(0.99) # Calculate the losses. loss_on_v = cntk.squared_error(self.R, self.v) pi_a_s = cntk.log(cntk.times_transpose(self.pi, self.action)) loss_on_pi = cntk.variables.Constant(-1) * (cntk.plus( cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc)), 0.01 * cntk.times_transpose(self.pi, cntk.log(self.pi)))) #loss_on_pi = cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc)) self.tensorboard_v_writer = TensorBoardProgressWriter( freq=10, log_dir="tensorboard_v_logs", model=self.v) self.tensorboard_pi_writer = TensorBoardProgressWriter( freq=10, log_dir="tensorboard_pi_logs", model=self.pi) # tensorboard --logdir=tensorboard_pi_logs http://localhost:6006/ # tensorboard --logdir=tensorboard_v_logs http://localhost:6006/ # Create the trainiers. self.trainer_v = cntk.Trainer(self.v, (loss_on_v), [ adam(self.pms_v, lr, beta1, variance_momentum=beta2, gradient_clipping_threshold_per_sample=2, l2_regularization_weight=0.01) ], self.tensorboard_v_writer) self.trainer_pi = cntk.Trainer(self.pi, (loss_on_pi), [ adam(self.pms_pi, lr, beta1, variance_momentum=beta2, gradient_clipping_threshold_per_sample=2, l2_regularization_weight=0.01) ], self.tensorboard_pi_writer)
def cargarRedDesdeArchivo(archivo): input_dim = 800; num_output_classes = 3; feature = input((input_dim), np.float32); label = input((num_output_classes), np.float32) netout = crearRed(input_dim, 3, feature); ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(1) trainer = Trainer(netout, (ce, pe), learner, progress_printer) trainer.restore_from_checkpoint(archivo); return netout;
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up): from Sequence2Sequence import create_criterion_function, create_model_train model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? local_learner = fsadagrad(model_train.parameters, lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) trainer = Trainer(None, criterion, learner, progress_printer) train_bind = {criterion.arguments[0]: train_reader.streams.features, criterion.arguments[1]: train_reader.streams.labels} training_session( mb_source = train_reader, trainer=trainer, model_inputs_to_streams=train_bind, mb_size=minibatch_size, progress_frequency=epoch_size, checkpoint_config=CheckpointConfig(frequency = epoch_size, filename = os.path.join(model_path, "SequenceToSequence"), restore = False), cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size) ).train()
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: raise RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters minibatch_size = 128 momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # progress writers progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer/metric_denom
def __init__(self, input_shape, nb_actions, gamma=0.99, explorer=LinearEpsilonAnnealingExplorer(1, 0.1, 1000000), learning_rate=0.00025, momentum=0.95, minibatch_size=32, memory_size=500000, train_after=10000, train_interval=4, target_update_interval=10000, monitor=True): self.input_shape = input_shape self.nb_actions = nb_actions self.gamma = gamma self._train_after = train_after self._train_interval = train_interval self._target_update_interval = target_update_interval self._explorer = explorer self._minibatch_size = minibatch_size self._history = History(input_shape) self._memory = ReplayMemory(memory_size, input_shape[1:], 4) self._num_actions_taken = 0 # Metrics accumulator self._episode_rewards, self._episode_q_means, self._episode_q_stddev = [], [], [] # Action Value model (used by agent to interact with the environment) with default_options(activation=relu, init=he_uniform()): self._action_value_net = Sequential([ Convolution2D((8, 8), 16, strides=4), Convolution2D((4, 4), 32, strides=2), Convolution2D((3, 3), 32, strides=1), Dense(256, init=he_uniform(scale=0.01)), Dense(nb_actions, activation=None, init=he_uniform(scale=0.01)) ]) self._action_value_net.update_signature(Tensor[input_shape]) # Target model used to compute the target Q-values in training, updated # less frequently for increased stability. self._target_net = self._action_value_net.clone(CloneMethod.freeze) # Function computing Q-values targets as part of the computation graph @Function @Signature(post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def compute_q_targets(post_states, rewards, terminals): return element_select( terminals, rewards, gamma * reduce_max(self._target_net(post_states), axis=0) + rewards, ) # Define the loss, using Huber Loss (more robust to outliers) @Function @Signature(pre_states=Tensor[input_shape], actions=Tensor[nb_actions], post_states=Tensor[input_shape], rewards=Tensor[()], terminals=Tensor[()]) def criterion(pre_states, actions, post_states, rewards, terminals): # Compute the q_targets q_targets = compute_q_targets(post_states, rewards, terminals) # actions is a 1-hot encoding of the action done by the agent q_acted = reduce_sum(self._action_value_net(pre_states) * actions, axis=0) # Define training criterion as the Huber Loss function return huber_loss(q_targets, q_acted, 1.0) # Adam based SGD lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) m_schedule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) l_sgd = adam(self._action_value_net.parameters, lr_schedule, momentum=m_schedule, variance_momentum=vm_schedule) self._metrics_writer = TensorBoardProgressWriter(freq=1, log_dir='metrics', model=criterion) if monitor else None self._learner = l_sgd self._trainer = Trainer(criterion, (criterion, None), l_sgd, self._metrics_writer)
def train_fast_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) return load_model(model_path) else: # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name = "roi_proposals") label_targets = input_variable((cfg.NUM_ROI_PROPOSALS, cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_targets = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_inside_weights = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the Fast R-CNN prediction model and loss function loss, pred_error = create_fast_rcnn_model(image_input, roi_proposals, label_targets, bbox_targets, bbox_inside_weights, cfg) if isinstance(loss, cntk.Variable): loss = combine([loss]) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters lr_factor = cfg["CNTK"].LR_FACTOR lr_per_sample_scaled = [x * lr_factor for x in cfg["CNTK"].LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT epochs_to_train = cfg["CNTK"].MAX_EPOCHS print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(lr_per_sample_scaled)) # --- train --- # Instantiate the learners and the trainer object params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT lr_schedule = learning_rate_schedule(lr_per_sample_scaled, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source if cfg.USE_PRECOMPUTED_PROPOSALS: proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) else: proposal_provider = ProposalProvider.fromconfig(cfg) od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["DATA"].NUM_TRAIN_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=proposal_provider, provide_targets=True, proposal_iou_threshold = cfg.BBOX_THRESH, normalize_means = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_MEANS, normalize_stds = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_STDS) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.proposals_si: roi_proposals, od_minibatch_source.label_targets_si: label_targets, od_minibatch_source.bbox_targets_si: bbox_targets, od_minibatch_source.bbiw_si: bbox_inside_weights } progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) eval_model = create_fast_rcnn_eval_model(loss, image_input, roi_proposals, cfg) eval_model.save(cfg['MODEL_PATH']) return eval_model
def sweep_based_schedule_fails(): with pytest.raises(Exception): learning_rate_schedule([1], unit=UnitType.sample, epoch_size=0)
def test_learning_rate_schedule(params, expectation, minibatch_size): l = learning_rate_schedule(*params) assert l.minibatch_size == minibatch_size assert [l[i] for i in range(len(expectation))] == expectation
def test_learner_empy_parameters_list(): lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) with pytest.raises(ValueError): learner = C.sgd([], lr_per_sample)