def test_model_not_criterion_subset(): input_dim = 2 proj_dim = 11 model1_dim = 3 model2_dim = 4 x = input_variable((input_dim,)) core = Embedding(proj_dim) model1 = Dense(model1_dim)(sequence.last(core(x))) model1_label = input_variable((model1_dim,), dynamic_axes=[Axis.default_batch_axis()]) ce_model1 = cross_entropy_with_softmax(model1, model1_label) pe_model1 = classification_error(model1, model1_label) model2 = Dense(model2_dim)(core(x)) model2_label = input_variable((model2_dim,)) ce_model2 = cross_entropy_with_softmax(model2, model2_label) pe_model2 = classification_error(model2, model2_label) ce = 0.5 * sequence.reduce_sum(ce_model2) + 0.5 * ce_model1 lr_schedule = learning_rate_schedule(0.003, UnitType.sample) trainer_multitask = Trainer(model1, (ce, pe_model1), sgd(ce.parameters, lr=lr_schedule)) x_data = np.asarray([[2., 1.], [1., 2.]], np.float32) model1_label_data = np.asarray([1., 0., 0.], np.float32) model2_label_data = np.asarray([[0., 1., 0., 0.], [0., 0., 0., 1.]], np.float32) trainer_multitask.train_minibatch({x : [x_data], model1_label : [model1_label_data], model2_label : [model2_label_data]})
def create_bn_inception(): # Input variables denoting the features and label data feature_var = input_variable((NUM_CHANNELS, IMAGE_HEIGHT, IMAGE_WIDTH)) label_var = input_variable((NUM_CLASSES)) bn_time_const = 4096 z = bn_inception_cifar_model(feature_var, NUM_CLASSES, bn_time_const) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) pe5 = classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label' : label_var, 'ce' : ce, 'pe' : pe, 'pe5' : pe5, 'output' : z }
def test_trainer(tmpdir, no_eval_function): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], Learner)
def test_trainer_with_some_params_not_learned(): input_dim = 2 proj_dim = 2 x = input_variable(shape=(input_dim,)) W = parameter(shape=(input_dim, proj_dim), init=glorot_uniform()) B = parameter(shape=(proj_dim,), init=glorot_uniform()) t = times(x, W) z = t + B W_orig_value = W.value B_orig_value = B.value labels = input_variable(shape=(proj_dim,)) ce = cross_entropy_with_softmax(z, labels) pe = classification_error(z, labels) lr_per_sample = learning_rate_schedule(0.1, UnitType.sample) trainer = Trainer(z, (ce, pe), sgd([W], lr_per_sample)) x_value = [[1, 1],[2, 2]] label_value = [[0, 1], [1, 0]] arguments = {x: x_value, labels: label_value} num_iters = 3 for i in range(num_iters): trainer.train_minibatch(arguments) assert np.array_equal(B.value, B_orig_value) assert not np.array_equal(W.value, W_orig_value) W_orig_value = W.value trainer.test_minibatch(arguments)
def entrenar(checkpoint, entrRuedas, entrOperaciones, input_dim, num_output_classes, testRuedas, testOperaciones): minibatch_size = 100; epocs=900; minibatchIteraciones = int(len(entrOperaciones) / minibatch_size); # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = crearRed(input_dim, num_output_classes, feature); ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.25, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(log_to_file=checkpoint+".log", num_epochs=epocs); trainer = Trainer(netout, (ce, pe), learner, progress_printer) if os.path.isfile(checkpoint): trainer.restore_from_checkpoint(checkpoint); npentrRuedas = np.array(entrRuedas).astype(np.float32); npentrOperaciones = np.array(entrOperaciones).astype(np.float32); #iteramos una vez por cada "epoc" for i in range(0, epocs): p = np.random.permutation(len(entrRuedas)); npentrOperaciones = npentrOperaciones[p]; npentrRuedas = npentrRuedas[p]; #ahora partimos los datos en "minibatches" y entrenamos for j in range(0, minibatchIteraciones): features = npentrRuedas[j*minibatch_size:(j+1)*minibatch_size]; labels = npentrOperaciones[j*minibatch_size:(j+1)*minibatch_size]; trainer.train_minibatch({feature: features, label: labels}); trainer.summarize_training_progress() trainer.save_checkpoint(checkpoint); minibatchIteraciones = int(len(testOperaciones) / minibatch_size); avg_error = 0; for j in range(0, minibatchIteraciones): test_features = np.array(testRuedas[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32); test_labels = np.array(testOperaciones[j*minibatch_size:(j+1)*minibatch_size]).astype(np.float32); #test_features = np.array( entrRuedas[0:minibatch_size]).astype(np.float32); #test_labels = np.array(entrOperaciones[0:minibatch_size]).astype(np.float32); avg_error = avg_error + ( trainer.test_minibatch( {feature: test_features, label: test_labels}) / minibatchIteraciones) return avg_error
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_parameter_schedule_per_sample(0.001) momentum_schedule = momentum_schedule_per_sample(0.9990913221888589) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_sample, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({input_sequence : features, label_sequence : labels}, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e*minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e+1) z.save(model_filename) print("Saved model to '%s'" % model_filename)
def create_sample_model(device, writer=None, lr_per_sample=C.learning_parameter_schedule_per_sample([0.3, 0.2, 0.1, 0.0])): in1 = sequence.input_variable(shape=(input_dim,)) labels = sequence.input_variable(shape=(input_dim,)) p = parameter(shape=(input_dim,), init=10, device=device) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) learner = C.sgd(z.parameters, lr_per_sample) trainer = C.Trainer(z, (ce, errs), [learner], writer) return (trainer, in1, labels)
def create_fast_rcnn_model(features, roi_proposals, label_targets, bbox_targets, bbox_inside_weights, cfg): # Load the pre-trained classification net and clone layers base_model = load_model(cfg['BASE_MODEL_PATH']) conv_layers = clone_conv_layers(base_model, cfg) fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], clone_method=CloneMethod.clone) # Normalization and conv layers feat_norm = features - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) conv_out = conv_layers(feat_norm) # Fast RCNN and losses cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, roi_proposals, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, roi_proposals, bbox_targets, bbox_inside_weights, cfg) pred_error = classification_error(cls_score, label_targets, axis=1) return detection_losses, pred_error
def test_model_one_output_of_multi_output_function(): input_dim = 2 proj_dim = 11 x = input_variable((input_dim,)) x_placeholder = placeholder_variable() w = parameter((input_dim, proj_dim)) b = parameter((proj_dim,)) proj = times(x_placeholder, w) proj_plus_bias = proj + b combined_model = as_block(combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op') labels = input_variable((proj_dim,)) lr_schedule = learning_rate_schedule(0.003, UnitType.sample) ce = cross_entropy_with_softmax(combined_model.outputs[0], labels) pe = classification_error(combined_model.outputs[0], labels) trainer_multitask = Trainer(combined_model.outputs[0], (ce, pe), sgd(ce.parameters, lr=lr_schedule))
def test_output_to_retain(): in1 = input_variable(shape=(1,)) labels = input_variable(shape=(1,)) p = parameter(shape=(2,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)]) in1_value = [[[1]], [[2]]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) assert np.allclose(var_map[z_output], np.asarray(in1_value)+20)
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print ("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def test_output_to_retain(): in1 = input(shape=(1, )) labels = input(shape=(1, )) p = parameter(shape=(2, ), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = momentum_as_time_constant_schedule(1100) lr_per_sample = learning_rate_schedule(0.007, UnitType.sample) trainer = Trainer(z, (ce, errs), [ momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True) ]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) assert np.allclose(var_map[z_output], np.asarray(in1_value) + 20)
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) netout = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes) ])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_parameter_schedule(0.5) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ feature: test_features, label: test_labels }) return avg_error
def test_model_one_output_of_multi_output_function(): input_dim = 2 proj_dim = 11 x = C.input_variable((input_dim, )) x_placeholder = C.placeholder() w = parameter((input_dim, proj_dim)) b = parameter((proj_dim, )) proj = times(x_placeholder, w) proj_plus_bias = proj + b combined_model = as_block(C.combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op') labels = C.input_variable((proj_dim, )) lr_schedule = C.learning_parameter_schedule(0.003, minibatch_size=1) ce = cross_entropy_with_softmax(combined_model.outputs[0], labels) pe = classification_error(combined_model.outputs[0], labels) trainer_multitask = C.Trainer(combined_model.outputs[0], (ce, pe), C.sgd(ce.parameters, lr=lr_schedule))
def run_distributed_training(tmpdir, create_func): in1 = sequence.input_variable(shape=1) labels = sequence.input_variable(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learner = create_func( C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = C.Trainer(z, (ce, errs), [dist_learner]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def test_model_one_output_of_multi_output_function(): input_dim = 2 proj_dim = 11 x = input((input_dim, )) x_placeholder = placeholder() w = parameter((input_dim, proj_dim)) b = parameter((proj_dim, )) proj = times(x_placeholder, w) proj_plus_bias = proj + b combined_model = as_block(combine([proj, proj_plus_bias]), [(x_placeholder, x)], 'dense_op') labels = input((proj_dim, )) lr_schedule = learning_rate_schedule(0.003, UnitType.sample) ce = cross_entropy_with_softmax(combined_model.outputs[0], labels) pe = classification_error(combined_model.outputs[0], labels) trainer_multitask = Trainer(combined_model.outputs[0], (ce, pe), sgd(ce.parameters, lr=lr_schedule))
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(1024): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ input: test_features, label: test_labels }) return avg_error
def create_network(num_convolution_layers): """ Create network """ # Input variables denoting the features and label data input_var = cntk.input_variable( (_NUM_CHANNELS, _IMAGE_HEIGHT, _IMAGE_WIDTH)) label_var = cntk.input_variable((_NUM_CLASSES)) # create model, and configure learning parameters # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) print('Creating NN model') with layers.default_options(activation=relu, pad=True): model = layers.Sequential([ layers.For( range(num_convolution_layers), lambda: [ layers.Convolution2D((3, 3), 64), layers.Convolution2D((3, 3), 64), layers.MaxPooling((3, 3), (2, 2)) ]), layers.For( range(2), lambda i: [layers.Dense([256, 128][i]), layers.Dropout(0.5)]), layers.Dense(_NUM_CLASSES, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(model, label_var) pe = classification_error(model, label_var) return { 'name': 'convnet', 'feature': input_var, 'label': label_var, 'ce': ce, 'pe': pe, 'output': model }
def test_trainer(tmpdir, no_eval_function): in1 = C.input_variable(shape=(1, )) labels = C.input_variable(shape=(1, )) p = parameter(shape=(2, ), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) if no_eval_function: errs = None else: errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_parameter_schedule(0.007, minibatch_size=1) trainer = C.Trainer(z, (ce, errs), [ C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True) ]) in1_value = [[1], [2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') external_state = { "additional external state": math.pi, "nested dict": { "a": "b" }, "list": [1, 2, 3] } trainer.save_checkpoint(p, external_state) restored_state = trainer.restore_from_checkpoint(p) assert external_state == restored_state assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__ assert isinstance(trainer.parameter_learners[0], C.Learner)
def ffnet(data, labels): input_dim = 800 num_output_classes = 3 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data feature = input((input_dim), np.float32) label = input((num_output_classes), np.float32) netout = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=sigmoid)), Dense(num_output_classes)])(feature) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(128) trainer = Trainer(netout, (ce, pe), learner, progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 features, labels = generate_stock_data(minibatch_size); for i in range(1024): # features, labels = generate_random_data( # minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({feature: features, label: labels}) trainer.summarize_training_progress() test_features, test_labels = generate_random_data( minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch( {feature: test_features, label: test_labels}) return avg_error
def run_distributed_training(tmpdir, create_func): in1 = sequence.input_variable(shape=1) labels = sequence.input_variable(shape=1) p = parameter(shape=2, init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_time_constant = C.momentum_as_time_constant_schedule(1100) lr_per_sample = C.learning_rate_schedule(0.007, C.UnitType.sample) dist_learner = create_func(C.momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, True)) communicator = dist_learner.communicator() workers = communicator.workers() current_worker = communicator.current_worker() found_rank = False for wk in workers: if current_worker.global_rank == wk.global_rank: found_rank = True assert found_rank trainer = C.Trainer(z, (ce, errs), [ dist_learner ]) in1_value = [[1],[2]] label_value = [[0], [1]] arguments = {in1: in1_value, labels: label_value} z_output = z.output updated, var_map = trainer.train_minibatch(arguments, outputs=[z_output]) p = str(tmpdir / 'checkpoint.dat') trainer.save_checkpoint(p) trainer.restore_from_checkpoint(p) communicator.barrier() assert trainer.model.name == 'z' # Ensure that Swig is not leaking raw types assert isinstance(trainer.model, Function) assert trainer.model.__doc__
def cargarRedDesdeArchivo(archivo): input_dim = 800; num_output_classes = 3; feature = input((input_dim), np.float32); label = input((num_output_classes), np.float32) netout = crearRed(input_dim, 3, feature); ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) lr_per_minibatch=learning_rate_schedule(0.5, UnitType.minibatch) # Instantiate the trainer object to drive the model training learner = sgd(netout.parameters, lr=lr_per_minibatch) progress_printer = ProgressPrinter(1) trainer = Trainer(netout, (ce, pe), learner, progress_printer) trainer.restore_from_checkpoint(archivo); return netout;
def test_eval(): input_dim = 2 proj_dim = 2 x = C.input_variable(shape=(input_dim, )) W = C.parameter(shape=(input_dim, proj_dim), init=[[1, 0], [0, 1]]) B = C.parameter(shape=(proj_dim, ), init=[[0, 1]]) t = C.times(x, W) z = t + B labels = C.input_variable(shape=(proj_dim, )) pe = classification_error(z, labels) tester = C.eval.Evaluator(pe) x_value = [[0, 1], [2, 2]] label_value = [[0, 1], [1, 0]] arguments = {x: x_value, labels: label_value} eval_error = tester.test_minibatch(arguments) assert np.allclose(eval_error, .5)
def test_eval(): input_dim = 2 proj_dim = 2 x = C.input_variable(shape=(input_dim,)) W = C.parameter(shape=(input_dim, proj_dim), init=[[1, 0], [0, 1]]) B = C.parameter(shape=(proj_dim,), init=[[0, 1]]) t = C.times(x, W) z = t + B labels = C.input_variable(shape=(proj_dim,)) pe = classification_error(z, labels) tester = C.eval.Evaluator(pe) x_value = [[0, 1], [2, 2]] label_value = [[0, 1], [1, 0]] arguments = {x: x_value, labels: label_value} eval_error = tester.test_minibatch(arguments) assert np.allclose(eval_error, .5)
def create_faster_rcnn_model(features, scaled_gt_boxes, dims_input, cfg): # Load the pre-trained classification net and clone layers base_model = load_model(cfg['BASE_MODEL_PATH']) conv_layers = clone_conv_layers(base_model, cfg) fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], clone_method=CloneMethod.clone) # Normalization and conv layers feat_norm = features - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) conv_out = conv_layers(feat_norm) # RPN and prediction targets rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_input, cfg) rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois, scaled_gt_boxes, cfg) # Fast RCNN and losses cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) loss = rpn_losses + detection_losses pred_error = classification_error(cls_score, label_targets, axis=1) return loss, pred_error
def create_faster_rcnn_predictor(base_model_file_name, features, scaled_gt_boxes, dims_input): # Load the pre-trained classification net and clone layers base_model = load_model(base_model_file_name) conv_layers = clone_conv_layers(base_model) fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], clone_method=CloneMethod.clone) # Normalization and conv layers feat_norm = features - normalization_const conv_out = conv_layers(feat_norm) # RPN and prediction targets rpn_rois, rpn_losses = \ create_rpn(conv_out, scaled_gt_boxes, dims_input, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS) rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes=globalvars['num_classes']) # Fast RCNN and losses cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers) detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights) loss = rpn_losses + detection_losses pred_error = classification_error(cls_score, label_targets, axis=1) return loss, pred_error
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = input(input_dim, np.float32) label = input(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with) ] if tensorboard_logdir is not None: progress_writers.append( TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training trainer = Trainer(z, (ce, pe), adadelta(z.parameters), progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, var_to_stream=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
# define the inputs and labels inputs = C.input_variable(D, np.float32, name='inputs') labels = C.input_variable(K, np.float32, name='labels') # get the output logits = model(inputs) # define loss / metrics # like Tensorflow the softmax is done # internally (if needed), so all we need are the logits ce = cross_entropy_with_softmax(logits, labels) pe = classification_error(logits, labels) # training config batch_size = 32 epochs = 15 n_batches = len(Xtrain) // batch_size # do the training # specify the training algorithm trainer = Trainer(logits, (ce, pe), adam(logits.parameters, lr=1e-2, momentum=0.9))
def criterion(query, labels): z = model(query) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return (ce, errs)
def train_faster_rcnn_alternating(cfg): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N # Learning parameters rpn_lr_factor = cfg["MODEL"].RPN_LR_FACTOR rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] frcn_lr_factor = cfg["MODEL"].FRCN_LR_FACTOR frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) rpn_epochs = cfg["CNTK"].RPN_EPOCHS frcn_epochs = cfg["CNTK"].FRCN_EPOCHS feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) debug_output=cfg["CNTK"].DEBUG_OUTPUT if debug_output: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(cfg['BASE_MODEL_PATH']) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, cfg) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage1_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, cfg) # Fast RCNN and losses fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot(stage1_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage2_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot(stage2_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return create_faster_rcnn_eval_model(stage2_frcn_network, image_input, dims_input, cfg, rpn_model=stage2_rpn_network)
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 2 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim) label = C.input_variable(num_output_classes) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) # z = Sequential([ # Dense(hidden_layers_dim, activation=relu), # Dense(hidden_layers_dim, activation=relu), # Dense(num_output_classes)])(scaled_input) with default_options(activation=relu, init=C.glorot_uniform()): z = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim)), Dense(num_output_classes, activation=None) ])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) # setup the data path = abs_path + "\Train-28x28_cntk_text.txt" reader_train = MinibatchSource( CTFDeserializer( path, StreamDefs(features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. progress_writers = [ ProgressPrinter(tag='Training', num_epochs=num_sweeps_to_train_with) ] # Instantiate the trainer object to drive the model training lr = learning_rate_schedule(1, UnitType.sample) trainer = Trainer(z, (ce, pe), [adadelta(z.parameters, lr)], progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, model_inputs_to_streams=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = abs_path + "\Test-28x28_cntk_text.txt" reader_test = MinibatchSource( CTFDeserializer( path, StreamDefs(features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def train_faster_rcnn_alternating(base_model_file_name, debug_output=False): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # Learning parameters rpn_lr_factor = globalvars['rpn_lr_factor'] rpn_lr_per_sample_scaled = [ x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE ] frcn_lr_factor = globalvars['frcn_lr_factor'] frcn_lr_per_sample_scaled = [ x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE ] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(globalvars['momentum_per_mb']) rpn_epochs = globalvars['rpn_epochs'] frcn_epochs = globalvars['frcn_epochs'] print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - normalization_const roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(base_model_file_name) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn( conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage1_rpn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes']) # Fast RCNN and losses fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor( conv_out, rois, fc_layers) detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine( [rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot( stage1_frcn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage2_rpn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], [ "cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error" ], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot( stage2_frcn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 2 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim) label = C.input_variable(num_output_classes) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) # z = Sequential([ # Dense(hidden_layers_dim, activation=relu), # Dense(hidden_layers_dim, activation=relu), # Dense(num_output_classes)])(scaled_input) with default_options(activation=relu, init=C.glorot_uniform()): z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim)), Dense(num_output_classes, activation=None)])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) # setup the data path = abs_path + "\Train-28x28_cntk_text.txt" reader_train = MinibatchSource(CTFDeserializer(path, StreamDefs( features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. progress_writers = [ProgressPrinter( tag='Training', num_epochs=num_sweeps_to_train_with)] # Instantiate the trainer object to drive the model training lr = learning_rate_schedule(1, UnitType.sample) trainer = Trainer(z, (ce, pe), [adadelta(z.parameters, lr)], progress_writers) training_session( trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, model_inputs_to_streams=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep ).train() # Load test data path = abs_path + "\Test-28x28_cntk_text.txt" reader_test = MinibatchSource(CTFDeserializer(path, StreamDefs( features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
Dense(300, activation=relu), Dense(K, activation=None), ]) # define the inputs and labels inputs = C.input_variable(D, np.float32, name='inputs') labels = C.input_variable(K, np.float32, name='labels') # get the output logits = model(inputs) # define loss / metrics # like Tensorflow the softmax is done # internally (if needed), so all we need are the logits ce = cross_entropy_with_softmax(logits, labels) pe = classification_error(logits, labels) # training config batch_size = 32 epochs = 15 n_batches = len(Xtrain) // batch_size # do the training # specify the training algorithm trainer = Trainer(logits, (ce, pe), adam(logits.parameters, lr=1e-2, momentum=0.9)) # helper function def get_output(node, X, Y):
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.ops import input_variable from cntk.layers import Dense from cntk.ops import relu from cntk.metrics import classification_error from cntk.losses import cross_entropy_with_softmax path = "dataset.txt" featuresShapeValue = 6 labelsShapeValue = 1 featuresShape = input_variable(featuresShapeValue) labelsShape = input_variable(labelsShapeValue) ctfdResult = CTFDeserializer( path, StreamDefs(features=StreamDef(field='features', shape=featuresShapeValue), labels=StreamDef(field='labels', shape=labelsShapeValue))) reader = MinibatchSource(ctfdResult) hiddenLayerDimension = 4 hiddenLayerOne = Dense(hiddenLayerDimension, activation=relu)(featuresShape) outputLayer = Dense(labelsShapeValue, activation=relu)(hiddenLayerOne) crossEntropy = cross_entropy_with_softmax(outputLayer, labelsShape) classificationError = classification_error(outputLayer, labelsShape)
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab( training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_parameter_schedule_per_sample(0.001) momentum_schedule = momentum_schedule_per_sample(0.9990913221888589) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({ input_sequence: features, label_sequence: labels }, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e * minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e + 1) z.save(model_filename) print("Saved model to '%s'" % model_filename)
def criterion(query, labels): z = model(query) ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) return (ce, errs)
def simple_mnist(tensorboard_logdir=None): input_dim = 19 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 1024 # Input variables denoting the features and label data feature = C.input_variable(input_dim, np.float32) label = C.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model # scaled_input = element_times(constant(0.00390625), feature) z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)), Dense(num_output_classes)])(feature) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = r"." path = os.path.normpath(os.path.join(data_dir, "train.ctf")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature : reader_train.streams.features, label : reader_train.streams.labels } # Training config minibatch_size = 512 num_samples_per_sweep = 1825000 num_sweeps_to_train_with = 100 # Instantiate progress writers. progress_writers = [ProgressPrinter( tag='Training', num_epochs=num_sweeps_to_train_with)] if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # Instantiate the trainer object to drive the model training lr = learning_parameter_schedule_per_sample(0.001) learner = create_learner(model=z) trainer = Trainer(z, (ce, pe), learner, progress_writers) num_minibatches_to_train = int(num_samples_per_sweep / minibatch_size * num_sweeps_to_train_with) model_dir = "model" for i in range(num_minibatches_to_train): mb = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) freq = int(num_samples_per_sweep / minibatch_size) if i > 0 and i % freq == 0: timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f") current_trainer_cp = os.path.join(model_dir, timestamp + "_epoch_" + str(freq) + ".trainer") trainer.save_checkpoint(current_trainer_cp) train_error = get_error_rate(os.path.join(data_dir, "train_subset.ctf"), input_map, input_dim, num_output_classes, trainer) valid_error = get_error_rate(os.path.join(data_dir, "validation.ctf"), input_map, input_dim, num_output_classes, trainer) if train_error > 0: tensorboard_writer.write_value("train_error", train_error, i) if valid_error > 0: tensorboard_writer.write_value("valid_error", valid_error, i) feat_path = os.path.normpath(os.path.join(data_dir, "test.ctf")) return get_error_rate(feat_path, input_map, input_dim, num_output_classes, trainer)
return {input_sequences: batch_inputs, labels: batch_outputs} data, char_to_ix, ix_to_char, data_size, VOCAB_SIZE_VAR = load_data_and_vocab( TRAINING_FILE_PATH) input_sequences, labels = create_model_placeholders() if USE_SAVED_MODEL: model = load_model(MODEL_FILE_PATH) else: model = create_model() z = model(input_sequences) ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) momentum_schedule = momentum_schedule_per_sample(0.9990913221888589) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = adam(z.parameters, 0.001, momentum_schedule) trainer = Trainer(z, (ce, errs), [learner]) for e in range(EPOCHS): arguments = get_random_batch() if TRAIN: trainer.train_minibatch(arguments) if e % LOG_FREQUENCY == 0: print('Epoch: ' + str(e) + ', Average Classification Error: {:,.0%}'.format( pd.DataFrame(errs.eval(arguments))[0].mean()))
def train_faster_rcnn_alternating(base_model_file_name, debug_output=False): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # Learning parameters rpn_lr_factor = globalvars['rpn_lr_factor'] rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] frcn_lr_factor = globalvars['frcn_lr_factor'] frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(globalvars['momentum_per_mb']) rpn_epochs = globalvars['rpn_epochs'] frcn_epochs = globalvars['frcn_epochs'] print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - normalization_const roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(base_model_file_name) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage1_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes']) # Fast RCNN and losses fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers) detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot(stage1_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage2_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot(stage2_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
return X, Y get_sample(0) input_text = C.input_variable((nchars, vocab_size)) output_char = C.input_variable(shape=vocab_size) model = Sequential( [Dense(3000, activation=C.relu), Dense(vocab_size, activation=None)]) z = model(input_text) ce = cross_entropy_with_softmax(z, output_char) errs = classification_error(z, output_char) lr_per_sample = learning_rate_schedule(0.01, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule(1100) learner = C.learners.adam(z.parameters, lr_per_sample, momentum=momentum_time_constant) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) log_number_of_parameters(z) for ep in range(1): print("Epoch={}".format(ep)) for mb in range(0, data_size - nchars - 1): feat, lab = get_sample(mb)
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = input((num_channels, image_height, image_width)) label_input = input(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print( "Training transfer learning model for {0} epochs (epoch_size = {1}).". format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim, np.float32) label = C.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)), Dense(num_output_classes)])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature : reader_train.streams.features, label : reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with)] if tensorboard_logdir is not None: progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training lr = learning_parameter_schedule_per_sample(1) trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers) training_session( trainer=trainer, mb_source = reader_train, mb_size = minibatch_size, model_inputs_to_streams = input_map, max_samples = num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep ).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature : reader_test.streams.features, label : reader_test.streams.labels } # Test data for trained model C.debugging.start_profiler() C.debugging.enable_profiler() C.debugging.set_node_timing(True) #C.cntk_py.disable_cpueval_optimization() # uncomment this to check CPU eval perf without optimization test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error C.debugging.stop_profiler() trainer.print_node_timing() # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
input_sequence = sequence.input_variable(shape=vocab_size) label_sequence = sequence.input_variable(shape=vocab_size) model = Sequential([ For( range(2), lambda: Sequential( [Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])), Dense(vocab_size) ]) z = model(input_sequence) z_sm = cntk.softmax(z) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) log_number_of_parameters(z)
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_parameter_schedule(lr_per_mb) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', log_to_file=log_file_name, num_epochs=num_epochs) #progress_printer = ProgressPrinter(tag='Training', log_to_file=log_file_name, num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) batch_index = 0 plot_data = {'batchindex': list(), 'loss': list(), 'error': list()} log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far #if sample_count % (100 * mb_size) == 0: # print ("Processed {0} samples".format(sample_count)) # For visualization... #print("type of plot_data:", type(plot_data), type(plot_data['batchindex']), type(plot_data['loss']),type(plot_data['error'])) plot_data['batchindex'].append(batch_index) plot_data['loss'].append(trainer.previous_minibatch_loss_average) plot_data['error'].append(trainer.previous_minibatch_evaluation_average) batch_index += 1 trainer.summarize_training_progress() # Visualize training result: window_width = 32 loss_cumsum = np.cumsum(np.insert(plot_data['loss'], 0, 0)) error_cumsum = np.cumsum(np.insert(plot_data['error'], 0, 0)) # Moving average. plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:] plot_data['avg_loss'] = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width plot_data['avg_error'] = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width plt.figure(1) #plt.subplot(211) plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--') plt.xlabel('Minibatch number') plt.ylabel('Loss') plt.title('Minibatch run vs. Training loss ') #plt.show() plt.savefig(output_figure_loss, bbox_inches='tight' ) plt.figure(2) #plt.subplot(212) plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--') plt.xlabel('Minibatch number') plt.ylabel('Label Prediction Error') plt.title('Minibatch run vs. Label Prediction Error ') #plt.show() plt.savefig(output_figure_error, bbox_inches='tight') return tl_model
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 2 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim, np.float32) label = C.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)), Dense(num_output_classes) ])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(data_dir, 'Train-28x28_cntk_text.txt') reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. # training_progress_output_freq = 100 progress_writers = [ ProgressPrinter( # freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with) ] if tensorboard_logdir is not None: progress_writers.append( TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training lr = 0.001 trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr), progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, model_inputs_to_streams=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model C.debugging.start_profiler() C.debugging.enable_profiler() C.debugging.set_node_timing(True) test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error C.debugging.stop_profiler() trainer.print_node_timing() # Average of evaluation errors of all test minibatches return test_result * 100 / num_minibatches_to_test
def fill(l): if (len(l)<input_size): return [0]*(input_size-len(l))+l else: return l words_arr = [(to_onehot(fill(list(map(char_to_num,list(w)))))) for w in words] labels_arr = [np.array([0,1],dtype=np.float32) if x==-1 else np.array([1,0],dtype=np.float32) for x in labels] input_var = C.input_variable((input_size,vocab_size)) label_var = C.input_variable((2)) model = Sequential([Dense(500,activation=C.ops.relu),Dense(2,activation=None)]) z = model(input_var) ce = cross_entropy_with_softmax(z, label_var) errs = classification_error(z, label_var) lr_per_sample = learning_rate_schedule(0.02, UnitType.minibatch) learner = C.learners.sgd(z.parameters, lr_per_sample) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) log_number_of_parameters(z) minibatch_size=10 for ep in range(20): print("Epoch={}".format(ep)) for mb in range(0,len(words),minibatch_size): trainer.train_minibatch({input_var: words_arr[mb:mb+minibatch_size], label_var: labels_arr[mb:mb+minibatch_size]})