def create_criterion(network): '''Create the criterion for model''' model, label1, label2 = network['model'], network['row_label'], network['col_label'] label1_ce = C.cross_entropy_with_softmax(model.outputs[0], label1) label2_ce = C.cross_entropy_with_softmax(model.outputs[1], label2) label1_pe = C.classification_error(model.outputs[0], label1) label2_pe = C.classification_error(model.outputs[1], label2) label_ce = label1_ce + label2_ce label_pe = label1_pe + label2_pe return (label_ce, label_pe)
def create_resnet_network(network_name, fp16): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) dtype = np.float16 if fp16 else np.float32 if fp16: graph_input = C.cast(input_var, dtype=np.float16) graph_label = C.cast(label_var, dtype=np.float16) else: graph_input = input_var graph_label = label_var with C.default_options(dtype=dtype): stride1x1 = (1, 1) stride3x3 = (2, 2) # create model, and configure learning parameters if network_name == 'resnet18': z = create_imagenet_model_basic(graph_input, [2, 1, 1, 2], num_classes) elif network_name == 'resnet34': z = create_imagenet_model_basic(graph_input, [3, 3, 5, 2], num_classes) elif network_name == 'resnet50': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 5, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet101': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 22, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet152': z = create_imagenet_model_bottleneck(graph_input, [2, 7, 35, 2], num_classes, stride1x1, stride3x3) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, graph_label) errs = classification_error(z, graph_label, topN=1) top5Errs = classification_error(z, graph_label, topN=5) if fp16: ce = C.cast(ce, dtype=np.float32) errs = C.cast(errs, dtype=np.float32) top5Errs = C.cast(top5Errs, dtype=np.float32) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'errs' : errs, 'top5Errs' : top5Errs, 'output': z }
def criterion(input, labels): # criterion function must drop the <s> from the labels postprocessed_labels = sequence.slice(labels, 1, 0) # <s> A B C </s> --> A B C </s> z = model(input, postprocessed_labels) ce = cross_entropy_with_softmax(z, postprocessed_labels) errs = classification_error (z, postprocessed_labels) return (ce, errs)
def test_learner_logging(): from cntk import Trainer from cntk.logging import ProgressPrinter from cntk import cross_entropy_with_softmax, classification_error features = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) z = features * w labels = C.input_variable(shape=(1,), name='b') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) writer = TestProgressWriter(); lr_values = [0.3, 0.2, 0.1, 0] m_values = [0.6, 0.7, 0.8] learner = C.momentum_sgd(z.parameters, learning_rate_schedule(lr_values, UnitType.sample, 1), C.momentum_schedule(m_values, 1)) trainer = Trainer(z, (ce, errs), [learner], writer) for i in range(10): trainer.train_minibatch({features: [[2.]], labels: [[1.]]}) assert len(writer.log_output) == len(lr_values + m_values) values = [j for i in zip(lr_values,m_values) for j in i] + [0] for i in range(len(values)): assert (values[i] == writer.log_output[i])
def criterion(input:InputSequence[C.layers.Tensor[input_vocab_dim]] ,labels:LabelSequence[C.layers.Tensor[label_vocab_dim]]): postprocessed_labels = C.sequence.slice(labels, 1, 0) # <s> A B C </s> --> A B C </s> z = model(input, postprocessed_labels) ce = C.cross_entropy_with_softmax(z, postprocessed_labels) errs = C.classification_error(z, postprocessed_labels) return (ce, errs)
def create_resnet_network(network_name): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'output': z }
def classification_error(output_vector, target_vector, name=''): ''' This operation computes the prediction error. It finds the index of the highest value in the output_vector and compares it to the actual ground truth label (the index of the hot bit in the target vector). The result is a scalar (i.e., one by one matrix). This is often used as an evaluation criterion. It cannot be used as a training criterion though since the gradient is not defined for it. Example: >>> C.eval(C.classification_error([1., 2., 3., 4.], [0., 0., 0., 1.])) #[0.] >>> C.eval(C.classification_error([1., 2., 3., 4.], [0., 0., 1., 0.])) #[1.] Args: output_vector: the output values from the network target_vector: it is one-hot vector where the hot bit corresponds to the label index name (str): the name of the node in the network Returns: :class:`cntk.Function` ''' from cntk import classification_error output_vector = sanitize_input(output_vector, get_data_type(target_vector)) target_vector = sanitize_input(target_vector, get_data_type(output_vector)) return classification_error(output_vector, target_vector, name).output()
def test_factor_dense_for_prediction(): input_dim = 2 num_output_classes = 2 hidden_layer_dim = 50 num_minibatches_to_train = 2000 minibatch_size = 25 learning_rate = 0.5 input = C.input_variable(input_dim) label = C.input_variable(num_output_classes) z = _create_model_dense(input, input_dim, hidden_layer_dim, num_output_classes) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) # Run the trainer and perform model training training_progress_output_freq = 20 plotdata = {"batchsize":[], "loss":[], "error":[]} for i in range(0, int(num_minibatches_to_train)): features, labels = _generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch data for training trainer.train_minibatch({input : features, label : labels}) # generate some data to predict features, labels = _generate_random_data_sample(10, 2, 2) # factor the model. newz = nc.factor_dense(z, projection_function=_get_rank_reduced_size, filter_function = _filter) original_out = C.softmax(z) factored_out = C.softmax(newz) original_labels_probs = original_out.eval({input : features}) predicted_label_probs = factored_out.eval({input : features}) original_prediction_percentage = _percentage_match(labels, original_labels_probs) # reduced model should have at leat 50% match compared to the original # For the test, we reduced the training minibatches, thus the match is lower. assert(original_prediction_percentage * 0.5 <= _percentage_match(labels, predicted_label_probs))
def test_debug_multi_output(): input_dim = 2 num_output_classes = 2 f_input = input_variable(input_dim, np.float32, needs_gradient=True, name='features') p = parameter(shape=(input_dim,), init=10, name='p') comb = combine([f_input, p]) ins = InStream(['n', 'n', 'n', 'n', 'n']) outs = OutStream() z = times(comb.outputs[0], comb.outputs[1], name='z') z = debug_model(z, ins, outs) l_input = input_variable(num_output_classes, np.float32, name='labels') loss = cross_entropy_with_softmax(z, l_input) eval_error = classification_error(z, l_input) _train(z, loss, eval_error, loss.find_by_name('features'), loss.find_by_name('labels'), num_output_classes, 1) # outs.written contains something like # =================================== forward =================================== # Parameter('p', [], [2]) with uid 'Parameter4' # Input('features', [#, *], [2]) with uid 'Input3' # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21' # =================================== backward =================================== # Times: Output('UserDefinedFunction12_Output_0', [#, *], [2]), Output('UserDefinedFunction15_Output_0', [], [2]) -> Output('z', [#, *], [2 x 2]) with uid 'Times21' # Input('features', [#, *], [2]) with uid 'Input3' # Parameter('p', [], [2]) with uid 'Parameter4' assert outs.written == out_stuff assert len(outs.written) == 8 v_p = "Parameter('p', " v_i = "Input('features'" v_t = 'Times: ' assert outs.written[0].startswith('=') and 'forward' in outs.written[0] line_1, line_2, line_3 = outs.written[1:4] assert outs.written[4].startswith('=') and 'backward' in outs.written[4] line_5, line_6, line_7 = outs.written[5:8] assert line_5.startswith(v_t) assert line_6.startswith(v_p) and line_7.startswith(v_i) or \ line_6.startswith(v_i) and line_7.startswith(v_p)
def train(nonlinearity, num_hidden_layers, device_id, minibatch_size=10, num_samples=1000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) training_progress_output_freq = 20 losses = [] errors = [] for i in range(num_minibatches_to_train): features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=cntk_device(device_id)) batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq) if not (loss == "NA" or error == "NA"): losses.append(loss) errors.append(error) return losses, errors
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer(StreamDefs( amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file))) ld = HTKMLFDeserializer(label_mapping_file, StreamDefs( awesome_labels = StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd,ld]) features = C.input_variable(((2*context+1)*feature_dim)) labels = C.input_variable((num_classes)) model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error (z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map={ features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def create_resnet_network(network_name): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) stride1x1 = (1, 1) stride3x3 = (2, 2) # create model, and configure learning parameters if network_name == 'resnet18': z = create_imagenet_model_basic(input_var, [2, 1, 1, 2], num_classes) elif network_name == 'resnet34': z = create_imagenet_model_basic(input_var, [3, 3, 5, 2], num_classes) elif network_name == 'resnet50': z = create_imagenet_model_bottleneck(input_var, [2, 3, 5, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet101': z = create_imagenet_model_bottleneck(input_var, [2, 3, 22, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet152': z = create_imagenet_model_bottleneck(input_var, [2, 7, 35, 2], num_classes, stride1x1, stride3x3) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) errs = classification_error(z, label_var, topN=1) top5Errs = classification_error(z, label_var, topN=5) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'errs' : errs, 'top5Errs' : top5Errs, 'output': z }
def create_model(self, frame_mode=False): if frame_mode: self.feat = cntk.input_variable(shape=(feat_dim,)) self.label = cntk.input_variable((label_dim,)) net = cntk.layers.Sequential([cntk.layers.Dense(cell_dim), cntk.layers.Dense(label_dim)]) self.output = net(self.feat) else: #sequence mode self.feat = cntk.sequence.input_variable(shape=(feat_dim,)) self.label = cntk.sequence.input_variable((label_dim,)) net = cntk.layers.Sequential([cntk.layers.Recurrence(cntk.layers.LSTM(shape=label_dim, cell_shape=(cell_dim,)))]) self.output = net(self.feat) self.ce = cntk.cross_entropy_with_softmax(self.output, self.label) self.err = cntk.classification_error(self.output, self.label)
def train_sequence_classifier(): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifier_net( features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = ("../../../Tests/EndToEndTests/Text/" + "SequenceClassification/Data/Train.ctf") path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_parameter_schedule_per_sample(0.0005) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample), progress_printer) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 for i in range(255): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) evaluation_average = float(trainer.previous_minibatch_evaluation_average) loss_average = float(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def test_usermbsource_training(tmpdir, with_checkpoint_impl): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) # Using this for testing the UserMinibatchSource checkpointing if with_checkpoint_impl: MBS_CV_CLASS = MyDataSourceWithCheckpoint else: MBS_CV_CLASS = MyDataSource mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_parameter_schedule_per_sample, sgd, Trainer, \ training_session, times feature = sequence.input_variable(shape=(input_dim,)) label = C.input_variable(shape=(num_output_classes,)) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed #note that training session can end earlier if there is no updates lr_per_sample = learning_parameter_schedule_per_sample(0.3) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = { feature: mbs.fsi, label: mbs.lsi } session = training_session( trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20, cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10, minibatch_size=2) ) session.train() assert trainer.total_number_of_samples_seen == 20 if with_checkpoint_impl: assert mbs_cv._restore_from_checkpoint_calls == 1
def ffnet(learner, trainer=None): inputs = 5 outputs = 3 layers = 2 hidden_dimension = 3 if trainer is None: # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential ([ Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=98052)), Dense(outputs, init=C.glorot_uniform(seed=98052))]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [learner(z.parameters)], [progress_printer]) else: features = trainer.loss_function.arguments[0] label = trainer.loss_function.arguments[1] # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 100 aggregate_loss = 0.0 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features : train_features, label : labels}) sample_count = trainer.previous_minibatch_sample_count aggregate_loss += trainer.previous_minibatch_loss_average * sample_count last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({features : test_features, label : test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error, trainer
def create_binary_convolution_model(): # Input variables denoting the features and label data feature_var = C.input((num_channels, image_height, image_width)) label_var = C.input((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) # first layer is ok to be full precision z = C.layers.Convolution((3, 3), 32, pad=True, activation=C.relu)(scaled_input) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=32, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (3,3), 128, channels=128, pad=True) z = C.layers.MaxPooling((3,3), strides=(2,2))(z) z = C.layers.BatchNormalization(map_rank=1)(z) z = BinaryConvolution(z, (1,1), num_classes, channels=128, pad=True) z = C.layers.AveragePooling((z.shape[1], z.shape[2]))(z) z = C.reshape(z, (num_classes,)) # Add binary regularization (ala Gang Hua) weight_sum = C.constant(0) for p in z.parameters: if (p.name == "filter"): weight_sum = C.plus(weight_sum, C.reduce_sum(C.minus(1, C.square(p)))) bin_reg = C.element_times(.000005, weight_sum) # After the last layer, we need to apply a learnable scale SP = C.parameter(shape=z.shape, init=0.001) z = C.element_times(z, SP) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) ce = C.plus(ce, bin_reg) pe = C.classification_error(z, label_var) return C.combine([z, ce, pe])
def create_recurrent_network(): # Input variables denoting the features and label data features = sequence.input_variable(((2*context+1)*feature_dim)) labels = sequence.input_variable((num_classes)) # create network model = Sequential([For(range(3), lambda : Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error (z, labels) return { 'feature': features, 'label': labels, 'ce' : ce, 'errs' : errs, 'output': z }
def ffnet(optimizer, num_minibatches_to_train, learning_rate_func, lr_args, learner_kwargs): inputs = 2 outputs = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential([ Dense(hidden_dimension, activation=C.sigmoid, init=C.glorot_uniform(seed=SEED)), Dense(outputs, init=C.glorot_uniform(seed=SEED))]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr= learning_rate_func(0.125, *lr_args) progress_printer = ProgressPrinter(0) learner = optimizer(z.parameters, lr) if optimizer != sgd else sgd(z.parameters, lr, **learner_kwargs) trainer = C.Trainer(z, (ce, pe), [learner], progress_printer) # Get minibatches of training data and perform model training minibatch_size = 25 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data( minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({features: train_features, label: labels}) test_features, test_labels = generate_random_data( minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch( {features: test_features, label: test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return z.parameters
def ffnet(): inputs = 2 outputs = 2 layers = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential ([ Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = C.learning_parameter_schedule(0.125) progress_printer = ProgressPrinter(0) trainer = C.Trainer(z, (ce, pe), [sgd(z.parameters, lr=lr_per_minibatch)], [progress_printer]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 1024 aggregate_loss = 0.0 for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features : train_features, label : labels}) sample_count = trainer.previous_minibatch_sample_count aggregate_loss += trainer.previous_minibatch_loss_average * sample_count last_avg_error = aggregate_loss / trainer.total_number_of_samples_seen test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({features : test_features, label : test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error
def test_udf_checkpointing(tmpdir): dev, w_value, c1_value, c2_value, op = build_test_function() label = C.constant(np.asarray([[1, 2], [3, 4]]).astype(np.float32)) loss = C.cross_entropy_with_softmax(op, label) eval_error = C.classification_error(op, label) lr_schedule = C.learning_rate_schedule(0.5, C.UnitType.minibatch) learner = C.sgd(op.parameters, lr_schedule) trainer = C.Trainer(op, (loss, eval_error), [learner]) trainer.train_minibatch({op.arguments[0]: np.random.random((2, 2)).astype(np.float32)}, device=dev) filepath = str(tmpdir / 'test_checkpointing.out') trainer.save_checkpoint(filepath, external_state={'test': 'test'}) d = C.cntk_py.Dictionary.load(filepath) assert len(d.keys()) != 0
def ffnet(): inputs = 2 outputs = 2 layers = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential ([ Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) trainer = C.Trainer(z, ce, pe, [sgd(z.parameters, lr=lr_per_minibatch)]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 1024 pp = ProgressPrinter(0) for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features : train_features, label : labels}) pp.update_with_trainer(trainer) last_avg_error = pp.avg_loss_since_start() test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({features : test_features, label : test_labels}) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error
def create_resnet_network(network_name, fp16): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) dtype = np.float16 if fp16 else np.float32 if fp16: graph_input = C.cast(input_var, dtype=np.float16) graph_label = C.cast(label_var, dtype=np.float16) else: graph_input = input_var graph_label = label_var with C.default_options(dtype=dtype): # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(graph_input, 3, num_classes) elif network_name == 'resnet110': z = create_cifar10_model(graph_input, 18, num_classes) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, graph_label) pe = classification_error(z, graph_label) if fp16: ce = C.cast(ce, dtype=np.float32) pe = C.cast(pe, dtype=np.float32) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'output': z }
def create_conv_network(): # Input variables denoting the features and label data feature_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # apply model to input scaled_input = C.element_times(C.constant(0.00390625), feature_var) z = create_convnet_cifar10_model(num_classes)(scaled_input) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) C.logging.log_number_of_parameters(z) ; print() return { 'feature': feature_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'output': z }
def train_and_evaluate(reader_train, reader_test, max_epochs, model_func): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # Normalize the input feature_scale = 1.0 / 256.0 input_var_norm = C.element_times(feature_scale, input_var) # apply model to input z = model_func(input_var_norm, out_dims=10) # # Training action # # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) # training config epoch_size = 50000 minibatch_size = 64 # Set training parameters lr_per_minibatch = C.learning_parameter_schedule([0.01]*10 + [0.003]*10 + [0.001], epoch_size = epoch_size) momentums = C.momentum_schedule(0.9, minibatch_size = minibatch_size) l2_reg_weight = 0.001 # trainer object learner = C.momentum_sgd(z.parameters, lr = lr_per_minibatch, momentum = momentums, l2_regularization_weight=l2_reg_weight) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = C.Trainer(z, (ce, pe), [learner], [progress_printer]) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } C.logging.log_number_of_parameters(z) ; print() # perform model training batch_index = 0 plot_data = {'batchindex':[], 'loss':[], 'error':[]} for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far # For visualization... plot_data['batchindex'].append(batch_index) plot_data['loss'].append(trainer.previous_minibatch_loss_average) plot_data['error'].append(trainer.previous_minibatch_evaluation_average) batch_index += 1 trainer.summarize_training_progress() # # Evaluation action # epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.1f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") # Visualize training result: window_width = 32 loss_cumsum = np.cumsum(np.insert(plot_data['loss'], 0, 0)) error_cumsum = np.cumsum(np.insert(plot_data['error'], 0, 0)) # Moving average. plot_data['batchindex'] = np.insert(plot_data['batchindex'], 0, 0)[window_width:] plot_data['avg_loss'] = (loss_cumsum[window_width:] - loss_cumsum[:-window_width]) / window_width plot_data['avg_error'] = (error_cumsum[window_width:] - error_cumsum[:-window_width]) / window_width plt.figure(1) plt.subplot(211) plt.plot(plot_data["batchindex"], plot_data["avg_loss"], 'b--') plt.xlabel('Minibatch number') plt.ylabel('Loss') plt.title('Minibatch run vs. Training loss ') plt.show() plt.subplot(212) plt.plot(plot_data["batchindex"], plot_data["avg_error"], 'r--') plt.xlabel('Minibatch number') plt.ylabel('Label Prediction Error') plt.title('Minibatch run vs. Label Prediction Error ') plt.show() return C.softmax(z)
def criterion(x, y): z = model(normalize(x)) ce = cross_entropy_with_softmax(z, y) errs = classification_error (z, y) return (ce, errs)
def conv3d_ucf11(train_reader, test_reader, max_epochs=30): # Replace 0 with 1 to get detailed log. set_computation_network_trace_level(0) # These values must match for both train and test reader. image_height = train_reader.height image_width = train_reader.width num_channels = train_reader.channel_count sequence_length = train_reader.sequence_length num_output_classes = train_reader.label_count # Input variables denoting the features and label data input_var = C.input_variable((num_channels, sequence_length, image_height, image_width), np.float32) label_var = C.input_variable(num_output_classes, np.float32) # Instantiate simple 3D Convolution network inspired by VGG network # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf with C.default_options (activation=C.relu): z = C.layers.Sequential([ C.layers.Convolution3D((3,3,3), 64, pad=True), C.layers.MaxPooling((1,2,2), (1,2,2)), C.layers.For(range(3), lambda i: [ C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True), C.layers.Convolution3D((3,3,3), [96, 128, 128][i], pad=True), C.layers.MaxPooling((2,2,2), (2,2,2)) ]), C.layers.For(range(2), lambda : [ C.layers.Dense(1024), C.layers.Dropout(0.5) ]), C.layers.Dense(num_output_classes, activation=None) ])(input_var) # loss and classification error. ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) # training config train_epoch_size = train_reader.size() train_minibatch_size = 2 # Set learning parameters lr_per_sample = [0.01]*10+[0.001]*10+[0.0001] lr_schedule = C.learning_rate_schedule(lr_per_sample, epoch_size=train_epoch_size, unit=C.UnitType.sample) momentum_time_constant = 4096 mm_schedule = C.momentum_as_time_constant_schedule([momentum_time_constant]) # Instantiate the trainer object to drive the model training learner = C.momentum_sgd(z.parameters, lr_schedule, mm_schedule, True) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = C.Trainer(z, (ce, pe), learner, progress_printer) log_number_of_parameters(z) ; print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs train_reader.reset() while train_reader.has_more(): videos, labels, current_minibatch = train_reader.next_minibatch(train_minibatch_size) trainer.train_minibatch({input_var : videos, label_var : labels}) trainer.summarize_training_progress() # Test data for trained model epoch_size = test_reader.size() test_minibatch_size = 2 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 test_reader.reset() while test_reader.has_more(): videos, labels, current_minibatch = test_reader.next_minibatch(test_minibatch_size) # minibatch data to be trained with metric_numer += trainer.test_minibatch({input_var : videos, label_var : labels}) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
input = cntk.input_variable(inp_dim) labels = cntk.input_variable(output_classes) def net_model(feature): with default_options(init=cntk.glorot_uniform()): layers = Dense(output_classes, activation=None)(feature) return layers input_layer = input / 255.0 net = net_model(input_layer) loss = cntk.cross_entropy_with_softmax(net, labels) error = cntk.classification_error(net, labels) learning_rate = 0.2 learning_schedule = cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch) learner = cntk.sgd(net.parameters, learning_schedule) trainer = cntk.Trainer(net, (loss, error), [learner]) def cumulative_avg(arr, diff=5): if len(arr) < diff: return arr return [ val if ids < diff else np.cumsum(arr, axis=None) / 5 for ids, val in enumerate(arr) ]
def mem_leak_check(nonlinearity, num_hidden_layers, device_id, minibatch_size=1, num_samples=10000): from cntk.cntk_py import always_allow_setting_default_device always_allow_setting_default_device() C.try_set_default_device(cntk_device(device_id)) np.random.seed(0) learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) hidden_layers_dim = 50 inp = C.input_variable((input_dim), np.float32) label = C.input_variable((num_output_classes), np.float32) z = fully_connected_classifier_net(inp, num_output_classes, hidden_layers_dim, num_hidden_layers, nonlinearity) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) num_minibatches_to_train = int(num_samples / minibatch_size) mem = np.zeros(num_minibatches_to_train) features, labels = generate_random_data_sample(minibatch_size, input_dim, num_output_classes) # Set a maximum fraction of iterations, in which the memory is allowed to # increase. Most likely these will be the first training runs. # Long-term this test needs to be run in a separate process over a longer # period of time. MEM_INCREASE_FRACTION_TOLERANCE = 0.01 # Set a maximum allowed memory increase. This is required because the # pytest process involves some memory fluctuations. MEM_INCREASE_TOLERANCE = 1024*1024 dev = cntk_device(device_id) i = 0 while i < num_minibatches_to_train: mem[i] = mem_used() # Specify the input variables mapping in the model to actual minibatch # data for training. trainer.train_minibatch({inp: features, label: labels}, device=dev) i += 1 mem_deltas = np.diff(mem) iterations_with_mem_increase = (mem_deltas > 0).sum() mem_inc_fraction = iterations_with_mem_increase/num_minibatches_to_train mem_diff = mem[-1] - mem[10] if mem_inc_fraction > MEM_INCREASE_FRACTION_TOLERANCE and \ mem_diff > MEM_INCREASE_TOLERANCE: # For the rough leak estimation we take the memory footprint after the # dust of the first train_minibatch runs has settled. mem_changes = mem_deltas[mem_deltas != 0] raise ValueError('Potential memory leak of ~ %i KB (%i%% of MBs ' 'increased memory usage) detected with %s:\n%s' % (int(mem_diff/1024), int(mem_inc_fraction*100), nonlinearity, mem_changes))
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False, fp16=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) dtype = np.float16 if fp16 else np.float32 if fp16: graph_input = C.cast(input_var, dtype=np.float16) graph_label = C.cast(label_var, dtype=np.float16) else: graph_input = input_var graph_label = label_var with C.default_options(dtype=dtype): # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(graph_input, 3, num_classes) lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01] elif network_name == 'resnet110': z = create_cifar10_model(graph_input, 18, num_classes) lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01] else: raise RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, graph_label) pe = classification_error(z, graph_label) if fp16: ce = C.cast(ce, dtype=np.float32) pe = C.cast(pe, dtype=np.float32) # shared training parameters minibatch_size = 128 l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9, minibatch_size) # progress writers progress_writers = [ ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat) ] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save( os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 9312 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer / metric_denom
def test_sweep_based_schedule(tmpdir, device_id): from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk import cross_entropy_with_softmax, classification_error, plus, reduce_sum, sequence from cntk import Trainer input_dim = 69 ctf_data = '''\ 0 |S0 3:1 |S1 3:1 |# <s> 0 |S0 4:1 |# A |S1 32:1 |# ~AH 0 |S0 5:1 |# B |S1 36:1 |# ~B 0 |S0 4:1 |# A |S1 31:1 |# ~AE 0 |S0 7:1 |# D |S1 38:1 |# ~D 0 |S0 12:1 |# I |S1 47:1 |# ~IY 0 |S0 1:1 |# </s> |S1 1:1 |# </s> 2 |S0 60:1 |# <s> |S1 3:1 |# <s> 2 |S0 61:1 |# A |S1 32:1 |# ~AH ''' ctf_file = str(tmpdir/'2seqtest.txt') with open(ctf_file, 'w') as f: f.write(ctf_data) mbs = MinibatchSource(CTFDeserializer(ctf_file, StreamDefs( features = StreamDef(field='S0', shape=input_dim, is_sparse=True), labels = StreamDef(field='S1', shape=input_dim, is_sparse=True) )), randomize=False) in1 = sequence.input_variable(shape=(input_dim,)) labels = sequence.input_variable(shape=(input_dim,)) p = parameter(shape=(input_dim,), init=10) z = plus(in1, reduce_sum(p), name='z') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = { in1 : mbs.streams.features, labels : mbs.streams.labels } # fetch minibatch (first sequence) data = mbs.next_minibatch(1, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.3 # fetch minibatch (second sequence, sweep ends at this point) data = mbs.next_minibatch(1, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.2 # fetch minibatch (both sequences -- entire sweep in one go) data = mbs.next_minibatch(9, input_map=input_map) trainer.train_minibatch(data) assert learner.learning_rate() == 0.1 # fetch minibatch (multiple sweeps) data = mbs.next_minibatch(30, input_map=input_map) trainer.train_minibatch(data, outputs=[z.output]) assert learner.learning_rate() == 0.0
def trainNet(args): # Crash doesn't seem to occur with this flag, # unfortunatly, it reduces training speed by about 35% #os.environ["CUDA_LAUNCH_BLOCKING"] = "1" # Instantiate generators for both training and # validation datasets. Grab their generator functions # TODO: Command line args # TODO: Better system for using files testing/validation than ranges? tFileShp = (1, 598) vFileShp = (0, 1) gen = Generator(featurePath, labelPath, tFileShp, batchSize, loadSize=3) valGen = Generator(featurePath, labelPath, vFileShp, batchSize, loadSize=1) g = gen.generator() vg = valGen.generator() inputVar = cntk.ops.input_variable((BoardDepth, BoardLength, BoardLength), name='features') policyVar = cntk.ops.input_variable((BoardSize)) valueVar = cntk.ops.input_variable((2)) if args.fp16: cntk.cast(inputVar, dtype=np.float16) cntk.cast(policyVar, dtype=np.float16) cntk.cast(valueVar, dtype=np.float16) net, epochOffset = loadModel(args, inputVar, netFilters, resBlockCount) # Show a heatmap of network outputs # over an input board state if args.heatMap: hmap = NetHeatMap(net, g) hmap.genHeatmap(args.heatMap) # Loss and accuracy policyLoss = cntk.cross_entropy_with_softmax(net.outputs[0], policyVar) valueLoss = cntk.cross_entropy_with_softmax(net.outputs[1], valueVar) loss = policyLoss + valueLoss # TODO: Figure out how to display/report both errors policyError = cntk.element_not( cntk.classification_error(net.outputs[0], policyVar)) valueError = cntk.element_not( cntk.classification_error(net.outputs[1], valueVar)) #error = (valueError + policyError) / 2 #error = valueError error = policyError if args.fp16: loss = cntk.cast(loss, dtype=np.float32) error = cntk.cast(error, dtype=np.float32) lrc = args.lr if args.cycleLr[0]: lrc = learningRateCycles(*args.cycleLr, gen.stepsPerEpoch, args.cycleMax) lrc = lrc * maxEpochs elif args.optLr: lrc = findOptLr(maxEpochs, *args.optLr, gen.stepsPerEpoch) lrc = cntk.learners.learning_parameter_schedule(lrc, batchSize, batchSize) learner = cntk.adam(net.parameters, lrc, momentum=0.9, minibatch_size=batchSize, l2_regularization_weight=0.0001) #learner = cntk.adadelta(net.parameters, lrc, l2_regularization_weight=0.0001) # Test adelta out! # TODO: Figure out how to write multiple 'metrics' tbWriter = cntk.logging.TensorBoardProgressWriter(freq=1, log_dir='./TensorBoard/', model=net) progressPrinter = cntk.logging.ProgressPrinter(tag='Training', num_epochs=maxEpochs) trainer = cntk.Trainer(net, (loss, error), learner, [progressPrinter, tbWriter]) # TODO: Replace model load with loading/saving checkpoints! # So we can store learners state et al #trainer.restore_from_checkpoint(findLatestModel('latest')) #checkpointFreq = gen.stepsPerEpoch // checkpointFreq ls = [] losses = [] #valueAccs = [] #policyAccs = [] for epoch in range(maxEpochs): miniBatches = 0 while miniBatches < gen.stepsPerEpoch: X, Y, W = next(g) miniBatches += 1 trainer.train_minibatch({ net.arguments[0]: X, policyVar: Y, valueVar: W }) ls.append(trainer.previous_minibatch_loss_average) trainer.summarize_training_progress() policyAcc, valueAcc = printAccuracy(net, 'Validation Acc %', vg, valGen.stepsPerEpoch) losses.append([epoch, sum(ls) / gen.stepsPerEpoch]) ls.clear() #policyAccs.append([epoch, policyAcc]) #valueAccs.append([epoch, valueAcc]) net.save(saveDir + netName + '_{}_{}_{}_{:.3f}.dnn'.format( epoch + 1 + epochOffset, policyAcc, valueAcc, losses[epoch][1]))
def create_network(input_vocab_dim, label_vocab_dim): # network complexity; initially low for faster testing hidden_dim = 256 num_layers = 1 # Source and target inputs to the model input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') raw_input = sequence.input(shape=(input_vocab_dim), sequence_axis=input_seq_axis, name='raw_input') raw_labels = sequence.input(shape=(label_vocab_dim), sequence_axis=label_seq_axis, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = sequence.slice(raw_labels, 1, 0) # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> is_first_label = sequence.is_first(label_sequence) # <s> 0 0 0 ... label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder encoder_outputH = stabilize(input_sequence) for i in range(0, num_layers): (encoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( encoder_outputH.output, hidden_dim, hidden_dim, future_value, future_value) thought_vectorH = sequence.first(encoder_outputH) thought_vectorC = sequence.first(encoder_outputC) thought_vector_broadcastH = sequence.broadcast_as(thought_vectorH, label_sequence) thought_vector_broadcastC = sequence.broadcast_as(thought_vectorC, label_sequence) # Decoder decoder_history_hook = alias( label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_hook)) decoder_outputH = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hookH = past_value recurrence_hookC = past_value else: isFirst = sequence.is_first(label_sequence) recurrence_hookH = lambda operand: element_select( isFirst, thought_vector_broadcastH, past_value(operand)) recurrence_hookC = lambda operand: element_select( isFirst, thought_vector_broadcastC, past_value(operand)) (decoder_outputH, encoder_outputC) = LSTMP_component_with_self_stabilization( decoder_outputH.output, hidden_dim, hidden_dim, recurrence_hookH, recurrence_hookC) decoder_output = decoder_outputH # Softmax output layer z = linear_layer(stabilize(decoder_output), label_vocab_dim) # Criterion nodes ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # network output for decoder history net_output = hardmax(z) # make a clone of the graph where the ground truth is replaced by the network output ng = z.clone(CloneMethod.share, {decoder_history_hook.output: net_output.output}) return { 'raw_input': raw_input, 'raw_labels': raw_labels, 'ce': ce, 'pe': errs, 'ng': ng, 'output': z }
def train(): # TODO: Need to add a method that reads exact sample size when # we're loading data that's already been converted #convertData(dataPath, 'intel', threshold, timeSteps, timeShift, seqDist) input = cntk.sequence.input_variable((numFeatures), name='features') label = cntk.input_variable((numClasses), name='label') trainReader = createReader('./data/intel_train.ctf', True, numFeatures, numClasses) validReader = createReader('./data/intel_valid.ctf', False, numFeatures, numClasses) trainInputMap = { input: trainReader.streams.features, label: trainReader.streams.labels } validInputMap = { input: validReader.streams.features, label: validReader.streams.labels } model = createModel(input, numClasses, lstmLayers, lstmSize) z = model(input) loss = cntk.cross_entropy_with_softmax(z, label) accy = cntk.element_not(cntk.classification_error( z, label)) # Print accuracy %, not error! lr = cntk.learning_parameter_schedule(0.05, batchSize) learner = cntk.adam( z.parameters, lr, 0.9 ) #, l2_regularization_weight=0.00001, gradient_clipping_threshold_per_sample=5.0 #tbWriter = cntk.logging.TensorBoardProgressWriter(1, './Tensorboard/', model=model) printer = cntk.logging.ProgressPrinter(100, tag='Training') trainer = cntk.Trainer(z, (loss, accy), learner, [printer]) # TODO: These should be automatically detected! samplesPerSeq = timeSteps sequences = 8709 validSeqs = 968 minibatchSize = batchSize * samplesPerSeq minibatches = sequences // batchSize validBatches = validSeqs // batchSize cntk.logging.log_number_of_parameters(z) print( "Input days: {}; Looking for +- {:.1f}% change {} days ahead;".format( samplesPerSeq, threshold * 100.0, timeShift)) print("Total Sequences: {}; {} epochs; {} minibatches per epoch;".format( sequences + validSeqs, numEpochs, minibatches + validBatches)) # Testing out custom data reader reader = DataReader('./data/intel_train.ctf', numFeatures, numClasses, batchSize, timeSteps, False) testReader = DataReader('./data/intel_valid.ctf', numFeatures, numClasses, batchSize, timeSteps, False) for e in range(numEpochs): # Train network for b in range(minibatches): X, Y = next(reader) trainer.train_minibatch({z.arguments[0]: X, label: Y}) trainer.summarize_training_progress() # Look at data we've not trained on (validation) for b in range(minibatches): X, Y = next(testReader) trainer.test_minibatch({z.arguments[0]: X, label: Y}) trainer.summarize_test_progress()
C.sigmoid) def create_model(features): with C.layers.default_options(init=C.layers.glorot_uniform(), activation=C.sigmoid): h = features for _ in range(num_hidden_layers): h = C.layers.Dense(hidden_layers_dim)(h) last_layer = C.layers.Dense(num_output_classes, activation=None) return last_layer(h) z = create_model(input) loss = C.cross_entropy_with_softmax(z, label) eval_error = C.classification_error(z, label) # Instantiate the trainer object to drive the model training learning_rate = 0.5 lr_schedule = C.learning_parameter_schedule(learning_rate) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) ################################################################################################################## # Define a utility function to compute the moving average sum. # A more efficient implementation is possible with np.cumsum() function def moving_average(a, w=10): if len(a) < w: return a[:] # Need to send a copy of the array return [ val if idx < w else sum(a[(idx - w):idx]) / w
def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_epochs=80): _cntk_py.set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = cntk.input((num_channels, image_height, image_width)) label_var = cntk.input((num_classes)) # apply model to input scaled_input = cntk.element_times(cntk.constant(0.00390625), input_var) with cntk.layers.default_options(activation=cntk.relu, pad=True): z = cntk.layers.Sequential([ cntk.layers.For( range(2), lambda: [ cntk.layers.Convolution2D((3, 3), 64), cntk.layers.Convolution2D((3, 3), 64), LocalResponseNormalization(1.0, 4, 0.001, 0.75), cntk.layers.MaxPooling((3, 3), (2, 2)) ]), cntk.layers.For( range(2), lambda i: [cntk.layers.Dense([256, 128][i]), cntk.layers.Dropout(0.5)]), cntk.layers.Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cntk.cross_entropy_with_softmax(z, label_var) pe = cntk.classification_error(z, label_var) # training config minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [ 0.00015625 ] * 20 + [0.000046875] * 10 + [0.000015625] lr_schedule = cntk.learning_rate_schedule( lr_per_sample, unit=cntk.learners.UnitType.sample, epoch_size=epoch_size) mm_time_constant = [0] * 20 + [600] * 20 + [1200] mm_schedule = cntk.learners.momentum_as_time_constant_schedule( mm_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = cntk.learners.momentum_sgd( z.parameters, lr_schedule, mm_schedule, unit_gain=True, l2_regularization_weight=l2_reg_weight) progress_printer = cntk.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = cntk.Trainer(z, (ce, pe), learner, progress_printer) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } cntk.logging.log_number_of_parameters(z) print() # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() z.save( os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def main(): print("\nBegin binary classification (two-node technique) \n") print("Using CNTK version = " + str(C.__version__) + "\n") dirname = os.path.dirname(__file__) input_dim = 12 hidden_dim = 20 output_dim = 2 onnx_path = os.path.join(dirname, "..\HeartDiseasePrediction\Assets") train_file = os.path.join(dirname, "input\TrainingData.txt") test_file = os.path.join(dirname, "input\TestData.txt") # 1. create network X = C.ops.input_variable(input_dim, np.float32) Y = C.ops.input_variable(output_dim, np.float32) print("Creating a 12-20-2 tanh-softmax NN ") with C.layers.default_options(init=C.initializer.uniform(scale=0.01, seed=1)): hLayer = C.layers.Dense(hidden_dim, activation=C.ops.tanh, name='hidLayer')(X) oLayer = C.layers.Dense(output_dim, activation=None, name='outLayer')(hLayer) nnet = oLayer model = C.ops.softmax(nnet) # 2. create learner and trainer print("Creating a cross entropy batch=10 SGD LR=0.005 Trainer ") tr_loss = C.cross_entropy_with_softmax(nnet, Y) tr_clas = C.classification_error(nnet, Y) max_iter = 5000 batch_size = 10 learn_rate = 0.005 learner = C.sgd(nnet.parameters, learn_rate) trainer = C.Trainer(nnet, (tr_loss, tr_clas), [learner]) # 3. create reader for train data rdr = create_reader(train_file, input_dim, output_dim, rnd_order=True, sweeps=C.io.INFINITELY_REPEAT) heart_input_map = { X : rdr.streams.x_src, Y : rdr.streams.y_src } # 4. train print("\nStarting training \n") for i in range(0, max_iter): curr_batch = rdr.next_minibatch(batch_size, input_map=heart_input_map) trainer.train_minibatch(curr_batch) if i % int(max_iter/10) == 0: mcee = trainer.previous_minibatch_loss_average macc = (1.0 - trainer.previous_minibatch_evaluation_average) * 100 print("batch %4d: mean loss = %0.4f, accuracy = %0.2f%% " % (i, mcee, macc)) trainer.summarize_training_progress() print("\nTraining complete") # Export as ONNX model.save(os.path.join(onnx_path, "Heart.onnx"), format=C.ModelFormat.ONNX) # 5. evaluate model using all data print("\nEvaluating accuracy using built-in test_minibatch() \n") rdr = create_reader(test_file, input_dim, output_dim, rnd_order=False, sweeps=1) heart_input_map = { X : rdr.streams.x_src, Y : rdr.streams.y_src } num_test = 91 all_test = rdr.next_minibatch(num_test, input_map=heart_input_map) acc = (1.0 - trainer.test_minibatch(all_test)) * 100 print("Classification accuracy on the %d data items = %0.2f%%" % (num_test,acc)) unknown = np.array([1, 0, 0, 0, 1, 2, 0.0370370373, 0, 0.832061052, 0, 1, 0.6458333], dtype=np.float32) predicted = model.eval(unknown) print(predicted) # (use trained model to make prediction) print("\nEnd Cleveland Heart Disease classification ")
def conv3d_ucf11(train_reader, test_reader, max_epochs=30): # Replace 0 with 1 to get detailed log. set_computation_network_trace_level(0) # These values must match for both train and test reader. image_height = train_reader.height image_width = train_reader.width num_channels = train_reader.channel_count sequence_length = train_reader.sequence_length num_output_classes = train_reader.label_count # Input variables denoting the features and label data input_var = input_variable( (num_channels, sequence_length, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate simple 3D Convolution network inspired by VGG network # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf with default_options(activation=relu): z = Sequential([ Convolution3D((3, 3, 3), 64, pad=True), MaxPooling((1, 2, 2), (1, 2, 2)), For( range(3), lambda i: [ Convolution3D((3, 3, 3), [96, 128, 128][i], pad=True), Convolution3D((3, 3, 3), [96, 128, 128][i], pad=True), MaxPooling((2, 2, 2), (2, 2, 2)) ]), For(range(2), lambda: [Dense(1024), Dropout(0.5)]), Dense(num_output_classes, activation=None) ])(input_var) # loss and classification error. ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 1322 # for now we manually specify epoch size minibatch_size = 4 # Set learning parameters lr_per_sample = [0.01] * 10 + [0.001] * 10 + [0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) momentum_time_constant = 4096 mm_schedule = momentum_as_time_constant_schedule([momentum_time_constant], epoch_size=epoch_size) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(z, (ce, pe), learner, progress_printer) log_number_of_parameters(z) print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs train_reader.reset() while train_reader.has_more(): videos, labels, current_minibatch = train_reader.next_minibatch( minibatch_size) trainer.train_minibatch({input_var: videos, label_var: labels}) trainer.summarize_training_progress() # Test data for trained model epoch_size = 332 minibatch_size = 2 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 test_reader.reset() while test_reader.has_more(): videos, labels, current_minibatch = test_reader.next_minibatch( minibatch_size) # minibatch data to be trained with metric_numer += trainer.test_minibatch({ input_var: videos, label_var: labels }) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def create_criterion_function(model, labels): loss = C.cross_entropy_with_softmax(model, labels) errs = C.classification_error(model, labels) return loss, errs
def create_criterion_function_preferred(model, labels): ce = C.cross_entropy_with_softmax(model, labels) errs = C.classification_error(model, labels) return ce, errs # (model, labels) -> (loss, error metr
# Set up NN input_dim = 4 hidden_dim = 50 num_output_classes = 3 input = cntk.input_variable(input_dim) label = cntk.input_variable(num_output_classes) # create a reader to read from the file reader_train = create_reader( "D:/Users/Sachit/source/repos/SamplesRepo/IrisData/IrisData/iris-data/trainData_cntk.txt", True, input_dim, num_output_classes) # Create the model z = create_model(input, hidden_dim, num_output_classes) loss = cntk.cross_entropy_with_softmax(z, label) label_error = cntk.classification_error(z, label) learning_rate = 0.2 lr_schedule = cntk.learning_parameter_schedule(learning_rate) learner = cntk.sgd(z.parameters, lr_schedule) trainer = cntk.Trainer(z, (loss, label_error), [learner]) #Init the params for trainer minibatch_size = 120 num_iterations = 20 # Map the data streams to input and labels input_map = { label: reader_train.streams.labels, input: reader_train.streams.features }
def create_vgg16(): # Input variables denoting the features and label data feature_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # apply model to input # remove mean value input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) For( range(2), lambda i: [ Convolution2D((3, 3), 64, name='conv1_{}'.format(i)), Activation(activation=relu, name='relu1_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool1'), For( range(2), lambda i: [ Convolution2D((3, 3), 128, name='conv2_{}'.format(i)), Activation(activation=relu, name='relu2_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool2'), For( range(3), lambda i: [ Convolution2D((3, 3), 256, name='conv3_{}'.format(i)), Activation(activation=relu, name='relu3_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool3'), For( range(3), lambda i: [ Convolution2D((3, 3), 512, name='conv4_{}'.format(i)), Activation(activation=relu, name='relu4_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool4'), For( range(3), lambda i: [ Convolution2D((3, 3), 512, name='conv5_{}'.format(i)), Activation(activation=relu, name='relu5_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool5'), Dense(4096, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, name='fc8') ])(input) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) pe5 = C.classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label': label_var, 'ce': ce, 'pe': pe, 'pe5': pe5, 'output': z }
def create_criterion_function(model): labels = C.placeholder(name='labels') ce = C.cross_entropy_with_softmax(model, labels) errs = C.classification_error(model, labels) return C.combine([ce, errs]) # (features, labels) -> (loss, metric)
def create_model(self): modeli = C.layers.Sequential([ # Convolution layers C.layers.Convolution2D((1, 3), num_filters=8, pad=True, reduction_rank=0, activation=C.ops.tanh, name='conv_a'), C.layers.Convolution2D((1, 3), num_filters=16, pad=True, reduction_rank=1, activation=C.ops.tanh, name='conv2_a'), C.layers.Convolution2D((1, 3), num_filters=32, pad=False, reduction_rank=1, activation=C.ops.tanh, name='conv3_a'), ###### # Dense layers #C.layers.Dense(128, activation=C.ops.relu,name='dense1_a'), #C.layers.Dense(64, activation=C.ops.relu,name='dense2_a'), C.layers.Dense(361, activation=C.ops.relu, name='dense3_a') ])(self._input) ### target modelt = C.layers.Sequential( [C.layers.Dense(360, activation=C.ops.relu, name='dense4_a')])(self._target) ### concatenate both processed target and observations inputs = C.ops.splice(modeli, modelt) ### Use input to predict next hidden state, and generate ### next observation model = C.layers.Sequential([ ###### C.layers.Dense(720, activation=C.ops.relu, name='dense5_a'), # Recurrence C.layers.Recurrence(C.layers.LSTM(2048, init=C.glorot_uniform()), name='lstm_a'), C.layers.Dense(1024, activation=None) ])(inputs) ###### # Prediction direction = C.layers.Sequential([ C.layers.Dense(720, activation=None, name='dense6_a'), C.layers.Dense(360, activation=C.ops.softmax, name='dense7_a') ])(model) velocity = C.layers.Sequential([ C.layers.Dense(128, activation=C.ops.relu), C.layers.Dense(64, activation=None), C.layers.Dense(1, activation=None) ])(model) model = C.ops.splice(direction, velocity) if self._load_model: model = C.load_model('dnns/action_predicter_f.dnn') direction = model[0:360] velocity = model[360] print(model) loss = C.squared_error(direction, self._output) + C.squared_error( velocity, self._output_velocity) error = C.classification_error(direction, self._output) + C.squared_error( velocity, self._output_velocity) learner = C.adadelta(model.parameters, l2_regularization_weight=0.001) progress_printer = C.logging.ProgressPrinter(tag='Training') trainer = C.Trainer(model, (loss, error), learner, progress_printer) return model, loss, learner, trainer
def criterion(x, y): z = model(normalize(x)) ce = cross_entropy_with_softmax(z, y) errs = classification_error(z, y) return (Function.NamedOutput(loss=ce), Function.NamedOutput(metric=errs))
def criterion(data, label_one_hot): z = model(data) # apply model. Computes a non-normalized log probability for every output class. loss = C.cross_entropy_with_softmax(z, label_one_hot) # this applies softmax to z under the hood metric = C.classification_error(z, label_one_hot) return loss, metric
def create_criterion_function_preferred(model, labels): ce = -C.reduce_sum(labels * C.ops.log(model)) errs = C.classification_error(model, labels) return ce, errs
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: raise RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters minibatch_size = 128 momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # progress writers progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer/metric_denom
inputs = C.input_variable(shape=(num_features), dtype=np.float32, name="features") # Z is the model; a composition of operation. Maps [(input_dim) -> (num_classes)] num_hidden_layers = 2 hidden_layers_dim = 10 # Choose your model z = create_model(inputs, num_hidden_layers, hidden_layers_dim) z = fully_connected_classifier_net(inputs, num_classes, hidden_layers_dim, num_hidden_layers, C.sigmoid) print(z.parameters) label = C.input_variable(1, dtype=np.float32, name="label") onehot = C.one_hot(label, num_classes) loss = C.cross_entropy_with_softmax(z, onehot) eval_error = C.classification_error(z, onehot) # Instantiate the trainer object to drive the model training learning_rate = 0.5 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) learner = C.sgd(z.parameters, lr_schedule) trainer = C.Trainer(z, (loss, eval_error), [learner]) # Define a utility that prints the training progress def print_training_progress(trainer, mb, frequency, verbose=1): training_loss, eval_error = "NA", "NA" if mb % frequency == 0: training_loss = trainer.previous_minibatch_loss_average eval_error = trainer.previous_minibatch_evaluation_average if verbose:
def TrainAndValidate(trainfile): #*****Hyper-Parameters****** q_max_words = 12 p_max_words = 50 emb_dim = 50 num_classes = 3 minibatch_size = 250 epoch_size = 500000 #No.of samples in training set total_epochs = 20 #Total number of epochs to run query_total_dim = q_max_words * emb_dim label_total_dim = num_classes passage_total_dim = p_max_words * emb_dim #****** Create placeholders for reading Training Data *********** query_input_var = C.ops.input_variable((1, q_max_words, emb_dim), np.float32, is_sparse=False) passage_input_var = C.ops.input_variable((1, p_max_words, emb_dim), np.float32, is_sparse=False) output_var = C.input_variable(num_classes, np.float32, is_sparse=False) train_reader = create_reader(trainfile, True, query_total_dim, passage_total_dim, label_total_dim) input_map = { query_input_var: train_reader.streams.queryfeatures, passage_input_var: train_reader.streams.passagefeatures, output_var: train_reader.streams.labels } # ********* Model configuration ******* model_output = cnn_network(query_input_var, passage_input_var, num_classes) loss = C.binary_cross_entropy(model_output, output_var) pe = C.classification_error(model_output, output_var) lr_per_minibatch = C.learning_rate_schedule(0.03, C.UnitType.minibatch) learner = C.adagrad(model_output.parameters, lr=lr_per_minibatch) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=total_epochs) #************Create Trainer with model_output object, learner and loss parameters************* trainer = C.Trainer(model_output, (loss, pe), learner, progress_printer) C.logging.log_number_of_parameters(model_output) print() # **** Train the model in batchwise mode ***** for epoch in range(total_epochs): # loop over epochs print("Epoch : ", epoch) sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = train_reader.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # training step sample_count += data[ output_var].num_samples # count samples processed so far trainer.summarize_training_progress() model_output.save( "CNN_{}.dnn".format(epoch)) # Save the model for every epoch #*** Find metrics on validation set after every epoch ******# (Note : you can skip doing this for every epoch instead to optimize the time, do it after every k epochs) predicted_labels = [] for i in range(len(validation_query_vectors)): queryVec = np.array(validation_query_vectors[i], dtype="float32").reshape( 1, q_max_words, emb_dim) passageVec = np.array(validation_passage_vectors[i], dtype="float32").reshape( 1, p_max_words, emb_dim) scores = model_output( queryVec, passageVec)[0] # do forward-prop on model to get score if scores[0] > scores[1] and scores[0] > scores[2]: predictLabel = 1 elif scores[1] > scores[2]: predictLabel = 2 else: predictLabel = 3 # predictLabel = 1 if scores[1]>=scores[0] else 0 predicted_labels.append(predictLabel) metrics = precision_recall_fscore_support(np.array(validation_labels), np.array(predicted_labels), average='weighted') #print("precision : "+str(metrics[0])+" recall : "+str(metrics[1])+" f1 : "+str(metrics[2])+"\n") return model_output
def criterion(x, y): z = model(normalize(x)) ce = cross_entropy_with_softmax(z, y) errs = classification_error(z, y) return (ce, errs)
def convnetlrn_cifar10_dataaug(reader_train, reader_test, epoch_size=50000, max_epochs=80): _cntk_py.set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # input normalization 1/256 = 0.00396025 scaled_input = C.element_times(C.constant(0.00390625), input_var) f = GlobalAveragePooling() f.update_signature((1, 8, 8)) with C.layers.default_options(): z = C.layers.Sequential([ C.layers.For( range(1), lambda: [ C.layers.Convolution2D( (3, 3), 32, strides=(1, 1), pad=True), C.layers.Activation(activation=C.relu), C.layers.Convolution2D( (1, 1), 64, strides=(1, 1), pad=False), C.layers.MaxPooling((3, 3), strides=(2, 2), pad=True) ]), C.layers.For( range(1), lambda: [ C.layers.Convolution2D( (3, 3), 128, strides=(1, 1), pad=True), C.layers.Activation(activation=C.relu), C.layers.Convolution2D( (1, 1), 256, strides=(1, 1), pad=False), C.layers.Activation(activation=C.relu), C.layers.MaxPooling((3, 3), strides=(2, 2), pad=True) ]), C.layers.For( range(1), lambda: [ C.layers.Convolution2D( (3, 3), 256, strides=(1, 1), pad=True), C.layers.Activation(activation=C.relu), C.layers.Convolution2D( (1, 1), 256, strides=(1, 1), pad=False), C.layers.Activation(activation=C.relu), C.layers.AveragePooling((8, 8), strides=(1, 1), pad=False) ]), C.layers.Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) # training config minibatch_size = 64 # Set learning parameters # learning rate lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [ 0.00015625 ] * 20 + [0.000046875] * 10 + [0.000015625] lr_schedule = C.learning_parameter_schedule_per_sample( lr_per_sample, epoch_size=epoch_size) # momentum mms = [0] * 20 + [0.9983347214509387] * 20 + [0.9991670137924583] mm_schedule = C.learners.momentum_schedule_per_sample( mms, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = C.learners.momentum_sgd(z.parameters, lr_schedule, mm_schedule, unit_gain=True, l2_regularization_weight=l2_reg_weight) progress_printer = C.logging.ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = C.Trainer(z, (ce, pe), learner, progress_printer) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } C.logging.log_number_of_parameters(z) print() # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # save model modelname = "NIN_test4.dnn" z.save(os.path.join(model_path, modelname)) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) data = reader_test.next_minibatch(current_minibatch, input_map=input_map) metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch sample_count += current_minibatch minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def train_model(base_model_file, train_map_file, test_map_file, input_resolution, num_epochs, mb_size, max_train_images, lr_per_mb, momentum_per_mb, l2_reg_weight, dropout_rate, freeze_weights, num_channels=3): #init image_width = input_resolution image_height = input_resolution epoch_size_test = len(readTable(test_map_file)) epoch_size_train = len(readTable(train_map_file)) epoch_size_train = min(epoch_size_train, max_train_images) num_classes = max( ToIntegers(getColumn(readTable(train_map_file), 1)) + ToIntegers(getColumn(readTable(test_map_file), 1))) + 1 # Create the minibatch source minibatch_source_train = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes, True) minibatch_source_test = create_mb_source(test_map_file, image_width, image_height, num_channels, num_classes, False) # Define mapping from reader streams to network inputs label_input = input_variable(num_classes) image_input = input_variable((num_channels, image_height, image_width), name="input") input_map = { image_input: minibatch_source_train['features'], label_input: minibatch_source_train['labels'] } # Instantiate the transfer learning model and loss function cntkModel = create_model(base_model_file, image_input, num_classes, dropout_rate, freeze_weights) ce = cross_entropy_with_softmax(cntkModel, label_input) pe = classification_error(cntkModel, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(cntkModel.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_writers = [ProgressPrinter(tag='Training', num_epochs=num_epochs)] trainer = Trainer(cntkModel, (ce, pe), learner, progress_writers) # Run training epochs print( "Training transfer learning model for {0} epochs (epoch_size_train = {1})." .format(num_epochs, epoch_size_train)) errsTest = [] errsTrain = [] log_number_of_parameters(cntkModel) for epoch in range(num_epochs): # Train model err_numer = 0 sample_counts = 0 while sample_counts < epoch_size_train: # Loop over minibatches in the epoch sample_count = min(mb_size, epoch_size_train - sample_counts) data = minibatch_source_train.next_minibatch(sample_count, input_map=input_map) trainer.train_minibatch(data) # Update model with it sample_counts += sample_count # Count samples processed so far err_numer += trainer.previous_minibatch_evaluation_average * sample_count if sample_counts % (100 * mb_size) == 0: print("Training: processed {0} samples".format(sample_counts)) # Visualize training images # img_data = data[image_input].asarray() # for i in range(len(img_data)): # debugImg = img_data[i].squeeze().swapaxes(0, 1).swapaxes(1, 2) / 255.0 # imshow(debugImg) # Compute accuracy on training and test sets errsTrain.append(err_numer / float(sample_counts)) trainer.summarize_training_progress() errsTest.append( cntkComputeTestError(trainer, minibatch_source_test, mb_size, epoch_size_test, input_map)) trainer.summarize_test_progress() # Plot training progress plt.plot(errsTrain, 'b-', errsTest, 'g-') plt.xlabel('Epoch number') plt.ylabel('Error') plt.title('Training error (blue), test error (green)') plt.draw() return cntkModel
training_set = numpy.array([[0,0,1,0],[0,1,0,1],[1,0,0,1],[1,1,1,0]],dtype=float32) x = cntk.input_variable(2) y = cntk.input_variable(2) def getNetwork(_x): with cntk.layers.default_options(init=cntk.layers.glorot_uniform(), activation=cntk.relu): res = _x res = cntk.layers.Dense(4, name="l1")(res) res = cntk.layers.Dense(4, name="l2")(res) res = cntk.layers.Dense(2, name="lo", activation=None)(res) return res fnn = getNetwork(x) loss = cntk.cross_entropy_with_softmax(fnn, y) errs = cntk.classification_error(fnn, y) trainer = cntk.Trainer(fnn, (loss, errs), [cntk.sgd(fnn.parameters, cntk.learning_rate_schedule(0.03, cntk.UnitType.minibatch))]) for times in range(1000): for data in training_set: batch = {x: numpy.array(data[:2],dtype=float32).reshape(2), y:numpy.array(data[2:],dtype = float32).reshape(2)} trainer.train_minibatch(batch) print("\r"+str(times), end="") print("") #print(fnn.lo.b.value) out = cntk.softmax(fnn) print(numpy.argmax(out.eval({x: numpy.array([[0,0]],dtype=float32).reshape(2)}))) print(numpy.argmax(out.eval({x: numpy.array([[0,1]],dtype=float32).reshape(2)}))) print(numpy.argmax(out.eval({x: numpy.array([[1,0]],dtype=float32).reshape(2)})))
data = cntk.input_variable(input_dim) W = cntk.Parameter((input_dim, num_classes), init=cntk.glorot_uniform(), name='W') b = cntk.Parameter((num_classes, ), init=0, name='b') model = cntk.times(data, W) + b # Define the CNTK criterion function. A criterion function maps # (input vectors, labels) to a loss function and an optional additional # metric. The loss function is used to train the model parameters. # We use cross entropy as a loss function. label_one_hot = cntk.input_variable(num_classes, is_sparse=True) loss = cntk.cross_entropy_with_softmax( model, label_one_hot) # this applies softmax to model's output under the hood metric = cntk.classification_error(model, label_one_hot) criterion = cntk.combine( [loss, metric]) # criterion is a tuple-valued function (loss, metric) # Learner object. The learner implements the update algorithm, in this case plain SGD. learning_rate = 0.1 learner = cntk.sgd(model.parameters, cntk.learning_parameter_schedule(learning_rate)) # Trainer. minibatch_size = 32 progress_writer = cntk.logging.ProgressPrinter( 50) # helper for logging progress; log every 50 minibatches trainer = cntk.Trainer(None, criterion, [learner], [progress_writer]) # Train!
for _ in range(num_hidden_layers): input = cntk.layers.Dense(hidden_layers_dim)(input) r = cntk.layers.Dense(num_output_classes, activation = None)(input) return r # Scale the input to 0-1 range by dividing each pixel by 255. input_s_normalized = input/255.0 input_s_squared = cntk.square(input_s_normalized) input_s_sqrt = cntk.sqrt(input_s_normalized) z_model = create_model(input_s_normalized) # Define the loss function for is_training loss = cntk.cross_entropy_with_softmax(z_model, label) # Classification error evaluation label_error = cntk.classification_error(z_model, label) # Configure training parameters # Instantiate the trainer object to drive the model training learning_rate = 0.2 lr_schedule = cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch) # Schoastic Gradient Descent learner learner = cntk.sgd(z_model.parameters, lr_schedule) trainer = cntk.Trainer(z_model, (loss, label_error), [learner]) # Define a utility function to compute the moving average sum. # A more efficient implementation is possible with np.cumsum() function def moving_average(a, w=5): if len(a) < w: return a[:] # Need to send a copy of the array