def create_model(input_sequence, label_sequence, vocab_dim, hidden_dim): # Create the rnn that computes the latent representation for the next token. rnn_with_latent_output = Sequential([ C.Embedding(hidden_dim), For( range(num_layers), lambda: Sequential([ Stabilizer(), Recurrence(LSTM(hidden_dim), go_backwards=False) ])), ]) # Apply it to the input sequence. latent_vector = rnn_with_latent_output(input_sequence) # Connect the latent output to (sampled/full) softmax. if use_sampled_softmax: weights = load_sampling_weights(token_frequencies_file_path) smoothed_weights = np.float32(np.power(weights, alpha)) sampling_weights = C.reshape(C.Constant(smoothed_weights), shape=(1, vocab_dim)) z, ce, errs = cross_entropy_with_sampled_softmax( latent_vector, label_sequence, vocab_dim, hidden_dim, softmax_sample_size, sampling_weights) else: z, ce, errs = cross_entropy_with_full_softmax(latent_vector, label_sequence, vocab_dim, hidden_dim) return z, ce, errs
def create_model(output_dim): return Sequential([ LayerStack( num_layers, lambda: Sequential([ Stabilizer(), Recurrence(LSTM(hidden_dim), go_backwards=False) ])), Dense(output_dim) ])
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd, ld]) features = C.input_variable(((2 * context + 1) * feature_dim)) labels = C.input_variable((num_classes)) model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error(z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map = { features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def create_pooling_neural_network(input_vars, out_dims): hidden_layer_1 = Dense(2, activation=cntk.ops.relu) hidden_layer_2 = Dense(16, activation=cntk.ops.relu) output_layer = Dense(out_dims, activation=None) model = Sequential([hidden_layer_1, hidden_layer_2, output_layer])(input_vars) return model
def _build_model(self): with default_options(init=he_uniform(), activation=relu, bias=True): model = Sequential([ Convolution((8, 8), 32, strides=(4, 4)), Convolution((4, 4), 64, strides=(2, 2)), Convolution((3, 3), 64, strides=(1, 1)), Dense(512, init=he_uniform(0.01)), Dense(self._nb_actions, activation=None, init=he_uniform(0.01)) ]) return model
def create_multi_layer_neural_network(input_vars, out_dims, num_hidden_layers): input_dims = input_vars.shape[0] num_hidden_neurons = input_dims**3 hidden_layer = lambda: Dense(num_hidden_neurons, activation=cntk.ops.relu) output_layer = Dense(out_dims, activation=None) model = Sequential([LayerStack(num_hidden_layers, hidden_layer), output_layer])(input_vars) return model
def create_convolutional_neural_network(input_vars, out_dims): convolutional_layer_1 = Convolution((5, 5), 32, strides=1, activation=cntk.ops.relu, pad=True, init=glorot_normal(), init_bias=0.1) pooling_layer_1 = MaxPooling((2, 2), strides=(2, 2), pad=True) convolutional_layer_2 = Convolution((5, 5), 64, strides=1, activation=cntk.ops.relu, pad=True, init=glorot_normal(), init_bias=0.1) pooling_layer_2 = MaxPooling((2, 2), strides=(2, 2), pad=True) convolutional_layer_3 = Convolution((5, 5), 128, strides=1, activation=cntk.ops.relu, pad=True, init=glorot_normal(), init_bias=0.1) pooling_layer_3 = MaxPooling((2, 2), strides=(2, 2), pad=True) fully_connected_layer = Dense(1024, activation=cntk.ops.relu, init=glorot_normal(), init_bias=0.1) output_layer = Dense(out_dims, activation=None, init=glorot_normal(), init_bias=0.1) model = Sequential([ convolutional_layer_1, pooling_layer_1, convolutional_layer_2, pooling_layer_2, #convolutional_layer_3, pooling_layer_3, fully_connected_layer, output_layer ])(input_vars) return model
def create_vgg9_model(input, num_classes): with default_options(activation=relu): model = Sequential([ LayerStack(3, lambda i: [ Convolution((3,3), [64,96,128][i], init=glorot_uniform(), pad=True), Convolution((3,3), [64,96,128][i], init=glorot_uniform(), pad=True), MaxPooling((3,3), strides=(2,2)) ]), LayerStack(2, lambda : [ Dense(1024, init=glorot_uniform()) ]), Dense(num_classes, init=glorot_uniform(), activation=None) ]) return model(input)
def ffnet(): inputs = 2 outputs = 2 layers = 2 hidden_dimension = 50 # input variables denoting the features and label data features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) # Instantiate the feedforward classification model my_model = Sequential( [Dense(hidden_dimension, activation=C.sigmoid), Dense(outputs)]) z = my_model(features) ce = C.cross_entropy_with_softmax(z, label) pe = C.classification_error(z, label) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.125, UnitType.minibatch) trainer = C.Trainer(z, ce, pe, [sgd(z.parameters, lr=lr_per_minibatch)]) # Get minibatches of training data and perform model training minibatch_size = 25 num_minibatches_to_train = 1024 pp = ProgressPrinter(0) for i in range(num_minibatches_to_train): train_features, labels = generate_random_data(minibatch_size, inputs, outputs) # Specify the mapping of input variables in the model to actual minibatch data to be trained with trainer.train_minibatch({features: train_features, label: labels}) pp.update_with_trainer(trainer) last_avg_error = pp.avg_loss_since_start() test_features, test_labels = generate_random_data(minibatch_size, inputs, outputs) avg_error = trainer.test_minibatch({ features: test_features, label: test_labels }) print(' error rate on an unseen minibatch: {}'.format(avg_error)) return last_avg_error, avg_error
def create_recurrent_network(): # Input variables denoting the features and label data features = input_variable(((2 * context + 1) * feature_dim)) labels = input_variable((num_classes)) # create network model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) return { 'feature': features, 'label': labels, 'ce': ce, 'errs': errs, 'output': z }
def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs=80): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input scaled_input = element_times(constant(0.00390625), input_var) with default_options(activation=relu, pad=True): z = Sequential([ LayerStack( 2, lambda: [ Convolution((3, 3), 64), Convolution((3, 3), 64), MaxPooling((3, 3), (2, 2)) ]), LayerStack(2, lambda i: [Dense([256, 128][i]), Dropout(0.5)]), Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [ 0.00015625 ] * 20 + [0.000046875] * 10 + [0.000015625] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size) momentum_time_constant = [0] * 20 + [600] * 20 + [1200] mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, ce, pe, learner, distributed_trainer) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) if distributed_trainer.communicator().current_worker( ).global_rank == 0: persist.save_model( z, os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += trainer.previous_minibatch_sample_count minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def convnet_cifar10(debug_output=False): set_computation_network_trace_level(0) image_height = 32 image_width = 32 num_channels = 3 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) with default_options(activation=relu, pad=True): z = Sequential([ LayerStack( 2, lambda: [ Convolution((3, 3), 64), Convolution((3, 3), 64), MaxPooling((3, 3), (2, 2)) ]), LayerStack(2, lambda i: [Dense([256, 128][i]), Dropout(0.5)]), Dense(num_output_classes, activation=None) ])(scaled_input) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader( os.path.join(data_path, 'Train_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625] * 10 + [0.00046875] * 10 + [0.00015625] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) momentum_time_constant = [0] * 20 + [-minibatch_size / np.log(0.9)] mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 30 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model( z, os.path.join(model_path, "ConvNet_CIFAR10_{}.dnn".format(epoch))) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var: reader_test.streams.features, label_var: reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def create_alexnet(): # Input variables denoting the features and label data feature_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input # remove mean value input = minus(feature_var, constant(114), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) Convolution((11, 11), 96, init=normal(0.01), pad=False, strides=(4, 4), name='conv1'), Activation(activation=relu, name='relu1'), LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm1'), MaxPooling((3, 3), (2, 2), name='pool1'), Convolution((5, 5), 192, init=normal(0.01), init_bias=0.1, name='conv2'), Activation(activation=relu, name='relu2'), LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm2'), MaxPooling((3, 3), (2, 2), name='pool2'), Convolution((3, 3), 384, init=normal(0.01), name='conv3'), Activation(activation=relu, name='relu3'), Convolution((3, 3), 384, init=normal(0.01), init_bias=0.1, name='conv4'), Activation(activation=relu, name='relu4'), Convolution((3, 3), 256, init=normal(0.01), init_bias=0.1, name='conv5'), Activation(activation=relu, name='relu5'), MaxPooling((3, 3), (2, 2), name='pool5'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, init=normal(0.01), name='fc8') ])(input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label': label_var, 'ce': ce, 'pe': pe, 'output': z }
def conv3d_ucf11(train_reader, test_reader, max_epochs=30): # Replace 0 with 1 to get detailed log. set_computation_network_trace_level(0) # These values must match for both train and test reader. image_height = train_reader.height image_width = train_reader.width num_channels = train_reader.channel_count sequence_length = train_reader.sequence_length num_output_classes = train_reader.label_count # Input variables denoting the features and label data input_var = input_variable( (num_channels, sequence_length, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate simple 3D Convolution network inspired by VGG network # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf with default_options(activation=relu): z = Sequential([ Convolution((3, 3, 3), 64, pad=True), MaxPooling((1, 2, 2), (1, 2, 2)), LayerStack( 3, lambda i: [ Convolution((3, 3, 3), [96, 128, 128][i], pad=True), Convolution((3, 3, 3), [96, 128, 128][i], pad=True), MaxPooling((2, 2, 2), (2, 2, 2)) ]), LayerStack(2, lambda: [Dense(1024), Dropout(0.5)]), Dense(num_output_classes, activation=None) ])(input_var) # loss and classification error. ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 1322 # for now we manually specify epoch size minibatch_size = 4 # Set learning parameters lr_per_sample = [0.01] * 10 + [0.001] * 10 + [0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) momentum_time_constant = 4096 mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True) trainer = Trainer(z, ce, pe, learner) log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs train_reader.reset() while train_reader.has_more(): videos, labels, current_minibatch = train_reader.next_minibatch( minibatch_size) trainer.train_minibatch({input_var: videos, label_var: labels}) progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) # Test data for trained model epoch_size = 332 minibatch_size = 2 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 test_reader.reset() while test_reader.has_more(): videos, labels, current_minibatch = test_reader.next_minibatch( minibatch_size) # minibatch data to be trained with metric_numer += trainer.test_minibatch({ input_var: videos, label_var: labels }) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def create_vgg19(): # Input variables denoting the features and label data feature_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input # remove mean value input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) LayerStack(2, lambda i: [ Convolution2D((3,3), 64, name='conv1_{}'.format(i)), Activation(activation=relu, name='relu1_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool1'), LayerStack(2, lambda i: [ Convolution2D((3,3), 128, name='conv2_{}'.format(i)), Activation(activation=relu, name='relu2_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool2'), LayerStack(4, lambda i: [ Convolution2D((3,3), 256, name='conv3_{}'.format(i)), Activation(activation=relu, name='relu3_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool3'), LayerStack(4, lambda i: [ Convolution2D((3,3), 512, name='conv4_{}'.format(i)), Activation(activation=relu, name='relu4_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool4'), LayerStack(4, lambda i: [ Convolution2D((3,3), 512, name='conv5_{}'.format(i)), Activation(activation=relu, name='relu5_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool5'), Dense(4096, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, name='fc8') ])(input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) pe5 = classification_error(z, label_var, topN=5) log_number_of_parameters(z) ; print() return { 'feature': feature_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'pe5': pe5, 'output': z }