def inception_v3_norm_model(input, labelDim, dropRate, bnTimeConst): # Normalize inputs to -1 and 1. featMean = 128 featScale = 1 / 128 input_subtracted = minus(input, featMean) input_scaled = element_times(input_subtracted, featScale) return inception_v3_model(input_scaled, labelDim, dropRate, bnTimeConst)
def inception_v3_norm_model(input, labelDim, dropRate, bnTimeConst): # Normalize inputs to -1 and 1. featMean = 128 featScale = 1/128 input_subtracted = minus(input, featMean) input_scaled = element_times(input_subtracted, featScale) return inception_v3_model(input_scaled, labelDim, dropRate, bnTimeConst)
def create_model(self): mean_removed_features = minus(self.input, constant(114), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): self.model = Sequential([ Convolution2D((11, 11), 96, init=normal(0.01), pad=False, name='conv1'), Activation(activation=relu, name='relu1'), self.__local_response_normalization(1.0, 2, 0.0001, 0.75, name='norm1'), MaxPooling((3, 3), (2, 2), name='pool1'), Convolution2D((5, 5), 192, init=normal(0.01), init_bias=0.1, name='conv2'), Activation(activation=relu, name='relu2'), self.__local_response_normalization(1.0, 2, 0.0001, 0.75, name='norm2'), MaxPooling((3, 3), (2, 2), name='pool2'), Convolution2D((3, 3), 384, init=normal(0.01), name='conv3'), Activation(activation=relu, name='relu3'), Convolution2D((3, 3), 384, init=normal(0.01), init_bias=0.1, name='conv4'), Activation(activation=relu, name='relu4'), Convolution2D((3, 3), 256, init=normal(0.01), init_bias=0.1, name='conv5'), Activation(activation=relu, name='relu5'), MaxPooling((3, 3), (2, 2), name='pool5'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(self.number_labels, init=normal(0.01), name='fc8') ])(mean_removed_features)
def create_network(num_convolution_layers): """ Create network """ # Input variables denoting the features and label data input_var = cntk.input_variable( (_NUM_CHANNELS, _IMAGE_HEIGHT, _IMAGE_WIDTH)) label_var = cntk.input_variable((_NUM_CLASSES)) # create model, and configure learning parameters # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) print('Creating NN model') with layers.default_options(activation=relu, pad=True): model = layers.Sequential([ layers.For( range(num_convolution_layers), lambda: [ layers.Convolution2D((3, 3), 64), layers.Convolution2D((3, 3), 64), layers.MaxPooling((3, 3), (2, 2)) ]), layers.For( range(2), lambda i: [layers.Dense([256, 128][i]), layers.Dropout(0.5)]), layers.Dense(_NUM_CLASSES, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(model, label_var) pe = classification_error(model, label_var) return { 'name': 'convnet', 'feature': input_var, 'label': label_var, 'ce': ce, 'pe': pe, 'output': model }
def convnet_cifar10(debug_output=False): set_computation_network_trace_level(0) image_height = 32 image_width = 32 num_channels = 3 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) with default_options(activation=relu, pad=True): z = Sequential([ LayerStack( 2, lambda: [ Convolution((3, 3), 64), Convolution((3, 3), 64), MaxPooling((3, 3), (2, 2)) ]), LayerStack(2, lambda i: [Dense([256, 128][i]), Dropout(0.5)]), Dense(num_output_classes, activation=None) ])(scaled_input) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader( os.path.join(data_path, 'Train_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625] * 10 + [0.00046875] * 10 + [0.00015625] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) momentum_time_constant = [0] * 20 + [-minibatch_size / np.log(0.9)] mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 30 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model( z, os.path.join(model_path, "ConvNet_CIFAR10_{}.dnn".format(epoch))) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var: reader_test.streams.features, label_var: reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def create_vgg19(): # Input variables denoting the features and label data feature_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # apply model to input # remove mean value input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) For(range(2), lambda i: [ Convolution2D((3,3), 64, name='conv1_{}'.format(i)), Activation(activation=relu, name='relu1_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool1'), For(range(2), lambda i: [ Convolution2D((3,3), 128, name='conv2_{}'.format(i)), Activation(activation=relu, name='relu2_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool2'), For(range(4), lambda i: [ Convolution2D((3,3), 256, name='conv3_{}'.format(i)), Activation(activation=relu, name='relu3_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool3'), For(range(4), lambda i: [ Convolution2D((3,3), 512, name='conv4_{}'.format(i)), Activation(activation=relu, name='relu4_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool4'), For(range(4), lambda i: [ Convolution2D((3,3), 512, name='conv5_{}'.format(i)), Activation(activation=relu, name='relu5_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool5'), Dense(4096, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, name='fc8') ])(input) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) pe5 = C.classification_error(z, label_var, topN=5) log_number_of_parameters(z) ; print() return { 'feature': feature_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'pe5': pe5, 'output': z }
def convnet_cifar10(debug_output=False): set_computation_network_trace_level(0) image_height = 32 image_width = 32 num_channels = 3 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) with default_options (activation=relu, pad=True): z = Sequential([ LayerStack(2, lambda : [ Convolution((3,3), 64), Convolution((3,3), 64), MaxPooling((3,3), (2,2)) ]), LayerStack(2, lambda i: [ Dense([256,128][i]), Dropout(0.5) ]), Dense(num_output_classes, activation=None) ])(scaled_input) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader(os.path.join(data_path, 'Train_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*10+[0.00046875]*10+[0.00015625] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size) momentum_time_constant = [0]*20+[-minibatch_size/np.log(0.9)] mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var : reader_train.streams.features, label_var : reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 30 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model(z, os.path.join(model_path, "ConvNet_CIFAR10_{}.dnn".format(epoch))) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var : reader_test.streams.features, label_var : reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def create_vgg19(): # Input variables denoting the features and label data feature_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # apply model to input # remove mean value input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) For( range(2), lambda i: [ Convolution2D((3, 3), 64, name='conv1_{}'.format(i)), Activation(activation=relu, name='relu1_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool1'), For( range(2), lambda i: [ Convolution2D((3, 3), 128, name='conv2_{}'.format(i)), Activation(activation=relu, name='relu2_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool2'), For( range(4), lambda i: [ Convolution2D((3, 3), 256, name='conv3_{}'.format(i)), Activation(activation=relu, name='relu3_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool3'), For( range(4), lambda i: [ Convolution2D((3, 3), 512, name='conv4_{}'.format(i)), Activation(activation=relu, name='relu4_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool4'), For( range(4), lambda i: [ Convolution2D((3, 3), 512, name='conv5_{}'.format(i)), Activation(activation=relu, name='relu5_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool5'), Dense(4096, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, name='fc8') ])(input) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) pe5 = C.classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label': label_var, 'ce': ce, 'pe': pe, 'pe5': pe5, 'output': z }
reader_test = create_reader(test_map_image, data_mean_file, False, image_height=image_height, image_width=image_width, num_channels=num_channels, num_classes=num_classes) #============================================================================== # ### #============================================================================== # Normalize the inputs feature_scale = 1.0 / 256.0 input_var = input_variable((num_channels, image_height, image_width)) input_var_mean = minus(input_var, 128) input_var_norm = element_times(feature_scale, input_var_mean) label_var = input_variable((num_classes)) # apply model to input z = model_func(input_var_norm, out_dims=num_classes) """ Training action """ # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) #pe5 = classification_error(z, label_var, topN=5)
def __init__(self, state_dim, action_dim, gamma=0.99, learning_rate=1e-4, momentum=0.95): self.state_dim = state_dim self.action_dim = action_dim self.gamma = gamma with default_options(activation=relu, init=he_uniform()): # Convolution filter counts were halved to save on memory, no gpu :( self.model = Sequential([ Convolution2D((8, 8), 16, strides=4, name='conv1'), Convolution2D((4, 4), 32, strides=2, name='conv2'), Convolution2D((3, 3), 32, strides=1, name='conv3'), Dense(256, init=he_uniform(scale=0.01), name='dense1'), Dense(action_dim, activation=None, init=he_uniform(scale=0.01), name='actions') ]) self.model.update_signature(Tensor[state_dim]) # Create the target model as a copy of the online model self.target_model = None self.update_target() self.pre_states = input_variable(state_dim, name='pre_states') self.actions = input_variable(action_dim, name='actions') self.post_states = input_variable(state_dim, name='post_states') self.rewards = input_variable((), name='rewards') self.terminals = input_variable((), name='terminals') self.is_weights = input_variable((), name='is_weights') predicted_q = reduce_sum(self.model(self.pre_states) * self.actions, axis=0) # DQN - calculate target q values # post_q = reduce_max(self.target_model(self.post_states), axis=0) # DDQN - calculate target q values online_selection = one_hot( argmax(self.model(self.post_states), axis=0), self.action_dim) post_q = reduce_sum(self.target_model(self.post_states) * online_selection, axis=0) post_q = (1.0 - self.terminals) * post_q target_q = stop_gradient(self.rewards + self.gamma * post_q) # Huber loss delta = 1.0 self.td_error = minus(predicted_q, target_q, name='td_error') abs_error = abs(self.td_error) errors = element_select(less(abs_error, delta), square(self.td_error) * 0.5, delta * (abs_error - 0.5 * delta)) loss = errors * self.is_weights # Adam based SGD lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch) m_scheule = momentum_schedule(momentum) vm_schedule = momentum_schedule(0.999) self._learner = adam(self.model.parameters, lr_schedule, m_scheule, variance_momentum=vm_schedule) self.writer = TensorBoardProgressWriter(log_dir='metrics', model=self.model) self.trainer = Trainer(self.model, (loss, None), [self._learner], self.writer)