def get_projection_map(out_dim, in_dim): if in_dim > out_dim: raise ValueError("Can only project from lower to higher dimensionality") projection_map_values = np.zeros(in_dim * out_dim, dtype=np.float32) for i in range(0, in_dim): projection_map_values[(i * out_dim) + i] = 1.0 shape = (in_dim, 1, 1, out_dim) return constant(value=projection_map_values.reshape(shape))
def get_projection_map(out_dim, in_dim): if in_dim > out_dim: raise ValueError( "Can only project from lower to higher dimensionality") projection_map_values = np.zeros(in_dim * out_dim, dtype=np.float32) for i in range(0, in_dim): projection_map_values[(i * out_dim) + i] = 1.0 shape = (in_dim, 1, 1, out_dim) return constant(value=projection_map_values.reshape(shape))
def create_model(self): mean_removed_features = minus(self.input, constant(114), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): self.model = Sequential([ Convolution2D((11, 11), 96, init=normal(0.01), pad=False, name='conv1'), Activation(activation=relu, name='relu1'), self.__local_response_normalization(1.0, 2, 0.0001, 0.75, name='norm1'), MaxPooling((3, 3), (2, 2), name='pool1'), Convolution2D((5, 5), 192, init=normal(0.01), init_bias=0.1, name='conv2'), Activation(activation=relu, name='relu2'), self.__local_response_normalization(1.0, 2, 0.0001, 0.75, name='norm2'), MaxPooling((3, 3), (2, 2), name='pool2'), Convolution2D((3, 3), 384, init=normal(0.01), name='conv3'), Activation(activation=relu, name='relu3'), Convolution2D((3, 3), 384, init=normal(0.01), init_bias=0.1, name='conv4'), Activation(activation=relu, name='relu4'), Convolution2D((3, 3), 256, init=normal(0.01), init_bias=0.1, name='conv5'), Activation(activation=relu, name='relu5'), MaxPooling((3, 3), (2, 2), name='pool5'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(self.number_labels, init=normal(0.01), name='fc8') ])(mean_removed_features)
def eval(node, clean_up=True): """ It evaluates a node that has taken a numpy array as input. Note that sequences are not supported yet by this method Examples: Plus with two matrices >>> print (cntk.eval(cntk.ops.plus([[-30.,40.], [1.,2.]], [[-30.,40.], [1.,2.]]))) # [array([[[-60., 80.], [2., 4.]]])] Times with broadcast of a scalar over a matrix >>> print (cntk.eval(cntk.ops.element_times([[-30.,40.], [1.,2.]], 5))) # [array([[[-150., 200.], [5., 10.]]])] Args: node (:class:`cntk.graph.ComputationNode`): the node to evaluate clean_up (bool): whether the temporary directory should be removed when the context is left Returns: NumPy array containing the result """ from cntk.context import get_new_context from cntk.ops import input_numpy, constant from cntk.graph import ComputationNode, _InputComputationNodeBase import numpy as np # call a helper method to get a context with get_new_context() as ctx: ctx.clean_up = clean_up first = True # The params are passed as arryas, e.g. plus([1,2], [3,4]), and we need to # wrap them with input and parameter nodes. if node.params: for p in node.params: if p in node.inputs: val = getattr(node, p) if not isinstance(val, ComputationNode): # One param needs to be an Input() node. This will be fixed in # CNTK soon, so that we can remove this workaround and evaluate a # network with no inputs. if first: ir = input_numpy([val], alias=p, name=p) setattr(node, p, ir) first = False else: setattr(node, p, constant(getattr(node, p), name=p)) else: if isinstance(val, _InputComputationNodeBase) and first: first = False return ctx.eval(node)
def create_network(num_convolution_layers): """ Create network """ # Input variables denoting the features and label data input_var = cntk.input_variable( (_NUM_CHANNELS, _IMAGE_HEIGHT, _IMAGE_WIDTH)) label_var = cntk.input_variable((_NUM_CLASSES)) # create model, and configure learning parameters # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) print('Creating NN model') with layers.default_options(activation=relu, pad=True): model = layers.Sequential([ layers.For( range(num_convolution_layers), lambda: [ layers.Convolution2D((3, 3), 64), layers.Convolution2D((3, 3), 64), layers.MaxPooling((3, 3), (2, 2)) ]), layers.For( range(2), lambda i: [layers.Dense([256, 128][i]), layers.Dropout(0.5)]), layers.Dense(_NUM_CLASSES, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(model, label_var) pe = classification_error(model, label_var) return { 'name': 'convnet', 'feature': input_var, 'label': label_var, 'ce': ce, 'pe': pe, 'output': model }
def test_simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 epoch_size = sys.maxsize minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 3 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size lr = cntk_py.learning_rates_per_sample(0.003125) input = variable((input_dim,), np.float32, needs_gradient=False, name="features") scaled_input = element_times(constant((), 0.00390625), input) label = variable((num_output_classes,), np.float32, needs_gradient=False, name="labels") dev = cntk_py.DeviceDescriptor.cpudevice() netout = fully_connected_classifier_net(scaled_input.output(), num_output_classes, hidden_layers_dim, num_hidden_layers, dev, sigmoid) ce = cross_entropy_with_softmax(netout.output(), label) pe = classification_error(netout.output(), label) ffnet = combine([ce, pe, netout], "classifier_model") cm = create_mb_source(input_dim, num_output_classes, epoch_size) stream_infos = cm.stream_infos() for si in stream_infos: if si.m_name == 'features': features_si = si elif si.m_name == 'labels': labels_si = si minibatch_size_limits = dict() minibatch_size_limits[features_si] = (0,minibatch_size) minibatch_size_limits[labels_si] = (0,minibatch_size) trainer = cntk_py.Trainer(ffnet, ce.output(), [cntk_py.sgdlearner(ffnet.parameters(), lr)]) for i in range(0,int(num_minibatches_to_train)): mb=cm.get_next_minibatch(minibatch_size_limits, dev) arguments = dict() arguments[input] = mb[features_si].m_data arguments[label] = mb[labels_si].m_data trainer.train_minibatch(arguments, dev) freq = 20 if i % freq == 0: training_loss = get_train_loss(trainer) print(str(i+freq) + ": " + str(training_loss)) #TODO: move the testing code into a separate test module ? assert np.allclose(training_loss, 0.6142425537109375, atol=TOLERANCE_ABSOLUTE)
def eval(node): """ It evaluates a node that has taken a numpy array as input. Note that sequences are not supported yet by this method Examples: Plus with two matrices >>> print (cntk.eval(cntk.ops.plus([[-30.,40.], [1.,2.]], [[-30.,40.], [1.,2.]]))) # [array([[[-60., 80.], [2., 4.]]])] Times with broadcast of a scalar over a matrix >>> print (cntk.eval(cntk.ops.element_times([[-30.,40.], [1.,2.]], 5))) # [array([[[-150., 200.], [5., 10.]]])] Args: node (:class:`cntk.graph.ComputationNode`): the node to evaluate Returns: NumPy array containing the result """ from cntk.context import get_context from cntk.ops import input_numpy, constant from cntk.graph import ComputationNode # call a helper method to get a context ctx = get_context() first = True # The params are passed as arryas, e.g. plus([1,2], [3,4]), and we need to # wrap them with input and parameter nodes. if node.params: for p in node.params: if p in node.inputs: val = getattr(node, p) if not isinstance(val, ComputationNode): # One param needs to be an Input() node. This will being fixed in # CNTK soon, so that we can remove this workaround and evaluate a # network with no inputs. if first: if not isinstance(val, list): # inputs have the outmost dimension for sequences val = [val] ir = input_numpy([val], alias=p, name=p) setattr(node, p, ir) first = False else: setattr(node, p, constant(getattr(node, p), name=p)) else: if val.op_name == 'CNTK2.Input' and first: first = False return ctx.eval(node)
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant((), 0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) rel_path = r"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes) ]) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.003125) trainer = Trainer(netout, ce, pe, [sgd_learner(netout.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 20 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.get_next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { input: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def sanitize_input(arg, fallback_dtype=np.float32, reshape=None): """sanitize_input(arg, fallback_dtype=np.float32, reshape=None) Convert to :class:`~cntk.variables.Variable` so that it can be passed as Variable to the CNTK operators. * If ``arg`` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`. * If ``arg`` is an op, it is assumed that it has only one output, which will be returned. Args: arg (number, NumPy array, :class:`~cntk.variables.Variable`, or :class:`~cntk.ops.functions.Function`): input fallback_dtype (NumPy dtype): fallback dtype in case ``arg`` is a list Returns: Leaves Constant, Parameter, and Variable as is. Returns Constant, if ``arg`` is a number or NumPy array. Variable otherwise. """ from cntk.ops.functions import UserFunction from cntk.variables import Constant, Variable, Parameter from cntk.ops.functions import Function from cntk.ops import constant from ..core import asarray # is it a Variable or a Function? if isinstance(arg, (Constant, cntk_py.Constant, Variable, cntk_py.Variable, Parameter, cntk_py.Parameter, Function, cntk_py.Function)): return arg if isinstance(arg, Variable._Type): raise ValueError("Input is a type object (" + str(arg) + "). Did you mean to pass 'input(" + str(arg) + ")'?") # maybe a Python list that we can interpret as a NumPy array? if isinstance(arg, list) and not arg: raise ValueError('input is empty') if not isinstance(arg, np.ndarray) or arg.dtype != fallback_dtype: # TODO: check whether Values can be ingested directly arg = asarray(arg, fallback_dtype) if arg.shape == (): arg.shape = (1,) if reshape: arg = np.reshape(arg, reshape) return constant(value=arg)
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant((), 0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) rel_path = os.path.join(*"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) if not os.path.exists(path): readme_file = os.path.normpath(os.path.join(os.path.dirname(path), "..", "README.md")) raise RuntimeError("File '%s' does not exist. Please follow the instructions at %s to download and prepare it."%(path, readme_file)) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration( feature_stream_name, input_dim ), StreamConfiguration( labels_stream_name, num_output_classes) ]) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.003125) trainer = Trainer(netout, ce, pe, [sgd_learner(netout.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 20 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.get_next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {input : mb[features_si].m_data, label : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def sanitize_input(arg, fallback_dtype=np.float32, reshape=None): """sanitize_input(arg, fallback_dtype=np.float32, reshape=None) Convert to :class:`~cntk.variables.Variable` so that it can be passed as Variable to the CNTK operators. * If ``arg`` is a NumPy array and its type is not among (`np.float32`, `np.float64`, `np.float16`), it sets it to `np.float32`. * If ``arg`` is an op, it is assumed that it has only one output, which will be returned. Args: arg (number, NumPy array, :class:`~cntk.variables.Variable`, or :class:`~cntk.ops.functions.Function`): input fallback_dtype (NumPy dtype): fallback dtype in case ``arg`` is a list Returns: Leaves Constant, Parameter, and Variable as is. Returns Constant, if ``arg`` is a number or NumPy array. Variable otherwise. """ from cntk.ops.functions import UserFunction from cntk.variables import Constant, Variable, Parameter from cntk.ops.functions import Function from cntk.ops import constant from ..core import asarray # is it a Variable or a Function? if isinstance(arg, (Constant, cntk_py.Constant, Variable, cntk_py.Variable, Parameter, cntk_py.Parameter, Function, cntk_py.Function)): return arg if isinstance(arg, Variable._Type): raise ValueError("Input is a type object (" + str(arg) + "). Did you mean to pass 'input_variable(**" + str(arg) + ")'?") # maybe a Python list that we can interpret as a NumPy array? if isinstance(arg, list) and not arg: raise ValueError('input is empty') if not isinstance(arg, np.ndarray) or arg.dtype != fallback_dtype: # TODO: check whether Values can be ingested directly arg = asarray(arg, fallback_dtype) if reshape: arg = np.reshape(arg, reshape) return constant(value=arg)
def sanitize_input(arg, fallback_dtype=np.float32, reshape=None): """ Convert to :class:`~cntk.ops.variables.Variable` so that it can be passed as Variable to the CNTK operators. * If ``arg`` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`. * If ``arg`` is an op, it is assumed that it has only one output, which will be returned. Args: arg (number, NumPy array, :class:`~cntk.ops.variables.Variable`, or :class:`~cntk.ops.functions.Function`): input fallback_dtype (NumPy dtype): fallback dtype in case ``arg`` is a list Returns: Leaves Constant, Parameter, and Variable as is. Returns Constant, if ``arg`` is a number or NumPy array. Variable otherwise. """ from cntk.ops.variables import Constant, Variable, Parameter from cntk.ops import constant # is it a Variable? if isinstance(arg, (Constant, cntk_py.Constant, Variable, cntk_py.Variable, Parameter, cntk_py.Parameter)): return arg # or a Function? if isinstance(arg, cntk_py.Function): try: return arg.output except RuntimeError: raise ValueError( 'the argument has more than one output, please provide the one you want') # maybe a Python list that we can interpret as a NumPy array? if isinstance(arg, list) and not arg: raise ValueError('input is empty') if not isinstance(arg, np.ndarray) or arg.dtype!=fallback_dtype: arg = np.asarray(arg, dtype=fallback_dtype) if arg.shape == (): arg.shape = (1,) if reshape: arg = np.reshape(arg, reshape) return constant(value=arg)
def batch_norm(cntk_layer, inputs): ''' Setup batch normalization op with given parameters Args: cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`): the layer definition of batch normalization op inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or :class:`~cntk.input` Return: :func:`~cntk.ops.functions.Function`: instaced cntk batch normalization op ''' sanitize_input = internal.sanitize_input(inputs[0]) parameter_tensor = (sanitize_input.shape[0], ) scale_init = 1 bias_init = 0 mean_init = 1 var_init = 0 if cntk_layer.parameter_tensor: if len(cntk_layer.parameter_tensor) < 3: raise AssertionError('At least three tensors (saved_mean, saved_variance and scale) are needed') mean_tensor = cntk_layer.parameter_tensor[0] variance_tensor = cntk_layer.parameter_tensor[1] global_scale = cntk_layer.parameter_tensor[2].data[0] moving_average_factor = 1 / global_scale if global_scale != 0 else 0 mean_init = np.asarray(mean_tensor.data, dtype=np.float32) * moving_average_factor var_init = np.asarray(variance_tensor.data, dtype=np.float32) * moving_average_factor if len(cntk_layer.parameter_tensor) == 5: scale_tensor = cntk_layer.parameter_tensor[3] bias_tensor = cntk_layer.parameter_tensor[4] scale_init = np.asarray(scale_tensor.data, dtype=np.float32) bias_init = np.asarray(bias_tensor.data, dtype=np.float32) scale_parameters = ops.parameter(parameter_tensor, init=scale_init, name='.'.join((cntk_layer.op_name, 'scale'))) bias_parameters = ops.parameter(parameter_tensor, init=bias_init, name='.'.join((cntk_layer.op_name, 'bias'))) mean_parameters = ops.parameter(parameter_tensor, init=mean_init, name='.'.join((cntk_layer.op_name, 'mean'))) var_parameters = ops.parameter(parameter_tensor, init=var_init, name='.'.join((cntk_layer.op_name, 'var'))) epsilon = cntk_layer.parameters.epsilon return ops.batch_normalization(sanitize_input, scale_parameters, bias_parameters, mean_parameters, var_parameters, True, use_cudnn_engine=False, epsilon=epsilon, running_count=ops.constant(0), name=cntk_layer.op_name)
def sanitize_input(arg, fallback_dtype=np.float32): """ Convert to Variable or Constant so that it can be passed as Variable to the CNTK operators. * If `arg` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`. * If `arg` is an op, it is assumed that it has only one output, which will be returned. Args: arg (number, NumPy array, `Variable`, or `Function`): input fallback_dtype (numpy dtype): fallback dtype in case `arg` is a list Returns: Constant, if `arg` was a number or NumPy array. Variable otherwise. """ from cntk.ops.variables import Constant, Variable from cntk.ops import constant # is it a Variable? if isinstance(arg, (Constant, Variable, cntk_py.Constant, cntk_py.Variable)): return arg # or a Function? # FIXME soon to be replaced by Function #if isinstance(arg, (Function, cntk_py.Function)): if isinstance(arg, cntk_py.Function): try: return arg.output() except RuntimeError: raise ValueError( 'the argument has more than one output, please provide the one you want' ) # maybe a Python list that we can interpret as a NumPy array? if isinstance(arg, list) and not arg: raise ValueError('input is empty') if not isinstance(arg, np.ndarray): arg = np.asarray(arg, dtype=fallback_dtype) return constant(value=arg)
def sanitize_input(arg, fallback_dtype=np.float32): """ Convert to Variable or Constant so that it can be passed as Variable to the CNTK operators. * If `arg` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`. * If `arg` is an op, it is assumed that it has only one output, which will be returned. Args: arg (number, NumPy array, `Variable`, or `Function`): input fallback_dtype (numpy dtype): fallback dtype in case `arg` is a list Returns: Constant, if `arg` was a number or NumPy array. Variable otherwise. """ from cntk.ops.variables import Constant, Variable from cntk.ops import constant # is it a Variable? if isinstance(arg, (Constant, Variable, cntk_py.Constant, cntk_py.Variable)): return arg # or a Function? if isinstance(arg, cntk_py.Function): try: return arg.output() except RuntimeError: raise ValueError( 'the argument has more than one output, please provide the one you want') # maybe a Python list that we can interpret as a NumPy array? if isinstance(arg, list) and not arg: raise ValueError('input is empty') if not isinstance(arg, np.ndarray): arg = np.asarray(arg, dtype=fallback_dtype) return constant(value=arg)
def sanitize_input(arg, fallback_dtype=np.float32): """ Convert to Variable or Constant so that it can be passed as Variable to the CNTK operators. * If `arg` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`. * If `arg` is an op, it is assumed that it has only one output, which will be returned. Args: arg (number, NumPy array, `Variable`, or `Function`): input fallback_dtype (numpy dtype): fallback dtype in case `arg` is a list Returns: Constant, if `arg` was a number or NumPy array. Variable otherwise. """ from cntk.ops.variables import Constant, Variable from cntk.ops import constant if isinstance(arg, (Constant, Variable, cntk_py.Constant, cntk_py.Variable)): return arg try: var_output = arg.output() if isinstance(var_output, (Variable, cntk_py.Variable)): return var_output else: raise ValueError( 'Cannot convert argument of type "%s" to Variable' % type(arg)) except AttributeError: # no function or function with more then one output pass if isinstance(arg, list) and not arg: raise ValueError('input is empty') if not isinstance(arg, np.ndarray): arg = np.asarray(arg, dtype=fallback_dtype) return constant(value=arg)
def sanitize_input(arg, fallback_dtype=np.float32): """ Convert to Variable or Constant so that it can be passed as Variable to the CNTK operators. * If `arg` is a NumPy array and its type is neither `np.float32` nor `np.float64`, it sets it to `np.float32`. * If `arg` is an op, it is assumed that it has only one output, which will be returned. Args: arg (number, NumPy array, `Variable`, or `Function`): input fallback_dtype (numpy dtype): fallback dtype in case `arg` is a list Returns: Constant, if `arg` was a number or NumPy array. Variable otherwise. """ from cntk.ops.variables import Constant, Variable from cntk.ops import constant if isinstance(arg, (Constant, Variable, cntk_py.Constant, cntk_py.Variable)): return arg try: var_output = arg.output() if isinstance(var_output, (Variable, cntk_py.Variable)): return var_output else: raise ValueError('Cannot convert argument of type "%s" to Variable'%type(arg)) except AttributeError: # no function or function with more then one output pass if isinstance(arg, list) and not arg: raise ValueError('input is empty') if not isinstance(arg, np.ndarray): arg = np.asarray(arg, dtype=fallback_dtype) return constant(value=arg)
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data features = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), features) netout = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(*"../Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader_train.streams.features, label: reader_train.streams.labels } # Instantiate progress writers. logdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mnist_log") tensorboard_writer = TensorBoardProgressWriter(freq=1, log_dir=logdir, model=netout) progress_printer = ProgressPrinter(freq=10, tag='Training') # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) learner = sgd(netout.parameters, lr=lr_per_minibatch) trainer = Trainer(netout, (ce, pe), learner, [tensorboard_writer, progress_printer]) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 6000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size for minibatch_idx in range(0, int(num_minibatches_to_train)): trainer.train_minibatch(reader_train.next_minibatch(minibatch_size, input_map=input_map)) # Log max/min/mean of each parameter tensor, so that we can confirm that the parameters change indeed. # Don't want to do that very often though, otherwise will spend too much time computing min/max/mean. if minibatch_idx % 10 == 9: for p in netout.parameters: tensorboard_writer.write_value(p.uid + "/max", reduce_max(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/min", reduce_min(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/mean", reduce_mean(p).eval(), minibatch_idx) trainer.summarize_training_progress() # Load test data try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(*"../Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { features: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) test_result += trainer.test_minibatch(mb) # Average of evaluation errors of all test minibatches trainer.summarize_test_progress() return test_result / num_minibatches_to_test
def convnet_cifar10_dataaug(reader_train, reader_test, max_epochs = 80): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input scaled_input = element_times(constant(0.00390625), input_var) with default_options (activation=relu, pad=True): z = Sequential([ LayerStack(2, lambda : [ Convolution((3,3), 64), Convolution((3,3), 64), MaxPooling((3,3), (2,2)) ]), LayerStack(2, lambda i: [ Dense([256,128][i]), Dropout(0.5) ]), Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size) mm_time_constant = [0]*20+[600]*20+[1200] mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model(z, os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 2 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim, np.float32) label = C.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)), Dense(num_output_classes) ])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.dirname(os.path.abspath(__file__)) path = os.path.join(data_dir, 'Train-28x28_cntk_text.txt') reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. # training_progress_output_freq = 100 progress_writers = [ ProgressPrinter( # freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with) ] if tensorboard_logdir is not None: progress_writers.append( TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training lr = 0.001 trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr), progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, model_inputs_to_streams=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model C.debugging.start_profiler() C.debugging.enable_profiler() C.debugging.set_node_timing(True) test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error C.debugging.stop_profiler() trainer.print_node_timing() # Average of evaluation errors of all test minibatches return test_result * 100 / num_minibatches_to_test
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 epoch_size = sys.maxsize minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size lr = learning_rates_per_sample(0.003125) input = variable(input_dim, np.float32, needs_gradient=False, name="features") scaled_input = element_times(constant((), 0.00390625), input) label = variable(num_output_classes, np.float32, needs_gradient=False, name="labels") dev = -1 cntk_dev = cntk_device(dev) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, dev, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) #TODO: add save and load module code ffnet = combine([ce, pe, netout], "classifier_model") rel_path = r"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) cm = create_text_mb_source(path, input_dim, num_output_classes, epoch_size) stream_infos = cm.stream_infos() for si in stream_infos: if si.m_name == 'features': features_si = si elif si.m_name == 'labels': labels_si = si trainer = Trainer(netout, ce, pe, [sgdlearner(netout.owner.parameters(), lr)]) for i in range(0, int(num_minibatches_to_train)): mb = cm.get_next_minibatch(minibatch_size, cntk_dev) arguments = dict() arguments[input] = mb[features_si].m_data arguments[label] = mb[labels_si].m_data trainer.train_minibatch(arguments, cntk_dev) freq = 20 if i % freq == 0: training_loss = get_train_loss(trainer) eval_crit = get_train_eval_criterion(trainer) print( "Minibatch: {}, Train Loss: {}, Train Evaluation Criterion: {}" .format(i, training_loss, eval_crit))
def Constant(init, shape=None, name=''): p = constant(init, shape, name=name) return _name_node( p, 'constant') # these are factory methods for things with state
def convnet_cifar10(debug_output=False): set_computation_network_trace_level(0) image_height = 32 image_width = 32 num_channels = 3 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) with default_options (activation=relu, pad=True): z = Sequential([ LayerStack(2, lambda : [ Convolution((3,3), 64), Convolution((3,3), 64), MaxPooling((3,3), (2,2)) ]), LayerStack(2, lambda i: [ Dense([256,128][i]), Dropout(0.5) ]), Dense(num_output_classes, activation=None) ])(scaled_input) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader(os.path.join(data_path, 'Train_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*10+[0.00046875]*10+[0.00015625] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size) momentum_time_constant = [0]*20+[-minibatch_size/np.log(0.9)] mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var : reader_train.streams.features, label_var : reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 30 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model(z, os.path.join(model_path, "ConvNet_CIFAR10_{}.dnn".format(epoch))) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var : reader_test.streams.features, label_var : reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim, np.float32) label = C.input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim, activation=relu)), Dense(num_output_classes)])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature : reader_train.streams.features, label : reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with)] if tensorboard_logdir is not None: progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training lr = learning_parameter_schedule_per_sample(1) trainer = Trainer(z, (ce, pe), adadelta(z.parameters, lr), progress_writers) training_session( trainer=trainer, mb_source = reader_train, mb_size = minibatch_size, model_inputs_to_streams = input_map, max_samples = num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep ).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature : reader_test.streams.features, label : reader_test.streams.labels } # Test data for trained model C.debugging.start_profiler() C.debugging.enable_profiler() C.debugging.set_node_timing(True) #C.cntk_py.disable_cpueval_optimization() # uncomment this to check CPU eval perf without optimization test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error C.debugging.stop_profiler() trainer.print_node_timing() # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 2 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim) label = C.input_variable(num_output_classes) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) # z = Sequential([ # Dense(hidden_layers_dim, activation=relu), # Dense(hidden_layers_dim, activation=relu), # Dense(num_output_classes)])(scaled_input) with default_options(activation=relu, init=C.glorot_uniform()): z = Sequential([For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim)), Dense(num_output_classes, activation=None)])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) # setup the data path = abs_path + "\Train-28x28_cntk_text.txt" reader_train = MinibatchSource(CTFDeserializer(path, StreamDefs( features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. progress_writers = [ProgressPrinter( tag='Training', num_epochs=num_sweeps_to_train_with)] # Instantiate the trainer object to drive the model training lr = learning_rate_schedule(1, UnitType.sample) trainer = Trainer(z, (ce, pe), [adadelta(z.parameters, lr)], progress_writers) training_session( trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, model_inputs_to_streams=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep ).train() # Load test data path = abs_path + "\Test-28x28_cntk_text.txt" reader_test = MinibatchSource(CTFDeserializer(path, StreamDefs( features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def convnet_mnist(debug_output=False): image_height = 28 image_width = 28 num_channels = 1 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input_var) with default_options (activation=relu, pad=False): conv1 = Convolution((5,5), 32, pad=True)(scaled_input) pool1 = MaxPooling((3,3), (2,2))(conv1) conv2 = Convolution((3,3), 48)(pool1) pool2 = MaxPooling((3,3), (2,2))(conv2) conv3 = Convolution((3,3), 64)(pool2) f4 = Dense(96)(conv3) drop4 = Dropout(0.5)(f4) z = Dense(num_output_classes, activation=None)(drop4) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader(os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 60000 # for now we manually specify epoch size minibatch_size = 128 # Set learning parameters lr_per_sample = [0.001]*10+[0.0005]*10+[0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size) momentum_time_constant = [0]*5+[1024] # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, momentum_time_constant) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var : reader_train.streams.features, label_var : reader_train.streams.labels } log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 40 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[label_var].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model(z, os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch))) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var : reader_test.streams.features, label_var : reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 1024 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 2 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = C.input_variable(input_dim) label = C.input_variable(num_output_classes) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) # z = Sequential([ # Dense(hidden_layers_dim, activation=relu), # Dense(hidden_layers_dim, activation=relu), # Dense(num_output_classes)])(scaled_input) with default_options(activation=relu, init=C.glorot_uniform()): z = Sequential([ For(range(num_hidden_layers), lambda i: Dense(hidden_layers_dim)), Dense(num_output_classes, activation=None) ])(scaled_input) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) # setup the data path = abs_path + "\Train-28x28_cntk_text.txt" reader_train = MinibatchSource( CTFDeserializer( path, StreamDefs(features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. progress_writers = [ ProgressPrinter(tag='Training', num_epochs=num_sweeps_to_train_with) ] # Instantiate the trainer object to drive the model training lr = learning_rate_schedule(1, UnitType.sample) trainer = Trainer(z, (ce, pe), [adadelta(z.parameters, lr)], progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, model_inputs_to_streams=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = abs_path + "\Test-28x28_cntk_text.txt" reader_test = MinibatchSource( CTFDeserializer( path, StreamDefs(features=StreamDef(field='features', shape=input_dim), labels=StreamDef(field='labels', shape=num_output_classes)))) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def simple_mnist(debug_output=False): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) z = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(abs_path, "..", "..", "..", "..", "..", "Examples", "Image", "DataSets", "MNIST", "Train-28x28_cntk_text.txt") path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input : reader_train.streams.features, label : reader_train.streams.labels } lr_per_minibatch=learning_rate_schedule(0.2, UnitType.minibatch) # Instantiate the trainer object to drive the model training trainer = Trainer(z, ce, pe, sgd(z.parameters, lr=lr_per_minibatch)) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 500 if debug_output: training_progress_output_freq = training_progress_output_freq/4 for i in range(0, int(num_minibatches_to_train)): mb = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) # Load test data try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(abs_path, "..", "..", "..", "..", "..", "Examples", "Image", "DataSets", "MNIST", "Test-28x28_cntk_text.txt") path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input : reader_test.streams.features, label : reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def create_vgg19(): # Input variables denoting the features and label data feature_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # apply model to input # remove mean value input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) For( range(2), lambda i: [ Convolution2D((3, 3), 64, name='conv1_{}'.format(i)), Activation(activation=relu, name='relu1_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool1'), For( range(2), lambda i: [ Convolution2D((3, 3), 128, name='conv2_{}'.format(i)), Activation(activation=relu, name='relu2_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool2'), For( range(4), lambda i: [ Convolution2D((3, 3), 256, name='conv3_{}'.format(i)), Activation(activation=relu, name='relu3_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool3'), For( range(4), lambda i: [ Convolution2D((3, 3), 512, name='conv4_{}'.format(i)), Activation(activation=relu, name='relu4_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool4'), For( range(4), lambda i: [ Convolution2D((3, 3), 512, name='conv5_{}'.format(i)), Activation(activation=relu, name='relu5_{}'.format(i)), ]), MaxPooling((2, 2), (2, 2), name='pool5'), Dense(4096, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, name='fc8') ])(input) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) pe5 = C.classification_error(z, label_var, topN=5) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label': label_var, 'ce': ce, 'pe': pe, 'pe5': pe5, 'output': z }
def simple_mnist(debug_output=False): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( * "../../../../Examples/Image/DataSets/MNIST/Train-28x28_cntk_text.txt" .split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input: reader_train.streams.features, label: reader_train.streams.labels } # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, sgd(netout.parameters, lr=0.003125)) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 500 if debug_output: training_progress_output_freq = training_progress_output_freq / 4 for i in range(0, int(num_minibatches_to_train)): mb = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) # Load test data try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( * "../../../../Examples/Image/DataSets/MNIST/Test-28x28_cntk_text.txt" .split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def simple_mnist(debug_output=False): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant((), 0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( *"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt". split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes) ]) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.003125)]) # Get minibatches of images to train with and perform model training minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 20 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.get_next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = { input: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) if debug_output: print_training_progress(trainer, i, training_progress_output_freq) # Load test data try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( *"../../../../Examples/Image/MNIST/Data/Test-28x28_cntk_text.txt". split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) test_mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes) ]) features_si = test_mb_source.stream_info(feature_stream_name) labels_si = test_mb_source.stream_info(labels_stream_name) # Test data for trained model test_minibatch_size = 512 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = test_mb_source.get_next_minibatch(test_minibatch_size) # Specify the mapping of input variables in the model to actual # minibatch data to be tested with arguments = { input: mb[features_si].m_data, label: mb[labels_si].m_data } eval_error = trainer.test_minibatch(arguments) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def convnet_cifar10(debug_output=False): set_computation_network_trace_level(0) image_height = 32 image_width = 32 num_channels = 3 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model input_removemean = minus(input_var, constant(128)) scaled_input = element_times(constant(0.00390625), input_removemean) with default_options(activation=relu, pad=True): z = Sequential([ LayerStack( 2, lambda: [ Convolution((3, 3), 64), Convolution((3, 3), 64), MaxPooling((3, 3), (2, 2)) ]), LayerStack(2, lambda i: [Dense([256, 128][i]), Dropout(0.5)]), Dense(num_output_classes, activation=None) ])(scaled_input) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader( os.path.join(data_path, 'Train_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625] * 10 + [0.00046875] * 10 + [0.00015625] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) momentum_time_constant = [0] * 20 + [-minibatch_size / np.log(0.9)] mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 30 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) persist.save_model( z, os.path.join(model_path, "ConvNet_CIFAR10_{}.dnn".format(epoch))) # Load test data reader_test = create_reader(os.path.join(data_path, 'Test_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var: reader_test.streams.features, label_var: reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
from cntk.io import MinibatchSource, CTFDeserializer, StreamDef, StreamDefs from cntk.ops import relu, element_times, constant from cntk.layers import Dense from cntk.learners import learning_rate_schedule, UnitType, adadelta from cntk import cross_entropy_with_softmax, classification_error, Trainer from cntk.logging import ProgressPrinter #define network input_dim = 784 num_output_classes = 10 hidden_layers_dim = 200 feature = C.input_variable(input_dim) label = C.input_variable(num_output_classes) scaled_input = element_times(constant(0.00390625), feature) #define network topology h1 = Dense(hidden_layers_dim, activation=relu)(scaled_input) h2 = Dense(hidden_layers_dim, activation=relu)(h1) z = Dense(num_output_classes, activation=None)(h2) #define loss and error functions ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) #Data source for training and testing path_train = "D:/MachineLearning/CNTK-2.0/Examples/Image/DataSets/MNIST/Train-28x28_cntk_text.txt" path_test = "D:/MachineLearning/CNTK-2.0/Examples/Image/DataSets/MNIST/Test-28x28_cntk_text.txt"
def test_op_times(left_operand, right_operand, device_id, precision, left_matrix_type, right_matrix_type): if right_matrix_type == 'sparse': pytest.skip('second operator of times() has to be dense') dt = PRECISION_TO_TYPE[precision] # Forward pass test #================== # we compute the expected output for the forward pass # we need two surrounding brackets # the first for sequences (length=1, since we have dynamic_axis='') # the second for batch of one sample expected = [[np.dot(AA(left_operand, dtype=dt), AA(right_operand, dtype=dt))]] if left_matrix_type == 'sparse': a = SI(*batch_dense_to_sparse([left_operand])) else: a = I([left_operand]) b = I([right_operand]) from cntk.ops import times, constant left_as_input = times(a, constant(right_operand)) right_as_input = times(constant(left_operand), b) unittest_helper(left_as_input, None, expected, device_id=device_id, precision=precision, clean_up=True, backward_pass=False) unittest_helper(right_as_input, None, expected, device_id=device_id, precision=precision, clean_up=True, backward_pass=False) unittest_helper(times(a, b), None, expected, device_id=device_id, precision=precision, clean_up=True, backward_pass=False) # Backward pass test #================== def op_grad(A, B): ''' Compute derivative of A with respect to B. For simplicity, assume A and B to be matrices. Let A be 2x2 and B be 2x1, then we have [a11 a12] [b11] = [ a11 b11 + a12 b21 ] [a21 a22] [b21] [ a21 b11 + a22 b21 ] The derivative for A with respect to B is [b11 b21] [b11 b21] The derivative for B with respect to A: [a11 + a12] [a21 + a22] ''' assert len(A.shape) == len(B.shape) == 2 D = np.zeros_like(A) D[:,:] = B.sum(axis=1) return D if 'sparse' not in [left_matrix_type, right_matrix_type]: # FIXME: disabling until the Pass node supports sparse expected_left = [[op_grad(AA(left_operand, dtype=dt), AA(right_operand, dtype=dt))]] expected_right = [[op_grad(AA(right_operand, dtype=dt).T, AA(left_operand, dtype=dt).T).T]] unittest_helper(left_as_input, None, expected_left, device_id=device_id, precision=precision, clean_up=True, backward_pass=True, input_node=a) # BUG: Fails because of Pass node? unittest_helper(right_as_input, None, expected_right, device_id=device_id, precision=precision, clean_up=True, backward_pass=True, input_node=b)
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data feature = input(input_dim, np.float32) label = input(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), feature) z = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { feature: reader_train.streams.features, label: reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with) ] if tensorboard_logdir is not None: progress_writers.append( TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training trainer = Trainer(z, (ce, pe), adadelta(z.parameters), progress_writers) training_session(trainer=trainer, mb_source=reader_train, mb_size=minibatch_size, var_to_stream=input_map, max_samples=num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { feature: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def convnet_cifar10_dataaug(reader_train, reader_test, distributed_trainer, max_epochs=80): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input scaled_input = element_times(constant(0.00390625), input_var) with default_options(activation=relu, pad=True): z = Sequential([ LayerStack( 2, lambda: [ Convolution((3, 3), 64), Convolution((3, 3), 64), MaxPooling((3, 3), (2, 2)) ]), LayerStack(2, lambda i: [Dense([256, 128][i]), Dropout(0.5)]), Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625] * 20 + [0.00046875] * 20 + [ 0.00015625 ] * 20 + [0.000046875] * 10 + [0.000015625] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample, epoch_size=epoch_size) momentum_time_constant = [0] * 20 + [600] * 20 + [1200] mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, ce, pe, learner, distributed_trainer) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) if distributed_trainer.communicator().current_worker( ).global_rank == 0: persist.save_model( z, os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += trainer.previous_minibatch_sample_count minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def simple_mnist(tensorboard_logdir=None): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) z = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input : reader_train.streams.features, label : reader_train.streams.labels } # Training config minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 # Instantiate progress writers. #training_progress_output_freq = 100 progress_writers = [ProgressPrinter( #freq=training_progress_output_freq, tag='Training', num_epochs=num_sweeps_to_train_with)] if tensorboard_logdir is not None: progress_writers.append(TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z)) # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) trainer = Trainer(z, (ce, pe), sgd(z.parameters, lr=lr_per_minibatch), progress_writers) training_session( trainer=trainer, mb_source = reader_train, mb_size = minibatch_size, var_to_stream = input_map, max_samples = num_samples_per_sweep * num_sweeps_to_train_with, progress_frequency=num_samples_per_sweep ).train() # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input : reader_test.streams.features, label : reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def simple_mnist(debug_output=False): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant((), 0.00390625), input) netout = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid ) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join( os.environ["CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY"], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/") ) except KeyError: rel_path = os.path.join(*"../../../../Examples/Image/Datasets/MNIST/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) feature_stream_name = "features" labels_stream_name = "labels" mb_source = text_format_minibatch_source( path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes), ], ) features_si = mb_source[feature_stream_name] labels_si = mb_source[labels_stream_name] # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.003125)]) # Get minibatches of images to train with and perform model training minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 80 if debug_output: training_progress_output_freq = training_progress_output_freq / 4 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.get_next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with arguments = {input: mb[features_si], label: mb[labels_si]} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) # Load test data try: rel_path = os.path.join( os.environ["CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY"], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/") ) except KeyError: rel_path = os.path.join(*"../../../../Examples/Image/Datasets/MNIST/Test-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) test_mb_source = text_format_minibatch_source( path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes), ], randomize=False, ) features_si = test_mb_source[feature_stream_name] labels_si = test_mb_source[labels_stream_name] # Test data for trained model test_minibatch_size = 512 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = test_mb_source.get_next_minibatch(test_minibatch_size) # Specify the mapping of input variables in the model to actual # minibatch data to be tested with arguments = {input: mb[features_si], label: mb[labels_si]} eval_error = trainer.test_minibatch(arguments) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test
def create_vgg19(): # Input variables denoting the features and label data feature_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) # apply model to input # remove mean value input = minus(feature_var, constant([[[104]], [[117]], [[124]]]), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) For(range(2), lambda i: [ Convolution2D((3,3), 64, name='conv1_{}'.format(i)), Activation(activation=relu, name='relu1_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool1'), For(range(2), lambda i: [ Convolution2D((3,3), 128, name='conv2_{}'.format(i)), Activation(activation=relu, name='relu2_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool2'), For(range(4), lambda i: [ Convolution2D((3,3), 256, name='conv3_{}'.format(i)), Activation(activation=relu, name='relu3_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool3'), For(range(4), lambda i: [ Convolution2D((3,3), 512, name='conv4_{}'.format(i)), Activation(activation=relu, name='relu4_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool4'), For(range(4), lambda i: [ Convolution2D((3,3), 512, name='conv5_{}'.format(i)), Activation(activation=relu, name='relu5_{}'.format(i)), ]), MaxPooling((2,2), (2,2), name='pool5'), Dense(4096, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, name='fc8') ])(input) # loss and metric ce = C.cross_entropy_with_softmax(z, label_var) pe = C.classification_error(z, label_var) pe5 = C.classification_error(z, label_var, topN=5) log_number_of_parameters(z) ; print() return { 'feature': feature_var, 'label': label_var, 'ce' : ce, 'pe' : pe, 'pe5': pe5, 'output': z }
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data features = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), features) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( *"../Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader_train.streams.features, label: reader_train.streams.labels } # Instantiate progress writers. logdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mnist_log") tensorboard_writer = TensorBoardProgressWriter(freq=1, log_dir=logdir, model=netout) progress_printer = ProgressPrinter(freq=10, tag='Training') # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) learner = sgd(netout.parameters, lr=lr_per_minibatch) trainer = Trainer(netout, (ce, pe), learner, [tensorboard_writer, progress_printer]) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 6000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size for minibatch_idx in range(0, int(num_minibatches_to_train)): trainer.train_minibatch( reader_train.next_minibatch(minibatch_size, input_map=input_map)) # Log max/min/mean of each parameter tensor, so that we can confirm that the parameters change indeed. # Don't want to do that very often though, otherwise will spend too much time computing min/max/mean. if minibatch_idx % 10 == 9: for p in netout.parameters: tensorboard_writer.write_value(p.uid + "/max", reduce_max(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/min", reduce_min(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/mean", reduce_mean(p).eval(), minibatch_idx) trainer.summarize_training_progress() # Load test data try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( *"../Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { features: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) test_result += trainer.test_minibatch(mb) # Average of evaluation errors of all test minibatches trainer.summarize_test_progress() return test_result / num_minibatches_to_test
def batch_norm(cntk_layer, inputs): ''' Setup batch normalization op with given parameters Args: cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`): the layer definition of batch normalization op inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or :class:`~cntk.input` Return: :func:`~cntk.ops.functions.Function`: instaced cntk batch normalization op ''' sanitize_input = internal.sanitize_input(inputs[0]) parameter_tensor = (sanitize_input.shape[0], ) scale_init = 1 bias_init = 0 mean_init = 1 var_init = 0 if cntk_layer.parameter_tensor: if len(cntk_layer.parameter_tensor) < 3: raise AssertionError( 'At least three tensors (saved_mean, saved_variance and scale) are needed' ) mean_tensor = cntk_layer.parameter_tensor[0] variance_tensor = cntk_layer.parameter_tensor[1] global_scale = cntk_layer.parameter_tensor[2].data[0] moving_average_factor = 1 / global_scale if global_scale != 0 else 0 mean_init = np.asarray(mean_tensor.data, dtype=np.float32) * moving_average_factor var_init = np.asarray(variance_tensor.data, dtype=np.float32) * moving_average_factor if len(cntk_layer.parameter_tensor) == 5: scale_tensor = cntk_layer.parameter_tensor[3] bias_tensor = cntk_layer.parameter_tensor[4] scale_init = np.asarray(scale_tensor.data, dtype=np.float32) bias_init = np.asarray(bias_tensor.data, dtype=np.float32) scale_parameters = ops.parameter(parameter_tensor, init=scale_init, name='.'.join( (cntk_layer.op_name, 'scale'))) bias_parameters = ops.parameter(parameter_tensor, init=bias_init, name='.'.join( (cntk_layer.op_name, 'bias'))) mean_parameters = ops.parameter(parameter_tensor, init=mean_init, name='.'.join( (cntk_layer.op_name, 'mean'))) var_parameters = ops.parameter(parameter_tensor, init=var_init, name='.'.join( (cntk_layer.op_name, 'var'))) epsilon = cntk_layer.parameters.epsilon return ops.batch_normalization(sanitize_input, scale_parameters, bias_parameters, mean_parameters, var_parameters, True, use_cudnn_engine=False, epsilon=epsilon, running_count=ops.constant(0), name=cntk_layer.op_name)
def convnet_mnist(debug_output=False): image_height = 28 image_width = 28 num_channels = 1 input_dim = image_height * image_width * num_channels num_output_classes = 10 # Input variables denoting the features and label data input_var = input_variable((num_channels, image_height, image_width), np.float32) label_var = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input_var) with default_options(activation=relu, pad=False): conv1 = Convolution((5, 5), 32, pad=True)(scaled_input) pool1 = MaxPooling((3, 3), (2, 2))(conv1) conv2 = Convolution((3, 3), 48)(pool1) pool2 = MaxPooling((3, 3), (2, 2))(conv2) conv3 = Convolution((3, 3), 64)(pool2) f4 = Dense(96)(conv3) drop4 = Dropout(0.5)(f4) z = Dense(num_output_classes, activation=None)(drop4) ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) reader_train = create_reader( os.path.join(data_path, 'Train-28x28_cntk_text.txt'), True, input_dim, num_output_classes) # training config epoch_size = 60000 # for now we manually specify epoch size minibatch_size = 128 # Set learning parameters lr_per_sample = [0.001] * 10 + [0.0005] * 10 + [0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, UnitType.sample, epoch_size) mm_time_constant = [0] * 5 + [1024] mm_schedule = momentum_as_time_constant_schedule(mm_time_constant, epoch_size) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule) trainer = Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() progress_printer = ProgressPrinter(tag='Training') # Get minibatches of images to train with and perform model training max_epochs = 40 for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += data[ label_var].num_samples # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) z.save_model( os.path.join(model_path, "ConvNet_MNIST_{}.dnn".format(epoch))) # Load test data reader_test = create_reader( os.path.join(data_path, 'Test-28x28_cntk_text.txt'), False, input_dim, num_output_classes) input_map = { input_var: reader_test.streams.features, label_var: reader_test.streams.labels } # Test data for trained model epoch_size = 10000 minibatch_size = 1024 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += trainer.previous_minibatch_sample_count minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def simple_mnist(debug_output=False): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), input) z = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(z, label) pe = classification_error(z, label) data_dir = os.path.join(abs_path, "..", "..", "..", "DataSets", "MNIST") path = os.path.normpath(os.path.join(data_dir, "Train-28x28_cntk_text.txt")) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { input : reader_train.streams.features, label : reader_train.streams.labels } lr_per_minibatch=learning_rate_schedule(0.2, UnitType.minibatch) # Instantiate the trainer object to drive the model training trainer = Trainer(z, ce, pe, sgd(z.parameters, lr=lr_per_minibatch)) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 10 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 500 if debug_output: training_progress_output_freq = training_progress_output_freq/4 for i in range(0, int(num_minibatches_to_train)): mb = reader_train.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) # Load test data path = os.path.normpath(os.path.join(data_dir, "Test-28x28_cntk_text.txt")) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { input : reader_test.streams.features, label : reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) eval_error = trainer.test_minibatch(mb) test_result = test_result + eval_error # Average of evaluation errors of all test minibatches return test_result / num_minibatches_to_test