def test_seq_classification_error(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed set_computation_network_trace_level(1) set_fixed_random_seed( 1 ) # to become invariant to initialization order, which is a valid change # test of the example itself # this emulates the main code in the PY file reader = create_reader(data_dir + "/atis.train.ctf") model = create_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) expected_avg = [0.15570838301766451, 0.7846451368305728] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE) # test of a config like in the example but with additions to test many code paths if device_id >= 0: # BatchNormalization currently does not run on CPU reader = create_reader(data_dir + "/atis.train.ctf") model = create_test_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) log_number_of_parameters(model, trace_level=1) print() expected_avg = [0.084, 0.407364] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def test_ffnet_error(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) avg_error = ffnet(debug_output=False) expected_avg_error = 0.12 assert np.allclose(avg_error, expected_avg_error, atol=TOLERANCE_ABSOLUTE)
def test_seq_classification_error(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) evaluation_avg, loss_avg = slu_hands_on() expected_avg = [0.15570838301766451, 0.7846451368305728] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def test_sequence_to_sequence(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) error = sequence_to_sequence_translator() expected_error = 0.8596881547969316 assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
def test_simple_mnist_error(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) test_error = simple_mnist() expected_test_error = 0.09 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def test_seq_classification_error(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) evaluation_avg, loss_avg = train_sequence_classifier() expected_avg = [0.55, 1.53099] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def test_initializer_init(device_id): from cntk.utils import cntk_device from cntk import DeviceDescriptor, cntk_py cntk_py.always_allow_setting_default_device() DeviceDescriptor.set_default_device(cntk_device(device_id)) _check(uniform(scale=10), 'uniform') _check(gaussian(output_rank=1, filter_rank=2, scale=10), 'gaussian') _check(xavier(output_rank=1, filter_rank=2, scale=10), 'xavier') _check(glorot_uniform(output_rank=1, filter_rank=2, scale=10), 'glorot_uniform') _check(glorot_normal(output_rank=1, filter_rank=2, scale=10), 'glorot_normal') _check(he_uniform(output_rank=1, filter_rank=2, scale=10), 'he_uniform') _check(he_normal(output_rank=1, filter_rank=2, scale=10), 'he_normal')
def _sanitize_value(shape, value, dtype, device, is_param=False): np_dtype = utils.sanitize_dtype_numpy(dtype) cntk_dtype = utils.sanitize_dtype_cntk(dtype) if value is None: if shape is None: raise ValueError('you need to specify at least shape or value') shape = utils.sanitize_shape(shape) if is_param: # TODO: expose the initialization params ndav = NDArrayView.random_uniform_float(shape, -0.05, 0.05, 1, device) else: ndav = utils.create_NDArrayView(shape, cntk_dtype, device) else: if not isinstance(value, np.ndarray) or value.dtype!=np_dtype: value = np.asarray(value, dtype=np_dtype) #TODO: check whether this copy operation from cpu to gpu is not needed if device.type() != 0: ndav_cpu = utils.create_NDArrayView_from_NumPy(value, dev=DeviceDescriptor.cpu_device()) ndav = utils.create_NDArrayView(value.shape, data_type=cntk_dtype, dev=device) ndav.copy_from(ndav_cpu) else: ndav = utils.create_NDArrayView_from_NumPy(value, device) return ndav
def forward(self, arguments, outputs, keep_for_backward=None, device=None): ''' Computes the values of speficied variables in ``outputs``, using values provided in ``arguments`` that correspond to each input `Variable` of the function whose ``is_input`` is `True`. Example: >>> v = C.input_variable(shape=(3,)) >>> f = C.reciprocal(v) >>> _, fv = f.forward({v:[[1, 2, 4]]}, [f.output]) >>> list(fv.values())[0] array([[[ 1. , 0.5 , 0.25]]], dtype=float32) Args: arguments: maps variables to their input data. The interpretation depends on the input type: * dict: keys are input variable or names, and values are the input data. * any other type: if node has an unique input, ``arguments`` is mapped to this input. For nodes with more than one input, only dict is allowed. In both cases, every every sample in the data will be interpreted as a new sequence. To mark samples as continuations of the previous sequence, specify ``arguments`` as ``tuple``: the first element will be used as ``arguments``, and the second one will be used as a list of bools, denoting whether a sequence is a new one (`True`) or a continuation of the previous one (`False`). Data should be either NumPy arrays or a :class:`~cntk.io.MinibatchData` instance. outputs (iterable): outputs to fetch values for. keep_for_backward (set, default `None`): the subset of the Function's output variables for which gradients shall be calculated in a subsequent backward call. If `None`, the returned state will be `None` and a subsequent call to :func:`backward` will not be possible. device (:class:`~cntk.device.DeviceDescriptor`, default `None`): the device descriptor that contains the type and id of the device on which the computation is. If `None`, the default device is used. Returns: A tuple (BackpropState, map of outputs to NumPy arrays). The BackpropState is a handle taken by :func:`backward`. ''' if device is None: from cntk import DeviceDescriptor device = DeviceDescriptor.use_default_device() in_var_map = sanitize_var_map(self.arguments, arguments, None, device) output_map = {v: None for v in outputs} keep_for_backward = set(keep_for_backward or {}) state = super(Function, self)._forward(in_var_map, output_map, device, keep_for_backward) for k in output_map: output_map[k] = value_to_seq(output_map[k]) return state, output_map
def forward(self, arguments, outputs, keep_for_backward=None, device=None): ''' Computes and stores the values of speficied variables in `outputs`, using provided `arguments` values corresponding to each leaf `Variable` of the function whose is_input() is true. Args: arguments (`dict` or `list` or `tuple`): maps variables to their input data. The interpretation depends on the input type: * `dict`: keys are input variable or names and values are the input data. * `list`: elements are input data in the order their respective variables have been defined in the network. In both cases, every every sample in the data will be interpreted as a new sequence. To mark samples as continuations of the previous sequence, specify `arguments` as `tuple`: the first element will be used as `arguments`, and the second one will be used as a list of bools, denoting whether a sequence is a new one (`True`) or a continuation of the previous one (`False`). Data should be either NumPy arrays or a :class:`cntk.io.MinibatchData` instance. outputs (iterable): outputs to fetch values for. keep_for_backward (`set`, default `None): the subset of the Function's output variables for which gradients shall be calculated in a subsequent backward call. If `None`, the returned state will be `None` and a subsequent call to `backward` will not be possible. for backpropagation. device (:class:`cntk.DeviceDescriptor`, default `None): the device descriptor that contains the type and id of the device on which the computation is. If `None`, the default device is used. Returns: A tuple (`BackpropState`, `map` of outputs to NumPy arrays). The BackpropState is a handle taken by :func:`backward`. ''' if device is None: from cntk import DeviceDescriptor device = DeviceDescriptor.use_default_device() in_var_map = sanitize_var_map(self.arguments(), arguments, None, device) output_map = {v: None for v in outputs} keep_for_backward = set(keep_for_backward or {}) state = super(Function, self)._forward(in_var_map, output_map, device, keep_for_backward) for k in output_map: output_map[k] = value_to_seq(output_map[k]) return state, output_map
def test_cifar_resnet_error(device_id): target_device = DeviceDescriptor.gpu_device(0) DeviceDescriptor.set_default_device(target_device) try: base_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) except KeyError: base_path = os.path.join( * "../../../../Examples/Image/Miscellaneous/CIFAR-10/cifar-10-batches-py" .split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) test_error = cifar_resnet(base_path) expected_test_error = 0.7 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def test_cifar_resnet_error(device_id): if cntk_device(device_id).type() != DeviceKind_GPU: pytest.skip('test only runs on GPU') DeviceDescriptor.set_default_device(cntk_device(device_id)) try: base_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/CIFAR/v0/cifar-10-batches-py".split("/")) except KeyError: base_path = os.path.join( *"../../../../Examples/Image/Datasets/CIFAR-10/cifar-10-batches-py" .split("/")) base_path = os.path.normpath(base_path) os.chdir(os.path.join(base_path, '..')) test_error = cifar_resnet(base_path) expected_test_error = 0.7 assert np.allclose(test_error, expected_test_error, atol=TOLERANCE_ABSOLUTE)
def forward(self, arguments, outputs, keep_for_backward=None, device=None): ''' Computes and stores the values of speficied variables in `outputs`, using provided `arguments` values corresponding to each leaf `Variable` of the function whose is_input() is true. Args: arguments (`dict` or `list` or `tuple`): maps variables to their input data. The interpretation depends on the input type: * `dict`: keys are input variable or names, and values are the input data. * `list`: elements are input data in the order their respective variables have been defined in the network. In both cases, every every sample in the data will be interpreted as a new sequence. To mark samples as continuations of the previous sequence, specify `arguments` as `tuple`: the first element will be used as `arguments`, and the second one will be used as a list of bools, denoting whether a sequence is a new one (`True`) or a continuation of the previous one (`False`). Data should be either NumPy arrays or a :class:`cntk.io.MinibatchData` instance. outputs (iterable): outputs to fetch values for. keep_for_backward (`set`, default `None): the subset of the Function's output variables for which gradients shall be calculated in a subsequent backward call. If `None`, the returned state will be `None` and a subsequent call to `backward` will not be possible. for backpropagation. device (:class:`cntk.DeviceDescriptor`, default `None): the device descriptor that contains the type and id of the device on which the computation is. If `None`, the default device is used. Returns: A tuple (`BackpropState`, `map` of outputs to NumPy arrays). The BackpropState is a handle taken by :func:`backward`. ''' if device is None: from cntk import DeviceDescriptor device = DeviceDescriptor.use_default_device() in_var_map = sanitize_var_map(self.arguments(), arguments, None, device) output_map = {v: None for v in outputs} keep_for_backward = set(keep_for_backward or {}) state = super(Function, self)._forward(in_var_map, output_map, device, keep_for_backward) for k in output_map: output_map[k] = value_to_seq(output_map[k]) return state, output_map
def test_seq_classification_error(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed set_computation_network_trace_level(1) set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change # test of the example itself # this emulates the main code in the PY file reader = create_reader(data_dir + "/atis.train.ctf") model = create_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) expected_avg = [0.15570838301766451, 0.7846451368305728] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE) # test of a config like in the example but with additions to test many code paths if device_id >= 0: # BatchNormalization currently does not run on CPU reader = create_reader(data_dir + "/atis.train.ctf") model = create_test_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) log_number_of_parameters(model, trace_level=1) ; print() expected_avg = [0.084, 0.407364] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
def test_get_data_type(): assert get_data_type(constant(value=2), constant(value=1)) == np.float32 assert get_data_type(input_variable(shape=(2, 3)), constant(value=1)) == np.float32 ndav32 = create_NDArrayView_from_NumPy( np.asarray([[1, 2]], dtype=np.float32)) assert get_data_type(input_variable(shape=(2, 3), data_type=np.float64), ndav32) == np.float64 ndav64 = create_NDArrayView_from_NumPy( np.asarray([[1, 2]], dtype=np.float64)) assert get_data_type(input_variable(shape=(2, 3), data_type=np.float64), ndav64) == np.float64 val32 = create_Value_from_NumPy(np.asarray([[1, 2]], dtype=np.float32), dev=DeviceDescriptor.default_device()) assert get_data_type(val32, ndav64) == np.float64
def test_seq_classification_error(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) evaluation_avg, loss_avg = train_sequence_classifier()
def test_language_understanding(device_id): from cntk.ops.tests.ops_test_utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed #set_computation_network_trace_level(1) set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training if device_id >= 0: # BatchNormalization currently does not run on CPU # change to intent classifier --moved up here since this fails, as repro # BUGBUG: Broken, need to pass new criterion to train(). #with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day # select_last = slice(Placeholder(), Axis.default_dynamic_axis(), -1, 0) # # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand" # run_model_test('change to intent classifier', Sequential([ # Embedding(emb_dim), # with_lookahead(), # BatchNormalization(), # BiRecurrence(LSTM(hidden_dim)), # BatchNormalization(), # select_last, # fails here with an axis problem # Dense(label_dim) # ]), [0.084, 0.407364]) # replace lookahead by bidirectional model with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('replace lookahead by bidirectional model', Sequential([ Embedding(emb_dim), BatchNormalization(), BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)), BatchNormalization(), Dense(label_dim) ]), [0.0579573500457558, 0.3214986774820327]) # replace lookahead by bidirectional model with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day #with default_options(dtype=np.float64): # test this with double precision since single precision is too little for reproducable aggregation # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0. run_model_test('replace lookahead by bidirectional model, with shared BN', Sequential([ Embedding(emb_dim), BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=True), #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False), BatchNormalization(normalization_time_constant=-1), Dense(label_dim) ]), [0.0579573500457558, 0.3214986774820327]) # values with normalization_time_constant=-1 and double precision: # [0.0583178503091983, 0.3199431143304898] """ with normalization_time_constant=-1: Minibatch[ 1- 1]: loss = 5.945220 * 67, metric = 100.0% * 67 Minibatch[ 2- 2]: loss = 4.850601 * 63, metric = 79.4% * 63 Minibatch[ 3- 3]: loss = 3.816031 * 68, metric = 57.4% * 68 Minibatch[ 4- 4]: loss = 2.213172 * 70, metric = 41.4% * 70 Minibatch[ 5- 5]: loss = 2.615342 * 65, metric = 40.0% * 65 Minibatch[ 6- 6]: loss = 2.360896 * 62, metric = 25.8% * 62 Minibatch[ 7- 7]: loss = 1.452822 * 58, metric = 27.6% * 58 Minibatch[ 8- 8]: loss = 0.947210 * 70, metric = 10.0% * 70 Minibatch[ 9- 9]: loss = 0.595654 * 59, metric = 10.2% * 59 Minibatch[ 10- 10]: loss = 1.515479 * 64, metric = 23.4% * 64 Minibatch[ 11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654 Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329 Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259 Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229 Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289 Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061 --> 0.057818696098277916 0.3214128415043278 Minibatch[ 1- 1]: loss = 0.000000 * 991, metric = 2.5% * 991 Minibatch[ 2- 2]: loss = 0.000000 * 1000, metric = 2.8% * 1000 Minibatch[ 3- 3]: loss = 0.000000 * 992, metric = 4.0% * 992 Minibatch[ 4- 4]: loss = 0.000000 * 989, metric = 3.0% * 989 Minibatch[ 5- 5]: loss = 0.000000 * 998, metric = 3.8% * 998 Minibatch[ 6- 6]: loss = 0.000000 * 995, metric = 1.5% * 995 Minibatch[ 7- 7]: loss = 0.000000 * 998, metric = 2.5% * 998 Minibatch[ 8- 8]: loss = 0.000000 * 992, metric = 1.6% * 992 Minibatch[ 9- 9]: loss = 0.000000 * 1000, metric = 1.6% * 1000 Minibatch[ 10- 10]: loss = 0.000000 * 996, metric = 7.9% * 996 Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984 --> 0.03159140568099053 0.0 """ # BatchNorm test case for global-corpus aggregation with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('BatchNorm global-corpus aggregation', Sequential([ Embedding(emb_dim), BatchNormalization(normalization_time_constant=-1), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(normalization_time_constant=-1), Dense(label_dim) ]), [0.05662627214996811, 0.2968516879905391]) """ Minibatch[ 1- 1]: loss = 5.745576 * 67, metric = 100.0% * 67 Minibatch[ 2- 2]: loss = 4.684151 * 63, metric = 90.5% * 63 Minibatch[ 3- 3]: loss = 3.957423 * 68, metric = 63.2% * 68 Minibatch[ 4- 4]: loss = 2.286908 * 70, metric = 41.4% * 70 Minibatch[ 5- 5]: loss = 2.733978 * 65, metric = 38.5% * 65 Minibatch[ 6- 6]: loss = 2.189765 * 62, metric = 30.6% * 62 Minibatch[ 7- 7]: loss = 1.427890 * 58, metric = 25.9% * 58 Minibatch[ 8- 8]: loss = 1.501557 * 70, metric = 18.6% * 70 Minibatch[ 9- 9]: loss = 0.632599 * 59, metric = 13.6% * 59 Minibatch[ 10- 10]: loss = 1.516047 * 64, metric = 23.4% * 64 Minibatch[ 11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654 Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329 Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259 Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229 Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289 Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061 --> 0.05662627214996811 0.2968516879905391 Minibatch[ 1- 1]: loss = 0.000000 * 991, metric = 1.8% * 991 Minibatch[ 2- 2]: loss = 0.000000 * 1000, metric = 3.4% * 1000 Minibatch[ 3- 3]: loss = 0.000000 * 992, metric = 3.9% * 992 Minibatch[ 4- 4]: loss = 0.000000 * 989, metric = 4.1% * 989 Minibatch[ 5- 5]: loss = 0.000000 * 998, metric = 4.0% * 998 Minibatch[ 6- 6]: loss = 0.000000 * 995, metric = 1.2% * 995 Minibatch[ 7- 7]: loss = 0.000000 * 998, metric = 2.8% * 998 Minibatch[ 8- 8]: loss = 0.000000 * 992, metric = 2.9% * 992 Minibatch[ 9- 9]: loss = 0.000000 * 1000, metric = 2.0% * 1000 Minibatch[ 10- 10]: loss = 0.000000 * 996, metric = 8.2% * 996 Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984 --> 0.035050983248361256 0.0 """ # plus BatchNorm with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('plus BatchNorm', Sequential([ Embedding(emb_dim), BatchNormalization(), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(), Dense(label_dim) ]), [0.05662627214996811, 0.2968516879905391]) # plus lookahead with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('plus lookahead', Sequential([ Embedding(emb_dim), with_lookahead(), BatchNormalization(), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(), Dense(label_dim) ]), [0.057901888466764646, 0.3044637752807047]) # replace lookahead by bidirectional model with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('replace lookahead by bidirectional model', Sequential([ Embedding(emb_dim), BatchNormalization(), BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)), BatchNormalization(), Dense(label_dim) ]), [0.0579573500457558, 0.3214986774820327]) # test of a config like in the example but with additions to test many code paths with default_options(enable_self_stabilization=True, use_peepholes=True): run_model_test('alternate paths', Sequential([ Stabilizer(), Embedding(emb_dim), BatchNormalization(), Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim+50), go_backwards=True), BatchNormalization(map_rank=1), Dense(label_dim) ]), [0.08574360112032389, 0.41847621578367716]) # test of the example itself # this emulates the main code in the PY file reader = create_reader(data_dir + "/atis.train.ctf", is_training=True) model = create_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) expected_avg = [0.15570838301766451, 0.7846451368305728] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE) # test of a config like in the example but with additions to test many code paths if device_id >= 0: # BatchNormalization currently does not run on CPU # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'language_understanding_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) reader = create_reader(data_dir + "/atis.train.ctf", is_training=True) model = create_test_model() loss_avg, evaluation_avg = train(reader, model, max_epochs=1, tensorboard_logdir=tb_logdir) log_number_of_parameters(model, trace_level=1) ; print() expected_avg = [0.084, 0.407364] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE) # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name. tb_files = 0 for tb_file in os.listdir(tb_logdir): assert tb_file.startswith("events.out.tfevents") tb_files += 1 assert tb_files == 1
pe = classification_error(classifier_output, label_var) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.0078125) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training mb_size = 32 training_progress_output_freq = 20 num_mbs = 1000 for i in range(0, num_mbs): mb = minibatch_source.get_next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { image_input: mb[features_si].m_data, label_var: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) if __name__ == '__main__': # Specify the target device to be used for computing target_device = DeviceDescriptor.gpu_device(0) DeviceDescriptor.set_default_device(target_device) cifar_resnet()
lr = learning_rates_per_sample(0.007) momentum_time_constant = 1100 momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant)) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True trainer = Trainer(z, ce, errs, [momentum_sgd_learner(z.owner.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 72 training_progress_output_freq = 10 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {raw_input : mb[features_si].m_data, raw_labels : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 if __name__=='__main__': # Specify the target device to be used for computing target_device = DeviceDescriptor.cpu_device() DeviceDescriptor.set_default_device(target_device) train_sequence_to_sequence_translator()
# Instantiate the resnet classification model classifier_output = resnet_classifer(image_input, num_classes) ce = cross_entropy_with_softmax(classifier_output, label_var) pe = classification_error(classifier_output, label_var) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.0078125) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training mb_size = 32 training_progress_output_freq = 20 num_mbs = 1000 for i in range(0, num_mbs): mb=minibatch_source.get_next_minibatch(mb_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = {image_input : mb[features_si].m_data, label_var : mb[labels_si].m_data} trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) if __name__=='__main__': # Specify the target device to be used for computing target_device = DeviceDescriptor.gpu_device(0) DeviceDescriptor.set_default_device(target_device) cifar_resnet()
def test_language_understanding(device_id): from cntk.ops.tests.ops_test_utils import cntk_device DeviceDescriptor.try_set_default_device(cntk_device(device_id)) from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms #set_computation_network_trace_level(1) set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training force_deterministic_algorithms() if device_id >= 0: # BatchNormalization currently does not run on CPU # change to intent classifier --moved up here since this fails, as repro # BUGBUG: Broken, need to pass new criterion to train(). #with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day # select_last = slice(placeholder(), Axis.default_dynamic_axis(), -1, 0) # # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand" # run_model_test('change to intent classifier', Sequential([ # Embedding(emb_dim), # with_lookahead(), # BatchNormalization(), # BiRecurrence(LSTM(hidden_dim)), # BatchNormalization(), # select_last, # fails here with an axis problem # Dense(num_labels) # ]), [0.084, 0.407364]) # replace lookahead by bidirectional model with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('replace lookahead by bidirectional model', Sequential([ Embedding(emb_dim), BatchNormalization(), BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)), BatchNormalization(), Dense(num_labels) ]), [0.0579573500457558, 0.3214986774820327]) # replace lookahead by bidirectional model with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day #with default_options(dtype=np.float64): # test this with double precision since single precision is too little for reproducable aggregation # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0. run_model_test('replace lookahead by bidirectional model, with shared BN', Sequential([ Embedding(emb_dim), BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=True), #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False), BatchNormalization(normalization_time_constant=-1), Dense(num_labels) ]), [0.0579573500457558, 0.3214986774820327]) # values with normalization_time_constant=-1 and double precision: # [0.0583178503091983, 0.3199431143304898] """ with normalization_time_constant=-1: Minibatch[ 1- 1]: loss = 5.945220 * 67, metric = 100.0% * 67 Minibatch[ 2- 2]: loss = 4.850601 * 63, metric = 79.4% * 63 Minibatch[ 3- 3]: loss = 3.816031 * 68, metric = 57.4% * 68 Minibatch[ 4- 4]: loss = 2.213172 * 70, metric = 41.4% * 70 Minibatch[ 5- 5]: loss = 2.615342 * 65, metric = 40.0% * 65 Minibatch[ 6- 6]: loss = 2.360896 * 62, metric = 25.8% * 62 Minibatch[ 7- 7]: loss = 1.452822 * 58, metric = 27.6% * 58 Minibatch[ 8- 8]: loss = 0.947210 * 70, metric = 10.0% * 70 Minibatch[ 9- 9]: loss = 0.595654 * 59, metric = 10.2% * 59 Minibatch[ 10- 10]: loss = 1.515479 * 64, metric = 23.4% * 64 Minibatch[ 11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654 Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329 Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259 Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229 Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289 Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061 --> 0.057818696098277916 0.3214128415043278 Minibatch[ 1- 1]: loss = 0.000000 * 991, metric = 2.5% * 991 Minibatch[ 2- 2]: loss = 0.000000 * 1000, metric = 2.8% * 1000 Minibatch[ 3- 3]: loss = 0.000000 * 992, metric = 4.0% * 992 Minibatch[ 4- 4]: loss = 0.000000 * 989, metric = 3.0% * 989 Minibatch[ 5- 5]: loss = 0.000000 * 998, metric = 3.8% * 998 Minibatch[ 6- 6]: loss = 0.000000 * 995, metric = 1.5% * 995 Minibatch[ 7- 7]: loss = 0.000000 * 998, metric = 2.5% * 998 Minibatch[ 8- 8]: loss = 0.000000 * 992, metric = 1.6% * 992 Minibatch[ 9- 9]: loss = 0.000000 * 1000, metric = 1.6% * 1000 Minibatch[ 10- 10]: loss = 0.000000 * 996, metric = 7.9% * 996 Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984 --> 0.03159140568099053 0.0 """ # BatchNorm test case for global-corpus aggregation with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('BatchNorm global-corpus aggregation', Sequential([ Embedding(emb_dim), BatchNormalization(normalization_time_constant=-1), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(normalization_time_constant=-1), Dense(num_labels) ]), [0.05662627214996811, 0.2968516879905391]) """ Minibatch[ 1- 1]: loss = 5.745576 * 67, metric = 100.0% * 67 Minibatch[ 2- 2]: loss = 4.684151 * 63, metric = 90.5% * 63 Minibatch[ 3- 3]: loss = 3.957423 * 68, metric = 63.2% * 68 Minibatch[ 4- 4]: loss = 2.286908 * 70, metric = 41.4% * 70 Minibatch[ 5- 5]: loss = 2.733978 * 65, metric = 38.5% * 65 Minibatch[ 6- 6]: loss = 2.189765 * 62, metric = 30.6% * 62 Minibatch[ 7- 7]: loss = 1.427890 * 58, metric = 25.9% * 58 Minibatch[ 8- 8]: loss = 1.501557 * 70, metric = 18.6% * 70 Minibatch[ 9- 9]: loss = 0.632599 * 59, metric = 13.6% * 59 Minibatch[ 10- 10]: loss = 1.516047 * 64, metric = 23.4% * 64 Minibatch[ 11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654 Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329 Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259 Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229 Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289 Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061 --> 0.05662627214996811 0.2968516879905391 Minibatch[ 1- 1]: loss = 0.000000 * 991, metric = 1.8% * 991 Minibatch[ 2- 2]: loss = 0.000000 * 1000, metric = 3.4% * 1000 Minibatch[ 3- 3]: loss = 0.000000 * 992, metric = 3.9% * 992 Minibatch[ 4- 4]: loss = 0.000000 * 989, metric = 4.1% * 989 Minibatch[ 5- 5]: loss = 0.000000 * 998, metric = 4.0% * 998 Minibatch[ 6- 6]: loss = 0.000000 * 995, metric = 1.2% * 995 Minibatch[ 7- 7]: loss = 0.000000 * 998, metric = 2.8% * 998 Minibatch[ 8- 8]: loss = 0.000000 * 992, metric = 2.9% * 992 Minibatch[ 9- 9]: loss = 0.000000 * 1000, metric = 2.0% * 1000 Minibatch[ 10- 10]: loss = 0.000000 * 996, metric = 8.2% * 996 Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984 --> 0.035050983248361256 0.0 """ # plus BatchNorm with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('plus BatchNorm', Sequential([ Embedding(emb_dim), BatchNormalization(), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(), Dense(num_labels) ]), [0.05662627214996811, 0.2968516879905391]) # plus lookahead with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('plus lookahead', Sequential([ Embedding(emb_dim), with_lookahead(), BatchNormalization(), Recurrence(LSTM(hidden_dim), go_backwards=False), BatchNormalization(), Dense(num_labels) ]), [0.057901888466764646, 0.3044637752807047]) # replace lookahead by bidirectional model with default_options(initial_state=0.1): # inject an option to mimic the BS version identically; remove some day run_model_test('replace lookahead by bidirectional model', Sequential([ Embedding(emb_dim), BatchNormalization(), BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)), BatchNormalization(), Dense(num_labels) ]), [0.0579573500457558, 0.3214986774820327]) # test of a config like in the example but with additions to test many code paths with default_options(enable_self_stabilization=True, use_peepholes=True): run_model_test('alternate paths', Sequential([ Embedding(emb_dim), BatchNormalization(), Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim+50), go_backwards=True), BatchNormalization(map_rank=1), Dense(num_labels) ]), [0.08574360112032389, 0.41847621578367716]) # test of the example itself # this emulates the main code in the PY file if device_id >= 0: # sparse FSAdagrad currently does not run on CPU --TODO: fix this test once it does reader = create_reader(data_dir + "/atis.train.ctf", is_training=True) model = create_model_function() loss_avg, evaluation_avg = train(reader, model, max_epochs=1) expected_avg = [0.09698114255561419, 0.5290531086061565] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE) # test reader = create_reader(data_dir + "/atis.test.ctf", is_training=False) evaluate(reader, model) # test of a config like in the example but with additions to test many code paths if device_id >= 0: # BatchNormalization currently does not run on CPU # Create a path to TensorBoard log directory and make sure it does not exist. abs_path = os.path.dirname(os.path.abspath(__file__)) tb_logdir = os.path.join(abs_path, 'language_understanding_test_log') if os.path.exists(tb_logdir): shutil.rmtree(tb_logdir) reader = create_reader(data_dir + "/atis.train.ctf", is_training=True) model = create_test_model() # TODO: update example to support tensorboard, or decide to not show it in all examples (in upcoming update of examples) loss_avg, evaluation_avg = train(reader, model, max_epochs=1) #, tensorboard_logdir=tb_logdir) log_number_of_parameters(model, trace_level=1) ; print() expected_avg = [0.084, 0.407364] assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { features: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1 if __name__ == '__main__': # Specify the target device to be used for computing target_device = DeviceDescriptor.cpu_device() DeviceDescriptor.set_default_device(target_device) train_sequence_classifier()
def test_sequence_to_sequence(device_id): from cntk.utils import cntk_device DeviceDescriptor.set_default_device(cntk_device(device_id)) error = sequence_to_sequence_translator()