Ejemplo n.º 1
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed
    set_computation_network_trace_level(1)
    set_fixed_random_seed(
        1
    )  # to become invariant to initialization order, which is a valid change

    # test of the example itself
    # this emulates the main code in the PY file
    reader = create_reader(data_dir + "/atis.train.ctf")
    model = create_model()
    loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
    expected_avg = [0.15570838301766451, 0.7846451368305728]
    assert np.allclose([evaluation_avg, loss_avg],
                       expected_avg,
                       atol=TOLERANCE_ABSOLUTE)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0:  # BatchNormalization currently does not run on CPU
        reader = create_reader(data_dir + "/atis.train.ctf")
        model = create_test_model()
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
        log_number_of_parameters(model, trace_level=1)
        print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg],
                           expected_avg,
                           atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 2
0
def test_ffnet_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    avg_error = ffnet(debug_output=False)
    expected_avg_error = 0.12
    assert np.allclose(avg_error, expected_avg_error, atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 3
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    evaluation_avg, loss_avg = slu_hands_on()

    expected_avg = [0.15570838301766451, 0.7846451368305728]
    assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 4
0
def test_sequence_to_sequence(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    error = sequence_to_sequence_translator()

    expected_error = 0.8596881547969316
    assert np.allclose(error, expected_error, atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 5
0
def test_simple_mnist_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    test_error = simple_mnist()
    expected_test_error = 0.09

    assert np.allclose(test_error, expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 6
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    evaluation_avg, loss_avg = train_sequence_classifier()

    expected_avg = [0.55, 1.53099]
    assert np.allclose([evaluation_avg, loss_avg],
                       expected_avg,
                       atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 7
0
def test_initializer_init(device_id):
    from cntk.utils import cntk_device
    from cntk import DeviceDescriptor, cntk_py
    cntk_py.always_allow_setting_default_device()
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    _check(uniform(scale=10), 'uniform')
    _check(gaussian(output_rank=1, filter_rank=2, scale=10), 'gaussian')
    _check(xavier(output_rank=1, filter_rank=2, scale=10), 'xavier')
    _check(glorot_uniform(output_rank=1, filter_rank=2, scale=10),
           'glorot_uniform')
    _check(glorot_normal(output_rank=1, filter_rank=2, scale=10),
           'glorot_normal')
    _check(he_uniform(output_rank=1, filter_rank=2, scale=10), 'he_uniform')
    _check(he_normal(output_rank=1, filter_rank=2, scale=10), 'he_normal')
Ejemplo n.º 8
0
def _sanitize_value(shape, value, dtype, device, is_param=False):
    np_dtype = utils.sanitize_dtype_numpy(dtype)
    cntk_dtype  = utils.sanitize_dtype_cntk(dtype)
    if value is None:
        if shape is None:
            raise ValueError('you need to specify at least shape or value')
        shape = utils.sanitize_shape(shape)

        if is_param:
            # TODO: expose the initialization params
            ndav = NDArrayView.random_uniform_float(shape, -0.05, 0.05, 1, device)
        else:
            ndav = utils.create_NDArrayView(shape, cntk_dtype, device)


    else:
        if not isinstance(value, np.ndarray) or value.dtype!=np_dtype:
            value = np.asarray(value, dtype=np_dtype)

        #TODO: check whether this copy operation from cpu to gpu is not needed
        if device.type() != 0:
            ndav_cpu = utils.create_NDArrayView_from_NumPy(value, dev=DeviceDescriptor.cpu_device())
            ndav = utils.create_NDArrayView(value.shape, data_type=cntk_dtype, dev=device)
            ndav.copy_from(ndav_cpu)
        else:
            ndav = utils.create_NDArrayView_from_NumPy(value, device)

    return ndav
Ejemplo n.º 9
0
    def forward(self, arguments, outputs, keep_for_backward=None, device=None):
        '''
        Computes the values of speficied variables in ``outputs``, using values
        provided in ``arguments`` that correspond to each input `Variable` of
        the function whose ``is_input`` is `True`.

        Example:
            >>> v = C.input_variable(shape=(3,))
            >>> f = C.reciprocal(v)
            >>> _, fv = f.forward({v:[[1, 2, 4]]}, [f.output])
            >>> list(fv.values())[0]
            array([[[ 1.  ,  0.5 ,  0.25]]], dtype=float32)

        Args:
            arguments: maps variables to their
             input data. The interpretation depends on the input type:

               * dict: keys are input variable or names, and values are the input data.
               * any other type: if node has an unique input, ``arguments`` is mapped to this input.
                For nodes with more than one input, only dict is allowed.
             In both cases, every every sample in the data will be interpreted
             as a new sequence. To mark samples as continuations of the
             previous sequence, specify ``arguments`` as ``tuple``: the
             first element will be used as ``arguments``, and the second one will
             be used as a list of bools, denoting whether a sequence is a new
             one (`True`) or a continuation of the previous one (`False`).
             Data should be either NumPy arrays or a
             :class:`~cntk.io.MinibatchData` instance.
            outputs (iterable): outputs to fetch values for.
            keep_for_backward (set, default `None`): the subset of the
             Function's output variables for which gradients shall be calculated
             in a subsequent backward call. If `None`, the returned state will
             be `None` and a subsequent call to :func:`backward` will not be
             possible.
            device (:class:`~cntk.device.DeviceDescriptor`, default `None`): the device
             descriptor that contains the type and id of the device on which the
             computation is. If `None`, the default device is used.

        Returns:
             A tuple (BackpropState, map of outputs to NumPy arrays). The
             BackpropState is a handle taken by :func:`backward`.
        '''
        if device is None:
            from cntk import DeviceDescriptor
            device = DeviceDescriptor.use_default_device()

        in_var_map = sanitize_var_map(self.arguments, arguments,
                                      None, device)
        output_map = {v: None for v in outputs}
        keep_for_backward = set(keep_for_backward or {})

        state = super(Function, self)._forward(in_var_map, output_map, device,
                                             keep_for_backward)

        for k in output_map:
            output_map[k] = value_to_seq(output_map[k])

        return state, output_map
Ejemplo n.º 10
0
    def forward(self, arguments, outputs, keep_for_backward=None, device=None):
        '''
        Computes the values of speficied variables in ``outputs``, using values
        provided in ``arguments`` that correspond to each input `Variable` of
        the function whose ``is_input`` is `True`.

        Example:
            >>> v = C.input_variable(shape=(3,))
            >>> f = C.reciprocal(v)
            >>> _, fv = f.forward({v:[[1, 2, 4]]}, [f.output])
            >>> list(fv.values())[0]
            array([[[ 1.  ,  0.5 ,  0.25]]], dtype=float32)

        Args:
            arguments: maps variables to their
             input data. The interpretation depends on the input type:

               * dict: keys are input variable or names, and values are the input data.
               * any other type: if node has an unique input, ``arguments`` is mapped to this input.
                For nodes with more than one input, only dict is allowed.
             In both cases, every every sample in the data will be interpreted
             as a new sequence. To mark samples as continuations of the
             previous sequence, specify ``arguments`` as ``tuple``: the
             first element will be used as ``arguments``, and the second one will
             be used as a list of bools, denoting whether a sequence is a new
             one (`True`) or a continuation of the previous one (`False`).
             Data should be either NumPy arrays or a
             :class:`~cntk.io.MinibatchData` instance.
            outputs (iterable): outputs to fetch values for.
            keep_for_backward (set, default `None`): the subset of the
             Function's output variables for which gradients shall be calculated
             in a subsequent backward call. If `None`, the returned state will
             be `None` and a subsequent call to :func:`backward` will not be
             possible.
            device (:class:`~cntk.device.DeviceDescriptor`, default `None`): the device
             descriptor that contains the type and id of the device on which the
             computation is. If `None`, the default device is used.

        Returns:
             A tuple (BackpropState, map of outputs to NumPy arrays). The
             BackpropState is a handle taken by :func:`backward`.
        '''
        if device is None:
            from cntk import DeviceDescriptor
            device = DeviceDescriptor.use_default_device()

        in_var_map = sanitize_var_map(self.arguments, arguments,
                                      None, device)
        output_map = {v: None for v in outputs}
        keep_for_backward = set(keep_for_backward or {})

        state = super(Function, self)._forward(in_var_map, output_map, device,
                                             keep_for_backward)

        for k in output_map:
            output_map[k] = value_to_seq(output_map[k])

        return state, output_map
Ejemplo n.º 11
0
    def forward(self, arguments, outputs, keep_for_backward=None, device=None):
        '''
        Computes and stores the values of speficied variables in `outputs`,
        using provided `arguments` values corresponding to each leaf `Variable`
        of the function whose is_input() is true. 

        Args:
            arguments (`dict` or `list` or `tuple`): maps variables to their
             input data. The interpretation depends on the input type:
               * `dict`: keys are input variable or names and values are the input data. 
               * `list`: elements are input data in the order their respective variables have been defined in the network. 
             In both cases, every every sample in the data will be interpreted
             as a new sequence. To mark samples as continuations of the
             previous sequence, specify `arguments` as `tuple`: the
             first element will be used as `arguments`, and the second one will
             be used as a list of bools, denoting whether a sequence is a new
             one (`True`) or a continuation of the previous one (`False`).
             Data should be either NumPy arrays or a
             :class:`cntk.io.MinibatchData` instance.
            outputs (iterable): outputs to fetch values for.
            keep_for_backward (`set`, default `None): the subset of the
             Function's output variables for which gradients shall be calculated
             in a subsequent backward call. If `None`, the returned state will
             be `None` and a subsequent call to `backward` will not be
             possible.
             for backpropagation.
            device (:class:`cntk.DeviceDescriptor`, default `None): the device
             descriptor that contains the type and id of the device on which the
             computation is. If `None`, the default device is used.

        Returns: 
             A tuple (`BackpropState`, `map` of outputs to NumPy arrays). The
             BackpropState is a handle taken by :func:`backward`.
        '''
        if device is None:
            from cntk import DeviceDescriptor
            device = DeviceDescriptor.use_default_device()

        in_var_map = sanitize_var_map(self.arguments(), arguments,
                                      None, device)
        output_map = {v: None for v in outputs}
        keep_for_backward = set(keep_for_backward or {})

        state = super(Function, self)._forward(in_var_map, output_map, device,
                                             keep_for_backward)

        for k in output_map:
            output_map[k] = value_to_seq(output_map[k])

        return state, output_map
Ejemplo n.º 12
0
def test_cifar_resnet_error(device_id):
    target_device = DeviceDescriptor.gpu_device(0)
    DeviceDescriptor.set_default_device(target_device)

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *
            "../../../../Examples/Image/Miscellaneous/CIFAR-10/cifar-10-batches-py"
            .split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    test_error = cifar_resnet(base_path)
    expected_test_error = 0.7

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 13
0
def test_cifar_resnet_error(device_id):
    if cntk_device(device_id).type() != DeviceKind_GPU:
        pytest.skip('test only runs on GPU')
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    try:
        base_path = os.path.join(
            os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'],
            *"Image/CIFAR/v0/cifar-10-batches-py".split("/"))
    except KeyError:
        base_path = os.path.join(
            *"../../../../Examples/Image/Datasets/CIFAR-10/cifar-10-batches-py"
            .split("/"))

    base_path = os.path.normpath(base_path)
    os.chdir(os.path.join(base_path, '..'))

    test_error = cifar_resnet(base_path)
    expected_test_error = 0.7

    assert np.allclose(test_error,
                       expected_test_error,
                       atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 14
0
    def forward(self, arguments, outputs, keep_for_backward=None, device=None):
        '''
        Computes and stores the values of speficied variables in `outputs`,
        using provided `arguments` values corresponding to each leaf `Variable`
        of the function whose is_input() is true. 

        Args:
            arguments (`dict` or `list` or `tuple`): maps variables to their
             input data. The interpretation depends on the input type:
               * `dict`: keys are input variable or names, and values are the input data. 
               * `list`: elements are input data in the order their respective variables have been defined in the network. 
             In both cases, every every sample in the data will be interpreted
             as a new sequence. To mark samples as continuations of the
             previous sequence, specify `arguments` as `tuple`: the
             first element will be used as `arguments`, and the second one will
             be used as a list of bools, denoting whether a sequence is a new
             one (`True`) or a continuation of the previous one (`False`).
             Data should be either NumPy arrays or a
             :class:`cntk.io.MinibatchData` instance.
            outputs (iterable): outputs to fetch values for.
            keep_for_backward (`set`, default `None): the subset of the
             Function's output variables for which gradients shall be calculated
             in a subsequent backward call. If `None`, the returned state will
             be `None` and a subsequent call to `backward` will not be
             possible.
             for backpropagation.
            device (:class:`cntk.DeviceDescriptor`, default `None): the device
             descriptor that contains the type and id of the device on which the
             computation is. If `None`, the default device is used.

        Returns: 
             A tuple (`BackpropState`, `map` of outputs to NumPy arrays). The
             BackpropState is a handle taken by :func:`backward`.
        '''
        if device is None:
            from cntk import DeviceDescriptor
            device = DeviceDescriptor.use_default_device()

        in_var_map = sanitize_var_map(self.arguments(), arguments, None,
                                      device)
        output_map = {v: None for v in outputs}
        keep_for_backward = set(keep_for_backward or {})

        state = super(Function, self)._forward(in_var_map, output_map, device,
                                               keep_for_backward)

        for k in output_map:
            output_map[k] = value_to_seq(output_map[k])

        return state, output_map
Ejemplo n.º 15
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed
    set_computation_network_trace_level(1)
    set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change

    # test of the example itself
    # this emulates the main code in the PY file
    reader = create_reader(data_dir + "/atis.train.ctf")
    model = create_model()
    loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
    expected_avg = [0.15570838301766451, 0.7846451368305728]
    assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0: # BatchNormalization currently does not run on CPU
        reader = create_reader(data_dir + "/atis.train.ctf")
        model = create_test_model()
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
        log_number_of_parameters(model, trace_level=1) ; print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 16
0
def test_get_data_type():
    assert get_data_type(constant(value=2), constant(value=1)) == np.float32
    assert get_data_type(input_variable(shape=(2, 3)),
                         constant(value=1)) == np.float32

    ndav32 = create_NDArrayView_from_NumPy(
        np.asarray([[1, 2]], dtype=np.float32))
    assert get_data_type(input_variable(shape=(2, 3), data_type=np.float64),
                         ndav32) == np.float64

    ndav64 = create_NDArrayView_from_NumPy(
        np.asarray([[1, 2]], dtype=np.float64))
    assert get_data_type(input_variable(shape=(2, 3), data_type=np.float64),
                         ndav64) == np.float64

    val32 = create_Value_from_NumPy(np.asarray([[1, 2]], dtype=np.float32),
                                    dev=DeviceDescriptor.default_device())
    assert get_data_type(val32, ndav64) == np.float64
Ejemplo n.º 17
0
def test_seq_classification_error(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    evaluation_avg, loss_avg = train_sequence_classifier()
def test_language_understanding(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed
    #set_computation_network_trace_level(1)
    set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change
    # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training

    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # change to intent classifier   --moved up here since this fails, as repro
        # BUGBUG: Broken, need to pass new criterion to train().
        #with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
        #    select_last = slice(Placeholder(), Axis.default_dynamic_axis(), -1, 0)
        #    # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand"
        #    run_model_test('change to intent classifier', Sequential([
        #        Embedding(emb_dim),
        #        with_lookahead(),
        #        BatchNormalization(),
        #        BiRecurrence(LSTM(hidden_dim)),
        #        BatchNormalization(),
        #        select_last,  # fails here with an axis problem
        #        Dense(label_dim)
        #    ]), [0.084, 0.407364])


        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(label_dim)
            ]), [0.0579573500457558, 0.3214986774820327])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
          #with default_options(dtype=np.float64):  # test this with double precision since single precision is too little for reproducable aggregation
          # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0.
            run_model_test('replace lookahead by bidirectional model, with shared BN', Sequential([
                Embedding(emb_dim),
                BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=True),
                #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(label_dim)
            ]), [0.0579573500457558, 0.3214986774820327])
            # values with normalization_time_constant=-1 and double precision:
            # [0.0583178503091983, 0.3199431143304898]
            """ with normalization_time_constant=-1:
             Minibatch[   1-   1]: loss = 5.945220 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.850601 * 63, metric = 79.4% * 63
             Minibatch[   3-   3]: loss = 3.816031 * 68, metric = 57.4% * 68
             Minibatch[   4-   4]: loss = 2.213172 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.615342 * 65, metric = 40.0% * 65
             Minibatch[   6-   6]: loss = 2.360896 * 62, metric = 25.8% * 62
             Minibatch[   7-   7]: loss = 1.452822 * 58, metric = 27.6% * 58
             Minibatch[   8-   8]: loss = 0.947210 * 70, metric = 10.0% * 70
             Minibatch[   9-   9]: loss = 0.595654 * 59, metric = 10.2% * 59
             Minibatch[  10-  10]: loss = 1.515479 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654
             Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329
             Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259
             Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229
             Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061
            --> 0.057818696098277916 0.3214128415043278
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 2.5% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 2.8% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 4.0% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 3.0% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 3.8% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.5% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.5% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 1.6% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 1.6% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 7.9% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984
            --> 0.03159140568099053 0.0
            """

        # BatchNorm test case for global-corpus aggregation
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('BatchNorm global-corpus aggregation', Sequential([
                Embedding(emb_dim),
                BatchNormalization(normalization_time_constant=-1),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(label_dim)
            ]), [0.05662627214996811, 0.2968516879905391])
            """
             Minibatch[   1-   1]: loss = 5.745576 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.684151 * 63, metric = 90.5% * 63
             Minibatch[   3-   3]: loss = 3.957423 * 68, metric = 63.2% * 68
             Minibatch[   4-   4]: loss = 2.286908 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.733978 * 65, metric = 38.5% * 65
             Minibatch[   6-   6]: loss = 2.189765 * 62, metric = 30.6% * 62
             Minibatch[   7-   7]: loss = 1.427890 * 58, metric = 25.9% * 58
             Minibatch[   8-   8]: loss = 1.501557 * 70, metric = 18.6% * 70
             Minibatch[   9-   9]: loss = 0.632599 * 59, metric = 13.6% * 59
             Minibatch[  10-  10]: loss = 1.516047 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654
             Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329
             Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259
             Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229
             Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061
            --> 0.05662627214996811 0.2968516879905391
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 1.8% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 3.4% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 3.9% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 4.1% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 4.0% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.2% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.8% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 2.9% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 2.0% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 8.2% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984
            --> 0.035050983248361256 0.0
            """


        # plus BatchNorm
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus BatchNorm', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(label_dim)
            ]), [0.05662627214996811, 0.2968516879905391])

        # plus lookahead
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus lookahead', Sequential([
                Embedding(emb_dim),
                with_lookahead(),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(label_dim)
            ]), [0.057901888466764646, 0.3044637752807047])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(label_dim)
            ]), [0.0579573500457558, 0.3214986774820327])

        # test of a config like in the example but with additions to test many code paths
        with default_options(enable_self_stabilization=True, use_peepholes=True):
                run_model_test('alternate paths', Sequential([
                Stabilizer(),
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim+50), go_backwards=True),
                BatchNormalization(map_rank=1),
                    Dense(label_dim)
                ]), [0.08574360112032389, 0.41847621578367716])

    # test of the example itself
    # this emulates the main code in the PY file
    reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
    model = create_model()
    loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
    expected_avg = [0.15570838301766451, 0.7846451368305728]
    assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # Create a path to TensorBoard log directory and make sure it does not exist.
        abs_path = os.path.dirname(os.path.abspath(__file__))
        tb_logdir = os.path.join(abs_path, 'language_understanding_test_log')
        if os.path.exists(tb_logdir):
            shutil.rmtree(tb_logdir)

        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_test_model()
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1, tensorboard_logdir=tb_logdir)
        log_number_of_parameters(model, trace_level=1) ; print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)

        # Ensure that the TensorBoard log directory was created and contains exactly one file with the expected name.
        tb_files = 0
        for tb_file in os.listdir(tb_logdir):
            assert tb_file.startswith("events.out.tfevents")
            tb_files += 1
        assert tb_files == 1
Ejemplo n.º 19
0
    pe = classification_error(classifier_output, label_var)

    # Instantiate the trainer object to drive the model training
    lr = learning_rates_per_sample(0.0078125)
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd_learner(classifier_output.owner.parameters(), lr)])

    # Get minibatches of images to train with and perform model training
    mb_size = 32
    training_progress_output_freq = 20
    num_mbs = 1000
    for i in range(0, num_mbs):
        mb = minibatch_source.get_next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {
            image_input: mb[features_si].m_data,
            label_var: mb[labels_si].m_data
        }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)


if __name__ == '__main__':
    # Specify the target device to be used for computing
    target_device = DeviceDescriptor.gpu_device(0)
    DeviceDescriptor.set_default_device(target_device)

    cifar_resnet()
Ejemplo n.º 20
0
    lr = learning_rates_per_sample(0.007)
    momentum_time_constant = 1100
    momentum_per_sample = momentums_per_sample(math.exp(-1.0 / momentum_time_constant))
    clipping_threshold_per_sample = 2.3
    gradient_clipping_with_truncation = True

    trainer = Trainer(z, ce, errs, [momentum_sgd_learner(z.owner.parameters(), lr, momentum_per_sample, clipping_threshold_per_sample, gradient_clipping_with_truncation)])                   

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 72
    training_progress_output_freq = 10
    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)
        if  len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {raw_input : mb[features_si].m_data, raw_labels : mb[labels_si].m_data}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

        i += 1

if __name__=='__main__':    
    # Specify the target device to be used for computing
    target_device = DeviceDescriptor.cpu_device()
    DeviceDescriptor.set_default_device(target_device)

    train_sequence_to_sequence_translator()
Ejemplo n.º 21
0
    # Instantiate the resnet classification model
    classifier_output = resnet_classifer(image_input, num_classes)

    ce = cross_entropy_with_softmax(classifier_output, label_var)
    pe = classification_error(classifier_output, label_var)

    # Instantiate the trainer object to drive the model training
    lr = learning_rates_per_sample(0.0078125)
    trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)])

    # Get minibatches of images to train with and perform model training
    mb_size = 32
    training_progress_output_freq = 20
    num_mbs = 1000
    for i in range(0, num_mbs):
        mb=minibatch_source.get_next_minibatch(mb_size)

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {image_input : mb[features_si].m_data, label_var : mb[labels_si].m_data}
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

if __name__=='__main__':
    # Specify the target device to be used for computing
    target_device = DeviceDescriptor.gpu_device(0)
    DeviceDescriptor.set_default_device(target_device)

    cifar_resnet()
Ejemplo n.º 22
0
def test_language_understanding(device_id):
    from cntk.ops.tests.ops_test_utils import cntk_device
    DeviceDescriptor.try_set_default_device(cntk_device(device_id))

    from _cntk_py import set_computation_network_trace_level, set_fixed_random_seed, force_deterministic_algorithms
    #set_computation_network_trace_level(1)
    set_fixed_random_seed(1) # to become invariant to initialization order, which is a valid change
    # BUGBUG: This ^^ currently seems to have no impact; the two BN models below should be identical in training
    force_deterministic_algorithms()

    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # change to intent classifier   --moved up here since this fails, as repro
        # BUGBUG: Broken, need to pass new criterion to train().
        #with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
        #    select_last = slice(placeholder(), Axis.default_dynamic_axis(), -1, 0)
        #    # BUGBUG: Fails with "RuntimeError: The specified dynamic axis named defaultDynamicAxis does not match any of the dynamic axes of the operand"
        #    run_model_test('change to intent classifier', Sequential([
        #        Embedding(emb_dim),
        #        with_lookahead(),
        #        BatchNormalization(),
        #        BiRecurrence(LSTM(hidden_dim)),
        #        BatchNormalization(),
        #        select_last,  # fails here with an axis problem
        #        Dense(num_labels)
        #    ]), [0.084, 0.407364])


        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.0579573500457558, 0.3214986774820327])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
          #with default_options(dtype=np.float64):  # test this with double precision since single precision is too little for reproducable aggregation
          # ^^ This test requires to change the #if 1 in Functions.cpp PopulateNetworkInputs() to be changed to #if 0.
            run_model_test('replace lookahead by bidirectional model, with shared BN', Sequential([
                Embedding(emb_dim),
                BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=True),
                #BNBiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim), test_dual=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(num_labels)
            ]), [0.0579573500457558, 0.3214986774820327])
            # values with normalization_time_constant=-1 and double precision:
            # [0.0583178503091983, 0.3199431143304898]
            """ with normalization_time_constant=-1:
             Minibatch[   1-   1]: loss = 5.945220 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.850601 * 63, metric = 79.4% * 63
             Minibatch[   3-   3]: loss = 3.816031 * 68, metric = 57.4% * 68
             Minibatch[   4-   4]: loss = 2.213172 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.615342 * 65, metric = 40.0% * 65
             Minibatch[   6-   6]: loss = 2.360896 * 62, metric = 25.8% * 62
             Minibatch[   7-   7]: loss = 1.452822 * 58, metric = 27.6% * 58
             Minibatch[   8-   8]: loss = 0.947210 * 70, metric = 10.0% * 70
             Minibatch[   9-   9]: loss = 0.595654 * 59, metric = 10.2% * 59
             Minibatch[  10-  10]: loss = 1.515479 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.686744 * 5654, metric = 10.4% * 5654
             Minibatch[ 101- 200]: loss = 0.289059 * 6329, metric = 5.8% * 6329
             Minibatch[ 201- 300]: loss = 0.218765 * 6259, metric = 4.7% * 6259
             Minibatch[ 301- 400]: loss = 0.182855 * 6229, metric = 3.5% * 6229
             Minibatch[ 401- 500]: loss = 0.156745 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.321413 * 36061, metric = 5.8% * 36061
            --> 0.057818696098277916 0.3214128415043278
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 2.5% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 2.8% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 4.0% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 3.0% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 3.8% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.5% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.5% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 1.6% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 1.6% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 7.9% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.2% * 10984
            --> 0.03159140568099053 0.0
            """

        # BatchNorm test case for global-corpus aggregation
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('BatchNorm global-corpus aggregation', Sequential([
                Embedding(emb_dim),
                BatchNormalization(normalization_time_constant=-1),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(normalization_time_constant=-1),
                Dense(num_labels)
            ]), [0.05662627214996811, 0.2968516879905391])
            """
             Minibatch[   1-   1]: loss = 5.745576 * 67, metric = 100.0% * 67
             Minibatch[   2-   2]: loss = 4.684151 * 63, metric = 90.5% * 63
             Minibatch[   3-   3]: loss = 3.957423 * 68, metric = 63.2% * 68
             Minibatch[   4-   4]: loss = 2.286908 * 70, metric = 41.4% * 70
             Minibatch[   5-   5]: loss = 2.733978 * 65, metric = 38.5% * 65
             Minibatch[   6-   6]: loss = 2.189765 * 62, metric = 30.6% * 62
             Minibatch[   7-   7]: loss = 1.427890 * 58, metric = 25.9% * 58
             Minibatch[   8-   8]: loss = 1.501557 * 70, metric = 18.6% * 70
             Minibatch[   9-   9]: loss = 0.632599 * 59, metric = 13.6% * 59
             Minibatch[  10-  10]: loss = 1.516047 * 64, metric = 23.4% * 64
             Minibatch[  11- 100]: loss = 0.580329 * 5654, metric = 9.8% * 5654
             Minibatch[ 101- 200]: loss = 0.280317 * 6329, metric = 5.6% * 6329
             Minibatch[ 201- 300]: loss = 0.188372 * 6259, metric = 4.1% * 6259
             Minibatch[ 301- 400]: loss = 0.170403 * 6229, metric = 3.9% * 6229
             Minibatch[ 401- 500]: loss = 0.159605 * 6289, metric = 3.4% * 6289
            Finished Epoch [1]: [Training] loss = 0.296852 * 36061, metric = 5.7% * 36061
            --> 0.05662627214996811 0.2968516879905391
             Minibatch[   1-   1]: loss = 0.000000 * 991, metric = 1.8% * 991
             Minibatch[   2-   2]: loss = 0.000000 * 1000, metric = 3.4% * 1000
             Minibatch[   3-   3]: loss = 0.000000 * 992, metric = 3.9% * 992
             Minibatch[   4-   4]: loss = 0.000000 * 989, metric = 4.1% * 989
             Minibatch[   5-   5]: loss = 0.000000 * 998, metric = 4.0% * 998
             Minibatch[   6-   6]: loss = 0.000000 * 995, metric = 1.2% * 995
             Minibatch[   7-   7]: loss = 0.000000 * 998, metric = 2.8% * 998
             Minibatch[   8-   8]: loss = 0.000000 * 992, metric = 2.9% * 992
             Minibatch[   9-   9]: loss = 0.000000 * 1000, metric = 2.0% * 1000
             Minibatch[  10-  10]: loss = 0.000000 * 996, metric = 8.2% * 996
            Finished Epoch [1]: [Evaluation] loss = 0.000000 * 10984, metric = 3.5% * 10984
            --> 0.035050983248361256 0.0
            """


        # plus BatchNorm
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus BatchNorm', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.05662627214996811, 0.2968516879905391])

        # plus lookahead
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('plus lookahead', Sequential([
                Embedding(emb_dim),
                with_lookahead(),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim), go_backwards=False),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.057901888466764646, 0.3044637752807047])

        # replace lookahead by bidirectional model
        with default_options(initial_state=0.1):  # inject an option to mimic the BS version identically; remove some day
            run_model_test('replace lookahead by bidirectional model', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                BiRecurrence(LSTM(hidden_dim), LSTM(hidden_dim)),
                BatchNormalization(),
                Dense(num_labels)
            ]), [0.0579573500457558, 0.3214986774820327])

        # test of a config like in the example but with additions to test many code paths
        with default_options(enable_self_stabilization=True, use_peepholes=True):
                run_model_test('alternate paths', Sequential([
                Embedding(emb_dim),
                BatchNormalization(),
                Recurrence(LSTM(hidden_dim, cell_shape=hidden_dim+50), go_backwards=True),
                BatchNormalization(map_rank=1),
                    Dense(num_labels)
                ]), [0.08574360112032389, 0.41847621578367716])

    # test of the example itself
    # this emulates the main code in the PY file
    if device_id >= 0: # sparse FSAdagrad currently does not run on CPU  --TODO: fix this test once it does
        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_model_function()
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1)
        expected_avg = [0.09698114255561419, 0.5290531086061565]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)

        # test
        reader = create_reader(data_dir + "/atis.test.ctf", is_training=False)
        evaluate(reader, model)

    # test of a config like in the example but with additions to test many code paths
    if device_id >= 0: # BatchNormalization currently does not run on CPU
        # Create a path to TensorBoard log directory and make sure it does not exist.
        abs_path = os.path.dirname(os.path.abspath(__file__))
        tb_logdir = os.path.join(abs_path, 'language_understanding_test_log')
        if os.path.exists(tb_logdir):
            shutil.rmtree(tb_logdir)

        reader = create_reader(data_dir + "/atis.train.ctf", is_training=True)
        model = create_test_model()
        # TODO: update example to support tensorboard, or decide to not show it in all examples (in upcoming update of examples)
        loss_avg, evaluation_avg = train(reader, model, max_epochs=1) #, tensorboard_logdir=tb_logdir)
        log_number_of_parameters(model, trace_level=1) ; print()
        expected_avg = [0.084, 0.407364]
        assert np.allclose([evaluation_avg, loss_avg], expected_avg, atol=TOLERANCE_ABSOLUTE)
Ejemplo n.º 23
0
    trainer = Trainer(classifier_output, ce, pe,
                      [sgd_learner(classifier_output.owner.parameters(), lr)])

    # Get minibatches of sequences to train with and perform model training
    minibatch_size = 200
    training_progress_output_freq = 10
    i = 0
    while True:
        mb = mb_source.get_next_minibatch(minibatch_size)
        if len(mb) == 0:
            break

        # Specify the mapping of input variables in the model to actual minibatch data to be trained with
        arguments = {
            features: mb[features_si].m_data,
            label: mb[labels_si].m_data
        }
        trainer.train_minibatch(arguments)

        print_training_progress(trainer, i, training_progress_output_freq)

        i += 1


if __name__ == '__main__':
    # Specify the target device to be used for computing
    target_device = DeviceDescriptor.cpu_device()
    DeviceDescriptor.set_default_device(target_device)

    train_sequence_classifier()
Ejemplo n.º 24
0
def test_sequence_to_sequence(device_id):
    from cntk.utils import cntk_device
    DeviceDescriptor.set_default_device(cntk_device(device_id))

    error = sequence_to_sequence_translator()