def test_rnn(device_id): if device_id == -1: pytest.skip('Test only runs on GPU') batch_size = 8 sequence_len = 100 vocab_dim = 20 embed_dim = 10 hidden_dim = 7 input = C.cast(C.sequence.input_variable(()), np.float16) with C.default_options(dtype=np.float16): embed = C.layers.Embedding(embed_dim)(C.one_hot(input, num_classes=vocab_dim, sparse_output=False)) z = C.layers.Recurrence(C.layers.LSTM(hidden_dim))(embed) feed = np.floor( np.random.rand(batch_size, sequence_len).astype(np.float32) * (vocab_dim - 1)) z.grad(feed, wrt=z.parameters) num_layers = 2 W = C.parameter((C.InferredDimension, embed_dim), init=C.glorot_uniform(), dtype=np.float16) with C.default_options(dtype=np.float16): z = C.optimized_rnnstack(embed, W, hidden_dim, num_layers) feed = np.floor( np.random.rand(batch_size, sequence_len).astype(np.float32) * (vocab_dim - 1)) z.grad(feed, wrt=z.parameters)
def test_GRU(tmpdir, dtype): with C.default_options(dtype = dtype): def MakeGRUNameFromConfig(backward, initial_state, activition): model_name = 'GRU.' + activition.__name__ if (initial_state != 0): model_name += '.initial' if (backward): model_name += '.backward' else: model_name += '.forward' return model_name direction_options = [False, True] activation_options = [C.tanh] initial_state_options = [0] input_dim = 2 cell_dim = 3 batch_size = 1 sequence_len = 5 for config in list(product(direction_options, initial_state_options, activation_options)): model_filename = MakeGRUNameFromConfig(*config) print(model_filename) backward, initial_state, activation = config x = C.input_variable(input_dim, dynamic_axes=[C.Axis.default_batch_axis(), C.Axis('sequenceAxis')]) GRUModel = C.layers.Recurrence(C.layers.GRU(cell_dim, activation = activation), initial_state = initial_state, go_backwards=backward)(x) data = np.random.uniform(low=0.0, high=1.0, size=(batch_size, sequence_len, input_dim)).astype('f') verify_one_input(GRUModel, data, tmpdir, model_filename)
def test_sequence_unpack_backprop(device_id): dev = cntk_device(device_id) input_vocab_size=3 emb_dim = 2 hidden_dim = 2 num_labels = 2 x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features') label_input = C.input_variable(num_labels, is_sparse=True, name='labels') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = C.sequence.last(C.layers.Recurrence(C.plus)(model)) ce = C.cross_entropy_with_softmax(z, label_input) seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]] seq2_data = [[0, 0, 1], [0, 1, 1]] label_data = _to_csr([[0, 1], [1, 0]]) param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) z = C.sequence.reduce_sum(model) ce = C.cross_entropy_with_softmax(z, label_input) param_grads_2, loss_result_2 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) assert np.allclose(loss_result_1.asarray(), loss_result_2.asarray()) for param in param_grads_1: if not param_grads_1[param].is_sparse: reference_grad_value = param_grads_1[param].asarray() grad_value = param_grads_2[param].asarray() assert np.allclose(reference_grad_value, grad_value)
def test_BatchNormalization(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): sample = [ # 5 samples having 4 classes [1, 1, 2, 3], [0, 0, 0, 0], [3, 3, 4, 4], [1000, 1000, 1000, 1000], [10000, 10000, 10000, 10000]] epsilon = 0.00001 t = np.asarray(sample, dtype=dtype).reshape(-1,1) mean = 1 var = 2 init_scale = 3 init_bias = 4 scale = C.Parameter(init=np.asarray([init_scale], dtype=dtype), dtype=dtype) bias = C.Parameter(init=np.asarray([init_bias], dtype=dtype), dtype=dtype) run_mean = C.ops.constant(mean, shape=(1), dtype=dtype) run_variance = C.ops.constant(var, shape=(1), dtype=dtype) run_count = C.ops.constant(0, dtype=dtype) a = C.input_variable(shape=(1), dtype=dtype, needs_gradient=False, name='a') op_node = C.batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False, epsilon=epsilon) verify_one_input(op_node, t, tmpdir, 'BatchNormalization')
def test_LayerNormalization(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") # This test point tests the LayerNormalization round trip with defaultepsilon. We loose always the epsilon value when # exporting to ONNX (because ONNX MeanVarianceNormalization does not have an epsilon attribute). When loading back # from ONNX, CNTK always uses the default eposilon value (0.00001). That's why test below has the default epsilon # value. It is not expected to pass with any other epsilon value until something changes. with C.default_options(dtype = dtype): test_shapes = [(3, 5, 7), (10, ), (20, 31)] for shape in test_shapes: data = np.reshape(np.arange(np.prod(shape), dtype = dtype), shape) input_operand = C.input_variable(shape=shape) model0 = C.layers.LayerNormalization(initial_scale=1, initial_bias=2, epsilon=0.00001)(input_operand) verify_one_input(model0, data, tmpdir, 'LayerNorm_0') # This test point tests especially with epsilon = 0, because that creates a graph with # different number of ops. However, we don't expect the numbers to match in round trip # because we only support default epislon (0.00001) when loading from ONNX. Therefore, # this is just a load/save test. model1 = C.layers.LayerNormalization(epsilon=0.0)(input_operand) filename = os.path.join(str(tmpdir), R'LayerNorm_1.onnx') model1.save(filename, format=C.ModelFormat.ONNX) loaded_model = C.Function.load(filename, format=C.ModelFormat.ONNX) assert model1.shape == loaded_model.shape
def test_MeanVarianceNormalization(tmpdir, dtype): with C.default_options(dtype=dtype): shape = (3, 5, 7) data = np.reshape(np.arange(np.prod(shape), dtype=dtype), shape) input_operand = C.input_variable(shape=shape) model0 = C.mean_variance_normalization(input_operand, use_stats_across_channels=False, do_variance_scaling=True) verify_one_input(model0, data, tmpdir, 'MVN_0') model1 = C.mean_variance_normalization(input_operand, use_stats_across_channels=False, do_variance_scaling=False) verify_one_input(model1, data, tmpdir, 'MVN_1') model2 = C.mean_variance_normalization(input_operand, use_stats_across_channels=True, do_variance_scaling=True) verify_one_input(model2, data, tmpdir, 'MVN_2') # The test below tests the round trip with epsilon. We loose always the epsilon value when exporting to ONNX # (because ONNX MeanVarianceNormalization does not have an epsilon attribute). When loading back from ONNX, CNTK # always uses the default eposilon value (0.00001). That's why test below has the default epsilon value. It is # not expected to pass with any other epsilon value until something changes. model3 = C.mean_variance_normalization(input_operand, epsilon=0.00001, use_stats_across_channels=False, do_variance_scaling=True) verify_one_input(model3, data, tmpdir, 'MVN_3')
def test_convolution_transpose(tmpdir, dtype, device_id): pytest.skip('Needs to be fixed after removal of batch axis change.') if device_id == -1 and dtype == np.float16: pytest.skip('Test only runs on GPU') device = cntk_device(device_id) with C.default_options(dtype=dtype): img_shape = (1, 3, 3) img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=dtype) x = C.input_variable(img.shape) filter = np.reshape(np.array([2, -1, -1, 2], dtype=dtype), (1, 2, 2)) kernel = C.constant(value=filter) root_node = C.convolution_transpose(kernel, x, auto_padding=[False], output_shape=(1, 4, 4)) filename = os.path.join(str(tmpdir), R'conv_transpose.onnx') root_node.save(filename, format=C.ModelFormat.ONNX) loaded_node = C.Function.load(filename, format=C.ModelFormat.ONNX) assert root_node.shape == loaded_node.shape x_ = loaded_node.arguments[0] assert np.allclose(loaded_node.eval({x_: [img]}, device=device), root_node.eval({x: [img]}, device=device))
def test_ConvTranspose(tmpdir, dtype, device_id): if device_id == -1 and dtype == np.float16: pytest.skip('Test is skipped on CPU with float16 data') device = cntk_device(device_id) with C.default_options(dtype=dtype): # Keep the shapes below as they are, because this tests an earlier bug. input_shape = (48, 16, 16) img = np.reshape(np.arange(np.prod(input_shape), dtype=dtype), input_shape) x = C.input_variable(input_shape) kernel_shape = ( 48, 32, 3, 3 ) # For convolution_transpose the shape is (I x O x W x H) kernel = C.constant(value=np.ones(shape=(kernel_shape), dtype=dtype)) conv_trans_model = C.convolution_transpose( kernel, x, strides=(2, 2), output_shape=(32, 32, 32), auto_padding=[False, True, True]) verify_one_input(conv_trans_model, img, tmpdir, 'ConvTranspose_0', device)
def test_ReduceSumSquare(tmpdir, dtype): with C.default_options(dtype=dtype): data = np.array( [[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]], dtype=dtype) model = C.reduce_sum_square(data, 0) verify_no_input(model, tmpdir, 'ReduceSumSquare_0')
def test_BatchNormalization(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): sample = [ # 5 samples having 4 classes [1, 1, 2, 3], [0, 0, 0, 0], [3, 3, 4, 4], [1000, 1000, 1000, 1000], [10000, 10000, 10000, 10000]] epsilon = 0.00001 t = np.asarray(sample, dtype=dtype).reshape(-1,1) mean = 1 var = 2 init_scale = 3 init_bias = 4 scale = C.Parameter(init=np.asarray([init_scale], dtype=dtype), dtype=dtype) bias = C.Parameter(init=np.asarray([init_bias], dtype=dtype), dtype=dtype) run_mean = C.ops.constant(mean, shape=(1), dtype=dtype) run_variance = C.ops.constant(var, shape=(1), dtype=dtype) run_count = C.ops.constant(0, dtype=dtype) a = C.input_variable(shape=(1), dtype=dtype, needs_gradient=False, name='a') op_node = C.batch_normalization(a, scale, bias, run_mean, run_variance, running_count=run_count, spatial=False, epsilon=epsilon) verify_one_input(op_node, t, tmpdir, 'BatchNormalization')
def test_GRU(tmpdir, dtype): with C.default_options(dtype = dtype): def MakeGRUNameFromConfig(backward, initial_state, activition): model_name = 'GRU.' + activition.__name__ if (initial_state != 0): model_name += '.initial' if (backward): model_name += '.backward' else: model_name += '.forward' return model_name direction_options = [False, True] activation_options = [C.tanh] initial_state_options = [0] input_dim = 2 cell_dim = 3 batch_size = 1 sequence_len = 5 for config in list(product(direction_options, initial_state_options, activation_options)): model_filename = MakeGRUNameFromConfig(*config) print(model_filename) backward, initial_state, activation = config x = C.input_variable(input_dim, dynamic_axes=[C.Axis.default_batch_axis(), C.Axis('sequenceAxis')]) GRUModel = C.layers.Recurrence(C.layers.GRU(cell_dim, activation = activation), initial_state = initial_state, go_backwards=backward)(x) data = np.random.uniform(low=0.0, high=1.0, size=(batch_size, sequence_len, input_dim)).astype('f') verify_one_input(GRUModel, data, tmpdir, model_filename)
def test_ReduceSum(tmpdir, dtype): with C.default_options(dtype=dtype): data = np.array( [[[5, 1], [20, 2]], [[30, 1], [40, 2]], [[55, 1], [60, 2]]], dtype=dtype) model = C.reduce_sum(data, 0) verify_no_input(model, tmpdir, 'ReduceSum_0')
def test_MaxRoiPool(tmpdir, dtype): with C.default_options(dtype=dtype): input_map = [[ [1., 2., 3.], # (1, 3, 3) input operand (conv feature map) [4., 5., 6.], [7., 8., 9.] ]] input_rois = [[1, 1, 2, 2]] conv_input = np.asarray(input_map, dtype=dtype) roi_input = np.asarray(input_rois, dtype=dtype) a = C.input_variable(shape=conv_input.shape, dtype=dtype, needs_gradient=True, name='a') b = C.input_variable(shape=roi_input.shape, dtype=dtype, needs_gradient=False, name='b') # adding batch and sequence axis conv_input.shape = (1, ) + conv_input.shape roi_input.shape = (1, ) + roi_input.shape model = C.roipooling(a, b, C.MAX_POOLING, (3, 3), 1.) verify_two_input(model, conv_input, roi_input, tmpdir, 'MaxRoiPool_1')
def test_ArgMin(tmpdir, dtype): with C.default_options(dtype = dtype): shape = (4, 5) data = np.random.rand(*shape).astype(dtype) model = C.argmin(data, 0) verify_no_input(model, tmpdir, 'ArgMin_0')
def test_sequence_unpack_backprop(device_id): dev = cntk_device(device_id) input_vocab_size=3 emb_dim = 2 hidden_dim = 2 num_labels = 2 x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features') label_input = C.input_variable(num_labels, is_sparse=True, name='labels') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = C.sequence.last(C.layers.Recurrence(C.plus)(model)) ce = C.cross_entropy_with_softmax(z, label_input) seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]] seq2_data = [[0, 0, 1], [0, 1, 1]] label_data = _to_csr([[0, 1], [1, 0]]) param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) z = C.sequence.reduce_sum(model) ce = C.cross_entropy_with_softmax(z, label_input) param_grads_2, loss_result_2 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_input : label_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) assert np.allclose(loss_result_1.asarray(), loss_result_2.asarray()) for param in param_grads_1: if not param_grads_1[param].is_sparse: reference_grad_value = param_grads_1[param].asarray() grad_value = param_grads_2[param].asarray() assert np.allclose(reference_grad_value, grad_value)
def test_LayerNormalization(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") # This test point tests the LayerNormalization round trip with defaultepsilon. We loose always the epsilon value when # exporting to ONNX (because ONNX MeanVarianceNormalization does not have an epsilon attribute). When loading back # from ONNX, CNTK always uses the default eposilon value (0.00001). That's why test below has the default epsilon # value. It is not expected to pass with any other epsilon value until something changes. with C.default_options(dtype=dtype): test_shapes = [(3, 5, 7), (10, ), (20, 31)] for shape in test_shapes: data = np.reshape(np.arange(np.prod(shape), dtype=dtype), shape) input_operand = C.input_variable(shape=shape) model0 = C.layers.LayerNormalization( initial_scale=1, initial_bias=2, epsilon=0.00001)(input_operand) verify_one_input(model0, data, tmpdir, 'LayerNorm_0') # This test point tests especially with epsilon = 0, because that creates a graph with # different number of ops. However, we don't expect the numbers to match in round trip # because we only support default epislon (0.00001) when loading from ONNX. Therefore, # this is just a load/save test. model1 = C.layers.LayerNormalization(epsilon=0.0)(input_operand) filename = os.path.join(str(tmpdir), R'LayerNorm_1.onnx') model1.save(filename, format=C.ModelFormat.ONNX) loaded_model = C.Function.load(filename, format=C.ModelFormat.ONNX) assert model1.shape == loaded_model.shape
def test_MaxRoiPool(tmpdir, dtype): pytest.skip('MaxRoiPool is failing with ONNX shape inference (input rois). RuntimeError: [ShapeInferenceError] RoIs tensor must have 2 dimensions') with C.default_options(dtype = dtype): input_map = [[[1., 2., 3.], # (1, 3, 3) input operand (conv feature map) [4., 5., 6.], [7., 8., 9.]]] input_rois = [[1, 1, 2, 2]] conv_input = np.asarray(input_map, dtype=dtype) roi_input = np.asarray(input_rois, dtype=dtype) a = C.input_variable(shape=conv_input.shape, dtype=dtype, needs_gradient=True, name='a') b = C.input_variable(shape=roi_input.shape, dtype=dtype, needs_gradient=False, name='b') # adding batch and sequence axis conv_input.shape = (1,) + conv_input.shape roi_input.shape = (1,) + roi_input.shape model = C.roipooling(a, b, C.MAX_POOLING, (3,3), 1.) verify_two_input(model, conv_input, roi_input, tmpdir, 'MaxRoiPool_1')
def test_ArgMin(tmpdir, dtype): with C.default_options(dtype=dtype): shape = (4, 5) data = np.random.rand(*shape).astype(dtype) model = C.argmin(data, 0) verify_no_input(model, tmpdir, 'ArgMin_0')
def test_MaxRoiPool(tmpdir, dtype): pytest.skip( 'MaxRoiPool is failing with ONNX shape inference (input rois). RuntimeError: [ShapeInferenceError] RoIs tensor must have 2 dimensions' ) with C.default_options(dtype=dtype): input_map = [[ [1., 2., 3.], # (1, 3, 3) input operand (conv feature map) [4., 5., 6.], [7., 8., 9.] ]] input_rois = [[1, 1, 2, 2]] conv_input = np.asarray(input_map, dtype=dtype) roi_input = np.asarray(input_rois, dtype=dtype) a = C.input_variable(shape=conv_input.shape, dtype=dtype, needs_gradient=True, name='a') b = C.input_variable(shape=roi_input.shape, dtype=dtype, needs_gradient=False, name='b') # adding batch and sequence axis conv_input.shape = (1, ) + conv_input.shape roi_input.shape = (1, ) + roi_input.shape model = C.roipooling(a, b, C.MAX_POOLING, (3, 3), 1.) verify_two_input(model, conv_input, roi_input, tmpdir, 'MaxRoiPool_1')
def create_resnet_network(network_name, fp16): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) dtype = np.float16 if fp16 else np.float32 if fp16: graph_input = C.cast(input_var, dtype=np.float16) graph_label = C.cast(label_var, dtype=np.float16) else: graph_input = input_var graph_label = label_var with C.default_options(dtype=dtype): stride1x1 = (1, 1) stride3x3 = (2, 2) # create model, and configure learning parameters if network_name == 'resnet18': z = create_imagenet_model_basic(graph_input, [2, 1, 1, 2], num_classes) elif network_name == 'resnet34': z = create_imagenet_model_basic(graph_input, [3, 3, 5, 2], num_classes) elif network_name == 'resnet50': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 5, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet101': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 22, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet152': z = create_imagenet_model_bottleneck(graph_input, [2, 7, 35, 2], num_classes, stride1x1, stride3x3) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, graph_label) errs = classification_error(z, graph_label, topN=1) top5Errs = classification_error(z, graph_label, topN=1) if fp16: ce = C.cast(ce, dtype=np.float32) errs = C.cast(errs, dtype=np.float32) top5Errs = C.cast(top5Errs, dtype=np.float32) return { 'name': network_name, 'feature': input_var, 'label': label_var, 'ce': ce, 'errs': errs, 'top5Errs': top5Errs, 'output': z }
def test_Floor(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], dtype=dtype) model = C.floor(data) verify_no_input(model, tmpdir, 'Floor_0') x = C.input_variable(data.shape) model = C.floor(x) verify_one_input(model, data, tmpdir, 'Floor_1')
def test_Exp(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.asarray([0., 1.], dtype=dtype) model = C.exp(data) verify_no_input(model, tmpdir, 'Exp_0') x = C.input_variable(data.shape) model = C.exp(x) verify_one_input(model, data, tmpdir, 'Exp_1')
def test_ReduceL1(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.array([[[1,2], [3,4]],[[5,6], [7,8]],[[9,10], [11,12]]], dtype=dtype) model = C.reduce_l1(data, 1) verify_no_input(model, tmpdir, 'ReduceL1_0') x = C.input_variable(np.shape(data)) model = C.reduce_l1(x, 1) verify_one_input(model, data, tmpdir, 'ReduceL1_1')
def test_Dropout(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.asarray([[10, 20],[30, 40],[50, 60]], dtype=dtype) model = C.dropout(data, 0.5) verify_no_input(model, tmpdir, 'Dropout_0') x = C.input_variable(data.shape) model = C.dropout(x, 0.5) verify_one_input(model, data, tmpdir, 'Dropout_1')
def test_Exp(tmpdir, dtype): with C.default_options(dtype=dtype): data = np.asarray([0., 1.], dtype=dtype) model = C.exp(data) verify_no_input(model, tmpdir, 'Exp_0') x = C.input_variable(data.shape) model = C.exp(x) verify_one_input(model, data, tmpdir, 'Exp_1')
def test_Floor(tmpdir, dtype): with C.default_options(dtype=dtype): data = np.asarray([0.2, 1.3, 4., 5.5, 0.0], dtype=dtype) model = C.floor(data) verify_no_input(model, tmpdir, 'Floor_0') x = C.input_variable(data.shape) model = C.floor(x) verify_one_input(model, data, tmpdir, 'Floor_1')
def test_MaxPool(tmpdir, dtype, device_id): if device_id == -1 and dtype == np.float16: pytest.skip('Test is skipped on CPU with float16 data') device = cntk_device(device_id) with C.default_options(dtype=dtype): img = np.reshape(np.arange(16, dtype=dtype), [1, 4, 4]) x = C.input_variable(img.shape) model = C.pooling(x, C.MAX_POOLING, (2, 2), (3, 3)) verify_one_input(model, img, tmpdir, 'MaxPool_1', device)
def test_Dropout(tmpdir, dtype): with C.default_options(dtype=dtype): data = np.asarray([[10, 20], [30, 40], [50, 60]], dtype=dtype) model = C.dropout(data, 0.5) verify_no_input(model, tmpdir, 'Dropout_0') x = C.input_variable(data.shape) model = C.dropout(x, 0.5) verify_one_input(model, data, tmpdir, 'Dropout_1')
def test_Elu(tmpdir, dtype): with C.default_options(dtype=dtype): data = np.asarray([[-1, -0.5, 0, 1, 2]], dtype=dtype) model = C.elu(data) verify_no_input(model, tmpdir, 'Elu_0') x = C.input_variable(data.shape) model = C.elu(x) verify_one_input(model, data, tmpdir, 'Elu_1')
def test_ReduceL1(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.array([[[1,2], [3,4]],[[5,6], [7,8]],[[9,10], [11,12]]], dtype=dtype) model = C.reduce_l1(data, 1) verify_no_input(model, tmpdir, 'ReduceL1_0') x = C.input_variable(np.shape(data)) model = C.reduce_l1(x, 1) verify_one_input(model, data, tmpdir, 'ReduceL1_1')
def test_MaxPool(tmpdir, dtype, device_id): if device_id == -1 and dtype == np.float16: pytest.skip('Test is skipped on CPU with float16 data') device = cntk_device(device_id) with C.default_options(dtype=dtype): img = np.reshape(np.arange(16, dtype = dtype), [1, 4, 4]) x = C.input_variable(img.shape) model = C.pooling(x, C.MAX_POOLING, (2,2), (3,3)) verify_one_input(model, img, tmpdir, 'MaxPool_1', device)
def test_Flatten(tmpdir, dtype): with C.default_options(dtype = dtype): shape = (2, 3, 4, 5) data = np.reshape(np.arange(np.prod(shape), dtype = dtype), shape) model = C.flatten(data, 1) verify_no_input(model, tmpdir, 'Flatten_0') x = C.input_variable(data.shape) model = C.flatten(x, 1) verify_one_input(model, data, tmpdir, 'Flatten_1')
def test_Mean(tmpdir, dtype): with C.default_options(dtype = dtype): in1 = C.input_variable((4,)) in2 = C.input_variable((4,)) model = C.mean([in1, in2]) in1_data = np.asarray([[1., 2., 3., 4.]], dtype = dtype) in2_data = np.asarray([[0., 5., -3., 2.]], dtype = dtype) verify_two_input(model, in1_data, in2_data, tmpdir, 'Mean_2')
def test_Sum(tmpdir, dtype): with C.default_options(dtype = dtype): in1_data = np.asarray([[1., 2., 3., 4.]], dtype = dtype) in2_data = np.asarray([[0., 5., -3., 2.]], dtype = dtype) in1 = C.input_variable(np.shape(in1_data)) in2 = C.input_variable(np.shape(in2_data)) model = C.sum([in1, in2]) verify_two_input(model, in1_data, in2_data, tmpdir, 'Sum_2')
def test_Gather(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): c = np.asarray([[[0],[1]],[[4],[5]]]).astype(dtype) x = C.input_variable((2,1)) d = np.arange(12).reshape(6,2).astype(dtype) y = C.constant(d) model = C.gather(y, x) verify_one_input(model, c, tmpdir, 'Gather_1')
def test_HardSigmiod(tmpdir, dtype): with C.default_options(dtype = dtype): shape = (2,3) x = C.input_variable(shape=shape, dtype=dtype) alpha = 1.2 beta = 2.5 model = C.hard_sigmoid(x, alpha, beta, 'hardSigmoid') data = np.random.rand(*shape).astype(dtype) verify_one_input(model, data, tmpdir, 'HardSigmoid_1')
def test_Mean(tmpdir, dtype): with C.default_options(dtype=dtype): in1 = C.input_variable((4, )) in2 = C.input_variable((4, )) model = C.mean([in1, in2]) in1_data = np.asarray([[1., 2., 3., 4.]], dtype=dtype) in2_data = np.asarray([[0., 5., -3., 2.]], dtype=dtype) verify_two_input(model, in1_data, in2_data, tmpdir, 'Mean_2')
def test_Less(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): data0 = np.asarray([41., 42., 43.], dtype=dtype) data1 = np.asarray([42., 42., 42.], dtype=dtype) model = C.less(data0, data1) verify_no_input(model, tmpdir, 'Less_0')
def create_autoencoder(input_dim, output_dim, hidden_dim, feature_input): """ Create a model with the layers library. """ with C.default_options(init = C.glorot_uniform()): encode = Dense(input_dim, sigmoid)(feature_input) #conv = Convolution((3,3))(feature_input) decode = Dense(output_dim, sigmoid)(encode) return(decode)
def test_LRN(tmpdir, dtype, device_id): if device_id == -1 and dtype == np.float16: pytest.skip('Test is skipped on CPU with float16 data, because it uses convolution.') device = cntk_device(device_id) with C.default_options(dtype=dtype): img_shape = (64, 32, 32) img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=dtype) x_r = C.input_variable(shape=img_shape, dtype=dtype) model = C.local_response_normalization(x_r, 2, 1.0, 0.0001, 0.75) verify_one_input(model, img, tmpdir, 'LRN_1', device)
def test_Gather(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype=dtype): c = np.asarray([[[0], [1]], [[4], [5]]]).astype(dtype) x = C.input_variable((2, 1)) d = np.arange(12).reshape(6, 2).astype(dtype) y = C.constant(d) model = C.gather(y, x) verify_one_input(model, c, tmpdir, 'Gather_1')
def test_HardSigmiod(tmpdir, dtype): with C.default_options(dtype=dtype): shape = (2, 3) x = C.input_variable(shape=shape, dtype=dtype) alpha = 1.2 beta = 2.5 model = C.hard_sigmoid(x, alpha, beta, 'hardSigmoid') data = np.random.rand(*shape).astype(dtype) verify_one_input(model, data, tmpdir, 'HardSigmoid_1')
def test_Sum(tmpdir, dtype): with C.default_options(dtype=dtype): in1_data = np.asarray([[1., 2., 3., 4.]], dtype=dtype) in2_data = np.asarray([[0., 5., -3., 2.]], dtype=dtype) in1 = C.input_variable(np.shape(in1_data)) in2 = C.input_variable(np.shape(in2_data)) model = C.sum([in1, in2]) verify_two_input(model, in1_data, in2_data, tmpdir, 'Sum_2')
def test_LRN(tmpdir, dtype, device_id): if device_id == -1 and dtype == np.float16: pytest.skip('Test is skipped on CPU with float16 data') device = cntk_device(device_id) with C.default_options(dtype=dtype): img_shape = (64, 32, 32) img = np.asarray(np.random.uniform(-1, 1, img_shape), dtype=dtype) x_r = C.input_variable(shape=img_shape, dtype=dtype) model = C.local_response_normalization(x_r, 2, 1.0, 0.0001, 0.75) verify_one_input(model, img, tmpdir, 'LRN_1', device)
def test_Less(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype=dtype): data0 = np.asarray([41., 42., 43.], dtype=dtype) data1 = np.asarray([42., 42., 42.], dtype=dtype) model = C.less(data0, data1) verify_no_input(model, tmpdir, 'Less_0')
def test_Flatten(tmpdir, dtype): with C.default_options(dtype=dtype): shape = (2, 3, 4, 5) data = np.reshape(np.arange(np.prod(shape), dtype=dtype), shape) model = C.flatten(data, 1) verify_no_input(model, tmpdir, 'Flatten_0') x = C.input_variable(data.shape) model = C.flatten(x, 1) verify_one_input(model, data, tmpdir, 'Flatten_1')
def test_Not(tmpdir, dtype): with C.default_options(dtype = dtype): data1 = np.asarray([[1, 1, 0, 0],[1, 1, 1, 1]]).astype(dtype) model = C.element_not(data1) verify_no_input(model, tmpdir, 'Not_0') x = C.input_variable(np.shape(data1)) model = C.element_not(x) verify_one_input(model, data1, tmpdir, 'Not_1')
def test_ArgMax(tmpdir, dtype): with C.default_options(dtype = dtype): shape = (4, 5) data = np.random.rand(*shape).astype(dtype) model = C.argmax(data, 0) verify_no_input(model, tmpdir, 'ArgMax_0') x = C.input_variable(shape) model = C.argmax(x, 0) verify_one_input(model, data, tmpdir, 'ArgMax_1')
def test_Gather_With_Axis(tmpdir, dtype): if (dtype == np.float16): pytest.skip("TO BE FIXED") with C.default_options(dtype = dtype): data = np.asarray( [[ [111, 112], [121, 122], [131, 132], ],[ [211, 212], [221, 222], [231, 232], ]]).astype(dtype) indices = np.asarray([[0, 1, 1], [1, 1, 1]]) x = C.input_variable(np.shape(data)) y = C.input_variable(np.shape(indices)) axis = 1 model = C.gather(data, y, axis) verify_one_input(model, indices, tmpdir, 'Gather_With_Axis_1')
def test_ArgMax(tmpdir, dtype): with C.default_options(dtype=dtype): shape = (4, 5) data = np.random.rand(*shape).astype(dtype) model = C.argmax(data, 0) verify_no_input(model, tmpdir, 'ArgMax_0') x = C.input_variable(shape) model = C.argmax(x, 0) verify_one_input(model, data, tmpdir, 'ArgMax_1')
def test_Not(tmpdir, dtype): with C.default_options(dtype=dtype): data1 = np.asarray([[1, 1, 0, 0], [1, 1, 1, 1]]).astype(dtype) model = C.element_not(data1) verify_no_input(model, tmpdir, 'Not_0') x = C.input_variable(np.shape(data1)) model = C.element_not(x) verify_one_input(model, data1, tmpdir, 'Not_1')
def test_Abs(tmpdir, dtype): with C.default_options(dtype = dtype): shape = (4, 5) data = np.random.rand(*shape).astype(dtype) model = C.abs(data) verify_no_input(model, tmpdir, 'Abs_0') x = C.input_variable(shape) model = C.abs(x) verify_one_input(model, data, tmpdir, 'Abs_1')
def test_DepthToSpace(tmpdir, dtype): with C.default_options(dtype = dtype): num_channels = 9 block_size = 3 image_shape = (4, 5) input_val = np.array(np.reshape(range(num_channels), (num_channels, 1, 1)), dtype=dtype) input_val = np.tile(input_val, (1,) + image_shape) input_val.shape = (1,) + input_val.shape img = C.input_variable((num_channels,) + image_shape, dtype=dtype) model = C.depth_to_space(img, block_size) verify_one_input(model, input_val, tmpdir, 'DepthToSpace')
def test_LSTM(tmpdir, dtype): with C.default_options(dtype = dtype): def CreateLSTMModel(activation, peepholes, self_stabilization, cell_dim, initial_state): return C.layers.Sequential([ C.layers.Recurrence(C.layers.LSTM(cell_dim, use_peepholes = peepholes, activation = activation, enable_self_stabilization = self_stabilization), initial_state = initial_state) ]) def MakeLSTMNameFromConfig(use_peepholes, enable_self_stabilization, initial_state, activition): model_name = 'LSTM.' + activition.__name__ if (use_peepholes): model_name += '.peephole' if(enable_self_stabilization): model_name += '.stabilize' if (initial_state != 0): model_name += '.initial' return model_name # lstm attributes use_peepholes_options = [False] enable_self_stabilization_options = [False] activation_options = [C.tanh] #Recurrence attributes initial_state_options = [0, 0.23] input_dim = 2 cell_dim = 3 batch_size = 1 sequence_len = 5 for config in list(product(use_peepholes_options, enable_self_stabilization_options, initial_state_options, activation_options)): model_filename = MakeLSTMNameFromConfig(*config) use_peepholes, enable_self_stabilization, initial_state, activation = config x = C.input_variable(input_dim, dynamic_axes=[C.Axis.default_batch_axis(), C.Axis('sequenceAxis')]) LSTMmodel = CreateLSTMModel(peepholes = use_peepholes, activation = activation, initial_state = initial_state, cell_dim = cell_dim, self_stabilization = enable_self_stabilization)(x) data = np.random.uniform(low=0.0, high=1.0, size=(batch_size, sequence_len, input_dim)).astype('f') verify_one_input(LSTMmodel, data, tmpdir, model_filename)
def test_Pad(tmpdir, dtype): with C.default_options(dtype = dtype): shape = (4, 5) data = np.random.rand(*shape).astype(dtype) model = C.pad(data, pattern=[(1,1),(2,2)], mode=C.ops.CONSTANT_PAD, constant_value=1) verify_no_input(model, tmpdir, 'Pad_0') x = C.input_variable(shape) model = C.pad(x, pattern=[(1,1),(2,2)], mode=C.ops.REFLECT_PAD) verify_one_input(model, data, tmpdir, 'Pad_1')
def test_Elu(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.asarray([[-1, -0.5, 0, 1, 2]], dtype=dtype) model = C.elu(data) verify_no_input(model, tmpdir, 'Elu_0') x1 = C.input_variable(data.shape) model = C.elu(x1) verify_one_input(model, data, tmpdir, 'Elu_1') x2 = C.input_variable(data.shape) model = C.elu(x2, alpha=2.0) verify_one_input(model, data, tmpdir, 'Elu_2')
def test_Slice(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.asarray([[1,2,-3], [4, 5, 6]],dtype=dtype) x1 = C.input_variable((2,3)) model = C.slice(data, 0, 1, 2) verify_no_input(model, tmpdir, 'Slice_0') model = C.slice(x1, 0, 1, 2) verify_one_input(model, data, tmpdir, 'Slice_1') model = C.slice(x1, [0,1], [1,0], [2,1]); verify_one_input(model, data, tmpdir, 'Slice2_1')
def test_Transpose(tmpdir, dtype): with C.default_options(dtype = dtype): data = np.arange(24).reshape(2,3,4).astype(dtype) x = C.input_variable(np.shape(data)) model = C.transpose(data, perm=(2, 0, 1)) verify_no_input(model, tmpdir, 'Transpose_0') model = C.transpose(x, perm=(2, 0, 1)) verify_one_input(model, data, tmpdir, 'Transpose_1') model = C.transpose(x, perm=(0, 2, 1)) verify_one_input(model, data, tmpdir, 'Transpose_1_2')
def create_resnet_network(network_name, fp16): # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width)) label_var = C.input_variable((num_classes)) dtype = np.float16 if fp16 else np.float32 if fp16: graph_input = C.cast(input_var, dtype=np.float16) graph_label = C.cast(label_var, dtype=np.float16) else: graph_input = input_var graph_label = label_var with C.default_options(dtype=dtype): stride1x1 = (1, 1) stride3x3 = (2, 2) # create model, and configure learning parameters if network_name == 'resnet18': z = create_imagenet_model_basic(graph_input, [2, 1, 1, 2], num_classes) elif network_name == 'resnet34': z = create_imagenet_model_basic(graph_input, [3, 3, 5, 2], num_classes) elif network_name == 'resnet50': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 5, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet101': z = create_imagenet_model_bottleneck(graph_input, [2, 3, 22, 2], num_classes, stride1x1, stride3x3) elif network_name == 'resnet152': z = create_imagenet_model_bottleneck(graph_input, [2, 7, 35, 2], num_classes, stride1x1, stride3x3) else: return RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, graph_label) errs = classification_error(z, graph_label, topN=1) top5Errs = classification_error(z, graph_label, topN=5) if fp16: ce = C.cast(ce, dtype=np.float32) errs = C.cast(errs, dtype=np.float32) top5Errs = C.cast(top5Errs, dtype=np.float32) return { 'name' : network_name, 'feature': input_var, 'label': label_var, 'ce' : ce, 'errs' : errs, 'top5Errs' : top5Errs, 'output': z }
def test_to_sequence_backprop(device_id): dev = cntk_device(device_id) input_vocab_size=3 emb_dim = 2 hidden_dim = 2 num_labels = 2 x_seq_input = C.sequence.input_variable(input_vocab_size, is_sparse=True, name='features') with C.default_options(initial_state=0.1): model = C.layers.Embedding(emb_dim, name='embed')(x_seq_input) model = C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=False)(model) model = C.layers.Dense(num_labels, name='classify')(model) z = model label_seq_input = C.sequence.input_variable(num_labels, is_sparse=True, name='labels') ce = C.cross_entropy_with_softmax(z, label_seq_input) seq1_data = [[0, 1, 1], [0, 1, 0], [1, 0, 0]] seq2_data = [[0, 0, 1], [0, 1, 1]] seq1_label_data = [[0, 1], [0, 1], [1, 0]] seq2_label_data = [[1, 0], [0, 1]] label_seq_data = [_to_csr(seq1_label_data), _to_csr(seq2_label_data)] param_grads_1, loss_result_1 = ce.grad({x_seq_input : [_to_csr(seq1_data), _to_csr(seq2_data)], label_seq_input : label_seq_data}, wrt=ce.parameters, outputs=[ce], as_numpy=False) # Create a clone of the model that uses a non-sequence input # and converts it to a sequence using to_sequence x_non_seq_input = C.input_variable((C.FreeDimension, input_vocab_size), is_sparse=True, name='non_seq_features') x_seq_lens = C.input_variable((), name='sequence_lengths') x_seq = C.to_sequence(x_non_seq_input, x_seq_lens) x_seq = C.reconcile_dynamic_axes(C.times(x_seq, np.eye(input_vocab_size, dtype=np.float32)), label_seq_input) ce_clone = ce.clone('share', {x_seq_input : x_seq}) x_non_seq_data = C.NDArrayView.from_csr(_to_csr([seq1_data, seq2_data + [[0, 0, 0]]]), shape=(2, 3, 3)) x_seq_lens_data = np.asarray([3, 2], dtype=np.float32) x_non_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'non_seq_features') label_seq_input = next(argument for argument in ce_clone.arguments if argument.name == 'labels') x_seq_lens = next(argument for argument in ce_clone.arguments if argument.name == 'sequence_lengths') param_grads_2, loss_result_2 = ce_clone.grad({x_non_seq_input : x_non_seq_data, x_seq_lens : x_seq_lens_data, label_seq_input : label_seq_data}, wrt=ce_clone.parameters, outputs=[ce_clone], as_numpy=False) assert np.array_equal(loss_result_1.as_sequences()[0], loss_result_2.as_sequences()[0]) assert np.array_equal(loss_result_1.as_sequences()[1], loss_result_2.as_sequences()[1]) for param in param_grads_1: if not param_grads_1[param].is_sparse: reference_grad_value = param_grads_1[param].asarray() grad_value = param_grads_2[param].asarray() assert np.array_equal(reference_grad_value, grad_value)