def __call__(self, num_classes=10, act_type=relu, mdl_conv1a_nf=40, mdl_conv1b_nf=60, mdl_conv2a_nf=50, mdl_conv2b_nf=75, mdl_fc1_nh=75, mdl_drop2a_p=0.033, mdl_drop2b_p=0.097, mdl_drop3_p=0.412, **kwargs): input_var = input_variable((1, self.img_h, self.img_w), np.float32) label_var = input_variable((self.n_dim), np.float32) conv1a = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv1a_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv1a')(input_var) conv1b = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv1b_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv1b')(conv1a) pool1 = MaxPooling(filter_shape=(2, 2), strides=(2, 2), name='pool1')(conv1b) conv2a = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv2a_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv2a')(pool1) drop2a = Dropout(prob=mdl_drop2a_p, name="drop2a")(conv2a) conv2b = Convolution(filter_shape=(3, 3), num_filters=int(mdl_conv2b_nf), activation=act_type, init=glorot_uniform(), pad=True, name='conv2b')(drop2a) drop2b = Dropout(prob=mdl_drop2a_p, name="drop2a")(conv2b) pool2 = MaxPooling(filter_shape=(2, 2), strides=(2, 2), name='pool2')(drop2b) fc1 = Dense(shape=int(mdl_fc1_nh), init=glorot_uniform(), activation=act_type, name='fc1')(pool2) drop3 = Dropout(prob=mdl_drop3_p, name="drop3")(fc1) #fc2 = Dense(shape=num_classes, init=glorot_uniform(), activation=softmax, name='fc2')(drop3) fc2 = Dense(shape=num_classes, init=glorot_uniform(), activation=None, name='fc2')(drop3) return input_var, label_var, fc2
def create_basic_model(input, out_dims): convolutional_layer_1 = Convolution((5, 5), 16, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(input) pooling_layer_1 = MaxPooling((2, 2), strides=(1, 1))(convolutional_layer_1) convolutional_layer_2 = Convolution((5, 5), 16, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(pooling_layer_1) pooling_layer_2 = MaxPooling((3, 3), strides=(2, 2))(convolutional_layer_2) # convolutional_layer_3 = Convolution((9, 9), 16, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(pooling_layer_2) pooling_layer_3 = MaxPooling((3, 3), strides=(2, 2))(convolutional_layer_3) fully_connected_layer = Dense(256, init=glorot_uniform())(pooling_layer_3) dropout_layer = Dropout(0.5)(fully_connected_layer) output_layer = Dense(out_dims, init=glorot_uniform(), activation=None)(dropout_layer) return output_layer
def _build_model(self): with default_options(init=he_uniform(), activation=relu, bias=True): model = Sequential([ Convolution((8, 8), 32, strides=(4, 4)), Convolution((4, 4), 64, strides=(2, 2)), Convolution((3, 3), 64, strides=(1, 1)), Dense(512, init=he_normal(0.01)), Dense(self._nb_actions, activation=None, init=he_normal(0.01)) ]) return model
def _build_model(self): with default_options(init=he_uniform(), activation=relu, bias=True): model = Sequential([ Convolution((4, 4), 64, strides=(2, 2), name='conv1'), Convolution((3, 3), 64, strides=(1, 1), name='conv2'), Dense(512, name='dense1', init=he_normal(0.01)), Dense(self._nb_actions, activation=None, init=he_normal(0.01), name='qvalues') ]) return model
def create_convolutional_neural_network(input_vars, out_dims): convolutional_layer_1 = Convolution((5, 5), 32, strides=1, activation=cntk.ops.relu, pad=True, init=glorot_normal(), init_bias=0.1) pooling_layer_1 = MaxPooling((2, 2), strides=(2, 2), pad=True) convolutional_layer_2 = Convolution((5, 5), 64, strides=1, activation=cntk.ops.relu, pad=True, init=glorot_normal(), init_bias=0.1) pooling_layer_2 = MaxPooling((2, 2), strides=(2, 2), pad=True) convolutional_layer_3 = Convolution((5, 5), 128, strides=1, activation=cntk.ops.relu, pad=True, init=glorot_normal(), init_bias=0.1) pooling_layer_3 = MaxPooling((2, 2), strides=(2, 2), pad=True) fully_connected_layer = Dense(1024, activation=cntk.ops.relu, init=glorot_normal(), init_bias=0.1) output_layer = Dense(out_dims, activation=None, init=glorot_normal(), init_bias=0.1) model = Sequential([ convolutional_layer_1, pooling_layer_1, convolutional_layer_2, pooling_layer_2, #convolutional_layer_3, pooling_layer_3, fully_connected_layer, output_layer ])(input_vars) return model
def create_vgg9_model(input, num_classes): with default_options(activation=relu): model = Sequential([ LayerStack(3, lambda i: [ Convolution((3,3), [64,96,128][i], init=glorot_uniform(), pad=True), Convolution((3,3), [64,96,128][i], init=glorot_uniform(), pad=True), MaxPooling((3,3), strides=(2,2)) ]), LayerStack(2, lambda : [ Dense(1024, init=glorot_uniform()) ]), Dense(num_classes, init=glorot_uniform(), activation=None) ]) return model(input)
def create_transfer_learning_model(input, num_classes, model_file, freeze=False): base_model = load_model(model_file) base_model = C.as_composite(base_model[3].owner) # Load the pretrained classification net and find nodes feature_node = C.logging.find_by_name(base_model, feature_node_name) last_node = C.logging.find_by_name(base_model, last_hidden_node_name) base_model = C.combine([last_node.owner]).clone(C.CloneMethod.freeze if freeze else C.CloneMethod.clone, {feature_node: C.placeholder(name='features')}) base_model = base_model(C.input_variable((num_channels, image_height, image_width))) r1 = C.logging.find_by_name(base_model, "z.x.x.r") r2_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.r") r3_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.r") r4_2 = C.logging.find_by_name(base_model, "z.x.x.x.x.x.x.x.x.r") up_r1 = OneByOneConvAndUpSample(r1, 3, num_classes) up_r2_2 = OneByOneConvAndUpSample(r2_2, 2, num_classes) up_r3_2 = OneByOneConvAndUpSample(r3_2, 1, num_classes) up_r4_2 = OneByOneConvAndUpSample(r4_2, 0, num_classes) merged = C.splice(up_r1, up_r3_2, up_r2_2, axis=0) resnet_fcn_out = Convolution((1, 1), num_classes, init=he_normal(), activation=sigmoid, pad=True)(merged) z = UpSampling2DPower(resnet_fcn_out,2) return z
def create_convolutional_neural_network(input_vars, out_dims, dropout_prob=0.0): convolutional_layer_1 = Convolution((5, 5), 32, strides=1, activation=cntk.ops.relu, pad=True, init=gaussian(), init_bias=0.1)(input_vars) pooling_layer_1 = MaxPooling((2, 2), strides=(2, 2), pad=True)(convolutional_layer_1) convolutional_layer_2 = Convolution((5, 5), 64, strides=1, activation=cntk.ops.relu, pad=True, init=gaussian(), init_bias=0.1)(pooling_layer_1) pooling_layer_2 = MaxPooling((2, 2), strides=(2, 2), pad=True)(convolutional_layer_2) convolutional_layer_3 = Convolution((5, 5), 128, strides=1, activation=cntk.ops.relu, pad=True, init=gaussian(), init_bias=0.1)(pooling_layer_2) pooling_layer_3 = MaxPooling((2, 2), strides=(2, 2), pad=True)(convolutional_layer_3) fully_connected_layer = Dense(1024, activation=cntk.ops.relu, init=gaussian(), init_bias=0.1)(pooling_layer_3) dropout_layer = Dropout(dropout_prob)(fully_connected_layer) output_layer = Dense(out_dims, activation=None, init=gaussian(), init_bias=0.1)(dropout_layer) return output_layer
def clone_cntk_layer(self, feature): """Returns a clone of the CNTK layer for per-layer forward prop validation""" activation = None nodes = utilities.get_model_layers(self.layer.block_root) if (utilities.find_node_by_op_name(nodes, 'ReLU') != None): activation = relu elif (utilities.find_node_by_op_name(nodes, 'Sigmoid') != None): activation = sigmoid elif (utilities.find_node_by_op_name(nodes, 'LeakyReLU') != None): activation = leaky_relu weightsShape = self.weights_parameter.shape pad = self.attributes['autoPadding'][0] or ( self.attributes['autoPadding'][1] and self.attributes['autoPadding'][2]) bias = (self.bias_parameter is not None) layer = Convolution((weightsShape[2], weightsShape[3]), weightsShape[0], pad=pad, activation=activation, bias=bias)(feature) layer.parameters[0].value = self.weights_parameter.value if bias: layer.parameters[1].value = self.bias_parameter.value return layer
def test_convolution_layer(self): """Test a model with a single CNTK Convolution layer against the equivalent ELL predictor. This verifies that the import functions reshape and reorder values appropriately and that the equivalent ELL layer produces comparable output """ # Create a Convolution CNTK layer with no bias or activation, # auto-padding, stride of 1 convolutionLayer = Convolution((3, 3), 5, pad=( True, True), strides=1, bias=False, init=0) x = input((2, 3, 4)) # Input order for CNTK is channels, rows, columns cntkModel = convolutionLayer(x) # Create a test set of weights to use for both CNTK and ELL layers # CNTK has these in filters, channels, rows, columns order weightValues = np.arange(90, dtype=np.float_).reshape(5, 2, 3, 3) # Set the weights convolutionLayer.parameters[0].value = weightValues # create an ELL Tensor from the cntk weights, which re-orders the # weights and produces an appropriately dimensioned tensor weightTensor = cntk_converters.\ get_float_tensor_from_cntk_convolutional_weight_parameter( convolutionLayer.parameters[0]) # Create the equivalent ELL predictor layerParameters = ell.LayerParameters( # Input order for ELL is rows, columns, channels. Account for # padding. ell.TensorShape(3 + 2, 4 + 2, 2), ell.ZeroPadding(1), ell.TensorShape(3, 4, 5), ell.NoPadding()) convolutionalParameters = ell.ConvolutionalParameters(3, 1, 0, 5) layer = ell.FloatConvolutionalLayer( layerParameters, convolutionalParameters, weightTensor) predictor = ell.FloatNeuralNetworkPredictor([layer]) # Get the results for both inputValues = np.arange(24, dtype=np.float32).reshape(2, 3, 4) cntkResults = cntkModel(inputValues) orderedCntkResults = cntk_converters.get_float_vector_from_cntk_array( cntkResults) orderedInputValues = cntk_converters.get_float_vector_from_cntk_array( inputValues) ellResults = predictor.Predict(orderedInputValues) # Compare the results np.testing.assert_array_equal( orderedCntkResults, ellResults, 'results for Convolution layer do not match!') # now run same over ELL compiled model self.verify_compiled( predictor, orderedInputValues, orderedCntkResults, "convolution", "test")
def test_depth_first_search_blocks(depth, prefix_count): from cntk.layers import Sequential, Convolution, MaxPooling, Dense from cntk.default_options import default_options def Blocked_Dense(dim, activation=None): dense = Dense(dim, activation=activation) @C.layers.BlockFunction('blocked_dense', 'blocked_dense') def func(x): return dense(x) return func with default_options(activation=C.relu): image_to_vec = Sequential([ Convolution((5, 5), 32, pad=True), MaxPooling((3, 3), strides=(2, 2)), Dense(10, activation=None), Blocked_Dense(10) ]) in1 = C.input_variable(shape=(3, 256, 256), name='image') img = image_to_vec(in1) found = C.logging.graph.depth_first_search(img, lambda x: True, depth=depth) found_str = [str(v) for v in found] assert len(found) == sum(prefix_count.values()) for prefix, count in prefix_count.items(): assert sum(f.startswith(prefix) for f in found_str) == count
def create_model(input, num_classes): c_map = [16, 32, 64] num_stack_layers = 3 conv = conv_bn_relu(input, (3, 3), c_map[0]) r1 = resnet_basic_stack(conv, num_stack_layers, c_map[0]) r2_1 = resnet_basic_inc(r1, c_map[1]) r2_2 = resnet_basic_stack(r2_1, num_stack_layers - 1, c_map[1]) r3_1 = resnet_basic_inc(r2_2, c_map[2]) r3_2 = resnet_basic_stack(r3_1, num_stack_layers - 1, c_map[2]) up_r1 = OneByOneConvAndUpSample(r1, 0, num_classes) up_r2_2 = OneByOneConvAndUpSample(r2_2, 1, num_classes) up_r3_2 = OneByOneConvAndUpSample(r3_2, 2, num_classes) merged = C.splice(up_r1, up_r3_2, up_r2_2, axis=0) resnet_fcn_out = Convolution((1, 1), num_classes, init=he_normal(), activation=sigmoid, pad=True)(merged) return resnet_fcn_out
def test_layers_conv_pool_unpool_deconv(): pass inC, inH, inW = 1,4,4 y = input((inC,inH, inW)) cMap = 1 zero_pad = True conv_init = 1 filter_shape = (2,2) pooling_strides = (2,2) dat = np.arange(0,16, dtype=np.float32).reshape(1,1,4,4) conv = Convolution(filter_shape, cMap, pad=zero_pad, init=conv_init,activation=None)(y) pool = MaxPooling(filter_shape, pooling_strides)(conv) unpool = MaxUnpooling(filter_shape, pooling_strides)(pool, conv) z = ConvolutionTranspose(filter_shape, cMap, init=conv_init, pad=zero_pad)(unpool) assert z.shape == y.shape res = z(dat) expected_res = np.asarray([[30, 64, 34], [76, 160, 84], [46, 96, 50]], np.float32) np.testing.assert_array_almost_equal(res[0][0][1:,1:], expected_res, decimal=6, err_msg="Wrong values in conv/pooling/unpooling/conv_transposed")
def create_shallow_model(input, out_dims): convolutional_layer_1_1 = Convolution((7,7), 32, init=glorot_uniform(), activation=relu, pad=True, strides=(1,1))(input) convolutional_layer_1_2 = Convolution((25,25), 32, init=glorot_uniform(), activation=relu, pad=True, strides=(1,1))(convolutional_layer_1_1) pooling_layer_1 = MaxPooling((25,25), strides=(5,5))(convolutional_layer_1_2 ) convolutional_layer_2_1 = Convolution((3,3), 32, init=glorot_uniform(), activation=relu, pad=True, strides=(1,1))(pooling_layer_1) pooling_layer_2 = MaxPooling((2,2), strides=(2,2))(convolutional_layer_2_1) fully_connected_layer_1 = Dense(512, init=glorot_uniform())(pooling_layer_2) fully_connected_layer_2 = Dense(128, init=glorot_uniform())(fully_connected_layer_1) dropout_layer = Dropout(0.5)(fully_connected_layer_2) output_layer = Dense(out_dims, init=glorot_uniform(), activation=None)(dropout_layer) return output_layer
def OneByOneConvAndUpSample(x, k_power, num_channels): x = Convolution((1, 1), num_channels, init=he_normal(), activation=relu, pad=True)(x) x = UpSampling2DPower(x, k_power) return x
def convolution_bn(input, filter_size, num_filters, strides=(1,1), init=he_normal(), activation=relu): if activation is None: activation = lambda x: x r = Convolution(filter_size, num_filters, strides=strides, init=init, activation=None, pad=True, bias=False)(input) r = BatchNormalization(map_rank=1)(r) r = activation(r) return r
def create_advanced_model(input, out_dims): with default_options(activation=relu): model = Sequential([ For(range(2), lambda i: [ # lambda with one parameter Convolution((3,3), [32,64][i], pad=True), # depth depends on i Convolution((5,5), [32,64][i], pad=True), Convolution((9,9), [32,64][i], pad=True), MaxPooling((3,3), strides=(2,2)) ]), For(range(2), lambda : [ # lambda without parameter Dense(512), Dropout(0.5) ]), Dense(out_dims, activation=None) ]) output_layer=model(input) return output_layer
def conv_bn(input, filter_size, num_filters, strides=(1, 1), init=he_normal()): c = Convolution(filter_size, num_filters, activation=None, init=init, pad=True, strides=strides, bias=False)(input) r = BatchNormalization(map_rank=1, normalization_time_constant=4096, use_cntk_engine=False)(c) return r
def test_sequential_convolution_without_reduction_dim(): c = Convolution(3, init=np.array([4., 2., 1.], dtype=np.float32), sequential=True, pad=False, reduction_rank=0, bias=False) c.update_signature(Sequence[Tensor[()]]) # input is a sequence of scalars data = [np.array([2., 6., 4., 8., 6.])] # like a short audio sequence, in the dynamic dimension out = c(data) exp = [[24., 40., 38.]] np.testing.assert_array_equal(out, exp, err_msg='Error in sequential convolution without reduction dimension') c = Convolution(3, init=np.array([4., 2., 1.], dtype=np.float32), sequential=True, pad=False, reduction_rank=0, bias=False) c.update_signature(Sequence[Tensor[1]]) # input is a sequence of dim-1 vectors data = [np.array([[2.], [6], [4.], [8.], [6.]])] out = c(data) exp = [[[24.], [40.], [38]]] # not reducing; hence, output is also a sequence of dim-1 vectors np.testing.assert_array_equal(out, exp, err_msg='Error in sequential convolution without reduction dimension') # these cases failed before emb_dim = 10 x = C.input_variable(**Sequence[Tensor[20]]) m = Embedding(emb_dim)(x) m = Convolution(filter_shape=3, sequential=True)(m) # this one still fails # Reshape: Operand (sub-)dimensions '[3]' incompatible with desired replacement (sub-)dimensions '[]'. Number of elements must be the same.. m = Embedding(emb_dim)(x) m = reshape(m, (emb_dim,1)) m = Convolution(filter_shape=(3,1), num_filters=13, pad=True, sequential=True)(m) m = Embedding(emb_dim)(x) m = Convolution(filter_shape=3, pad=True, sequential=True)(m)
def conv_dw(input, fillter_size, num_filters, strides=(1, 1), init=he_normal()): r = Convolution(fillter_size, num_filters, activation=None, init=init, pad=True, strides=strides, bias=False, groups=1)(input) print('r.shape ', r.shape) return relu(r)
def create_model(feature_dimensions, classes): with default_options(activation=relu, init=glorot_uniform()): model = Sequential([ For( range(3), lambda i: [ Convolution((5, 5), [32, 32, 64][i], pad=True), BatchNormalization(map_rank=1), MaxPooling((3, 3), strides=(2, 2)) ]), Dense(64), BatchNormalization(map_rank=1), Dense(len(classes), activation=None) ]) return model(feature_dimensions)
def test_convolution_consistency_in_different_evals(): inC, inH, inW = 1,4,4 y = input((inC,inH, inW)) cMap = 1 dat = np.arange(0,16, dtype=np.float32).reshape(1,1,4,4) conv = Convolution((2,2), cMap, pad=False, activation=None, name='foo' )(y) first_eval_result = conv(dat) np.testing.assert_array_almost_equal(conv(dat), first_eval_result, decimal=5, err_msg="Error in convolution consistency, different results for two runs")
def clone_cntk_layer(self, feature): """Returns a clone of the CNTK layer for per-layer forward prop validation""" nodes = utilities.get_model_layers(self.layer.block_root) activation = utilities.get_cntk_activation_op(nodes) weightsShape = self.weights_parameter.shape pad = self.attributes['autoPadding'][0] or ( self.attributes['autoPadding'][1] and self.attributes['autoPadding'][2]) bias = (self.bias_parameter is not None) layer = Convolution((weightsShape[2], weightsShape[3]), weightsShape[0], pad=pad, activation=activation, bias=bias)(feature) layer.parameters[0].value = self.weights_parameter.value if bias: layer.parameters[1].value = self.bias_parameter.value return layer
def conv_bn_relu_layer(input, num_filters, filter_size, strides=(1, 1), pad=True, bnTimeConst=4096, init=he_normal()): conv = Convolution(filter_size, num_filters, activation=None, init=init, pad=pad, strides=strides, bias=False)(input) bn = BatchNormalization(map_rank=1, normalization_time_constant=bnTimeConst, use_cntk_engine=False)(conv) return relu(bn)
def test_layers_name(): from cntk import placeholder I = placeholder(name='input') p = Dense(10, name='dense10')(I) assert(p.name == 'dense10') assert(I.name == 'input') assert(p.root_function.name == 'dense10') q = Convolution((3, 3), 3, name='conv33')(I) assert(q.name == 'conv33') assert(q.root_function.name == 'conv33') e = Embedding(0, name='emb')(I) assert(e.name == 'emb') assert(e.root_function.name == 'emb') e = Embedding(0, name='')(I) assert(e.name == '') assert(e.root_function.name == '')
def test_depth_first_search_blocks(depth, prefix_count): from cntk.layers import Sequential, Convolution, MaxPooling, Dense from cntk.default_options import default_options with default_options(activation=relu): image_to_vec = Sequential([ Convolution((5, 5), 32, pad=True), MaxPooling((3, 3), strides=(2, 2)), Dense(10, activation=None) ]) in1 = input(shape=(3, 256, 256), name='image') img = image_to_vec(in1) found = depth_first_search(img, lambda x: True, depth=depth) found_str = [str(v) for v in found] assert len(found) == sum(prefix_count.values()) for prefix, count in prefix_count.items(): assert sum(f.startswith(prefix) for f in found_str) == count
def conv_bn(layer_input, filter_size, num_filters, strides, init=he_normal(), name=''): """ Returns a convolutional layer followed by a batch normalization layer """ r = Convolution(filter_size, num_filters, activation=None, init=init, pad=True, strides=strides, bias=True, name=name)(layer_input) r = BatchNormalization(map_rank=1, normalization_time_constant=4096, name='{}_bn'.format(name))(r) return r
def create_model_ext(input, ext_values, out_dims): # in VGG style #https://www.cs.toronto.edu/~frossard/post/vgg16/ convolutional_layer_1_1 = Convolution((3, 3), 16, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(input) convolutional_layer_1_2 = Convolution( (5, 5), 32, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(convolutional_layer_1_1) pooling_layer_1 = MaxPooling((2, 2), strides=(2, 2))(convolutional_layer_1_2) convolutional_layer_2_1 = Convolution((3, 3), 32, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(pooling_layer_1) convolutional_layer_2_2 = Convolution( (7, 7), 64, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(convolutional_layer_2_1) pooling_layer_2 = MaxPooling((2, 2), strides=(1, 1))(convolutional_layer_2_2) convolutional_layer_3_1 = Convolution((3, 3), 64, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(pooling_layer_2) convolutional_layer_3_2 = Convolution( (7, 7), 96, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(convolutional_layer_3_1) pooling_layer_3 = MaxPooling((2, 2), strides=(1, 1))(convolutional_layer_3_2) convolutional_layer_4_1 = Convolution((3, 3), 96, init=glorot_uniform(), activation=relu, pad=True, strides=(1, 1))(pooling_layer_3) pooling_layer_4 = MaxPooling((2, 2), strides=(1, 1))(convolutional_layer_4_1) ## fully_connected_layer_1 = Dense(512, init=glorot_uniform())(pooling_layer_4) dropout_layer_1 = Dropout(0.5)(fully_connected_layer_1) fully_connected_with_extra_values = splice(dropout_layer_1, ext_values, axis=0) fully_connected_layer_2 = Dense( 256, init=glorot_uniform())(fully_connected_with_extra_values) fully_connected_layer_3 = Dense( 128, init=glorot_uniform())(fully_connected_layer_2) dropout_layer_2 = Dropout(0.5)(fully_connected_layer_3) output_layer = Dense(out_dims, init=glorot_uniform(), activation=None)(dropout_layer_2) return output_layer
debughelpers.dump_function(r) r.update_signature(1) debughelpers.dump_function(r) data = [ # simple sequence array([[2], [6], [4], [8], [6]]) ] #out = r(data) # BUGBUG: fails with "ValueError: Variable(Plus5_output) with unknown shape detected when compiling the Function graph!" #print(out) # ---------------------------------------------- # sequential convolution without reduction dimension # ---------------------------------------------- from cntk.layers import Convolution c = Convolution(3, init=array([4, 2, 1]), sequential=True, pad=False, reduction_rank=0, bias=False) debughelpers.dump_function(c) c.update_signature(1) debughelpers.dump_function(c) data = [ # audio sequence array([[2], [6], [4], [8], [6]]) ] out = c(data) print(out) # [[[[ 24. 40. 38.]]]] # ---------------------------------------------- # 1D convolution without reduction dimension # ---------------------------------------------- from cntk.layers import Convolution
def test_failing_convolution(): with pytest.raises(ValueError): conv = Convolution((3,3), 1) conv.update_signature(5)
def test_sequential_convolution_without_reduction_dim(): c = Convolution(3, init=np.array([4., 2., 1.], dtype=np.float32), sequential=True, pad=False, reduction_rank=0, bias=False) c.update_signature(Sequence[Tensor[()]]) # input is a sequence of scalars data = [np.array([2., 6., 4., 8., 6.])] # like a short audio sequence, in the dynamic dimension out = c(data) exp = [[24., 40., 38.]] np.testing.assert_array_equal(out, exp, err_msg='Error in sequential convolution without reduction dimension') c = Convolution(3, init=np.array([4., 2., 1.], dtype=np.float32), sequential=True, pad=False, reduction_rank=0, bias=False) c.update_signature(Sequence[Tensor[1]]) # input is a sequence of dim-1 vectors data = [np.array([[2.], [6], [4.], [8.], [6.]])] out = c(data) exp = [[[24.], [40.], [38]]] # not reducing; hence, output is also a sequence of dim-1 vectors np.testing.assert_array_equal(out, exp, err_msg='Error in sequential convolution without reduction dimension') # these cases failed before emb_dim = 10 x = input(**Sequence[Tensor[20]]) m = Embedding(emb_dim)(x) m = Convolution(filter_shape=3, sequential=True)(m) # this one still fails # Reshape: Operand (sub-)dimensions '[3]' incompatible with desired replacement (sub-)dimensions '[]'. Number of elements must be the same.. m = Embedding(emb_dim)(x) m = reshape(m, (emb_dim,1)) m = Convolution(filter_shape=(3,1), num_filters=13, pad=True, sequential=True)(m) m = Embedding(emb_dim)(x) m = Convolution(filter_shape=3, pad=True, sequential=True)(m)
def create_alexnet(): # Input variables denoting the features and label data feature_var = input_variable((num_channels, image_height, image_width)) label_var = input_variable((num_classes)) # apply model to input # remove mean value input = minus(feature_var, constant(114), name='mean_removed_input') with default_options(activation=None, pad=True, bias=True): z = Sequential([ # we separate Convolution and ReLU to name the output for feature extraction (usually before ReLU) Convolution((11, 11), 96, init=normal(0.01), pad=False, strides=(4, 4), name='conv1'), Activation(activation=relu, name='relu1'), LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm1'), MaxPooling((3, 3), (2, 2), name='pool1'), Convolution((5, 5), 192, init=normal(0.01), init_bias=0.1, name='conv2'), Activation(activation=relu, name='relu2'), LocalResponseNormalization(1.0, 2, 0.0001, 0.75, name='norm2'), MaxPooling((3, 3), (2, 2), name='pool2'), Convolution((3, 3), 384, init=normal(0.01), name='conv3'), Activation(activation=relu, name='relu3'), Convolution((3, 3), 384, init=normal(0.01), init_bias=0.1, name='conv4'), Activation(activation=relu, name='relu4'), Convolution((3, 3), 256, init=normal(0.01), init_bias=0.1, name='conv5'), Activation(activation=relu, name='relu5'), MaxPooling((3, 3), (2, 2), name='pool5'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc6'), Activation(activation=relu, name='relu6'), Dropout(0.5, name='drop6'), Dense(4096, init=normal(0.005), init_bias=0.1, name='fc7'), Activation(activation=relu, name='relu7'), Dropout(0.5, name='drop7'), Dense(num_classes, init=normal(0.01), name='fc8') ])(input) # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) log_number_of_parameters(z) print() return { 'feature': feature_var, 'label': label_var, 'ce': ce, 'pe': pe, 'output': z }
def create_rpn(conv_out, scaled_gt_boxes, im_info, cfg, add_loss_functions=True): ''' Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" Outputs object detection proposals by applying estimated bounding-box transformations to a set of regular boxes (called "anchors"). Args: conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. im_info: A CNTK variable or constant containing (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 cfg: The configuration dictionary add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses Returns: rpn_rois - the proposed ROIs rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) ''' # RPN network # init = 'normal', initValueScale = 0.01, initBias = 0.1 num_channels = cfg["MODEL"].RPN_NUM_CHANNELS rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, init=normal(scale=0.01), init_bias=0.0)(conv_out) rpn_cls_score = Convolution( (1, 1), 18, activation=None, name="rpn_cls_score", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) rpn_bbox_pred = Convolution( (1, 1), 36, activation=None, name="rpn_bbox_pred", init=normal(scale=0.01), init_bias=0.0)(rpn_conv_3x3) # 4(coords) * 9(anchors) # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) num_predictions = int(rpn_cls_score.shape[0] / 2) rpn_cls_score_rshp = reshape( rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") p_rpn_cls_score_rshp = cntk.placeholder() rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") # proposal layer rpn_rois = create_proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg) rpn_losses = None if (add_loss_functions): # RPN targets # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... proposal_layer_params = "'feat_stride': {}\n'scales':\n - {}". \ format(cfg["MODEL"].FEATURE_STRIDE, "\n - ".join([str(v) for v in cfg["DATA"].PROPOSAL_LAYER_SCALES])) atl = user_function( AnchorTargetLayer( rpn_cls_score, scaled_gt_boxes, im_info, rpn_batch_size=cfg["TRAIN"].RPN_BATCHSIZE, rpn_fg_fraction=cfg["TRAIN"].RPN_FG_FRACTION, clobber_positives=cfg["TRAIN"].RPN_CLOBBER_POSITIVES, positive_overlap=cfg["TRAIN"].RPN_POSITIVE_OVERLAP, negative_overlap=cfg["TRAIN"].RPN_NEGATIVE_OVERLAP, param_str=proposal_layer_params)) rpn_labels = atl.outputs[0] rpn_bbox_targets = atl.outputs[1] rpn_bbox_inside_weights = atl.outputs[2] # classification loss p_rpn_labels = cntk.placeholder() p_rpn_cls_score_rshp = cntk.placeholder() keeps = cntk.greater_equal(p_rpn_labels, 0.0) fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") bg_labels = minus(1, fg_labels, name="bg_targets") rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) rpn_loss_cls = element_times(rpn_ce, keeps) # The terms that are accounted for in the cls loss are those that have a label >= 0 cls_num_terms = reduce_sum(keeps) cls_normalization_factor = 1.0 / cls_num_terms normalized_rpn_cls_loss = reduce_sum( rpn_loss_cls) * cls_normalization_factor reduced_rpn_loss_cls = cntk.as_block( normalized_rpn_cls_loss, [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'CE_with_ignore', 'norm_rpn_cls_loss') # regression loss p_rpn_bbox_pred = cntk.placeholder() p_rpn_bbox_targets = cntk.placeholder() p_rpn_bbox_inside_weights = cntk.placeholder() rpn_loss_bbox = SmoothL1Loss(cfg.SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) # The bbox loss is normalized by the rpn batch size bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE normalized_rpn_bbox_loss = reduce_sum( rpn_loss_bbox) * bbox_normalization_factor reduced_rpn_loss_bbox = cntk.as_block( normalized_rpn_bbox_loss, [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 'SmoothL1Loss', 'norm_rpn_bbox_loss') rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") return rpn_rois, rpn_losses