def fully_connected_layer(input, output_dim, device_id, nonlinearity): input_dim = input.shape()[0] times_param = parameter(shape=(input_dim,output_dim)) t = times(input,times_param) plus_param = parameter(shape=(output_dim,)) p = plus(plus_param,t.output()) return nonlinearity(p.output());
def linear(output_shape, input_shape, scale_init, bias_init, name): ''' Implement linear ops, also known as full connection in Caffe Args: output_shape (tuple): the output channel size input_shape (tuple): the input channel size scale_init (`np.array`): the tensor saving initialize values of scale bias_init (`np.array`): the tensor saving initialize values of bias name (str): the name of ops Return: :func:`~cntk.ops.as_block`: the function contains linear ops ''' sc = ops.parameter(shape=input_shape + output_shape, init=scale_init, name='.'.join((name, 'sc'))) b = ops.parameter(shape=output_shape, init=bias_init, name='.'.join((name, 'b'))) @BlockFunction('linear', name) def _linear(x): apply_x = ops.times(x, sc) apply_x += b return apply_x return _linear
def frcn_predictor(features, rois, n_classes): # Load the pretrained classification net and find nodes loaded_model = load_model(model_file) feature_node = find_by_name(loaded_model, feature_node_name) conv_node = find_by_name(loaded_model, last_conv_node_name) pool_node = find_by_name(loaded_model, pool_node_name) last_node = find_by_name(loaded_model, last_hidden_node_name) # Clone the conv layers and the fully connected layers of the network conv_layers = combine([conv_node.owner ]).clone(CloneMethod.freeze, {feature_node: Placeholder()}) fc_layers = combine([last_node.owner]).clone(CloneMethod.clone, {pool_node: Placeholder()}) # Create the Fast R-CNN model feat_norm = features - Constant(114) conv_out = conv_layers(feat_norm) roi_out = roipooling(conv_out, rois, (roi_dim, roi_dim)) fc_out = fc_layers(roi_out) # z = Dense(rois[0], num_classes, map_rank=1)(fc_out) # --> map_rank=1 is not yet supported W = parameter(shape=(4096, n_classes), init=glorot_uniform()) b = parameter(shape=n_classes, init=0) z = times(fc_out, W) + b return z
def resnet_classifer(input, num_classes): conv_w_scale = 7.07 conv_b_value = 0 fc1_w_scale = 0.4 fc1_b_value = 0 sc_value = 1 bn_time_const = 4096 kernel_width = 3 kernel_height = 3 conv1_w_scale = 0.26 c_map1 = 16 conv1 = conv_bn_relu_layer(input, c_map1, kernel_width, kernel_height, 1, 1, conv1_w_scale, conv_b_value, sc_value, bn_time_const) rn1_1 = resnet_node2(conv1, c_map1, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const) rn1_2 = resnet_node2(rn1_1, c_map1, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const) rn1_3 = resnet_node2(rn1_2, c_map1, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const) c_map2 = 32 rn2_1_wProj = get_projection_map(c_map2, c_map1) rn2_1 = resnet_node2_inc(rn1_3, c_map2, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, rn2_1_wProj) rn2_2 = resnet_node2(rn2_1, c_map2, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const) rn2_3 = resnet_node2(rn2_2, c_map2, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const) c_map3 = 64 rn3_1_wProj = get_projection_map(c_map3, c_map2) rn3_1 = resnet_node2_inc(rn2_3, c_map3, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, rn3_1_wProj) rn3_2 = resnet_node2(rn3_1, c_map3, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const) rn3_3 = resnet_node2(rn3_2, c_map3, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const) # Global average pooling poolw = 8 poolh = 8 poolh_stride = 1 poolv_stride = 1 pool = pooling(rn3_3, AVG_POOLING, (1, poolh, poolw), (1, poolv_stride, poolh_stride)) out_times_params = parameter(shape=(c_map3, 1, 1, num_classes), init=glorot_uniform()) out_bias_params = parameter(shape=(num_classes), init=0) t = times(pool, out_times_params) return t + out_bias_params
def fully_connected_classifier_net(input, num_output_classes, hidden_layer_dim, num_hidden_layers, device, nonlinearity): classifier_root = fully_connected_layer(input, hidden_layer_dim, device, nonlinearity) for i in range(1, num_hidden_layers): classifier_root = fully_connected_layer(classifier_root.output(), hidden_layer_dim, device, nonlinearity) output_times_param = parameter(shape=(hidden_layer_dim,num_output_classes)) output_plus_param = parameter(shape=(num_output_classes,)) t = times(classifier_root.output(),output_times_param) classifier_root = plus(output_plus_param,t.output()) return classifier_root;
def batch_norm(cntk_layer, inputs): ''' Setup batch normalization op with given parameters Args: cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`): the layer definition of batch normalization op inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or :class:`~cntk.input` Return: :func:`~cntk.ops.functions.Function`: instaced cntk batch normalization op ''' sanitize_input = internal.sanitize_input(inputs[0]) parameter_tensor = (sanitize_input.shape[0], ) scale_init = 1 bias_init = 0 mean_init = 1 var_init = 0 if cntk_layer.parameter_tensor: if len(cntk_layer.parameter_tensor) < 3: raise AssertionError('At least three tensors (saved_mean, saved_variance and scale) are needed') mean_tensor = cntk_layer.parameter_tensor[0] variance_tensor = cntk_layer.parameter_tensor[1] global_scale = cntk_layer.parameter_tensor[2].data[0] moving_average_factor = 1 / global_scale if global_scale != 0 else 0 mean_init = np.asarray(mean_tensor.data, dtype=np.float32) * moving_average_factor var_init = np.asarray(variance_tensor.data, dtype=np.float32) * moving_average_factor if len(cntk_layer.parameter_tensor) == 5: scale_tensor = cntk_layer.parameter_tensor[3] bias_tensor = cntk_layer.parameter_tensor[4] scale_init = np.asarray(scale_tensor.data, dtype=np.float32) bias_init = np.asarray(bias_tensor.data, dtype=np.float32) scale_parameters = ops.parameter(parameter_tensor, init=scale_init, name='.'.join((cntk_layer.op_name, 'scale'))) bias_parameters = ops.parameter(parameter_tensor, init=bias_init, name='.'.join((cntk_layer.op_name, 'bias'))) mean_parameters = ops.parameter(parameter_tensor, init=mean_init, name='.'.join((cntk_layer.op_name, 'mean'))) var_parameters = ops.parameter(parameter_tensor, init=var_init, name='.'.join((cntk_layer.op_name, 'var'))) epsilon = cntk_layer.parameters.epsilon return ops.batch_normalization(sanitize_input, scale_parameters, bias_parameters, mean_parameters, var_parameters, True, use_cudnn_engine=False, epsilon=epsilon, running_count=ops.constant(0), name=cntk_layer.op_name)
def _weights_parameter(output_channels, init, group_name): dilation_kernel = [(k - 1) * d + 1 for k, d in zip(kernel, dilation)] # expand kernel to simulate dilation used_init = init.copy() if dilation_kernel != kernel: for axis in range(len(dilation)): kernel_sequence = [x * dilation[axis] for x in range(kernel[axis])] insert_lines = list(set([x for x in range(dilation_kernel[axis])]) ^ set(kernel_sequence)) for index in range(len(insert_lines)): insert_lines[index] -= index used_init = np.insert(used_init, insert_lines, 0, axis=len(init.shape) - axis - 1) return ops.parameter(shape=(output_channels, cntk.InferredDimension) + ops.sanitize_shape(dilation_kernel), init=used_init, name=group_name)
def frcn_predictor(features, rois, n_classes, base_path): # model specific variables for AlexNet model_file = base_path + "/../../../resources/cntk/AlexNet.model" roi_dim = 6 feature_node_name = "features" last_conv_node_name = "conv5.y" pool_node_name = "pool3" last_hidden_node_name = "h2_d" # Load the pretrained classification net and find nodes print("Loading pre-trained model...") loaded_model = load_model(model_file) print("Loading pre-trained model... DONE.") feature_node = find_by_name(loaded_model, feature_node_name) conv_node = find_by_name(loaded_model, last_conv_node_name) pool_node = find_by_name(loaded_model, pool_node_name) last_node = find_by_name(loaded_model, last_hidden_node_name) # Clone the conv layers and the fully connected layers of the network conv_layers = combine([conv_node.owner ]).clone(CloneMethod.freeze, {feature_node: placeholder()}) fc_layers = combine([last_node.owner]).clone(CloneMethod.clone, {pool_node: placeholder()}) # Create the Fast R-CNN model feat_norm = features - constant(114) conv_out = conv_layers(feat_norm) roi_out = roipooling(conv_out, rois, (roi_dim, roi_dim)) fc_out = fc_layers(roi_out) #fc_out.set_name("fc_out") # z = Dense(rois[0], num_classes, map_rank=1)(fc_out) # --> map_rank=1 is not yet supported W = parameter(shape=(4096, n_classes), init=glorot_uniform()) b = parameter(shape=n_classes, init=0) z = times(fc_out, W) + b return z, fc_out
def frcn_predictor(features, rois, n_classes): # Load the pretrained classification net and find nodes loaded_model = load_model(model_file) feature_node = find_by_name(loaded_model, feature_node_name) conv_node = find_by_name(loaded_model, last_conv_node_name) pool_node = find_by_name(loaded_model, pool_node_name) last_node = find_by_name(loaded_model, last_hidden_node_name) # Clone the conv layers and the fully connected layers of the network conv_layers = combine([conv_node.owner]).clone(CloneMethod.freeze, {feature_node: Placeholder()}) fc_layers = combine([last_node.owner]).clone(CloneMethod.clone, {pool_node: Placeholder()}) # Create the Fast R-CNN model feat_norm = features - Constant(114) conv_out = conv_layers(feat_norm) roi_out = roipooling(conv_out, rois, (roi_dim, roi_dim)) fc_out = fc_layers(roi_out) # z = Dense(rois[0], num_classes, map_rank=1)(fc_out) # --> map_rank=1 is not yet supported W = parameter(shape=(4096, n_classes), init=glorot_uniform()) b = parameter(shape=n_classes, init=0) z = times(fc_out, W) + b return z
def _weights_parameter(output_channels, init, group_name): dilation_kernel = [(k - 1) * d + 1 for k, d in zip(kernel, dilation)] # expand kernel to simulate dilation used_init = init.copy() if dilation_kernel != kernel: for axis in range(len(dilation)): kernel_sequence = [ x * dilation[axis] for x in range(kernel[axis]) ] insert_lines = list( set([x for x in range(dilation_kernel[axis])]) ^ set(kernel_sequence)) for index in range(len(insert_lines)): insert_lines[index] -= index used_init = np.insert(used_init, insert_lines, 0, axis=len(init.shape) - axis - 1) return ops.parameter( shape=(output_channels, cntk.InferredDimension) + ops.sanitize_shape(dilation_kernel), init=used_init, name=group_name)
def create_model(): # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input') label_dynamic_axes = [batch_axis, label_seq_axis] raw_labels = input_variable(shape=(label_vocab_dim), dynamic_axes=label_dynamic_axes, name='raw_labels') # Instantiate the sequence to sequence translation model input_sequence = raw_input # Drop the sentence start token from the label, for decoder training label_sequence = sequence.slice( raw_labels, 1, 0, name='label_sequence') # <s> A B C </s> --> A B C </s> label_sentence_start = sequence.first(raw_labels) # <s> # Setup primer for decoder is_first_label = sequence.is_first(label_sequence) # 1 0 0 0 ... label_sentence_start_scattered = sequence.scatter(label_sentence_start, is_first_label) # Encoder stabilize = Stabilizer() encoder_output_h = stabilize(input_sequence) for i in range(0, num_layers): (encoder_output_h, encoder_output_c) = LSTM_layer(encoder_output_h.output, hidden_dim, future_value, future_value) # Prepare encoder output to be used in decoder thought_vector_h = sequence.first(encoder_output_h) thought_vector_c = sequence.first(encoder_output_c) thought_vector_broadcast_h = sequence.broadcast_as(thought_vector_h, label_sequence) thought_vector_broadcast_c = sequence.broadcast_as(thought_vector_c, label_sequence) # Decoder decoder_history_hook = alias( label_sequence, name='decoder_history_hook') # copy label_sequence decoder_input = element_select(is_first_label, label_sentence_start_scattered, past_value(decoder_history_hook)) decoder_output_h = stabilize(decoder_input) for i in range(0, num_layers): if (i > 0): recurrence_hook_h = past_value recurrence_hook_c = past_value else: recurrence_hook_h = lambda operand: element_select( is_first_label, thought_vector_broadcast_h, past_value(operand) ) recurrence_hook_c = lambda operand: element_select( is_first_label, thought_vector_broadcast_c, past_value(operand) ) (decoder_output_h, decoder_output_c) = LSTM_layer(decoder_output_h.output, hidden_dim, recurrence_hook_h, recurrence_hook_c) # Linear output layer W = parameter(shape=(decoder_output_h.shape[0], label_vocab_dim), init=glorot_uniform()) B = parameter(shape=(label_vocab_dim), init=0) z = plus(B, times(stabilize(decoder_output_h), W)) return z
def convolution(output, kernel, stride, pad, kernel_init, bias_init, group, dilation, name): ''' Implement convolution ops Args: output (int): the output channel size kernel (list): the kernel size of filter, with format [width, height] stride (list): the stride of convolution, with format [w_stride, h_stride] pad (bool): auto padding or not kernel_init (`np.array`): the tensor saving initialize values of filter bias_init (`np.array`): the tensor saving initialize values of bias group (int): the group size in the convolution dilation (list): the dilation of convolution, with format [w_dilation, h_dilation] name (str): the name of ops Return: :func:`~cntk.ops.as_block`: the function contains convolution ops ''' def _conv_ops(weights, data): return ops.convolution(weights, data, strides=(cntk.InferredDimension, ) + \ ops.sanitize_shape(stride), auto_padding=[False, pad, pad]) def _weights_parameter(output_channels, init, group_name): dilation_kernel = [(k - 1) * d + 1 for k, d in zip(kernel, dilation)] # expand kernel to simulate dilation used_init = init.copy() if dilation_kernel != kernel: for axis in range(len(dilation)): kernel_sequence = [x * dilation[axis] for x in range(kernel[axis])] insert_lines = list(set([x for x in range(dilation_kernel[axis])]) ^ set(kernel_sequence)) for index in range(len(insert_lines)): insert_lines[index] -= index used_init = np.insert(used_init, insert_lines, 0, axis=len(init.shape) - axis - 1) return ops.parameter(shape=(output_channels, cntk.InferredDimension) + ops.sanitize_shape(dilation_kernel), init=used_init, name=group_name) if group == 1: w = _weights_parameter(output, kernel_init, '.'.join((name, 'W'))) else: sub_output_channels = int(output / group) groups_kernel_init = np.split(kernel_init, group) groups_kernel = [_weights_parameter(sub_output_channels, groups_kernel_init[i], '.'.join((name, str(i), 'W'))) for i in range(0, group)] sub_input_channels = groups_kernel[0].shape[1] if bias_init is not None: b = ops.parameter(shape=(output, ), init=bias_init, name='.'.join((name, 'b'))) @BlockFunction('Convolution', name) def _convolution(x): if group == 1: apply_x = _conv_ops(w, x) else: groups_data = [ops.slice(x, axis=0, begin_index=i * sub_input_channels, end_index=(i + 1) * sub_input_channels) for i in range(0, group)] apply_sub = [_conv_ops(group_kernel, group_data) for group_kernel, group_data in zip(groups_kernel, groups_data)] apply_x = ops.splice(*apply_sub, axis=0) if bias_init is not None: apply_x += b return apply_x return _convolution
def resnet_classifer(input, num_classes, device, output_name): conv_w_scale = 7.07 conv_b_value = 0 fc1_w_scale = 0.4 fc1_b_value = 0 sc_value = 1 bn_time_const = 4096 kernel_width = 3 kernel_height = 3 conv1_w_scale = 0.26 c_map1 = 16 conv1 = conv_bn_relu_layer(input, c_map1, kernel_width, kernel_height, 1, 1, conv1_w_scale, conv_b_value, sc_value, bn_time_const, device) rn1_1 = resnet_node2(conv1.output(), c_map1, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, device) rn1_2 = resnet_node2(rn1_1.output(), c_map1, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, device) rn1_3 = resnet_node2(rn1_2.output(), c_map1, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, device) c_map2 = 32 rn2_1_wProj = get_projection_map(c_map2, c_map1, device) rn2_1 = resnet_node2_inc(rn1_3.output(), c_map2, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, rn2_1_wProj, device) rn2_2 = resnet_node2(rn2_1.output(), c_map2, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, device) rn2_3 = resnet_node2(rn2_2.output(), c_map2, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, device) c_map3 = 64 rn3_1_wProj = get_projection_map(c_map3, c_map2, device) rn3_1 = resnet_node2_inc(rn2_3.output(), c_map3, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, rn3_1_wProj, device) rn3_2 = resnet_node2(rn3_1.output(), c_map3, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, device) rn3_3 = resnet_node2(rn3_2.output(), c_map3, kernel_width, kernel_height, conv1_w_scale, conv_b_value, sc_value, bn_time_const, device) # Global average pooling poolw = 8 poolh = 8 poolh_stride = 1 poolv_stride = 1 pool = pooling(rn3_3.output(), AVG_POOLING, (1, poolh, poolw), (1, poolv_stride, poolh_stride)) out_times_params = parameter(shape=(c_map3, 1, 1, num_classes), device_id=device) out_bias_params = parameter(shape=(num_classes, ), device_id=device) t = times(pool.output(), out_times_params) return plus(t.output(), out_bias_params, output_name)
def Parameter(shape, init, name=''): if init is None: raise "Parameter: init cannot be None" p = parameter(shape, init=init, name=name) return _name_node( p, 'parameter') # these are factory methods for things with state
if (i > 0): recurrence_hook_h = past_value recurrence_hook_c = past_value else: recurrence_hook_h = lambda operand: element_select( is_first_label, thought_vector_broadcast_h, past_value(operand)) recurrence_hook_c = lambda operand: element_select( is_first_label, thought_vector_broadcast_c, past_value(operand)) (decoder_output_h, decoder_output_c) = LSTM_layer(decoder_output_h.output, hidden_dim, recurrence_hook_h, recurrence_hook_c) # 1. # Add the linear layer W = parameter(shape=(decoder_output_h.shape[0], label_vocab_dim), init=glorot_uniform()) B = parameter(shape=(label_vocab_dim), init=0) z = plus(B, times(decoder_output_h, W)) def create_model(): # Source and target inputs to the model batch_axis = Axis.default_batch_axis() input_seq_axis = Axis('inputAxis') label_seq_axis = Axis('labelAxis') input_dynamic_axes = [batch_axis, input_seq_axis] raw_input = input_variable(shape=(input_vocab_dim), dynamic_axes=input_dynamic_axes, name='raw_input')
def convolution(output, kernel, stride, pad, kernel_init, bias_init, group, dilation, name): ''' Implement convolution ops Args: output (int): the output channel size kernel (list): the kernel size of filter, with format [width, height] stride (list): the stride of convolution, with format [w_stride, h_stride] pad (bool): auto padding or not kernel_init (`np.array`): the tensor saving initialize values of filter bias_init (`np.array`): the tensor saving initialize values of bias group (int): the group size in the convolution dilation (list): the dilation of convolution, with format [w_dilation, h_dilation] name (str): the name of ops Return: :func:`~cntk.ops.as_block`: the function contains convolution ops ''' def _conv_ops(weights, data): return ops.convolution(weights, data, strides=(cntk.InferredDimension, ) + \ ops.sanitize_shape(stride), auto_padding=[False, pad, pad]) def _weights_parameter(output_channels, init, group_name): dilation_kernel = [(k - 1) * d + 1 for k, d in zip(kernel, dilation)] # expand kernel to simulate dilation used_init = init.copy() if dilation_kernel != kernel: for axis in range(len(dilation)): kernel_sequence = [ x * dilation[axis] for x in range(kernel[axis]) ] insert_lines = list( set([x for x in range(dilation_kernel[axis])]) ^ set(kernel_sequence)) for index in range(len(insert_lines)): insert_lines[index] -= index used_init = np.insert(used_init, insert_lines, 0, axis=len(init.shape) - axis - 1) return ops.parameter( shape=(output_channels, cntk.InferredDimension) + ops.sanitize_shape(dilation_kernel), init=used_init, name=group_name) if group == 1: w = _weights_parameter(output, kernel_init, '.'.join((name, 'W'))) else: sub_output_channels = int(output / group) groups_kernel_init = np.split(kernel_init, group) groups_kernel = [ _weights_parameter(sub_output_channels, groups_kernel_init[i], '.'.join((name, str(i), 'W'))) for i in range(0, group) ] sub_input_channels = groups_kernel[0].shape[1] if bias_init is not None: b = ops.parameter(shape=(output, ), init=bias_init, name='.'.join((name, 'b'))) @BlockFunction('Convolution', name) def _convolution(x): if group == 1: apply_x = _conv_ops(w, x) else: groups_data = [ ops.slice(x, axis=0, begin_index=i * sub_input_channels, end_index=(i + 1) * sub_input_channels) for i in range(0, group) ] apply_sub = [ _conv_ops(group_kernel, group_data) for group_kernel, group_data in zip( groups_kernel, groups_data) ] apply_x = ops.splice(*apply_sub, axis=0) if bias_init is not None: apply_x += b return apply_x return _convolution
def batch_norm(cntk_layer, inputs): ''' Setup batch normalization op with given parameters Args: cntk_layer (:class:`~cntk.contrib.crosstalkcaffe.unimodel.cntkmodel.CntkLayersDefinition`): the layer definition of batch normalization op inputs (list): a list contains all :class:`~cntk.ops.functions.Function` or :class:`~cntk.input` Return: :func:`~cntk.ops.functions.Function`: instaced cntk batch normalization op ''' sanitize_input = internal.sanitize_input(inputs[0]) parameter_tensor = (sanitize_input.shape[0], ) scale_init = 1 bias_init = 0 mean_init = 1 var_init = 0 if cntk_layer.parameter_tensor: if len(cntk_layer.parameter_tensor) < 3: raise AssertionError( 'At least three tensors (saved_mean, saved_variance and scale) are needed' ) mean_tensor = cntk_layer.parameter_tensor[0] variance_tensor = cntk_layer.parameter_tensor[1] global_scale = cntk_layer.parameter_tensor[2].data[0] moving_average_factor = 1 / global_scale if global_scale != 0 else 0 mean_init = np.asarray(mean_tensor.data, dtype=np.float32) * moving_average_factor var_init = np.asarray(variance_tensor.data, dtype=np.float32) * moving_average_factor if len(cntk_layer.parameter_tensor) == 5: scale_tensor = cntk_layer.parameter_tensor[3] bias_tensor = cntk_layer.parameter_tensor[4] scale_init = np.asarray(scale_tensor.data, dtype=np.float32) bias_init = np.asarray(bias_tensor.data, dtype=np.float32) scale_parameters = ops.parameter(parameter_tensor, init=scale_init, name='.'.join( (cntk_layer.op_name, 'scale'))) bias_parameters = ops.parameter(parameter_tensor, init=bias_init, name='.'.join( (cntk_layer.op_name, 'bias'))) mean_parameters = ops.parameter(parameter_tensor, init=mean_init, name='.'.join( (cntk_layer.op_name, 'mean'))) var_parameters = ops.parameter(parameter_tensor, init=var_init, name='.'.join( (cntk_layer.op_name, 'var'))) epsilon = cntk_layer.parameters.epsilon return ops.batch_normalization(sanitize_input, scale_parameters, bias_parameters, mean_parameters, var_parameters, True, use_cudnn_engine=False, epsilon=epsilon, running_count=ops.constant(0), name=cntk_layer.op_name)