def resblock_basic(inp, num_filters): c1 = C.layers.Convolution( (3, 3), num_filters, init=C.he_normal(), pad=True, bias=False)(inp) c1 = C.layers.BatchNormalization(map_rank=1)(c1) c1 = C.param_relu(C.Parameter(c1.shape, init=C.he_normal()), c1) c2 = C.layers.Convolution( (3, 3), num_filters, init=C.he_normal(), pad=True, bias=False)(c1) c2 = C.layers.BatchNormalization(map_rank=1)(c2) return inp + c2
def create_model(input, net_type="gru", encoder_type=1, model_file=None, e3cloning=False): if encoder_type == 1: h = audio_encoder(input) if net_type.lower() is not "cnn": h = flatten(h) elif encoder_type == 2: h = audio_encoder_2(input) # pooling h = C.layers.GlobalAveragePooling(name="avgpool")(h) h = C.squeeze(h) elif encoder_type == 3: h = audio_encoder_3(input, model_file, e3cloning) if net_type.lower() is not "cnn": h = flatten(h) else: raise ValueError( "encoder type {:d} not supported".format(encoder_type)) if net_type.lower() == "cnn": h = C.layers.Dense(1024, init=C.he_normal(), activation=C.tanh)(h) elif net_type.lower() == "gru": h = C.layers.Recurrence(step_function=C.layers.GRU(256), go_backwards=False, name="rnn")(h) elif net_type.lower() == "lstm": h = C.layers.Recurrence(step_function=C.layers.LSTM(256), go_backwards=False, name="rnn")(h) elif net_type.lower() == "bigru": # bi-directional GRU h = bi_recurrence(h, C.layers.GRU(128), C.layers.GRU(128), name="bigru") elif net_type.lower() == "bilstm": # bi-directional LSTM h = bi_recurrence(h, C.layers.LSTM(128), C.layers.LSTM(128), name="bilstm") h = C.layers.Dropout(0.2)(h) # output y = C.layers.Dense(label_dim, activation=C.sigmoid, init=C.he_normal(), name="output")(h) return y
def conv_bn_relu(input, filter_size, num_filters, strides=(1, 1), init=C.he_normal()): r = conv_bn(input, filter_size, num_filters, strides, init, 1) return C.relu(r)
def create_network(feature_dim = 40, num_classes=256, feature_mean_file=None, feature_inv_stddev_file=None, feature_norm_files = None, label_prior_file = None, context=(0,0), model_type=None): def MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file): m = C.reshape(load_ascii_vector(feature_mean_file,'feature_mean'), shape=(1, feature_dim)) s = C.reshape(load_ascii_vector(feature_inv_stddev_file,'feature_invstddev'), shape=(1,feature_dim)) def _func(operand): return C.reshape(C.element_times(C.reshape(operand,shape=(1+context[0]+context[1], feature_dim)) - m, s), shape=operand.shape) return _func def MyDNNLayer(hidden_size=128, num_layers=2): return C.layers.Sequential([ C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size, activation=C.sigmoid)) ]) def MyBLSTMLayer(hidden_size=128, num_layers=2): W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters') def _func(operand): return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' ) return _func # Input variables denoting the features and label data feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1])) label_var = C.sequence.input_variable(num_classes) feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var) label_prior = load_ascii_vector(label_prior_file, 'label_prior') log_prior = C.log(label_prior) if (model_type=="DNN"): net = MyDNNLayer(512,4)(feature_norm) elif (model_type=="BLSTM"): net = MyBLSTMLayer(512,2)(feature_norm) else: raise RuntimeError("model_type must be DNN or BLSTM") out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net) # loss and metric ce = C.cross_entropy_with_softmax(out, label_var) pe = C.classification_error(out, label_var) ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood') # talk to the user C.logging.log_number_of_parameters(out) print() return { 'feature': feature_var, 'label': label_var, 'output': out, 'ScaledLogLikelihood': ScaledLogLikelihood, 'ce': ce, 'pe': pe, 'final_hidden': net # adding last hidden layer output for future use in CTC tutorial }
def SRResNet(h0): print('Generator inp shape: ', h0.shape) with C.layers.default_options(init=C.he_normal(), bias=False): h1 = C.layers.Convolution((9, 9), 64, pad=True)(h0) h1 = C.param_relu(C.Parameter(h1.shape, init=C.he_normal()), h1) h2 = resblock_basic_stack(h1, 16, 64) h3 = C.layers.Convolution((3, 3), 64, activation=None, pad=True)(h2) h3 = C.layers.BatchNormalization(map_rank=1)(h3) h4 = h1 + h3 # here h5 = C.layers.ConvolutionTranspose2D( (3, 3), 64, pad=True, strides=(2, 2), output_shape=(224, 224))(h4) h5 = C.param_relu(C.Parameter(h5.shape, init=C.he_normal()), h5) h6 = C.layers.Convolution((3, 3), 3, pad=True)(h5) return h6
def conv_bn_lrelu(input, filter_shape, num_filters, strides=(1, 1), init=C.he_normal(), name=""): return conv_bn(input, filter_shape, num_filters, strides, init, activation=C.leaky_relu, name=name)
def create_resnet_model(input, out_dims): conv = convolution_bn(input, (3, 3), 16) r1_1 = resnet_basic_stack(conv, 16, 3) r2_1 = resnet_basic_inc(r1_1, 32) r2_2 = resnet_basic_stack(r2_1, 32, 2) r3_1 = resnet_basic_inc(r2_2, 64) r3_2 = resnet_basic_stack(r3_1, 64, 2) pool = C.layers.AveragePooling(filter_shape=(8, 8), strides=(1, 1))(r3_2) net = C.layers.Dense(out_dims, init=C.he_normal(), activation=None)(pool) return net
def MyBLSTMLayer(hidden_size=128, num_layers=2): W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters') def _func(operand): return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm') return _func
def convolution_bn(input, filter_size, num_filters, strides=(1,1), init=C.he_normal(), activation=C.relu): if activation is None: activation = lambda x: x r = C.layers.Convolution(filter_size, num_filters, strides=strides, init=init, activation=None, pad=True, bias=False)(input) r = C.layers.BatchNormalization(map_rank=1)(r) r = activation(r) return r
def conv_bn(input, filter_shape, num_filters, strides=(1, 1), init=C.he_normal(), activation=None, name=""): x = conv(input, filter_shape, num_filters, strides, init, name=name + "_conv" if name else "") x = bn(x, activation, name=name) return x
def conv(input, filter_shape, num_filters, strides=(1, 1), init=C.he_normal(), activation=None, pad=True, name=""): return C.layers.Convolution(filter_shape, num_filters, strides=strides, pad=pad, activation=activation, init=init, bias=False, name=name)(input)
def MyDNNLayer(hidden_size=128, num_layers=2): return C.layers.Sequential([ C.layers.For(range(num_layers), lambda: C.layers.Dense(hidden_size)>> C.layers.BatchNormalization()>>C.sigmoid>>C.layers.Dropout(.3)) ]) def MyBLSTMLayer(hidden_size=128, num_layers=2): W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters') #initialize weights of RNN #'C.Parameter'--> it creates a parameter tensor def _func(operand): #operand represents input data return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' ) return _func # Input variables denoting the features and label data #shape of input data feature_var = C.sequence.input_variable(feature_dim * (1+context[0]+context[1])) #It creates an input in the network: a place where data, such as features and labels, should be provided. label_var = C.sequence.input_variable(num_classes) ###1st layer feature_norm = MyMeanVarNorm(feature_mean_file, feature_inv_stddev_file)(feature_var) #feature_var is operand in _fun in MyMeanVarNorm function label_prior = load_ascii_vector(label_prior_file, 'label_prior') log_prior = C.log(label_prior) #Computes the element-wise the natural logarithm of label_prior if (model_type=="DNN"): net = MyDNNLayer(512,4)(feature_norm) ########### elif (model_type=="BLSTM"): net = MyBLSTMLayer(512,3)(feature_norm) else: raise RuntimeError("model_type must be DNN or BLSTM") #initial value of weights W #'C.he_normal'-->initializer for Parameter initialized to Gaussian distribution with mean 0 and standard deviation scale *.... out = C.layers.Dense(num_classes, init=C.he_normal(scale=1/3))(net) #####last layer in any network of both NN ##### loss and metric## ce = C.cross_entropy_with_softmax(out, label_var) #loss function ((objective function)) pe = C.classification_error(out, label_var) ###for evaluation ScaledLogLikelihood = C.minus(out, log_prior, name='ScaledLogLikelihood') # talk to the user C.logging.log_number_of_parameters(out) #print number of parameters in the whole model print() return { 'feature': feature_var, 'label': label_var, 'output': out, 'ScaledLogLikelihood': ScaledLogLikelihood, 'ce': ce, 'pe': pe, 'final_hidden': net # adding last hidden layer output for future use in CTC tutorial }
def conv_bn(input, filter_size, num_filters, strides=(1, 1), init=C.he_normal(), bn_init_scale=1): c = Convolution2D(filter_size, num_filters, activation=None, init=init, pad=True, strides=strides, bias=False)(input) r = BatchNormalization(map_rank=1, normalization_time_constant=4096, use_cntk_engine=False, init_scale=bn_init_scale, disable_regularization=True)(c) return r
def GaussianWindowAttention(nb_mixtures, activation=C.softplus, init=C.he_normal(), name=''): """ Implementation of the attention model found in "Generating sequences with recurrent neural networks" by Alex Graves. Gaussian window attention uses a directional mixture of gaussian kernels as convolution/attention window. For more details, the paper can be found in https://arxiv.org/abs/1308.0850 Note: There is a slight deviation from the original implementation where we use softplus as the activation function instead of exp. Exp activation causes some minor instability. Example: seq1 = C.Axis.new_unique_dynamic_axis('seq1') seq2 = C.Axis.new_unique_dynamic_axis('seq2') encoded = C.sequence.input_variable(30, sequence_axis=seq1) query = C.sequence.input_variable(28, sequence_axis=seq2) a = GaussianWindowAttention(10)(encoded, query) assert a.shape == (30, ) Arguments: nb_mixtures (int): number of gaussian mixtures to use for attention model Returns: :class:`~cntk.ops.functions.Function`: """ dense = Dense(shape=3 * nb_mixtures, activation=activation, init=init, name="GravesAttention") def window_weight(a, b, k, u): """ Calculate Phi is the window weight of character seq at position u of time t. Function tested to be correct on 2018-25-02 using numpy equivalent math: phi = summation of mixtures { a * exp ( -b * (k - u) ^ 2 ) } Args: a: importance of window within the mixture. Not normalised and doesn't sum to one. b: width of attention window k: location of window u: integer position of each item in sequence. Value from 1 to seq_length. (rank 2 tensor) [-3, 1] Returns: :class:`~cntk.ops.functions.Function` """ # print(f"k shape: {k.shape}, u shape: {u.shape}") phi = a * C.exp(-1 * b * C.square(k - u)) # print("internal phi shape:", phi.shape) phi = C.swapaxes(C.reduce_sum(phi, axis=0)) # Reduce sum the mixture axis # phi: [#, n] [*-c, 1] return phi @C.typemap def gaussian_windows_attention_coefficients(abk, nb_mixtures): """ Split into 3 equal tensor of dim nb_mixtures """ a = C.slice(abk, 0, 0, nb_mixtures) b = C.slice(abk, 0, nb_mixtures, 2 * nb_mixtures) k = C.slice(abk, 0, 2 * nb_mixtures, 0) k = Recurrence(C.plus)(k) a = C.expand_dims(a, axis=-1) b = C.expand_dims(b, axis=-1) k = C.expand_dims(k, axis=-1) return a, b, k @C.BlockFunction('GaussianWindowAttention', name) def attention(encoded, network): abk = dense(network) a, b, k = gaussian_windows_attention_coefficients(abk, nb_mixtures) # print("abk shape:", a.shape, b.shape, k.shape) # a, b, k: [#, n] [nb_mixture, 1] # context: [#, c] [char_ohe] encoded_unpacked = C.sequence.unpack(encoded, padding_value=0, no_mask_output=True) # context_unpacked: [#] [*=c, char_ohe] u = Cx.sequence.position(encoded) # position gives shape=(1, ) # u: [#, c], [1] u_values, u_valid = C.sequence.unpack(u, padding_value=999_999).outputs # u_values: [#] [*=c, 1] # u_valid: [#] [*=c] u_values_broadcast = C.swapaxes(C.sequence.broadcast_as(u_values, k)) # u_values_broadcast: [#, n] [1, *=c] u_valid_broadcast = C.sequence.broadcast_as(C.reshape(u_valid, (1,), 1), k) # u_valid_broadcast: [#, n] [*=c, 1] ~ shape verified correct at his point # print("u_values_broadcast shape:", u_values_broadcast.shape) # print("abk shape:", a.shape, b.shape, k.shape) phi = window_weight(a, b, k, u_values_broadcast) # phi: [#, n] [*=c, 1] zero = C.constant(0) phi = C.element_select(u_valid_broadcast, phi, zero, name="phi") # phi: [#, n] [*=c, 1] attended = C.reduce_sum(phi * C.sequence.broadcast_as(encoded_unpacked, phi), axis=0) # [#, n] [1, char_ohe] # print("attended_context shape:", attended_context.shape) output = C.squeeze(attended, name="GaussianWindowAttention") # [#, n] [char_ohe] return output return attention
def MyBLSTMLayer(hidden_size=128, num_layers=2): W = C.Parameter((C.InferredDimension, hidden_size), init=C.he_normal(1.0), name='rnn_parameters') #initialize weights of RNN #'C.Parameter'--> it creates a parameter tensor def _func(operand): #operand represents input data return C.optimized_rnnstack(operand, weights=W, hidden_size=hidden_size, num_layers=num_layers, bidirectional=True, recurrent_op='lstm' ) return _func
def resnet_basic_stack(input, num_filters, num_stack): assert (num_stack > 0) r = input for _ in range(num_stack): r = resnet_basic(r, num_filters) return r def create_resnet_model(input, out_dims): conv = convolution_bn(input, (3,3), 16) r1_1 = resnet_basic_stack(conv, 16, 3) r2_1 = resnet_basic_inc(r1_1, 32) r2_2 = resnet_basic_stack(r2_1, 32, 2) r3_1 = resnet_basic_inc(r2_2, 64) r3_2 = resnet_basic_stack(r3_1, 64, 2) . # Global average pooling pool = C.layers.AveragePooling(filter_shape=(8,8), strides=(1,1))(r3_2) net = C.layers.Dense(out_dims, init=C.he_normal(), activation=None)(pool) return net pred_resnet = train_and_evaluate(reader_train, reader_test, max_epochs=5, model_func=create_resnet_model)