def test_vgg_deconv(): if K.image_data_format() == 'channels_first': x1 = K.variable(np.random.random((1, 512, 8, 8))) y1_shape = (1, 21, 18, 18) x2 = K.variable(np.random.random((1, 512, 27, 27))) y2_shape = (1, 21, 38, 38) x3 = K.variable(np.random.random((1, 256, 53, 53))) y3_shape = (1, 21, 312, 312) else: x1 = K.variable(np.random.random((1, 8, 8, 512))) y1_shape = (1, 18, 18, 21) x2 = K.variable(np.random.random((1, 27, 27, 512))) y2_shape = (1, 38, 38, 21) x3 = K.variable(np.random.random((1, 53, 53, 256))) y3_shape = (1, 312, 312, 21) upscore1 = vgg_deconv(classes=21)(x1, None) assert K.int_shape(upscore1) == y1_shape assert not np.any(np.isnan(K.eval(upscore1))) upscore2 = vgg_deconv(classes=21)(x2, upscore1) assert K.int_shape(upscore2) == y2_shape assert not np.any(np.isnan(K.eval(upscore2))) upscore3 = vgg_deconv(classes=21, kernel_size=(16, 16), strides=(8, 8))(x3, upscore2) assert K.int_shape(upscore3) == y3_shape assert not np.any(np.isnan(K.eval(upscore3)))
def shortcut(input, residual): """Adds a shortcut between input and residual block and merges them with "sum" """ # Expand channels of shortcut to match residual. # Stride appropriately to match residual (width, height) # Should be int if network architecture is correctly configured. ROW_AXIS = 1 COL_AXIS = 2 CHANNEL_AXIS = 3 input_shape = K.int_shape(input) residual_shape = K.int_shape(residual) stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS])) stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS])) equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS] shortcut = input # 1 X 1 conv if shape is different. Else identity. if stride_width > 1 or stride_height > 1 or not equal_channels: #kernel_regularizer = l2(1e-5) #kernel_regularizer = l2(1e-6) kernel_regularizer = None shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS], kernel_size=(2, 2), #kernel_size=(1, 1), strides=(stride_width, stride_height), padding="valid", kernel_initializer="he_normal", kernel_regularizer=kernel_regularizer)(input) return add([shortcut, residual])
def _shortcut(input, residual): """Adds a shortcut between input and residual block and merges them with "sum" """ # Expand channels of shortcut to match residual. # Stride appropriately to match residual (width, height) # Should be int if network architecture is correctly configured. input_shape = K.int_shape(input) residual_shape = K.int_shape(residual) stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS])) stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS])) equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS] shortcut = input # if shape is different. if stride_width > 1 or stride_height > 1 or not equal_channels: if SHORTCUT_OPTION == 'B': # 1x1 convolution to match dimension shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS], kernel_size=(1, 1), strides=(stride_width, stride_height), padding="valid", kernel_initializer="he_normal", kernel_regularizer=l2(0.0001))(input) elif SHORTCUT_OPTION == 'A': # spatial pooling with padded identity mapping x = AveragePooling2D(pool_size=(1, 1), strides=(stride_width, stride_height))(input) # multiply every element of x by 0 to get zero matrix mul_zero = Lambda(lambda val: val * 0.0, output_shape=K.int_shape(x)[1:])(x) shortcut = concatenate([x, mul_zero], axis=CHANNEL_AXIS) return add([shortcut, residual])
def upsampling_block(input_tensor, skip_tensor, filters, padding='same', batchnorm=True, dropout=0.0): x = Conv2DTranspose(filters, kernel_size=(2, 2), strides=(2, 2))(input_tensor) # compute amount of cropping needed for skip_tensor _, x_height, x_width, _ = K.int_shape(x) _, s_height, s_width, _ = K.int_shape(skip_tensor) h_crop = s_height - x_height w_crop = s_width - x_width assert h_crop >= 0 assert w_crop >= 0 if h_crop == 0 and w_crop == 0: y = skip_tensor else: cropping = ((h_crop // 2, h_crop - h_crop // 2), (w_crop // 2, w_crop - w_crop // 2)) y = Cropping2D(cropping=cropping)(skip_tensor) x = Concatenate()([x, y]) x = Conv2D(filters, kernel_size=(3,3), padding=padding)(x) x = BatchNormalization()(x) if batchnorm else x x = Activation('relu')(x) x = Dropout(dropout)(x) if dropout > 0 else x x = Conv2D(filters, kernel_size=(3, 3), padding=padding)(x) x = BatchNormalization()(x) if batchnorm else x x = Activation('relu')(x) x = Dropout(dropout)(x) if dropout > 0 else x return x
def call(self, x): assert isinstance(x, list) inp_a, inp_b = x outp_a = K.l2_normalize(inp_a, -1) outp_b = K.l2_normalize(inp_b, -1) alpha = K.batch_dot(outp_b, outp_a, axes=[2, 2]) alpha = K.l2_normalize(alpha, 1) alpha = K.one_hot(K.argmax(alpha, 1), K.int_shape(inp_a)[1]) hmax = K.batch_dot(alpha, outp_b, axes=[1, 1]) kcon = K.eye(K.int_shape(inp_a)[1], dtype='float32') m = [] for i in range(self.output_dim): outp_a = inp_a * self.W[i] outp_hmax = hmax * self.W[i] outp_a = K.l2_normalize(outp_a, -1) outp_hmax = K.l2_normalize(outp_hmax, -1) outp = K.batch_dot(outp_hmax, outp_a, axes=[2, 2]) outp = K.sum(outp * kcon, -1, keepdims=True) m.append(outp) if self.output_dim > 1: persp = K.concatenate(m, 2) else: persp = m return [persp, persp]
def duplet_model(self): duplet = self.duplet c_shape = K.int_shape(duplet.inputs[0]) r_shape = K.int_shape(duplet.inputs[1]) c = Input(batch_shape=c_shape) r = Input(batch_shape=r_shape) score = duplet([c, r]) score = Lambda(lambda x: 1. - x)(score) model = Model([c, r], score) return model
def myLoss(y_true, y_pred): p1 = K.mean(K.abs(y_pred - y_true), axis=-1) print("Shape: " + str(K.int_shape(y_pred))) #t2 = tf.slice(y_pred,2,-1) yy = y_true - y_pred t2 = yy[:,2:,:] t3 = yy[:,1:-1,:] #t3 = tf.slice(y_pred,1,-2) print("Shape2: " + str(K.int_shape(t2)[1])) print("Shape3: " + str(K.int_shape(t3)[1])) return p1 + K.sum(K.abs(t3-t2)) / K.int_shape(t3)[1]
def softmax_sparse_crossentropy_ignoring_last_label(y_true, y_pred): y_pred = K.reshape(y_pred, (-1, K.int_shape(y_pred)[-1])) log_softmax = tf.nn.log_softmax(y_pred) y_true = K.one_hot(tf.to_int32(K.flatten(y_true)), K.int_shape(y_pred)[-1]+1) unpacked = tf.unstack(y_true, axis=-1) y_true = tf.stack(unpacked[:-1], axis=-1) cross_entropy = -K.sum(y_true * log_softmax, axis=1) cross_entropy_mean = K.mean(cross_entropy) return cross_entropy_mean
def triplet_model(self): duplet = self.duplet c_shape = K.int_shape(duplet.inputs[0]) r_shape = K.int_shape(duplet.inputs[1]) c1 = Input(batch_shape=c_shape) r1 = Input(batch_shape=r_shape) c2 = Input(batch_shape=c_shape) r2 = Input(batch_shape=r_shape) score1 = duplet([c1, r1]) score2 = duplet([c2, r2]) score_diff = Subtract()([score2, score1]) model = Model([c1, r1, c2, r2], score_diff) return model
def __call__(self, inputs, initial_state=None, constants=None, **kwargs): inputs, initial_state, constants = self._standardize_args( inputs, initial_state, constants, self._num_constants) if initial_state is None and constants is None: return super(ExternalAttentionRNNWrapper, self).__call__(inputs, **kwargs) # If any of `initial_state` or `constants` are specified and are Keras # tensors, then add them to the inputs and temporarily modify the # input_spec to include them. additional_inputs = [] additional_specs = [] if initial_state is not None: kwargs['initial_state'] = initial_state additional_inputs += initial_state self.state_spec = [InputSpec(shape=K.int_shape(state)) for state in initial_state] additional_specs += self.state_spec if constants is not None: kwargs['constants'] = constants additional_inputs += constants self.constants_spec = [InputSpec(shape=K.int_shape(constant)) for constant in constants] self._num_constants = len(constants) additional_specs += self.constants_spec # at this point additional_inputs cannot be empty is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) for tensor in additional_inputs: if K.is_keras_tensor(tensor) != is_keras_tensor: raise ValueError('The initial state or constants of an ExternalAttentionRNNWrapper' ' layer cannot be specified with a mix of' ' Keras tensors and non-Keras tensors' ' (a "Keras tensor" is a tensor that was' ' returned by a Keras layer, or by `Input`)') if is_keras_tensor: # Compute the full input spec, including state and constants full_input = inputs + additional_inputs full_input_spec = self.input_spec + additional_specs # Perform the call with temporarily replaced input_spec original_input_spec = self.input_spec self.input_spec = full_input_spec output = super(ExternalAttentionRNNWrapper, self).__call__(full_input, **kwargs) self.input_spec = self.input_spec[:len(original_input_spec)] return output else: return super(ExternalAttentionRNNWrapper, self).__call__(inputs, **kwargs)
def get_num_filters(layer): """Determines the number of filters within the given `layer`. Args: layer: The keras layer to use. Returns: Total number of filters within `layer`. For `keras.layers.Dense` layer, this is the total number of outputs. """ # Handle layers with no channels. if K.ndim(layer.output) == 2: return K.int_shape(layer.output)[-1] channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 return K.int_shape(layer.output)[channel_idx]
def test_bilinear_upsampling_2d(): num_samples = 2 stack_size = 2 input_len_dim1 = 5 input_len_dim2 = 5 target_len_dim1 = 8 target_len_dim2 = 8 for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, stack_size, input_len_dim1, input_len_dim2) target = np.random.rand(num_samples, stack_size, target_len_dim1, target_len_dim2) expected_output_shape = (2, 2, 8, 8) else: inputs = np.random.rand(num_samples, input_len_dim1, input_len_dim2, stack_size) target = np.random.rand(num_samples, target_len_dim1, target_len_dim2, stack_size) expected_output_shape = (2, 8, 8, 2) # shape test layer = BilinearUpSampling2D(target_shape=target.shape, data_format=data_format) output = layer(K.variable(inputs)) assert K.int_shape(output) == expected_output_shape
def sampling(args): z_mean, z_log_var = args batch = K.shape(z_mean)[0] dim = K.int_shape(z_mean)[1] # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon
def sampling(args): """Reparameterization trick by sampling fr an isotropic unit Gaussian. # Arguments: args (tensor): mean and log of variance of Q(z|X) # Returns: z (tensor): sampled latent vector """ print("args") print(args) # print("z_mean") # print(z_mean) # print("z_log_var") # print(z_log_var) z_mean, z_log_var = args batch = K.shape(z_mean)[0] print("batch") print(z_mean.shape[0]) print(batch) dim = K.int_shape(z_mean)[1] print("dim") print(z_mean.shape[1]) print(dim) # by default, random_normal has mean=0 and std=1.0 epsilon = K.random_normal(shape=(batch, dim)) return z_mean + K.exp(0.5 * z_log_var) * epsilon
def VGGUpsampler(pyramid, scales, classes, weight_decay=0.): """A Functional upsampler for the VGG Nets. :param: pyramid: A list of features in pyramid, scaling from large receptive field to small receptive field. The bottom of the pyramid is the input image. :param: scales: A list of weights for each of the feature map in the pyramid, sorted in the same order as the pyramid. :param: classes: Integer, number of classes. """ if len(scales) != len(pyramid) - 1: raise ValueError('`scales` needs to match the length of' '`pyramid` - 1.') blocks = [] for i in range(len(pyramid) - 1): block_name = 'feat{}'.format(i + 1) block = vgg_upsampling(classes=classes, target_shape=K.int_shape(pyramid[i + 1]), scale=scales[i], weight_decay=weight_decay, block_name=block_name) blocks.append(block) return Decoder(pyramid=pyramid[:-1], blocks=blocks)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): if p.name in self.lr_mult: multiplied_lr = lr * self.lr_mult[p.name] else: multiplied_lr = lr v = self.momentum * m - multiplied_lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + self.momentum * v - multiplied_lr * g else: new_p = p + v # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def call(self, x, mask=None): assert self.built, 'Layer must be built before being called' input_shape = K.int_shape(x) reduction_axes = list(range(len(input_shape))) del reduction_axes[self.axis] broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] if sorted(reduction_axes) == range(K.ndim(x))[:-1]: x_normed = K.batch_normalization( x, self.running_mean, self.running_std, self.beta, self.gamma, epsilon=self.epsilon) else: # need broadcasting broadcast_running_mean = K.reshape(self.running_mean, broadcast_shape) broadcast_running_std = K.reshape(self.running_std, broadcast_shape) broadcast_beta = K.reshape(self.beta, broadcast_shape) broadcast_gamma = K.reshape(self.gamma, broadcast_shape) x_normed = K.batch_normalization( x, broadcast_running_mean, broadcast_running_std, broadcast_beta, broadcast_gamma, epsilon=self.epsilon) return x_normed
def call(self, inputs): input_shape = K.int_shape(inputs) if len(input_shape) != 4: raise ValueError('Inputs should have rank ' + str(4) + '; Received input shape:', str(input_shape)) if self.data_format == 'channels_first': batch_size, c, h, w = input_shape if batch_size is None: batch_size = -1 rh, rw = self.size oh, ow = h * rh, w * rw oc = c // (rh * rw) out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w)) out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) out = K.reshape(out, (batch_size, oc, oh, ow)) return out elif self.data_format == 'channels_last': batch_size, h, w, c = input_shape if batch_size is None: batch_size = -1 rh, rw = self.size oh, ow = h * rh, w * rw oc = c // (rh * rw) out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc)) out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) out = K.reshape(out, (batch_size, oh, ow, oc)) return out
def additive_self_attention(units, n_hidden=None, n_output_features=None, activation=None): """ Compute additive self attention for time series of vectors (with batch dimension) the formula: score(h_i, h_j) = <v, tanh(W_1 h_i + W_2 h_j)> v is a learnable vector of n_hidden dimensionality, W_1 and W_2 are learnable [n_hidden, n_input_features] matrices Args: units: tf tensor with dimensionality [batch_size, time_steps, n_input_features] n_hidden: number of2784131 units in hidden representation of similarity measure n_output_features: number of features in output dense layer activation: activation at the output Returns: output: self attended tensor with dimensionality [batch_size, time_steps, n_output_features] """ n_input_features = K.int_shape(units)[2] if n_hidden is None: n_hidden = n_input_features if n_output_features is None: n_output_features = n_input_features exp1 = Lambda(lambda x: expand_tile(x, axis=1))(units) exp2 = Lambda(lambda x: expand_tile(x, axis=2))(units) units_pairs = Concatenate(axis=3)([exp1, exp2]) query = Dense(n_hidden, activation="tanh")(units_pairs) attention = Dense(1, activation=lambda x: softmax(x, axis=2))(query) attended_units = Lambda(lambda x: K.sum(attention * x, axis=2))(exp1) output = Dense(n_output_features, activation=activation)(attended_units) return output
def multiplicative_self_attention(units, n_hidden=None, n_output_features=None, activation=None): """ Compute multiplicative self attention for time series of vectors (with batch dimension) the formula: score(h_i, h_j) = <W_1 h_i, W_2 h_j>, W_1 and W_2 are learnable matrices with dimensionality [n_hidden, n_input_features] Args: units: tf tensor with dimensionality [batch_size, time_steps, n_input_features] n_hidden: number of units in hidden representation of similarity measure n_output_features: number of features in output dense layer activation: activation at the output Returns: output: self attended tensor with dimensionality [batch_size, time_steps, n_output_features] """ n_input_features = K.int_shape(units)[2] if n_hidden is None: n_hidden = n_input_features if n_output_features is None: n_output_features = n_input_features exp1 = Lambda(lambda x: expand_tile(x, axis=1))(units) exp2 = Lambda(lambda x: expand_tile(x, axis=2))(units) queries = Dense(n_hidden)(exp1) keys = Dense(n_hidden)(exp2) scores = Lambda(lambda x: K.sum(queries * x, axis=3, keepdims=True))(keys) attention = Lambda(lambda x: softmax(x, axis=2))(scores) mult = Multiply()([attention, exp1]) attended_units = Lambda(lambda x: K.sum(x, axis=2))(mult) output = Dense(n_output_features, activation=activation)(attended_units) return output
def f(x, y): def scaling(xx, ss=1): return xx * ss scaled = Lambda(scaling, arguments={'ss': scale}, name='scale_{}'.format(block_name))(x) score = Conv2D(filters=classes, kernel_size=(1, 1), activation='linear', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), name='score_{}'.format(block_name))(scaled) if y is None: upscore = Conv2DTranspose(filters=classes, kernel_size=kernel_size, strides=strides, padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), use_bias=False, name='upscore_{}'.format(block_name))(score) else: crop = CroppingLike2D(target_shape=K.int_shape(y), offset=crop_offset, name='crop_{}'.format(block_name))(score) merge = add([y, crop]) upscore = Conv2DTranspose(filters=classes, kernel_size=kernel_size, strides=strides, padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), use_bias=False, name='upscore_{}'.format(block_name))(merge) return upscore
def test_multi_output_mask(): """Fixes #7589""" class ArbitraryMultiOutputLayer(Layer): def __init__(self, **kwargs): super(ArbitraryMultiOutputLayer, self).__init__(**kwargs) def call(self, inputs, **kwargs): return [K.abs(inputs), K.abs(inputs)] def compute_output_shape(self, input_shape): out_shape = super(ArbitraryMultiOutputLayer, self).compute_output_shape(input_shape) return [out_shape, out_shape] class ArbitraryMultiInputLayer(Layer): def __init__(self, **kwargs): super(ArbitraryMultiInputLayer, self).__init__(**kwargs) def call(self, inputs, **kwargs): negative, positive = inputs return negative + positive input_layer = Input(shape=(16, 16, 3)) x, y = ArbitraryMultiOutputLayer()(input_layer) z = ArbitraryMultiInputLayer()([x, y]) _ = Model(inputs=input_layer, outputs=z) assert K.int_shape(z)[1:] == (16, 16, 3)
def _inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 if block_idx is None: prefix = None else: prefix = '_'.join((block_type, str(block_idx))) name_fmt = partial(_generate_layer_name, prefix=prefix) if block_type == 'Block35': branch_0 = conv2d_bn(x, 32, 1, name=name_fmt('Conv2d_1x1', 0)) branch_1 = conv2d_bn(x, 32, 1, name=name_fmt('Conv2d_0a_1x1', 1)) branch_1 = conv2d_bn(branch_1, 32, 3, name=name_fmt('Conv2d_0b_3x3', 1)) branch_2 = conv2d_bn(x, 32, 1, name=name_fmt('Conv2d_0a_1x1', 2)) branch_2 = conv2d_bn(branch_2, 32, 3, name=name_fmt('Conv2d_0b_3x3', 2)) branch_2 = conv2d_bn(branch_2, 32, 3, name=name_fmt('Conv2d_0c_3x3', 2)) branches = [branch_0, branch_1, branch_2] elif block_type == 'Block17': branch_0 = conv2d_bn(x, 128, 1, name=name_fmt('Conv2d_1x1', 0)) branch_1 = conv2d_bn(x, 128, 1, name=name_fmt('Conv2d_0a_1x1', 1)) branch_1 = conv2d_bn(branch_1, 128, [1, 7], name=name_fmt('Conv2d_0b_1x7', 1)) branch_1 = conv2d_bn(branch_1, 128, [7, 1], name=name_fmt('Conv2d_0c_7x1', 1)) branches = [branch_0, branch_1] elif block_type == 'Block8': branch_0 = conv2d_bn(x, 192, 1, name=name_fmt('Conv2d_1x1', 0)) branch_1 = conv2d_bn(x, 192, 1, name=name_fmt('Conv2d_0a_1x1', 1)) branch_1 = conv2d_bn(branch_1, 192, [1, 3], name=name_fmt('Conv2d_0b_1x3', 1)) branch_1 = conv2d_bn(branch_1, 192, [3, 1], name=name_fmt('Conv2d_0c_3x1', 1)) branches = [branch_0, branch_1] else: raise ValueError('Unknown Inception-ResNet block type. ' 'Expects "Block35", "Block17" or "Block8", ' 'but got: ' + str(block_type)) mixed = Concatenate(axis=channel_axis, name=name_fmt('Concatenate'))(branches) up = conv2d_bn(mixed, K.int_shape(x)[channel_axis], 1, activation=None, use_bias=True, name=name_fmt('Conv2d_1x1')) up = Lambda(scaling, output_shape=K.int_shape(up)[1:], arguments={'scale': scale})(up) x = add([x, up]) if activation is not None: x = Activation(activation, name=name_fmt('Activation'))(x) return x
def test_reset_states_with_values(layer_class): num_states = 2 if layer_class is recurrent.LSTM else 1 layer = layer_class(units, stateful=True) layer.build((num_samples, timesteps, embedding_dim)) layer.reset_states() assert len(layer.states) == num_states assert layer.states[0] is not None np.testing.assert_allclose(K.eval(layer.states[0]), np.zeros(K.int_shape(layer.states[0])), atol=1e-4) state_shapes = [K.int_shape(state) for state in layer.states] values = [np.ones(shape) for shape in state_shapes] layer.reset_states(values) np.testing.assert_allclose(K.eval(layer.states[0]), np.ones(K.int_shape(layer.states[0])), atol=1e-4)
def test_vgg_conv(): if K.image_data_format() == 'channels_first': x = Input(shape=(3, 224, 224)) y1_shape = (None, 64, 112, 112) y2_shape = (None, 128, 56, 56) else: x = Input(shape=(224, 224, 3)) y1_shape = (None, 112, 112, 64) y2_shape = (None, 56, 56, 128) block1 = vgg_conv(filters=64, convs=2, block_name='block1') y = block1(x) assert K.int_shape(y) == y1_shape block2 = vgg_conv(filters=128, convs=2, block_name='block2') y = block2(y) assert K.int_shape(y) == y2_shape
def set_output_shape(self, model): """ Set the output shape for use in training and convert """ logger.debug("Setting output shape") out = [K.int_shape(tensor)[-3:] for tensor in model.outputs] if not out: raise ValueError("No outputs found! Check your model.") self.output_shape = tuple(out[0]) logger.debug("Added output shape: %s", self.output_shape)
def __call__(self, x): xshape = K.int_shape(x) if self.division_idx is None: self.division_idx = xshape[-1]/2 x = K.reshape(x, (-1, xshape[-1])) x /= K.sqrt(K.sum(K.square(x), axis=0, keepdims=True)) xx = K.sum(x[:,:self.division_idx] * x[:,self.division_idx:], axis=0) return self.gamma * K.sqrt(K.sum(K.square(xx)) + K.epsilon())
def expand_tile(units, axis): """ Expand and tile tensor along given axis Args: units: tf tensor with dimensions [batch_size, time_steps, n_input_features] axis: axis along which expand and tile. Must be 1 or 2 """ assert axis in (1, 2) n_time_steps = K.int_shape(units)[1] repetitions = [1, 1, 1, 1] repetitions[axis] = n_time_steps if axis == 1: expanded = Reshape(target_shape=( (1,) + K.int_shape(units)[1:] ))(units) else: expanded = Reshape(target_shape=(K.int_shape(units)[1:2] + (1,) + K.int_shape(units)[2:]))(units) return K.tile(expanded, repetitions)
def store_input_shapes(self, model): """ Store the input and output shapes to state """ logger.debug("Adding input shapes to state for model") inputs = {tensor.name: K.int_shape(tensor)[-3:] for tensor in model.inputs} if not any(inp for inp in inputs.keys() if inp.startswith("face")): raise ValueError("No input named 'face' was found. Check your input naming. " "Current input names: {}".format(inputs)) self.state.inputs = inputs logger.debug("Added input shapes: %s", self.state.inputs)
def __call__(self, x): xshape = K.int_shape(x) if self.division_idx is None: self.division_idx = xshape[-1]/2 x = K.reshape(x, (-1, xshape[-1])) x /= K.sqrt(K.sum(K.square(x), axis=0, keepdims=True)) # xx = K.dot(K.transpose(x), x) xx = K.sum(x[:,:self.division_idx] * x[:,self.division_idx:], axis=0) return self.gamma * K.sum(K.log(1.0 + K.exp(self.lam * (xx - 1.0))))
def fpn_orientation_graph(rois, feature_maps, mrcnn_probs, mrcnn_bbox, image_meta, pool_size, train_bn=True): """Builds the computation graph of the feature pyramid network orientation heads. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. mrcnn_probs: classifier probabilities. mrcnn_bbox: Deltas to apply to proposal boxes image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. train_bn: Boolean. Train or freeze Batch Norm layers Returns: logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax) probs: [batch, num_rois, NUM_CLASSES] classifier probabilities """ # ROI Pooling # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] x = model.PyramidROIAlign( [pool_size, pool_size], name="roi_align_orientation")([rois, image_meta] + feature_maps) x = KL.TimeDistributed(KL.Conv2D(256, (5, 5), padding="valid"), name="mrcnn_orientation_conv1")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn1')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="valid"), name="mrcnn_orientation_conv2")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn2')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="valid"), name="mrcnn_orientation_conv3")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn3')(x, training=train_bn) x = KL.Activation('relu')(x) # Two 1024 FC layers (implemented with Conv2D for consistency) # First layer x = KL.TimeDistributed(KL.Conv2D(1024, (6, 6), padding="valid"), name="mrcnn_orientation_conv4")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn4')(x, training=train_bn) x = KL.Activation('relu')(x) # Second layer x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)), name="mrcnn_orientation_conv5")(x) x = KL.TimeDistributed(model.BatchNorm(), name='mrcnn_orientation_bn5')(x, training=train_bn) x = KL.Activation('relu')(x) # Squeezed feature maps # [batch, num_rois, fc_layers_size] shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), name="pool_squeeze_orientation")(x) # Add class probabilities shared = KL.Concatenate(axis=2)([shared, mrcnn_probs]) # Add detected bounding box s = K.int_shape(mrcnn_bbox) mrcnn_bbox = KL.Reshape((s[1], s[2] * s[3]))(mrcnn_bbox) shared = KL.Concatenate(axis=2)([shared, mrcnn_bbox]) logits = [] probs = [] res = [] ''' for angle in range(0,3): for bin in range(0,2): bin_logits, bin_prob, bin_res = bin_block(shared, angle, bin, train_bn) logits.append(bin_logits) probs.append(bin_prob) res.append(bin_res) ''' for angle in range(0, 3): bin_logits, bin_prob, bin_res = angle_block(shared, angle, train_bn) logits.append(bin_logits) probs.append(bin_prob) res.append(bin_res) logits = KL.Concatenate(axis=2)(logits) probs = KL.Concatenate(axis=2)(probs) res = KL.Concatenate(axis=2)(res) #logits, probs, res = full_block(shared, train_bn) return logits, probs, res
def __init__( self, model, bounds, channel_axis=3, preprocessing=(0, 1), predicts='probabilities'): super(KerasModel, self).__init__(bounds=bounds, channel_axis=channel_axis, preprocessing=preprocessing) from keras import backend as K if predicts == 'probs': predicts = 'probabilities' assert predicts in ['probabilities', 'logits'] images_input = model.input label_input = K.placeholder(shape=(1,)) predictions = model.output if predicts == 'probabilities': predictions_are_logits = False elif predicts == 'logits': predictions_are_logits = True shape = K.int_shape(predictions) _, num_classes = shape assert num_classes is not None self._num_classes = num_classes loss = K.sparse_categorical_crossentropy( label_input, predictions, from_logits=predictions_are_logits) # sparse_categorical_crossentropy returns 1-dim tensor, # gradients wants 0-dim tensor (for some backends) loss = K.squeeze(loss, axis=0) grads = K.gradients(loss, images_input) if K.backend() == 'tensorflow': # tensorflow backend returns a list with the gradient # as the only element, even if loss is a single scalar # tensor; # theano always returns the gradient itself (and requires # that loss is a single scalar tensor) assert isinstance(grads, list) assert len(grads) == 1 grad = grads[0] elif K.backend() == 'cntk': # pragma: no cover assert isinstance(grads, list) assert len(grads) == 1 grad = grads[0] grad = K.reshape(grad, (1,) + grad.shape) else: assert not isinstance(grads, list) grad = grads self._loss_fn = K.function( [images_input, label_input], [loss]) self._batch_pred_fn = K.function( [images_input], [predictions]) self._pred_grad_fn = K.function( [images_input, label_input], [predictions, grad]) self._predictions_are_logits = predictions_are_logits
def sparse_crossentropy_ignoring_last_label(y_true, y_pred): nb_classes = K.int_shape(y_pred)[-1] y_true = K.one_hot(tf.to_int32(y_true[:, :, 0]), nb_classes + 1)[:, :, :-1] return K.categorical_crossentropy(y_true, y_pred)
def compute_output_shape(self, input_shape): return (None, ) + K.int_shape(self.result)[1:]
def TK_TCN_regression(n_classes, feat_dim, max_len, gap=1, dropout=0.0, W_regularizer=l1(1.e-4), activation="relu"): """TCN regression model. num_block = 2. initial_conv_num=64. The last layer is fullly-connected instead of softmax. Args: n_classes: number of classes for this kind of label. feat_dim: the dumention of the feature. max_len: the number of frames for each video. Returns: model: uncompiled model.""" ROW_AXIS = 1 CHANNEL_AXIS = 2 initial_conv_len = 8 initial_conv_num = 64 config = [ [(1, 8, 64)], [(1, 8, 64)], [(1, 8, 64)], [(2, 8, 128)], [(1, 8, 128)], [(1, 8, 128)], ] input = Input(shape=(max_len, feat_dim)) model = input model = Convolution1D(initial_conv_num, initial_conv_len, init="he_normal", border_mode="same", subsample_length=1, W_regularizer=W_regularizer)(model) for depth in range(0, len(config)): blocks = [] for stride, filter_dim, num in config[depth]: ## residual block bn = BatchNormalization(mode=0, axis=CHANNEL_AXIS)(model) relu = Activation(activation)(bn) dr = Dropout(dropout)(relu) conv = Convolution1D(num, filter_dim, init="he_normal", border_mode="same", subsample_length=stride, W_regularizer=W_regularizer)(dr) ## potential downsample conv_shape = K.int_shape(conv) model_shape = K.int_shape(model) if conv_shape[CHANNEL_AXIS] != model_shape[CHANNEL_AXIS]: model = Convolution1D(num, 1, init="he_normal", border_mode="same", subsample_length=2, W_regularizer=W_regularizer)(model) ## merge block model = merge([model, conv], mode='sum', concat_axis=CHANNEL_AXIS) ## final bn+relu bn = BatchNormalization(mode=0, axis=CHANNEL_AXIS)(model) model = Activation(activation)(bn) if gap: pool_window_shape = K.int_shape(model) gap = AveragePooling1D(pool_window_shape[ROW_AXIS], stride=1)(model) flatten = Flatten()(gap) else: flatten = Flatten()(model) dense = Dense(output_dim=n_classes, init="he_normal", activation="softmax")(flatten) dense = Dense(output_dim=1, init="normal")(dense) model = Model(input=input, output=dense) # optimizer = SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=True) # model.compile(loss='mean_absolute_error', optimizer = 'adam') return model
def SSD300(input_shape, num_classes=21): # 300,300,3 input_tensor = Input(shape=input_shape) img_size = (input_shape[1], input_shape[0]) # SSD结构,net字典 net = mobilenet(input_tensor) #-----------------------将提取到的主干特征进行处理---------------------------# num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['conv4_3_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv4_3_loc')(net['conv4_3']) net['conv4_3_loc_flat'] = Flatten(name='conv4_3_loc_flat')( net['conv4_3_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['conv4_3_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv4_3_conf')(net['conv4_3']) net['conv4_3_conf_flat'] = Flatten(name='conv4_3_conf_flat')( net['conv4_3_conf']) priorbox = PriorBox(img_size, 30.0, max_size=60.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv4_3_priorbox') net['conv4_3_priorbox'] = priorbox(net['conv4_3']) # 对fc7层进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 net['fc7_mbox_loc'] = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='fc7_mbox_loc')(net['fc7']) net['fc7_mbox_loc_flat'] = Flatten(name='fc7_mbox_loc_flat')( net['fc7_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 net['fc7_mbox_conf'] = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='fc7_mbox_conf')(net['fc7']) net['fc7_mbox_conf_flat'] = Flatten(name='fc7_mbox_conf_flat')( net['fc7_mbox_conf']) priorbox = PriorBox(img_size, 60.0, max_size=111.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='fc7_mbox_priorbox') net['fc7_mbox_priorbox'] = priorbox(net['fc7']) # 对conv6_2进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv6_2_mbox_loc')(net['conv6_2']) net['conv6_2_mbox_loc'] = x net['conv6_2_mbox_loc_flat'] = Flatten(name='conv6_2_mbox_loc_flat')( net['conv6_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv6_2_mbox_conf')(net['conv6_2']) net['conv6_2_mbox_conf'] = x net['conv6_2_mbox_conf_flat'] = Flatten(name='conv6_2_mbox_conf_flat')( net['conv6_2_mbox_conf']) priorbox = PriorBox(img_size, 111.0, max_size=162.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox') net['conv6_2_mbox_priorbox'] = priorbox(net['conv6_2']) # 对conv7_2进行处理 num_priors = 6 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv7_2_mbox_loc')(net['conv7_2']) net['conv7_2_mbox_loc'] = x net['conv7_2_mbox_loc_flat'] = Flatten(name='conv7_2_mbox_loc_flat')( net['conv7_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv7_2_mbox_conf')(net['conv7_2']) net['conv7_2_mbox_conf'] = x net['conv7_2_mbox_conf_flat'] = Flatten(name='conv7_2_mbox_conf_flat')( net['conv7_2_mbox_conf']) priorbox = PriorBox(img_size, 162.0, max_size=213.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox') net['conv7_2_mbox_priorbox'] = priorbox(net['conv7_2']) # 对conv8_2进行处理 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv8_2_mbox_loc')(net['conv8_2']) net['conv8_2_mbox_loc'] = x net['conv8_2_mbox_loc_flat'] = Flatten(name='conv8_2_mbox_loc_flat')( net['conv8_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv8_2_mbox_conf')(net['conv8_2']) net['conv8_2_mbox_conf'] = x net['conv8_2_mbox_conf_flat'] = Flatten(name='conv8_2_mbox_conf_flat')( net['conv8_2_mbox_conf']) priorbox = PriorBox(img_size, 213.0, max_size=264.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox') net['conv8_2_mbox_priorbox'] = priorbox(net['conv8_2']) # 对conv9_2进行处理 num_priors = 4 # 预测框的处理 # num_priors表示每个网格点先验框的数量,4是x,y,h,w的调整 x = Conv2D(num_priors * 4, kernel_size=(3, 3), padding='same', name='conv9_2_mbox_loc')(net['conv9_2']) net['conv9_2_mbox_loc'] = x net['conv9_2_mbox_loc_flat'] = Flatten(name='conv9_2_mbox_loc_flat')( net['conv9_2_mbox_loc']) # num_priors表示每个网格点先验框的数量,num_classes是所分的类 x = Conv2D(num_priors * num_classes, kernel_size=(3, 3), padding='same', name='conv9_2_mbox_conf')(net['conv9_2']) net['conv9_2_mbox_conf'] = x net['conv9_2_mbox_conf_flat'] = Flatten(name='conv9_2_mbox_conf_flat')( net['conv9_2_mbox_conf']) priorbox = PriorBox(img_size, 264.0, max_size=315.0, aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2], name='conv9_2_mbox_priorbox') net['conv9_2_mbox_priorbox'] = priorbox(net['conv9_2']) # 将所有结果进行堆叠 net['mbox_loc'] = concatenate([ net['conv4_3_loc_flat'], net['fc7_mbox_loc_flat'], net['conv6_2_mbox_loc_flat'], net['conv7_2_mbox_loc_flat'], net['conv8_2_mbox_loc_flat'], net['conv9_2_mbox_loc_flat'] ], axis=1, name='mbox_loc') net['mbox_conf'] = concatenate([ net['conv4_3_conf_flat'], net['fc7_mbox_conf_flat'], net['conv6_2_mbox_conf_flat'], net['conv7_2_mbox_conf_flat'], net['conv8_2_mbox_conf_flat'], net['conv9_2_mbox_conf_flat'] ], axis=1, name='mbox_conf') net['mbox_priorbox'] = concatenate([ net['conv4_3_priorbox'], net['fc7_mbox_priorbox'], net['conv6_2_mbox_priorbox'], net['conv7_2_mbox_priorbox'], net['conv8_2_mbox_priorbox'], net['conv9_2_mbox_priorbox'] ], axis=1, name='mbox_priorbox') if hasattr(net['mbox_loc'], '_keras_shape'): num_boxes = net['mbox_loc']._keras_shape[-1] // 4 elif hasattr(net['mbox_loc'], 'int_shape'): num_boxes = K.int_shape(net['mbox_loc'])[-1] // 4 # 8732,4 net['mbox_loc'] = Reshape((num_boxes, 4), name='mbox_loc_final')(net['mbox_loc']) # 8732,21 net['mbox_conf'] = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(net['mbox_conf']) net['mbox_conf'] = Activation('softmax', name='mbox_conf_final')(net['mbox_conf']) net['predictions'] = concatenate( [net['mbox_loc'], net['mbox_conf'], net['mbox_priorbox']], axis=2, name='predictions') model = Model(input_tensor, net['predictions']) return model
def build(input_shape, num_outputs, block_fn, repetitions): """Builds a custom ResNet like architecture. Args: input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols) num_outputs: The number of outputs at final softmax layer block_fn: The block function to use. This is either `basic_block` or `bottleneck`. The original paper used basic_block for layers < 50 repetitions: Number of repetitions of various block units. At each block unit, the number of filters are doubled and the input size is halved Returns: The keras `Model`. """ _handle_dim_ordering() if len(input_shape) != 3: raise Exception( "Input shape should be a tuple (nb_channels, nb_rows, nb_cols)" ) # Permute dimension order if necessary #if K.image_dim_ordering() == 'tf': # input_shape = (input_shape[1], input_shape[2], input_shape[0]) # Load function from str if needed. block_fn = _get_block(block_fn) input = Input(shape=input_shape) asninput = _bn_relu(input) if input_shape[0] == 32: # for CIFAR nb_filter = 16 pool1 = _conv_bn_relu(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=(1, 1))(asninput) repetitions = repetitions[:-1] else: nb_filter = 64 conv1 = _conv_bn_relu(nb_filter=nb_filter, nb_row=7, nb_col=7, subsample=(2, 2))(asninput) pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), border_mode="same")(conv1) block = pool1 for i, r in enumerate(repetitions): block = _residual_block(block_fn, nb_filter=nb_filter, repetitions=r, is_first_layer=(i == 0))(block) nb_filter *= 2 if block_fn.__name__ is not 'basic_block_v0': # Last activation block = _bn_relu(block) block_norm = BatchNormalization(mode=0, axis=CHANNEL_AXIS)(block) #block_output = Activation(activation='relu')(block_norm) block_output = Activation()(block_norm) # Classifier block block_shape = K.int_shape(block) pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]), strides=(1, 1))(block_output) flatten1 = Flatten()(pool2) dense = Dense(output_dim=num_outputs, init="he_normal", activation="softmax")(flatten1) model = Model(input=input, output=dense) return model
def SSD(input_shape, num_classes): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ alpha = 1.0 img_size = (input_shape[1], input_shape[0]) input_shape = (input_shape[1], input_shape[0], 3) mobilenetv2_input_shape = (224, 224, 3) Input0 = Input(input_shape) mobilenetv2 = MobileNetV2(input_shape=mobilenetv2_input_shape, include_top=False, weights="imagenet") FeatureExtractor = Model( inputs=mobilenetv2.input, outputs=mobilenetv2.get_layer("res_connect_12").output) # get_3rd_layer_output = K.function([mobilenetv2.layers[114].input, K.learning_phase()], # [mobilenetv2.layers[147].output]) x = FeatureExtractor(Input0) x, pwconv3 = _isb4conv13(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=13) # x=get_3rd_layer_output([x,1])[0] x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) x, pwconv4 = Conv(x, 1280) x, pwconv5 = LiteConv(x, 5, 512) x, pwconv6 = LiteConv(x, 6, 256) x, pwconv7 = LiteConv(x, 7, 128) x, pwconv8 = LiteConv(x, 8, 128) pwconv3_mbox_loc_flat, pwconv3_mbox_conf_flat, pwconv3_mbox_priorbox = prediction( pwconv3, 3, 3, 60.0, None, [2], num_classes, img_size) pwconv4_mbox_loc_flat, pwconv4_mbox_conf_flat, pwconv4_mbox_priorbox = prediction( pwconv4, 4, 6, 105.0, 150.0, [2, 3], num_classes, img_size) pwconv5_mbox_loc_flat, pwconv5_mbox_conf_flat, pwconv5_mbox_priorbox = prediction( pwconv5, 5, 6, 150.0, 195.0, [2, 3], num_classes, img_size) pwconv6_mbox_loc_flat, pwconv6_mbox_conf_flat, pwconv6_mbox_priorbox = prediction( pwconv6, 6, 6, 195.0, 240.0, [2, 3], num_classes, img_size) pwconv7_mbox_loc_flat, pwconv7_mbox_conf_flat, pwconv7_mbox_priorbox = prediction( pwconv7, 7, 6, 240.0, 285.0, [2, 3], num_classes, img_size) pwconv8_mbox_loc_flat, pwconv8_mbox_conf_flat, pwconv8_mbox_priorbox = prediction( pwconv8, 8, 6, 285.0, 300.0, [2, 3], num_classes, img_size) # Gather all predictions mbox_loc = concatenate( [ pwconv3_mbox_loc_flat, pwconv4_mbox_loc_flat, pwconv5_mbox_loc_flat, pwconv6_mbox_loc_flat, pwconv7_mbox_loc_flat, pwconv8_mbox_loc_flat, ], axis=1, name="mbox_loc", ) mbox_conf = concatenate( [ pwconv3_mbox_conf_flat, pwconv4_mbox_conf_flat, pwconv5_mbox_conf_flat, pwconv6_mbox_conf_flat, pwconv7_mbox_conf_flat, pwconv8_mbox_conf_flat, ], axis=1, name="mbox_conf", ) mbox_priorbox = concatenate( [ pwconv3_mbox_priorbox, pwconv4_mbox_priorbox, pwconv5_mbox_priorbox, pwconv6_mbox_priorbox, pwconv7_mbox_priorbox, pwconv8_mbox_priorbox, ], axis=1, name="mbox_priorbox", ) if hasattr(mbox_loc, "_keras_shape"): num_boxes = mbox_loc._keras_shape[-1] // 4 elif hasattr(mbox_loc, "int_shape"): num_boxes = K.int_shape(mbox_loc)[-1] // 4 mbox_loc = Reshape((num_boxes, 4), name="mbox_loc_final")(mbox_loc) mbox_conf = Reshape((num_boxes, num_classes), name="mbox_conf_logits")(mbox_conf) mbox_conf = Activation("softmax", name="mbox_conf_final")(mbox_conf) predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox], axis=2, name="predictions") model = Model(inputs=Input0, outputs=predictions) return model
def interpolate_(self, image, sampled_grids, output_size): batch_size = K.shape(image)[0] height = K.shape(image)[1] width = K.shape(image)[2] num_channels = K.shape(image)[3] x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32') y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32') x = 0.5 * (x + 1.0) * K.cast(width, dtype='float32') y = 0.5 * (y + 1.0) * K.cast(height, dtype='float32') x_0 = K.cast(x, 'int32) x_1 = x_0 + 1 y_0 = K.cast(y, 'int32) y_1 = y_0 + 1 x_max = int(K.int_shape(image)[2] - 1) y_max = int(K.int_shape(image)[1] - 1) x_0 = K.clip(x_0, 0, x_max) y_0 = K.clip(y_0, 0, y_max) x_1 = K.clip(x_1, 0, x_max) x_1 = K.clip(y_1, 0, y_max) pixels_batch = K.arange(0, batch_size) * (height * width) pixels_batch = K.expand_dims(pixels_batch, axis=-1) flat_output = output_size[0] * output_size[1] base = K.repeat_elements(pixels_batch, flat_output_size, axis=1) base = K.flatten(base) y0_base = y_0 * width y0_base = base + y0_base y1_base = y1 * width y1_base = y1_base + base index_a = y0_base + x_0 index_b = y1_base + x_0 index_c = y0_base + x_1 index_d = y1_base + x_1 flat_image = K.reshape(image, shape=(-1, num_channels)) flat_image = K.cast(flat_image, dtype='float32') pixel_vals_a = K.gather(flat_image, index_a) pixel_vals_b = K.gather(flat_image, index_b) pixel_vals_c = K.gather(flat_image, index_c) pixel_vals_d = K.gather(flat_image, index_d) x_0 = K.cast(x_0, 'float32) x_1 = K.cast(x_1, 'float32) y_0 = K.cast(y_0, 'float32) y_1 = K.cast(y_1, 'float32) area_a = K.expand_dims(((x_1 - x) * (y_1 - y)), 1) area_b = K.expand_dims(((x_1 - x) * (y - y_0)), 1) area_c = K.expand_dims(((x - x_0) * (y_1 - y)), 1) area_a = K.expand_dims(((x - x_0) * (y - y_0)), 1) a_vals = area_a * pixel_vals_a b_vals = area_b * pixel_vals_b c_vals = area_c * pixel_vals_c d_vals = area_d * pixel_vals_d return a_vals + b_vals + c_vals + d_vals
def call(self, inputs): if self.pattern is None: in_dim = K.int_shape(inputs)[-1] self.pattern = [in_dim // 2, in_dim - in_dim // 2] partion = [0] + list(np.cumsum(self.pattern)) return [inputs[..., i:j] for i, j in zip(partion, partion[1:])]
def _create_all_weights(self, params): shapes = [backend.int_shape(p) for p in params] accumulators = [backend.zeros(shape) for shape in shapes] delta_accumulators = [backend.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators return accumulators, delta_accumulators
def call(self, inputs): self.pattern = [K.int_shape(i)[-1] for i in inputs] return K.concatenate(inputs, -1)
def attention(self, pre_q, pre_v, pre_k, out_seq_len: int, d_model: int, training=None): """ Calculates the output of the attention once the affine transformations of the inputs are done. Here's the shapes of the arguments: :param pre_q: (batch_size, q_seq_len, num_heads, d_model // num_heads) :param pre_v: (batch_size, v_seq_len, num_heads, d_model // num_heads) :param pre_k: (batch_size, k_seq_len, num_heads, d_model // num_heads) :param out_seq_len: the length of the output sequence :param d_model: dimensionality of the model (by the paper) :param training: Passed by Keras. Should not be defined manually. Optional scalar tensor indicating if we're in training or inference phase. """ # shaping Q and V into (batch_size, num_heads, seq_len, d_model//heads) q = K.permute_dimensions(pre_q, [0, 2, 1, 3]) v = K.permute_dimensions(pre_v, [0, 2, 1, 3]) if self.compression_window_size is None: k_transposed = K.permute_dimensions(pre_k, [0, 2, 3, 1]) else: # Memory-compressed attention described in paper # "Generating Wikipedia by Summarizing Long Sequences" # (https://arxiv.org/pdf/1801.10198.pdf) # It compresses keys and values using 1D-convolution which reduces # the size of Q * K_transposed from roughly seq_len^2 # to convoluted_seq_len^2. If we use strided convolution with # window size = 3 and stride = 3, memory requirements of such # memory-compressed attention will be 9 times smaller than # that of the original version. if self.use_masking: raise NotImplementedError( "Masked memory-compressed attention has not " "been implemented yet") k = K.permute_dimensions(pre_k, [0, 2, 1, 3]) k, v = [ K.reshape( # Step 3: Return the result to its original dimensions # (batch_size, num_heads, seq_len, d_model//heads) K.bias_add( # Step 3: ... and add bias K.conv1d( # Step 2: we "compress" K and V using strided conv K.reshape( # Step 1: we reshape K and V to # (batch + num_heads, seq_len, d_model//heads) item, (-1, K.int_shape(item)[-2], d_model // self.num_heads)), kernel, strides=self.compression_window_size, padding='valid', data_format='channels_last'), bias, data_format='channels_last'), # new shape K.concatenate( [K.shape(item)[:2], [-1, d_model // self.num_heads]])) for item, kernel, bias in ((k, self.k_conv_kernel, self.k_conv_bias), (v, self.v_conv_kernel, self.v_conv_bias)) ] k_transposed = K.permute_dimensions(k, [0, 1, 3, 2]) # shaping K into (batch_size, num_heads, d_model//heads, seq_len) # for further matrix multiplication sqrt_d = K.constant(np.sqrt(d_model // self.num_heads), dtype=K.floatx()) q_shape = K.int_shape(q) k_t_shape = K.int_shape(k_transposed) v_shape = K.int_shape(v) # before performing batch_dot all tensors are being converted to 3D # shape (batch_size * num_heads, rows, cols) to make sure batch_dot # performs identically on all backends attention_heads = K.reshape( K.batch_dot( self.apply_dropout_if_needed(K.softmax( self.mask_attention_if_needed( K.batch_dot( K.reshape(q, (-1, ) + q_shape[-2:]), K.reshape(k_transposed, (-1, ) + k_t_shape[-2:])) / sqrt_d)), training=training), K.reshape(v, (-1, ) + v_shape[-2:])), (-1, self.num_heads, q_shape[-2], v_shape[-1])) attention_heads_merged = K.reshape( K.permute_dimensions(attention_heads, [0, 2, 1, 3]), (-1, d_model)) attention_out = K.reshape( K.dot(attention_heads_merged, self.output_weights), (-1, out_seq_len, d_model)) return attention_out
def _create_all_weights(self, params): shapes = [backend.int_shape(p) for p in params] moments = [backend.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments return moments
def __call__(self, x, mask=None): args = ['input', 'ground_truth', 'initial_readout', 'states'] if type(x) is dict: x = list(map(x.get, args)) elif type(x) not in [list, tuple]: x = [x, None, None, None] self.input_format = [] input_tensors = [] for i in range(3): if x[i] is not None: self.input_format += [args[i]] input_tensors += [x[i]] if x[3] is not None: self.input_format += [args[3]] states = [] self.state_indices = [] for i in range(len(x[3])): if x[3][i] is not None: states += [x[3][i]] self.state_indices += [i] input_tensors += states if not self.built: self.assert_input_compatibility(x) input_shapes = [] for x_elem in input_tensors: if hasattr(x_elem, '_keras_shape'): input_shapes.append(x_elem._keras_shape) elif hasattr(K, 'int_shape'): input_shapes.append(K.int_shape(x_elem)) elif x_elem is not None: raise Exception('You tried to call layer "' + self.name + '". This layer has no information' ' about its expected input shape, ' 'and thus cannot be built. ' 'You can build it manually via: ' '`layer.build(batch_input_shape)`') self.build(input_shapes[0]) self.built = True self.assert_input_compatibility(x[0]) input_added = False inbound_layers = [] node_indices = [] tensor_indices = [] self.ignore_indices = [] for i in range(len(input_tensors)): input_tensor = input_tensors[i] if hasattr(input_tensor, '_keras_history') and input_tensor._keras_history: previous_layer, node_index, tensor_index = input_tensor._keras_history inbound_layers.append(previous_layer) node_indices.append(node_index) tensor_indices.append(tensor_index) else: inbound_layers = None break if inbound_layers: self.add_inbound_node(inbound_layers, node_indices, tensor_indices) input_added = True if input_added: outputs = self.inbound_nodes[-1].output_tensors if len(outputs) == 1: return outputs[0] else: return outputs else: return self.call(x, mask)
def _main(args): config_path = os.path.expanduser(args.config_path) weights_path = os.path.expanduser(args.weights_path) assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( config_path) assert weights_path.endswith( '.weights'), '{} is not a .weights file'.format(weights_path) output_path = os.path.expanduser(args.output_path) assert output_path.endswith( '.h5'), 'output path {} is not a .h5 file'.format(output_path) output_root = os.path.splitext(output_path)[0] # Load weights and config. print('Loading weights.') weights_file = open(weights_path, 'rb') major, minor, revision = np.ndarray(shape=(3, ), dtype='int32', buffer=weights_file.read(12)) if (major * 10 + minor) >= 2 and major < 1000 and minor < 1000: seen = np.ndarray(shape=(1, ), dtype='int64', buffer=weights_file.read(8)) else: seen = np.ndarray(shape=(1, ), dtype='int32', buffer=weights_file.read(4)) print('Weights Header: ', major, minor, revision, seen) print('Parsing Darknet config.') unique_config_file = unique_config_sections(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) print('Creating Keras model.') input_layer = Input(shape=(None, None, 3)) prev_layer = input_layer all_layers = [] weight_decay = float(cfg_parser['net_0']['decay'] ) if 'net_0' in cfg_parser.sections() else 5e-4 count = 0 out_index = [] for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] padding = 'same' if pad == 1 and stride == 1 else 'valid' # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) print('conv2d', 'bn' if batch_normalize else ' ', activation, weights_shape) conv_bias = np.ndarray(shape=(filters, ), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if batch_normalize: bn_weights = np.ndarray(shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters bn_weight_list = [ bn_weights[0], # scale gamma conv_bias, # shift beta bn_weights[1], # running mean bn_weights[2] # running var ] conv_weights = np.ndarray(shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size # DarkNet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # We would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) conv_weights = [conv_weights] if batch_normalize else [ conv_weights, conv_bias ] # Handle activation. act_fn = None if activation == 'leaky': pass # Add advanced activation later. elif activation != 'linear': raise ValueError( 'Unknown activation function `{}` in section {}'.format( activation, section)) # Create Conv2D layer if stride > 1: # Darknet uses left and top padding instead of 'same' mode prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer) conv_layer = (Conv2D(filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding))(prev_layer) if batch_normalize: conv_layer = (BatchNormalization( weights=bn_weight_list))(conv_layer) prev_layer = conv_layer if activation == 'linear': all_layers.append(prev_layer) elif activation == 'leaky': act_layer = LeakyReLU(alpha=0.1)(prev_layer) prev_layer = act_layer all_layers.append(act_layer) elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] layers = [all_layers[i] for i in ids] if len(layers) > 1: print('Concatenating route layers:', layers) concatenate_layer = Concatenate()(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer #Our changes for tiny-YOLO conversion elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append( MaxPooling2D(pool_size=(size, size), strides=(stride, stride), border_mode='same')(prev_layer)) prev_layer = all_layers[-1] #End of our chanegs elif section.startswith('shortcut'): index = int(cfg_parser[section]['from']) activation = cfg_parser[section]['activation'] assert activation == 'linear', 'Only linear activation supported.' all_layers.append(Add()([all_layers[index], prev_layer])) prev_layer = all_layers[-1] elif section.startswith('upsample'): stride = int(cfg_parser[section]['stride']) assert stride == 2, 'Only stride=2 supported.' all_layers.append(UpSampling2D(stride)(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('yolo'): out_index.append(len(all_layers) - 1) all_layers.append(None) prev_layer = all_layers[-1] elif section.startswith('net'): pass else: raise ValueError( 'Unsupported section header type: {}'.format(section)) # Create and save model. model = Model(inputs=input_layer, outputs=[all_layers[i] for i in out_index]) print(model.summary()) model.save('{}'.format(output_path)) print('Saved Keras model to {}'.format(output_path)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format( count, count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) if args.plot_model: plot(model, to_file='{}.png'.format(output_root), show_shapes=True) print('Saved model plot to {}.png'.format(output_root))
def compute_output_shape(self, input_shape): if isinstance(input_shape, list): q = input_shape[0] return K.int_shape(q) return input_shape
def build(input_shape, num_outputs, block_fn, repetitions, base_filters=64, shortcut_option='B', downsampling_top=True): """Builds a custom ResNet like architecture. Args: input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols) num_outputs: The number of outputs at final softmax layer block_fn: The block function to use. This is either `basic_block` or `bottleneck`. The original paper used basic_block for layers < 50 repetitions: Number of repetitions of various block units. At each block unit, the number of filters are doubled and the input size is halved base_filters: The number of filters that the first residual block has. shortcut_option: The shortcut option to use in the original paper. Either 'A' (identity map with padded zeros) or 'B' (convolutional map). downsampling_top: Whether to include the max pooling after the first convolutional layer (that layer also has stride of 2 if this is set to True) Returns: The keras `Model`. """ _handle_dim_ordering() _handle_shortcut_option(shortcut_option) if len(input_shape) != 3: raise Exception("Input shape should be a tuple (nb_channels, " "nb_rows, nb_cols)") # Permute dimension order if necessary if K.image_data_format() == 'channels_last': input_shape = (input_shape[1], input_shape[2], input_shape[0]) # Load function from str if needed. block_fn = _get_block(block_fn) input = Input(shape=input_shape) # set up first layer if downsampling_top: # This is based on the original Resnet for tinyimagenet conv1 = _conv_bn_relu(filters=base_filters, kernel_size=(7, 7), strides=(2, 2))(input) pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1) block = pool1 else: # This is based on the Resnet for Cifar10, which does not contain # the pooling layer conv1 = _conv_bn_relu(filters=base_filters, kernel_size=(3, 3), strides=(1, 1))(input) block = conv1 # add residual blocks filters = base_filters for i, r in enumerate(repetitions): block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block) filters *= 2 # Last activation block = _bn_relu(block) # Classifier block block_shape = K.int_shape(block) pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]), strides=(1, 1))(block) flatten1 = Flatten()(pool2) dense = Dense(units=num_outputs, kernel_initializer="he_normal", activation="softmax")(flatten1) model = Model(inputs=input, outputs=dense) return model
def call(self, inputs, mask=None): """ Calculate the probability of each answer option. Parameters ---------- inputs: List of Tensors The inputs to the layer must be passed in as a list to the ``call`` function. The inputs expected are a Tensor of document indices, a Tensor of document probabilities, and a Tensor of options (in that order). The documents indices tensor is a 2D tensor of shape (batch size, document_length). The document probabilities tensor is a 2D Tensor of shape (batch size, document_length). The options tensor is of shape (batch size, num_options, option_length). mask: Tensor or None, optional (default=None) Tensor of shape (batch size, max number of options) representing which options are padding and thus have a 0 in the associated mask position. Returns ------- options_probabilities : Tensor Tensor with shape (batch size, max number of options) with floats, where each float is the normalized probability of the option as calculated based on ``self.multiword_option_mode``. """ document_indices, document_probabilities, options = inputs # This takes `document_indices` from (batch_size, document_length) to # (batch_size, num_options, option_length, document_length), with the # original indices repeated, so that we can create a mask indicating # which options are used in the probability computation. We do the # same thing for `document_probababilities` to select the probability # values corresponding to the words in the options. expanded_indices = K.expand_dims(K.expand_dims(document_indices, 1), 1) tiled_indices = K.repeat_elements(K.repeat_elements( expanded_indices, K.int_shape(options)[1], axis=1), K.int_shape(options)[2], axis=2) expanded_probabilities = K.expand_dims( K.expand_dims(document_probabilities, 1), 1) tiled_probabilities = K.repeat_elements(K.repeat_elements( expanded_probabilities, K.int_shape(options)[1], axis=1), K.int_shape(options)[2], axis=2) expanded_options = K.expand_dims(options, 3) tiled_options = K.repeat_elements(expanded_options, K.int_shape(document_indices)[-1], axis=3) # This generates a binary tensor of the same shape as tiled_options / # tiled_indices that indicates if index is option or padding. options_words_mask = K.cast(K.equal(tiled_options, tiled_indices), "float32") # This applies a mask to the probabilities to select the # indices for probabilities that correspond with option words. selected_probabilities = options_words_mask * tiled_probabilities # This sums up the probabilities to get the aggregate probability for # each option's constituent words. options_word_probabilities = K.sum(selected_probabilities, axis=3) sum_option_words_probabilities = K.sum(options_word_probabilities, axis=2) if self.multiword_option_mode == "mean": # This block figures out how many words (excluding # padding) are in each option. # Here we generate the mask on the input option. option_mask = K.cast(K.not_equal(options, K.zeros_like(options)), "float32") # This tensor stores the number words in each option. divisor = K.sum(option_mask, axis=2) # If the divisor is zero at a position, we add epsilon to it. is_zero_divisor = K.equal(divisor, K.zeros_like(divisor)) divisor = switch(is_zero_divisor, K.ones_like(divisor) * K.epsilon(), divisor) else: # Since we're taking the sum, we divide all sums by 1. divisor = K.ones_like(sum_option_words_probabilities) # Now we divide the sums by the divisor we generated above. option_probabilities = sum_option_words_probabilities / divisor return option_probabilities
def SSD300(input_shape=(300, 300, 3), num_classes=21): """SSD300 architecture. # Arguments input_shape: Shape of the input image, expected to be either (300, 300, 3) or (3, 300, 300)(not tested). num_classes: Number of classes including background. # References https://arxiv.org/abs/1512.02325 """ input_layer = Input(shape=input_shape) # Block 1 conv1_1 = Conv2D(64, (3, 3), name='conv1_1', padding='same', activation='relu')(input_layer) conv1_2 = Conv2D(64, (3, 3), name='conv1_2', padding='same', activation='relu')(conv1_1) pool1 = MaxPooling2D( name='pool1', pool_size=(2, 2), strides=(2, 2), padding='same', )(conv1_2) # Block 2 conv2_1 = Conv2D(128, (3, 3), name='conv2_1', padding='same', activation='relu')(pool1) conv2_2 = Conv2D(128, (3, 3), name='conv2_2', padding='same', activation='relu')(conv2_1) pool2 = MaxPooling2D(name='pool2', pool_size=(2, 2), strides=(2, 2), padding='same')(conv2_2) # Block 3 conv3_1 = Conv2D(256, (3, 3), name='conv3_1', padding='same', activation='relu')(pool2) conv3_2 = Conv2D(256, (3, 3), name='conv3_2', padding='same', activation='relu')(conv3_1) conv3_3 = Conv2D(256, (3, 3), name='conv3_3', padding='same', activation='relu')(conv3_2) pool3 = MaxPooling2D(name='pool3', pool_size=(2, 2), strides=(2, 2), padding='same')(conv3_3) # Block 4 conv4_1 = Conv2D(512, (3, 3), name='conv4_1', padding='same', activation='relu')(pool3) conv4_2 = Conv2D(512, (3, 3), name='conv4_2', padding='same', activation='relu')(conv4_1) conv4_3 = Conv2D(512, (3, 3), name='conv4_3', padding='same', activation='relu')(conv4_2) pool4 = MaxPooling2D(name='pool4', pool_size=(2, 2), strides=(2, 2), padding='same')(conv4_3) # Block 5 conv5_1 = Conv2D(512, (3, 3), name='conv5_1', padding='same', activation='relu')(pool4) conv5_2 = Conv2D(512, (3, 3), name='conv5_2', padding='same', activation='relu')(conv5_1) conv5_3 = Conv2D(512, (3, 3), name='conv5_3', padding='same', activation='relu')(conv5_2) pool5 = MaxPooling2D(name='pool5', pool_size=(3, 3), strides=(1, 1), padding='same')(conv5_3) # FC6 fc6 = Conv2D(1024, (3, 3), name='fc6', dilation_rate=(6, 6), padding='same', activation='relu')(pool5) # x = Dropout(0.5, name='drop6')(x) # FC7 fc7 = Conv2D(1024, (1, 1), name='fc7', padding='same', activation='relu')(fc6) # x = Dropout(0.5, name='drop7')(x) # Block 6 conv6_1 = Conv2D(256, (1, 1), name='conv6_1', padding='same', activation='relu')(fc7) conv6_2 = Conv2D(512, (3, 3), name='conv6_2', strides=(2, 2), padding='same', activation='relu')(conv6_1) # Block 7 conv7_1 = Conv2D(128, (1, 1), name='conv7_1', padding='same', activation='relu')(conv6_2) conv7_1z = ZeroPadding2D(name='conv7_1z')(conv7_1) conv7_2 = Conv2D(256, (3, 3), name='conv7_2', padding='valid', strides=(2, 2), activation='relu')(conv7_1z) # Block 8 conv8_1 = Conv2D(128, (1, 1), name='conv8_1', padding='same', activation='relu')(conv7_2) conv8_2 = Conv2D(256, (3, 3), name='conv8_2', padding='same', strides=(2, 2), activation='relu')(conv8_1) # Last Pool pool6 = GlobalAveragePooling2D(name='pool6')(conv8_2) # Prediction from conv4_3 num_priors = 3 img_size = (input_shape[1], input_shape[0]) name = 'conv4_3_norm_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv4_3_norm = Normalize(20, name='conv4_3_norm')(conv4_3) conv4_3_norm_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='conv4_3_norm_mbox_loc', padding='same')(conv4_3_norm) conv4_3_norm_mbox_loc_flat = Flatten( name='conv4_3_norm_mbox_loc_flat')(conv4_3_norm_mbox_loc) conv4_3_norm_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), name=name, padding='same')(conv4_3_norm) conv4_3_norm_mbox_conf_flat = Flatten( name='conv4_3_norm_mbox_conf_flat')(conv4_3_norm_mbox_conf) conv4_3_norm_mbox_priorbox = PriorBox(img_size, 30.0, name='conv4_3_norm_mbox_priorbox', aspect_ratios=[2], variances=[0.1, 0.1, 0.2, 0.2])(conv4_3_norm) # Prediction from fc7 num_priors = 6 name = 'fc7_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) fc7_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(fc7) fc7_mbox_conf_flat = Flatten(name='fc7_mbox_conf_flat')(fc7_mbox_conf) fc7_mbox_loc = Conv2D(num_priors * 4, (3, 3), name='fc7_mbox_loc', padding='same')(fc7) fc7_mbox_loc_flat = Flatten(name='fc7_mbox_loc_flat')(fc7_mbox_loc) fc7_mbox_priorbox = PriorBox(img_size, 60.0, name='fc7_mbox_priorbox', max_size=114.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2])(fc7) # Prediction from conv6_2 num_priors = 6 name = 'conv6_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv6_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv6_2) conv6_2_mbox_conf_flat = Flatten( name='conv6_2_mbox_conf_flat')(conv6_2_mbox_conf) conv6_2_mbox_loc = Conv2D(num_priors * 4, ( 3, 3, ), name='conv6_2_mbox_loc', padding='same')(conv6_2) conv6_2_mbox_loc_flat = Flatten( name='conv6_2_mbox_loc_flat')(conv6_2_mbox_loc) conv6_2_mbox_priorbox = PriorBox(img_size, 114.0, max_size=168.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv6_2_mbox_priorbox')(conv6_2) # Prediction from conv7_2 num_priors = 6 name = 'conv7_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv7_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv7_2) conv7_2_mbox_conf_flat = Flatten( name='conv7_2_mbox_conf_flat')(conv7_2_mbox_conf) conv7_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv7_2_mbox_loc')(conv7_2) conv7_2_mbox_loc_flat = Flatten( name='conv7_2_mbox_loc_flat')(conv7_2_mbox_loc) conv7_2_mbox_priorbox = PriorBox(img_size, 168.0, max_size=222.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv7_2_mbox_priorbox')(conv7_2) # Prediction from conv8_2 num_priors = 6 name = 'conv8_2_mbox_conf' if num_classes != 21: name += '_{}'.format(num_classes) conv8_2_mbox_conf = Conv2D(num_priors * num_classes, (3, 3), padding='same', name=name)(conv8_2) conv8_2_mbox_conf_flat = Flatten( name='conv8_2_mbox_conf_flat')(conv8_2_mbox_conf) conv8_2_mbox_loc = Conv2D(num_priors * 4, (3, 3), padding='same', name='conv8_2_mbox_loc')(conv8_2) conv8_2_mbox_loc_flat = Flatten( name='conv8_2_mbox_loc_flat')(conv8_2_mbox_loc) conv8_2_mbox_priorbox = PriorBox(img_size, 222.0, max_size=276.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='conv8_2_mbox_priorbox')(conv8_2) # Prediction from pool6 num_priors = 6 name = 'pool6_mbox_conf_flat' if num_classes != 21: name += '_{}'.format(num_classes) if K.image_dim_ordering() == 'tf': target_shape = (1, 1, 256) else: target_shape = (256, 1, 1) pool6_mbox_loc_flat = Dense(num_priors * 4, name='pool6_mbox_loc_flat')(pool6) pool6_mbox_conf_flat = Dense(num_priors * num_classes, name=name)(pool6) pool6_reshaped = Reshape(target_shape, name='pool6_reshaped')(pool6) pool6_mbox_priorbox = PriorBox(img_size, 276.0, max_size=330.0, aspect_ratios=[2, 3], variances=[0.1, 0.1, 0.2, 0.2], name='pool6_mbox_priorbox')(pool6_reshaped) # Gather all predictions mbox_loc = concatenate([ conv4_3_norm_mbox_loc_flat, fc7_mbox_loc_flat, conv6_2_mbox_loc_flat, conv7_2_mbox_loc_flat, conv8_2_mbox_loc_flat, pool6_mbox_loc_flat ], axis=1, name='mbox_loc') mbox_conf = concatenate([ conv4_3_norm_mbox_conf_flat, fc7_mbox_conf_flat, conv6_2_mbox_conf_flat, conv7_2_mbox_conf_flat, conv8_2_mbox_conf_flat, pool6_mbox_conf_flat ], axis=1, name='mbox_conf') mbox_priorbox = concatenate([ conv4_3_norm_mbox_priorbox, fc7_mbox_priorbox, conv6_2_mbox_priorbox, conv7_2_mbox_priorbox, conv8_2_mbox_priorbox, pool6_mbox_priorbox ], axis=1, name='mbox_priorbox') if hasattr(mbox_loc, '_keras_shape'): num_boxes = mbox_loc._keras_shape[-1] // 4 elif hasattr(mbox_loc, 'int_shape'): num_boxes = K.int_shape(mbox_loc)[-1] // 4 mbox_loc = Reshape((num_boxes, 4), name='mbox_loc_final')(mbox_loc) mbox_conf = Reshape((num_boxes, num_classes), name='mbox_conf_logits')(mbox_conf) mbox_conf = Activation('softmax', name='mbox_conf_final')(mbox_conf) predictions = concatenate([mbox_loc, mbox_conf, mbox_priorbox], axis=2, name='predictions') model = Model(inputs=input_layer, outputs=predictions) return model
def Conv_VAE3D(n_epochs=2, batch_size=10, learning_rate=0.001, decay_rate=0.0, latent_dim=8, name='stats.pickle'): # Prepare session: K.clear_session() # Number of samples to use for training and validation: n_train = 1500 n_val = 1000 # ENCODER: --------------------------------------------------------------- input_img = Input(shape=(50, 50, 50, 4), name="Init_Input") x = layers.Conv3D(32, (3, 3, 3), padding="same", activation='relu', name='E_Conv1')(input_img) x = layers.MaxPooling3D((2, 2, 2), name='E_MP1')(x) x = layers.Conv3D(64, (3, 3, 3), padding="same", activation='relu', name='E_Conv2')(x) x = layers.MaxPooling3D((2, 2, 2), name='E_MP2')(x) x = layers.Conv3D(64, (3, 3, 3), padding="valid", activation='relu', name='E_Conv3')(x) x = layers.MaxPooling3D((2, 2, 2), name='E_MP3')(x) x = layers.Conv3D(128, (3, 3, 3), padding="same", activation='relu', name='E_Conv4')(x) shape_before_flattening = K.int_shape(x) x = layers.Flatten()(x) x = layers.Dense(32, activation='relu')(x) encoder = Model(input_img, x) print(encoder.summary()) # VARIATIONAL LAYER: ------------------------------------------------------ z_mean = layers.Dense(latent_dim, name='V_Mean')(x) z_log_var = layers.Dense(latent_dim, name='V_Sig')(x) def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.) return z_mean + K.exp(z_log_var) * epsilon z = layers.Lambda(sampling, name='V_Var')([z_mean, z_log_var]) variation = Model(input_img, z) print(variation.summary()) # DECODER: --------------------------------------------------------------- decoder_input = layers.Input(shape=(latent_dim, ), name='D_Input') x = layers.Dense(np.prod(shape_before_flattening[1:]), activation='relu', name='D_Dense')(decoder_input) x = layers.Reshape(shape_before_flattening[1:], name='D_UnFlatten')(x) x = layers.Conv3DTranspose(32, 3, padding='same', activation='relu', name='D_DeConv1')(x) x = layers.UpSampling3D((2, 2, 2))(x) x = layers.Conv3D(4, 3, padding='same', activation='sigmoid', name='D_Conv1')(x) x = layers.UpSampling3D((5, 5, 5))(x) x = layers.Conv3D(4, 3, padding='same', activation='sigmoid', name='D_Conv2')(x) decoder = Model(decoder_input, x) print(decoder.summary()) # CALLBACKS: -------------------------------------------------------------- class TimeHistory(keras.callbacks.Callback): start = [] end = [] times = [] def on_epoch_begin(self, batch, logs=None): self.start = time.time() def on_epoch_end(self, batch, logs=None): self.end = time.time() self.times.append(self.end - self.start) # CUSTOM LAYERS: ---------------------------------------------------------- class CustomVariationalLayer(keras.layers.Layer): def vae_loss(self, x, z_decoded): x = K.flatten(x) z_decoded = K.flatten(z_decoded) xent_loss = keras.metrics.binary_crossentropy(x, z_decoded) kl_loss = -5e-4 * K.mean( 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return K.mean(xent_loss + kl_loss) #xent_loss) # + kl_loss) def call(self, inputs): x = inputs[0] z_decoded = inputs[1] loss = self.vae_loss(x, z_decoded) self.add_loss(loss, inputs=inputs) return x # DEFINE FINAL MODEL: ---------------------------------------------------- z_encoded = variation(input_img) z_decoded = decoder(z_encoded) # Construct Final Model: y = CustomVariationalLayer()([input_img, z_decoded]) vae = Model(input_img, y) print(vae.summary()) # Define Optimizer: vae_optimizer = keras.optimizers.Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, decay=decay_rate, amsgrad=False) vae.compile(optimizer=vae_optimizer, loss=None) # Not using custom vae loss function defined above # Define time callback: time_callback = TimeHistory() steps = n_train // batch_size val_steps = n_val // batch_size # FIT MODEL: -------------------------------------------------------------- history = vae.fit_generator( gen_batches(batch_size), shuffle=True, epochs=n_epochs, steps_per_epoch=steps, callbacks=[time_callback], validation_data=gen_batches_validation(batch_size), validation_steps=val_steps) # OUTPUTS: ------------------------------------------------------------- history_dict = history.history loss_values = history_dict['loss'] val_loss_values = history_dict['val_loss'] times = time_callback.times data = { 'train_loss': loss_values, 'val_loss': val_loss_values, 'epoch_time': times } pickle_out = open(name, "wb") pickle.dump(data, pickle_out) pickle_out.close() K.clear_session() return (history_dict)
def test_recursion(): #################################################### # test recursion a = Input(shape=(32, ), name='input_a') b = Input(shape=(32, ), name='input_b') dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) merged = merge([a_2, b_2], mode='concat', name='merge') c = Dense(64, name='dense_2')(merged) d = Dense(5, name='dense_3')(c) model = Model(input=[a, b], output=[c, d], name='model') e = Input(shape=(32, ), name='input_e') f = Input(shape=(32, ), name='input_f') g, h = model([e, f]) # g2, h2 = model([e, f]) assert g._keras_shape == c._keras_shape assert h._keras_shape == d._keras_shape # test separate manipulation of different layer outputs i = Dense(7, name='dense_4')(h) final_model = Model(input=[e, f], output=[i, g], name='final') assert len(final_model.inputs) == 2 assert len(final_model.outputs) == 2 assert len(final_model.layers) == 4 # we don't check names of first 2 layers (inputs) because # ordering of same-level layers is not fixed print('final_model layers:', [layer.name for layer in final_model.layers]) assert [layer.name for layer in final_model.layers][2:] == ['model', 'dense_4'] print(model.compute_mask([e, f], [None, None])) assert model.compute_mask([e, f], [None, None]) == [None, None] print(final_model.get_output_shape_for([(10, 32), (10, 32)])) assert final_model.get_output_shape_for([(10, 32), (10, 32)]) == [(10, 7), (10, 64)] # run recursive model fn = K.function(final_model.inputs, final_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] # test serialization model_config = final_model.get_config() print(json.dumps(model_config, indent=4)) recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] #################################################### # test multi-input multi-output j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) o = Input(shape=(32, ), name='input_o') p = Input(shape=(32, ), name='input_p') q, r = model([o, p]) assert n._keras_shape == (None, 5) assert q._keras_shape == (None, 64) s = merge([n, q], mode='concat', name='merge_nq') assert s._keras_shape == (None, 64 + 5) # test with single output as 1-elem list multi_io_model = Model([j, k, o, p], [s]) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([ np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)) ]) assert [x.shape for x in fn_outputs] == [(10, 69)] # test with single output as tensor multi_io_model = Model([j, k, o, p], s) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([ np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)) ]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] # test serialization print('multi_io_model.layers:', multi_io_model.layers) print('len(model.inbound_nodes):', len(model.inbound_nodes)) print('len(model.outbound_nodes):', len(model.outbound_nodes)) model_config = multi_io_model.get_config() print(model_config) print(json.dumps(model_config, indent=4)) recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) fn_outputs = fn([ np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)) ]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] config = model.get_config() new_model = Model.from_config(config) model.summary() json_str = model.to_json() new_model = model_from_json(json_str) yaml_str = model.to_yaml() new_model = model_from_yaml(yaml_str) #################################################### # test invalid graphs # input is not an Input tensor j = Input(shape=(32, ), name='input_j') j = Dense(32)(j) k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(Exception): invalid_model = Model([j, k], [m, n]) # disconnected graph j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(Exception) as e: invalid_model = Model([j], [m, n]) # redudant outputs j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) # this should work lol # TODO: raise a warning invalid_model = Model([j, k], [m, n, n]) # redundant inputs j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(Exception): invalid_model = Model([j, k, j], [m, n]) # i have not idea what I'm doing: garbage as inputs/outputs j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(Exception): invalid_model = Model([j, k], [m, n, 0]) #################################################### # test calling layers/models on TF tensors if K._BACKEND == 'tensorflow': import tensorflow as tf j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) tf_model = Model([j, k], [m, n]) # magic j_tf = tf.placeholder(dtype=K.floatx()) k_tf = tf.placeholder(dtype=K.floatx()) m_tf, n_tf = tf_model([j_tf, k_tf]) assert not hasattr(m_tf, '_keras_shape') assert not hasattr(n_tf, '_keras_shape') assert K.int_shape(m_tf) == (None, 64) assert K.int_shape(n_tf) == (None, 5) # test merge o_tf = merge([j_tf, k_tf], mode='concat', concat_axis=1) # test tensor input x = tf.placeholder(shape=(None, 2), dtype=K.floatx()) input_layer = InputLayer(input_tensor=x) x = Input(tensor=x) y = Dense(2)(x)
def call(self, x, mask=None): if hasattr(x, '_keras_shape'): input_shape = x._keras_shape elif hasattr(K, 'int_shape'): input_shape = K.int_shape(x) # --------------------------------- # # 获取输入进来的特征层的宽和高 # 比如38x38 # --------------------------------- # layer_width = input_shape[self.waxis] layer_height = input_shape[self.haxis] # --------------------------------- # # 获取输入进来的图片的宽和高 # 比如300x300 # --------------------------------- # img_width = self.img_size[1] img_height = self.img_size[0] box_widths = [] box_heights = [] # --------------------------------- # # self.aspect_ratios一般有两个值 # [1, 1, 2, 1/2] # [1, 1, 2, 1/2, 3, 1/3] # --------------------------------- # for ar in self.aspect_ratios: # 首先添加一个较小的正方形 if ar == 1 and len(box_widths) == 0: box_widths.append(self.min_size) box_heights.append(self.min_size) # 然后添加一个较大的正方形 elif ar == 1 and len(box_widths) > 0: box_widths.append(np.sqrt(self.min_size * self.max_size)) box_heights.append(np.sqrt(self.min_size * self.max_size)) # 然后添加长方形 elif ar != 1: box_widths.append(self.min_size * np.sqrt(ar)) box_heights.append(self.min_size / np.sqrt(ar)) # --------------------------------- # # 获得所有先验框的宽高1/2 # --------------------------------- # box_widths = 0.5 * np.array(box_widths) box_heights = 0.5 * np.array(box_heights) # --------------------------------- # # 每一个特征层对应的步长 # --------------------------------- # step_x = img_width / layer_width step_y = img_height / layer_height # --------------------------------- # # 生成网格中心 # --------------------------------- # linx = np.linspace(0.5 * step_x, img_width - 0.5 * step_x, layer_width) liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y, layer_height) centers_x, centers_y = np.meshgrid(linx, liny) centers_x = centers_x.reshape(-1, 1) centers_y = centers_y.reshape(-1, 1) # 每一个先验框需要两个(centers_x, centers_y),前一个用来计算左上角,后一个计算右下角 num_priors_ = len(self.aspect_ratios) prior_boxes = np.concatenate((centers_x, centers_y), axis=1) prior_boxes = np.tile(prior_boxes, (1, 2 * num_priors_)) # 获得先验框的左上角和右下角 prior_boxes[:, ::4] -= box_widths prior_boxes[:, 1::4] -= box_heights prior_boxes[:, 2::4] += box_widths prior_boxes[:, 3::4] += box_heights # --------------------------------- # # 将先验框变成小数的形式 # 归一化 # --------------------------------- # prior_boxes[:, ::2] /= img_width prior_boxes[:, 1::2] /= img_height prior_boxes = prior_boxes.reshape(-1, 4) prior_boxes = np.minimum(np.maximum(prior_boxes, 0.0), 1.0) num_boxes = len(prior_boxes) if len(self.variances) == 1: variances = np.ones((num_boxes, 4)) * self.variances[0] elif len(self.variances) == 4: variances = np.tile(self.variances, (num_boxes, 1)) else: raise Exception('Must provide one or four variances.') prior_boxes = np.concatenate((prior_boxes, variances), axis=1) prior_boxes_tensor = K.expand_dims(K.variable(prior_boxes), 0) pattern = [tf.shape(x)[0], 1, 1] prior_boxes_tensor = tf.tile(prior_boxes_tensor, pattern) return prior_boxes_tensor
def __call__(self, w): m = K.dot(K.transpose(w), w) - K.eye(K.int_shape(w)[1]) return self.reg_weight * frobenius_norm(m)
def SAR_Net(input_shape, ctc_enable = False, ar_enable = True, disc_enable = False, res_type="res18", res_filters=64, hidden_dim=256, bn_dim=0, bpe_classes=1000, accent_classes=8, max_ctc_len=72, mto=None, vlad_clusters=8, ghost_clusters=2, metric_loss='cosface', margin=0.3, raw_model=None, lr=0.01, gpus = 1, mode="train", name=None): # ========================= # INPUT (2D Spectrogram) # ========================= if mode=="train": inputs = Input(shape=input_shape,name="x_data") else: inputs = Input(shape=[None,input_shape[1],input_shape[2]], name="x_data") if disc_enable: disc_labels = Input(shape=(accent_classes,), name="x_accent") # ============================== # SHARED ENCODER (Res + BiGRU) # ============================== if res_type == "res18": cnn = resnet18_(inputs, filters=res_filters) elif res_type == "res34": cnn = resnet34_(inputs, filters=res_filters) elif res_type == "res50": cnn = resnet50_(inputs, filters=res_filters) elif res_type == "res101": cnn = resnet101_(inputs, filters=res_filters) elif res_type == "res152": cnn = resnet152_(inputs, filters=res_filters) else: print("======= ERROR: please specify cnn in res-[18,34,50,101,152] ======") cnn = Reshape([-1,K.int_shape(cnn)[-1]],name="CNN2SEQ")(cnn) cnn = DS(hidden_dim, activation='tanh', name="CNN_LIN")(cnn) cnn = LN(name="CNN_LIN_LN")(cnn) crnn = BIGRU(hidden_dim, name="CRNN")(cnn) crnn = LN(name="CRNN_LN")(crnn) # ========================= # ASR Branch # ========================= if ctc_enable: asr = crnn asr = BIGRU(hidden_dim, name="CTC_BIGRU")(asr) asr = LN(name="CTC_BIGRU_LN")(asr) asr = DS(hidden_dim, activation='tanh', name='CTC_DS')(asr) asr = LN(name='CTC_DS_LN')(asr) ctc_pred = DS(bpe_classes, activation="softmax", name='ctc_pred')(asr) ctc_loss, ctc_labels, ctc_input_len, ctc_label_len = ctc_module(ctc_pred, max_ctc_len) # ========================= # AR Branch # ========================= if ar_enable: # ========================= # AR Branch: Integration # ========================= ar = DS(hidden_dim,activation='tanh',name='AR_DS')(crnn) ar = LN(name='AR_DS_LN')(ar) ar = integration(ar, hidden_dim=hidden_dim, mto=mto, vlad_clusters=vlad_clusters, ghost_clusters=ghost_clusters) ar = BN(name='AR_BN1')(ar) # ar = DP(0.5,name="AR_DP")(ar) ar = DS(hidden_dim, activation=None, name="AR_EMBEDDING")(ar) # Global Feature ar = BN(name='AR_BN2')(ar) # ======================================= # AR Branch: Classification # ======================================= ar1 = DS(64, activation='relu',name="AR_CF_DS1")(ar) ar1 = DS(64, activation='relu',name="AR_CF_DS2")(ar1) ar1 = DS(accent_classes, activation='softmax', name='y_accent')(ar1) # =================================== # AR Branch: Discriminative loss # =================================== if disc_enable: ar2 = disc_loss(ar, accent_label=disc_labels, accent_classes=accent_classes, loss=metric_loss, margin=margin, name="y_disc") # ========================================== # AR Branch: Visual BottleNeck feature (*) # ========================================== if disc_enable and bn_dim: bn = DS(64, activation='relu',name="AR_BN_DS")(ar) bn = BN(name='AR_BN3')(bn) bn = DS(bn_dim, activation=None, name="bottleneck")(bn) bn = BN(name='AR_BN4')(bn) bn = disc_loss(bn, accent_label=disc_labels, accent_classes=accent_classes, loss=metric_loss, margin=margin, name="y_disc_bn") # ============================== # Model # ============================== input_set = [inputs] output_set = [] if ar_enable: output_set += [ar1] if disc_enable: input_set += [disc_labels] output_set += [ar2] if ctc_enable: input_set += [ctc_labels, ctc_input_len, ctc_label_len] output_set += [ctc_loss] if bn_dim: output_set += [bn] model = build(inputs=input_set,outputs=output_set,raw=raw_model,name=name) # ============================== # Compile # ============================== loss = {} loss_weights = {} metrics = {} alpha = 0.4 beta = 0.01 if ar_enable: loss["y_accent"] = 'categorical_crossentropy' loss_weights["y_accent"] = beta if disc_enable else 1.0 metrics["y_accent"] = "accuracy" if disc_enable: loss["y_disc"] = 'categorical_crossentropy' if metric_loss != 'circleloss' \ else lambda y, x: ls.circle_loss(y, x, gamma=256, margin=margin) loss_weights["y_disc"] = 1-alpha if ctc_enable else 1.0 metrics["y_disc"] = "accuracy" if ctc_enable: loss["y_ctc_loss"] = lambda y_true, y_pred: y_pred loss_weights["y_ctc_loss"] = alpha if disc_enable else 1.0 loss_weights["y_ctc_loss"] = 1-alpha if not disc_enable else beta if bn_dim: loss["y_disc_bn"] = 'categorical_crossentropy' if metrics != 'circleloss' \ else lambda y, x: ls.circle_loss(y, x, gamma=256, margin=margin) loss_weights["y_disc_bn"] = 0.1 metrics['y_disc_bn'] = 'accuracy' train_model = compile(model,gpus,lr=lr,loss=loss,loss_weights=loss_weights,metrics=metrics) print(loss_weights) return model,train_model
def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): """Adds a Inception-ResNet block. This function builds 3 types of Inception-ResNet blocks mentioned in the paper, controlled by the `block_type` argument (which is the block name used in the official TF-slim implementation): - Inception-ResNet-A: `block_type='block35'` - Inception-ResNet-B: `block_type='block17'` - Inception-ResNet-C: `block_type='block8'` # Arguments x: input tensor. scale: scaling factor to scale the residuals (i.e., the output of passing `x` through an inception module) before adding them to the shortcut branch. Let `r` be the output from the residual branch, the output of this block will be `x + scale * r`. block_type: `'block35'`, `'block17'` or `'block8'`, determines the network structure in the residual branch. block_idx: an `int` used for generating layer names. The Inception-ResNet blocks are repeated many times in this network. We use `block_idx` to identify each of the repetitions. For example, the first Inception-ResNet-A block will have `block_type='block35', block_idx=0`, ane the layer names will have a common prefix `'block35_0'`. activation: activation function to use at the end of the block (see [activations](keras./activations.md)). When `activation=None`, no activation is applied (i.e., "linear" activation: `a(x) = x`). # Returns Output tensor for the block. # Raises ValueError: if `block_type` is not one of `'block35'`, `'block17'` or `'block8'`. """ if block_type == 'block35': branch_0 = conv2d_bn(x, 32, 1) branch_1 = conv2d_bn(x, 32, 1) branch_1 = conv2d_bn(branch_1, 32, 3) branch_2 = conv2d_bn(x, 32, 1) branch_2 = conv2d_bn(branch_2, 48, 3) branch_2 = conv2d_bn(branch_2, 64, 3) branches = [branch_0, branch_1, branch_2] elif block_type == 'block17': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 128, 1) branch_1 = conv2d_bn(branch_1, 160, [1, 7]) branch_1 = conv2d_bn(branch_1, 192, [7, 1]) branches = [branch_0, branch_1] elif block_type == 'block8': branch_0 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(x, 192, 1) branch_1 = conv2d_bn(branch_1, 224, [1, 3]) branch_1 = conv2d_bn(branch_1, 256, [3, 1]) branches = [branch_0, branch_1] else: raise ValueError('Unknown Inception-ResNet block type. ' 'Expects "block35", "block17" or "block8", ' 'but got: ' + str(block_type)) block_name = block_type + '_' + str(block_idx) channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 mixed = Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches) up = conv2d_bn(mixed, K.int_shape(x)[channel_axis], 1, activation=None, use_bias=True, name=block_name + '_conv') x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, output_shape=K.int_shape(x)[1:], arguments={'scale': scale}, name=block_name)([x, up]) if activation is not None: x = Activation(activation, name=block_name + '_ac')(x) return x
def inverse(self): in_dim = K.int_shape(self.idxs)[0] reverse_idxs = tf.nn.top_k(self.idxs, in_dim)[1][::-1] layer = Permute() layer.idxs = reverse_idxs return layer
layer_filters = [32, 64] # Build the Autoencoder Model # First build the Encoder Model inputs = Input(shape=input_shape, name='encoder_input') x = inputs # Stack of Conv2D blocks # Notes: # 1) Use Batch Normalization before ReLU on deep networks # 2) Use MaxPooling2D as alternative to strides>1 # - faster but not as good as strides>1 for filters in layer_filters: x = Conv2D(filters=filters, kernel_size=kernel_size, strides=2, activation='relu', padding='same')(x) # Shape info needed to build Decoder Model shape = K.int_shape(x) # Generate the latent vector x = Flatten()(x) latent = Dense(latent_dim, name='latent_vector')(x) # Instantiate Encoder Model encoder = Model(inputs, latent, name='encoder') encoder.summary() # Build the Decoder Model latent_inputs = Input(shape=(latent_dim,), name='decoder_input') x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) x = Reshape((shape[1], shape[2], shape[3]))(x) # Stack of Transposed Conv2D blocks
def TCN_V4(n_classes, feat_dim, max_len, gap=1, dropout=0.0, activation="relu"): """TCN model. num_block = 2. initial_conv_num=64, block_size = 3. Args: n_classes: number of classes for this kind of label. feat_dim: the dumention of the feature. max_len: the number of frames for each video. Returns: model: uncompiled model.""" ROW_AXIS = 1 CHANNEL_AXIS = 2 initial_conv_len = 4 initial_conv_num = 64 config = [ [(1, 4, 64)], [(1, 4, 64)], [(1, 4, 64)], [(2, 4, 128)], [(1, 4, 128)], [(1, 4, 128)], ] input = Input(shape=(max_len, feat_dim)) model = input model = Convolution1D(initial_conv_num, initial_conv_len, init="he_normal", border_mode="same", subsample_length=1)(model) for depth in range(0, len(config)): blocks = [] for stride, filter_dim, num in config[depth]: ## residual block bn = BatchNormalization(mode=0, axis=CHANNEL_AXIS)(model) relu = Activation(activation)(bn) dr = Dropout(dropout)(relu) conv = Convolution1D(num, filter_dim, init="he_normal", border_mode="same", subsample_length=stride)(dr) #dr = Dropout(dropout)(conv) ## potential downsample conv_shape = K.int_shape(conv) model_shape = K.int_shape(model) if conv_shape[CHANNEL_AXIS] != model_shape[CHANNEL_AXIS]: model = Convolution1D(num, 1, init="he_normal", border_mode="same", subsample_length=2)(model) ## merge block model = merge([model, conv], mode='sum', concat_axis=CHANNEL_AXIS) ## final bn+relu bn = BatchNormalization(mode=0, axis=CHANNEL_AXIS)(model) model = Activation(activation)(bn) if gap: pool_window_shape = K.int_shape(model) gap = AveragePooling1D(pool_window_shape[ROW_AXIS], stride=1)(model) flatten = Flatten()(gap) else: flatten = Flatten()(model) dense = Dense(output_dim=n_classes, init="he_normal", activation="softmax")(flatten) model = Model(input=input, output=dense) # optimizer = SGD(lr=0.01, momentum=0.9, decay=0.0, nesterov=True) # model.compile(loss='categorical_crossentropy', optimizer=optimizer,metrics=['accuracy']) return model
def call(self, inputs): self.in_shape = [i or -1 for i in K.int_shape(inputs)] if self.shape is None: self.shape = [-1, np.prod(self.in_shape[1:])] return K.reshape(inputs, self.shape)