def defineFullModel(self): self.TRAIN_FLAG = 0 outputs = self.defineModel() if len(outputs) > 1: self.raw_output = Concatenate()(outputs) else: #if only a single chunk self.raw_output = outputs[0] #pass output logits through activation for idx, o in enumerate(outputs): outputs[idx] = Lambda(self.params_dict['output_activation'])(o) if len(outputs) > 1: x = Concatenate()(outputs) else: #if only a single chunk x = outputs[0] x = Lambda(self.outputDecoder)(x) #logits x = Activation('softmax')(x) #return probs if self.params_dict['base_model'] == None: self.model_full = KerasModel(inputs=self.input, outputs=x) else: self.model_full = KerasModel( inputs=self.params_dict['base_model'].input, outputs=x)
def line_lstm(input_shape, output_shape, window_width=20, window_stride=14, decoder_dim=None, encoder_dim=None): # Here is another way to pass arguments to the Keras Lambda function def slide_window_bound(image, window_width=window_width, window_stride=window_stride): return slide_window(image, window_width, window_stride) image_height, image_width = input_shape output_length, num_classes = output_shape if encoder_dim is None: encoder_dim = 128 if decoder_dim is None: decoder_dim = 128 image_input = Input(shape=input_shape) # (image_height, image_width) image_reshaped = Reshape((image_height, image_width, 1))(image_input) # (image_height, image_width, 1) image_patches = Lambda(slide_window_bound)(image_reshaped) # (num_windows, image_height, window_width, 1) convnet = lenet((image_height, window_width, 1), (num_classes, )) convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output) # (image_height, window_width, 1) -> (128,) convnet_outputs = TimeDistributed(convnet)(image_patches) # (num_windows, 128) gpu_present = len(device_lib.list_local_devices()) > 1 lstm = CuDNNLSTM if gpu_present else LSTM ##### Your code below (Lab 3) encoder_output = lstm(encoder_dim, return_sequences=False, go_backwards=True)(convnet_outputs) # (encoder_dim) repeated_encoding = RepeatVector(output_length)(encoder_output) # (max_length, encoder_dim) decoder_output = lstm(decoder_dim, return_sequences=True)(repeated_encoding) # decoder_output_dropout = Dropout(0.2)(decoder_output) # (output_length, decoder_dim) ##### Your code above (Lab 3) softmax_output = TimeDistributed(Dense( num_classes, activation='softmax'))(decoder_output) # (max_length, num_classes) model = KerasModel(inputs=image_input, outputs=softmax_output) return model
def line_cnn_sliding_window(input_shape: Tuple[int, ...], output_shape: Tuple[int, ...], window_width: float = 16, window_stride: float = 10) -> KerasModel: """ Input is an image with shape (image_height, image_width) Output is of shape (output_length, num_classes) """ image_height, image_width = input_shape output_length, num_classes = output_shape image_input = Input(shape=input_shape) # (image_height, image_width) image_reshaped = Reshape((image_height, image_width, 1))(image_input) # (image_height, image_width, 1) image_patches = Lambda(slide_window, arguments={ 'window_width': window_width, 'window_stride': window_stride })(image_reshaped) # (num_windows, image_height, window_width, 1) # Make a LeNet and get rid of the last two layers (softmax and dropout) convnet = lenet((image_height, window_width, 1), (num_classes, )) convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output) convnet_outputs = TimeDistributed(convnet)(image_patches) # (num_windows, 128) # Now we have to get to (output_length, num_classes) shape. One way to do it is to do another sliding window with # width = floor(num_windows / output_length) # Note that this will likely produce too many items in the output sequence, so take only output_length, # and watch out that width is at least 2 (else we will only be able to predict on the first half of the line) ##### Your code below (Lab 2) convnet_outputs_extra_dim = Lambda(lambda x: tf.expand_dims(x, -1))( convnet_outputs) num_windows = int((image_width - window_width) / window_stride) + 1 width = int(num_windows / output_length) conved_convnet_outputs = Conv2D( num_classes, (width, 128), (width, 1), activation='softmax')(convnet_outputs_extra_dim) squeezed_conved_convnet_outputs = Lambda(lambda x: tf.squeeze(x, 2))( conved_convnet_outputs) softmax_output = Lambda(lambda x: x[:, :output_length, :])( squeezed_conved_convnet_outputs) ##### Your code above (Lab 2) model = KerasModel(inputs=image_input, outputs=softmax_output) model.summary() return model
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14): # pylint: disable=too-many-locals image_height, image_width = input_shape output_length, num_classes = output_shape num_windows = int((image_width - window_width) / window_stride) + 1 if num_windows < output_length: raise ValueError(f"Window width/stride need to generate >= {output_length} windows (currently {num_windows})") image_input = Input(shape=input_shape, name="image") y_true = Input(shape=(output_length,), name="y_true") input_length = Input(shape=(1,), name="input_length") label_length = Input(shape=(1,), name="label_length") # Your code should use slide_window and extract image patches from image_input. # Pass a convolutional model over each image patch to generate a feature vector per window. # Pass these features through one or more LSTM layers. # Convert the lstm outputs to softmax outputs. # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length). # Your code below (Lab 3) image_reshaped = Reshape((image_height, image_width, 1))(image_input) # (image_height, image_width, 1) image_patches = Lambda(slide_window, arguments={"window_width": window_width, "window_stride": window_stride})( image_reshaped ) # (num_windows, image_height, window_width, 1) # Make a LeNet and get rid of the last two layers (softmax and dropout) convnet = lenet((image_height, window_width, 1), (num_classes,)) convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output) convnet_outputs = TimeDistributed(convnet)(image_patches) # (num_windows, 128) lstm_output = LSTM(128, return_sequences=True)(convnet_outputs) # (num_windows, 128) softmax_output = Dense(num_classes, activation="softmax", name="softmax_output")(lstm_output) # (num_windows, num_classes) # Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={"num_windows": num_windows} )(input_length) ctc_loss_output = Lambda(lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name="ctc_loss")( [y_true, softmax_output, input_length_processed, label_length] ) ctc_decoded_output = Lambda(lambda x: ctc_decode(x[0], x[1], output_length), name="ctc_decoded")( [softmax_output, input_length_processed] ) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output], ) return model
def __init__(self, image_width, learning_rate=0.001): # initialize model structure x = Input(shape=(image_width, image_width, 3)) x1 = self.inception_layer(1, 4, 4, 2)(x) x1 = BatchNormalization()(x1) x1 = MaxPooling2D(pool_size=(2, 2), padding='same')(x1) x2 = self.inception_layer(2, 4, 4, 2)(x1) x2 = BatchNormalization()(x2) x2 = MaxPooling2D(pool_size=(2, 2), padding='same')(x2) x3 = Conv2D(16, (5, 5), padding='same', activation='relu')(x2) x3 = BatchNormalization()(x3) x3 = MaxPooling2D(pool_size=(2, 2), padding='same')(x3) x4 = Conv2D(16, (5, 5), padding='same', activation='relu')(x3) x4 = BatchNormalization()(x4) x4 = MaxPooling2D(pool_size=(4, 4), padding='same')(x4) y = Flatten()(x4) y = Dropout(0.5)(y) y = Dense(16)(y) y = LeakyReLU(alpha=0.1)(y) y = Dropout(0.5)(y) y = Dense(1, activation='sigmoid')(y) self.model = KerasModel(inputs=x, outputs=y) self.model.compile(optimizer=Adam(lr=learning_rate), loss=tensorflow.keras.losses.BinaryCrossentropy(), metrics=[tensorflow.keras.metrics.AUC()])
def _gradient_backprop_eager(self, grad_fn, layer_name, images, mode='max', output_index=0, loss_fn=None): # save current weight weights = self.model.get_weights() new_model = clone_model(self.model) # Apply weights new_model.set_weights(weights) for layer in new_model.layers: if 'activation' in layer.get_config(): if 'relu' in layer.activation.__name__: layer.activation = grad_fn guided_model = KerasModel(new_model.inputs, new_model.get_layer(layer_name).output) img_tensor = tf.Variable(tf.cast(images, K.floatx())) with tf.GradientTape() as tape: tape.watch(img_tensor) output = guided_model(img_tensor) loss = self._get_backprop_loss(output, mode, output_index, loss_fn) grads = tape.gradient(loss, img_tensor) del guided_model del new_model return grads.numpy()
def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: """Predict on a single input.""" softmax_output_fn = KerasModel( inputs=[self.network.get_layer("image").input], outputs=[self.network.get_layer("softmax_output").output], ) if image.dtype == np.uint8: image = (image / 255).astype(np.float32) # Get the prediction and confidence using softmax_output_fn, passing the right input into it. input_image = np.expand_dims(image, 0) softmax_output = softmax_output_fn.predict(input_image) input_length = [softmax_output.shape[1]] decoded, log_prob = K.ctc_decode(softmax_output, input_length, greedy=True) pred_raw = K.eval(decoded[0])[0] pred = "".join(self.data.mapping[label] for label in pred_raw).strip() neg_sum_logit = K.eval(log_prob)[0][0] conf = np.exp(-neg_sum_logit) # Your code above (Lab 3) return pred, conf
def init_model(self): x = Input(shape=(IMGWIDTH, IMGWIDTH, 3)) x1 = Conv2D(8, (3, 3), padding='same', activation='relu')(x) x1 = BatchNormalization()(x1) x1 = MaxPooling2D(pool_size=(2, 2), padding='same')(x1) x2 = Conv2D(8, (5, 5), padding='same', activation='relu')(x1) x2 = BatchNormalization()(x2) x2 = MaxPooling2D(pool_size=(2, 2), padding='same')(x2) x3 = Conv2D(16, (5, 5), padding='same', activation='relu')(x2) x3 = BatchNormalization()(x3) x3 = MaxPooling2D(pool_size=(2, 2), padding='same')(x3) x4 = Conv2D(16, (5, 5), padding='same', activation='relu')(x3) x4 = BatchNormalization()(x4) x4 = MaxPooling2D(pool_size=(4, 4), padding='same')(x4) y = Flatten()(x4) y = Dropout(0.5)(y) y = Dense(16)(y) y = LeakyReLU(alpha=0.1)(y) y = Dropout(0.5)(y) y = Dense(1, activation='sigmoid')(y) return KerasModel(inputs=x, outputs=y)
def decoder(self): """ DFL H128 Decoder """ input_ = Input(shape=(16, 16, self.encoder_dim)) # Face var_x = input_ var_x = self.blocks.upscale(var_x, self.encoder_dim) var_x = self.blocks.upscale(var_x, self.encoder_dim // 2) var_x = self.blocks.upscale(var_x, self.encoder_dim // 4) var_x = self.blocks.conv2d(var_x, 3, kernel_size=5, padding="same", activation="sigmoid", name="face_out") outputs = [var_x] if self.config.get("learn_mask", False): var_y = input_ var_y = self.blocks.upscale(var_y, self.encoder_dim) var_y = self.blocks.upscale(var_y, self.encoder_dim // 2) var_y = self.blocks.upscale(var_y, self.encoder_dim // 4) var_y = self.blocks.conv2d(var_y, 1, kernel_size=5, padding="same", activation="sigmoid", name="mask_out") outputs.append(var_y) return KerasModel(input_, outputs=outputs)
def evaluate(self, x, y, batch_size: int=16, verbose=True) -> float: test_sequence = DatasetSequence(x, y, batch_size, format_fn=self.batch_format_fn) # We can use the `ctc_decoded` layer that is part of our model here. decoding_model = KerasModel(inputs=self.network.input, outputs=self.network.get_layer('ctc_decoded').output) preds = decoding_model.predict_generator(test_sequence) trues = np.argmax(y, -1) pred_strings = [''.join(self.data.mapping.get(label, '') for label in pred).strip(' |_') for pred in preds] true_strings = [''.join(self.data.mapping.get(label, '') for label in true).strip(' |_') for true in trues] char_accuracies = [ 1 - editdistance.eval(true_string, pred_string) / len(true_string) for pred_string, true_string in zip(pred_strings, true_strings) ] if verbose: sorted_ind = np.argsort(char_accuracies) print("\nLeast accurate predictions:") for ind in sorted_ind[:5]: print(f'True: {true_strings[ind]}') print(f'Pred: {pred_strings[ind]}') print("\nMost accurate predictions:") for ind in sorted_ind[-5:]: print(f'True: {true_strings[ind]}') print(f'Pred: {pred_strings[ind]}') print("\nRandom predictions:") for ind in np.random.randint(0, len(char_accuracies), 5): print(f'True: {true_strings[ind]}') print(f'Pred: {pred_strings[ind]}') mean_accuracy = np.mean(char_accuracies) return mean_accuracy
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14, conv_dim=128, lstm_dim=256): image_height, image_width = input_shape output_length, num_classes = output_shape num_windows = int((image_width - window_width) / window_stride) + 1 if num_windows < output_length: raise ValueError(f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})') image_input = Input(shape=input_shape, name='image') y_true = Input(shape=(output_length,), name='y_true') input_length = Input(shape=(1,), name='input_length') label_length = Input(shape=(1,), name='label_length') gpu_present = len(device_lib.list_local_devices()) > 1 lstm_fn = CuDNNLSTM if gpu_present else LSTM # Your code should use slide_window and extract image patches from image_input. # Pass a convolutional model over each image patch to generate a feature vector per window. # Pass these features through one or more LSTM layers. # Convert the lstm outputs to softmax outputs. # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length). ##### Your code below (Lab 3) image_reshaped = Reshape((image_height, image_width, 1))(image_input) # (image_height, image_width, 1) conv = Conv2D(conv_dim, (image_height, window_width), (1, window_stride), activation='relu')(image_reshaped) conv_squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv) lstm_output1 = lstm_fn(lstm_dim, return_sequences=True)(convnet_outputs) # (num_windows, 128) lstm_output2 = lstm_fn(lstm_dim, return_sequences=True)(lstm_output1) lstm_output3 = lstm_fn(lstm_dim, return_sequences=True)(lstm_output2 + lstm_output1) lstm_output4 = lstm_fn(lstm_dim, return_sequences=True)(lstm_output3 + lstm_output2) softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output4) # (num_windows, num_classes) ##### Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={'num_windows': num_windows} )(input_length) ctc_loss_output = Lambda( lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss' )([y_true, softmax_output, input_length_processed, label_length]) ctc_decoded_output = Lambda( lambda x: ctc_decode(x[0], x[1], output_length), name='ctc_decoded' )([softmax_output, input_length_processed]) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output] ) return model
def fprop(self, x): """ Exposes all the layers of the model returned by get_layer_names. :param x: A symbolic representation of the network input :return: A dictionary mapping layer names to the symbolic representation of their output. """ from tensorflow.keras.models import Model as KerasModel if self.keras_model is None: # Get the input layer new_input = self.model.get_input_at(0) # Make a new model that returns each of the layers as output out_layers = [x_layer.output for x_layer in self.model.layers] self.keras_model = KerasModel(new_input, out_layers) # and get the outputs for that model on the input x outputs = self.keras_model(x) # Keras only returns a list for outputs of length >= 1, if the model # is only one layer, wrap a list if len(self.model.layers) == 1: outputs = [outputs] # compute the dict to return fprop_dict = dict(zip(self.get_layer_names(), outputs)) return fprop_dict
def decoder(self): """ Decoder Network """ input_ = Input(shape=(8, 8, 512)) var_x = input_ var_x = self.blocks.upscale(var_x, 256) var_x = self.blocks.upscale(var_x, 128) var_x = self.blocks.upscale(var_x, 64) var_x = self.blocks.conv2d(var_x, 3, kernel_size=5, padding="same", activation="sigmoid", name="face_out") outputs = [var_x] if self.config.get("learn_mask", False): var_y = input_ var_y = self.blocks.upscale(var_y, 256) var_y = self.blocks.upscale(var_y, 128) var_y = self.blocks.upscale(var_y, 64) var_y = self.blocks.conv2d(var_y, 1, kernel_size=5, padding="same", activation="sigmoid", name="mask_out") outputs.append(var_y) return KerasModel(input_, outputs=outputs)
def build(self): # build the Inception V3 network, use pretrained weights from ImageNet # remove top fully connected layers by include_top=False base_model = applications.InceptionV3(weights='imagenet', include_top=False, input_shape=(self.img_width, self.img_height, 3)) # Add new layers on top of the model # build a classifier model to put on top of the convolutional model # This consists of a global average pooling layer and a fully connected layer with 256 nodes # Then apply dropout and sigmoid activation model_top = Sequential() model_top.add( GlobalAveragePooling2D(input_shape=base_model.output_shape[1:], data_format=None)), model_top.add(Dense(256, activation='relu')) model_top.add(Dropout(0.5)) model_top.add(Dense(1, activation='sigmoid')) model = KerasModel(inputs=base_model.input, outputs=model_top(base_model.output)) # Compile model using Adam optimizer with common values and binary cross entropy loss # Use low learning rate (lr) for transfer learning model.compile(optimizer=Adam(lr=self.learning_rate, beta_1=self.beta_1, beta_2=self.beta_2, epsilon=self.epsilon, decay=self.decay), loss='binary_crossentropy', metrics=['accuracy']) self._model = model
def Decoder(self): input_ = Input(shape=(8, 8, 512)) x = input_ x = self.upscale(256)(x) x = self.upscale(128)(x) x = self.upscale(64)(x) x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) return KerasModel(input_, x)
def build_autoencoders(self, inputs): """ Initialize original model """ logger.debug("Initializing model") for side in ("a", "b"): logger.debug("Adding Autoencoder. Side: %s", side) decoder = self.networks["decoder_{}".format(side)].network output = decoder(self.networks["encoder"].network(inputs[0])) autoencoder = KerasModel(inputs, output) self.add_predictor(side, autoencoder) logger.debug("Initialized model")
def init_model(self, dl_rate): x = Input(shape = (IMGWIDTH, IMGWIDTH, 3)) x1 = Conv2D(16, (3, 3), dilation_rate = dl_rate, strides = 1, padding='same', activation = 'relu')(x) x1 = Conv2D(4, (1, 1), padding='same', activation = 'relu')(x1) x1 = BatchNormalization()(x1) x1 = MaxPooling2D(pool_size=(8, 8), padding='same')(x1) y = Flatten()(x1) y = Dropout(0.5)(y) y = Dense(1, activation = 'sigmoid')(y) return KerasModel(inputs = x, outputs = y)
def Encoder(self): input_ = Input(shape=IMAGE_SHAPE) x = input_ x = self.conv(128)(x) x = self.conv(256)(x) x = self.conv(512)(x) x = self.conv(1024)(x) x = Dense(ENCODER_DIM)(Flatten()(x)) x = Dense(4 * 4 * 1024)(x) x = Reshape((4, 4, 1024))(x) x = self.upscale(512)(x) return KerasModel(input_, x)
def SP_ResNet(num_classes, input_shape, depths=[2, 2, 2, 2], filters=[64, 128, 256, 512], pool_at=[0, 1, 2, 3], squeeze_ratio=16, use_residuals=True, dense_layers=[], dropout_rate=None): # ... input_img = Input(shape=input_shape, name='input') # entry conv + pool x = Conv2D(filters[0], (7, 7), strides=(2, 2), padding='same', activation=None, name='entry_conv')(input_img) x = BatchNormalization(name='entry_bn')(x) x = Activation('relu', name='entry_relu')(x) x = MaxPool2D((3, 3), strides=(2, 2), padding='same')(x) pooling_outputs = [] for i, (f, d) in enumerate(zip(filters, depths)): # n_blocks = depth for n in range(d): downsample = True if n == 0 else False x, z = SP_block(x, f, str(i) + '_' + str(n), ratio=squeeze_ratio, residual=use_residuals, downsample=downsample) # only pool at last block in depth if i in pool_at and z is not None: z = Lambda(bilinear_pooling, name='bilinear_pooling' + str(i))([z, z]) pooling_outputs.append(z) print(z.get_shape().as_list()) x = GlobalAveragePooling2D(name='global_pooling_top')(x) pooling_outputs.append(x) x = Concatenate(name='feature_concat')(pooling_outputs) x = make_dense_layers(dense_layers, dropout=dropout_rate)(x) pred = Dense(num_classes, activation='softmax')(x) model = KerasModel(inputs=input_img, outputs=pred) return model
def encoder(self): """ DFL H128 Encoder """ input_ = Input(shape=self.input_shape) var_x = input_ var_x = self.blocks.conv(var_x, 128) var_x = self.blocks.conv(var_x, 256) var_x = self.blocks.conv(var_x, 512) var_x = self.blocks.conv(var_x, 1024) var_x = Dense(self.encoder_dim)(Flatten()(var_x)) var_x = Dense(8 * 8 * self.encoder_dim)(var_x) var_x = Reshape((8, 8, self.encoder_dim))(var_x) var_x = self.blocks.upscale(var_x, self.encoder_dim) return KerasModel(input_, var_x)
def encoder(self): """ Encoder Network """ input_ = Input(shape=self.input_shape) var_x = input_ var_x = self.blocks.conv(var_x, 128) var_x = self.blocks.conv(var_x, 256) var_x = self.blocks.conv(var_x, 512) if not self.config.get("lowmem", False): var_x = self.blocks.conv(var_x, 1024) var_x = Dense(self.encoder_dim)(Flatten()(var_x)) var_x = Dense(4 * 4 * 1024)(var_x) var_x = Reshape((4, 4, 1024))(var_x) var_x = self.blocks.upscale(var_x, 512) return KerasModel(input_, var_x)
def initModel(self): optimizer = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) x = Input(shape=IMAGE_SHAPE) self.autoencoder_A = KerasModel(x, self.decoder_A(self.encoder(x))) self.autoencoder_B = KerasModel(x, self.decoder_B(self.encoder(x))) try: self.autoencoder_A_multi = multi_gpu_model(self.autoencoder_A, gpus=2) self.autoencoder_B_multi = multi_gpu_model(self.autoencoder_B, gpus=2) self.autoencoder_A_multi.compile(optimizer=optimizer, loss='mean_absolute_error') self.autoencoder_B_multi.compile(optimizer=optimizer, loss='mean_absolute_error') except: self.autoencoder_A_multi = self.autoencoder_A self.autoencoder_B_multi = self.autoencoder_A self.autoencoder_A.compile(optimizer=optimizer, loss='mean_absolute_error') self.autoencoder_B.compile(optimizer=optimizer, loss='mean_absolute_error')
def load(self): """ Returns ------- tensorflow.keras.models.Model A neural network of sequential layers from the configured layer list. """ layers = [Input(**self._input_params)] for i, layer in enumerate(self._layers): next_tensor = layer_from_config(layer) layers.append(next_tensor(layers[i])) return KerasModel(inputs=layers[0], outputs=layers[-1])
def evaluate(self, x, y, batch_size: int = 16, verbose: bool = True) -> float: """Evaluate model.""" test_sequence = DatasetSequence(x, y, batch_size, format_fn=self.batch_format_fn) # We can use the `ctc_decoded` layer that is part of our model here. decoding_model = KerasModel( inputs=self.network.input, outputs=self.network.get_layer("ctc_decoded").output) preds = decoding_model.predict(test_sequence) trues = np.argmax(y, -1) pred_strings = [ "".join(self.data.mapping.get(label, "") for label in pred).strip(" |_") for pred in preds ] true_strings = [ "".join(self.data.mapping.get(label, "") for label in true).strip(" |_") for true in trues ] char_accuracies = [ 1 - editdistance.eval(true_string, pred_string) / len(true_string) for pred_string, true_string in zip(pred_strings, true_strings) ] if verbose: sorted_ind = np.argsort(char_accuracies) print("\nLeast accurate predictions:") for ind in sorted_ind[:5]: print(f"True: {true_strings[ind]}") print(f"Pred: {pred_strings[ind]}") print("\nMost accurate predictions:") for ind in sorted_ind[-5:]: print(f"True: {true_strings[ind]}") print(f"Pred: {pred_strings[ind]}") print("\nRandom predictions:") random_ind = np.random.randint(0, len(char_accuracies), 5) for ind in random_ind: # pylint: disable=not-an-iterable print(f"True: {true_strings[ind]}") print(f"Pred: {pred_strings[ind]}") mean_accuracy = np.mean(char_accuracies) return mean_accuracy
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14): # pylint: disable=too-many-locals image_height, image_width = input_shape output_length, num_classes = output_shape num_windows = int((image_width - window_width) / window_stride) + 1 if num_windows < output_length: raise ValueError( f'Window width/stride need to generate >= {output_length} windows (currently {num_windows})' ) image_input = Input(shape=input_shape, name='image') y_true = Input(shape=(output_length, ), name='y_true') input_length = Input(shape=(1, ), name='input_length') label_length = Input(shape=(1, ), name='label_length') gpu_present = len(device_lib.list_local_devices()) > 2 lstm_fn = CuDNNLSTM if gpu_present else LSTM # Your code should use slide_window and extract image patches from image_input. # Pass a convolutional model over each image patch to generate a feature vector per window. # Pass these features through one or more LSTM layers. # Convert the lstm outputs to softmax outputs. # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length). # Your code below (Lab 3) # Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={'num_windows': num_windows})(input_length) ctc_loss_output = Lambda( lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss')( [y_true, softmax_output, input_length_processed, label_length]) ctc_decoded_output = Lambda( lambda x: ctc_decode(x[0], x[1], output_length), name='ctc_decoded')([softmax_output, input_length_processed]) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output]) return model
def sub_model(self, layer_name): """ Create a sub-model with the same inputs, and the outputs of a specific layer in the deoxys model. Parameters ---------- layer_name : str name of layer Returns ------- tensorflow.keras.models.Model Model, whose outputs are of the layer_name """ return KerasModel(inputs=self.model.inputs, outputs=self.layers[layer_name].output)
def deepten(num_classes, input_shape, backbone_cnn=None, encode_K=32, conv1x1=128, dense_layers=[], dropout_rate=None): '''Combine a backbone CNN + Encoding layer + Dense layers into a DeepTEN. Parameters ---------- backbone_cnn : KerasModel or str Feature extraction network. If KerasModel, should output features (N, H, W, C). If str, loads the corresponding ImageNet model from `keras.applications`. n_classes : int Number of classes for softmax output layer input_shape : tuple of int, optional Shape of input image. Can be None, since Encoding layer allows variable input sizes. encode_K : int, optional Number of codewords to learn, default=32. conv1x1 : int, optional Add a 1x1 conv to reduce number of filters in backbone_cnn.output before Encoding layer, default=128. dense_layers : iterable of int, optional Sizes for additional Dense layers between Encoding.output and softmax, default=[]. dropout_rate: float, optional Specify a dropout rate for Dense layers Returns ------- DeepTEN : KerasModel Deep Texture Encoding Network ''' assert backbone_cnn is not None backbone_model = make_backbone(backbone_cnn, input_shape) conv_output = backbone_model.output if conv1x1 is not None: conv_output = Conv2D(conv1x1, (1, 1), activation='relu')(conv_output) conv_output = BatchNormalization()(conv_output) x = Encoding(encode_K, dropout=dropout_rate)(conv_output) x = make_dense_layers(dense_layers, dropout=dropout_rate)(x) pred = Dense(num_classes, activation='softmax')(x) model = KerasModel(inputs=backbone_model.input, outputs=pred) return model
def make_backbone(backbone_cnn, input_shape): '''Check an existing backbone Model or grab ImageNet pretrained from keras_apps.''' if backbone_cnn is None: return None elif isinstance(backbone_cnn, KerasModel): assert len(backbone_cnn.output_shape ) == 4, 'backbone_cnn.output must output a 4D Tensor' return backbone_cnn elif isinstance(backbone_cnn, str): assert backbone_cnn in keras_apps.keys( ), 'Invalid keras.applications string' model = keras_apps[backbone_cnn](include_top=False, input_shape=input_shape) # resnet50 ends with a 7x7 pooling, which collapses conv to 1x1 for 224x224 input if backbone_cnn == 'resnet50': model = KerasModel(inputs=model.input, outputs=model.layers[-2].output) return model else: raise ValueError('input to make_backbone() has invalid type')
def defineModel(self, maxlen, max_features=20000, embed_size=128, number_of_classes=1): #Input layer inp = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size)(inp) #LSTM layer x = LSTM(60, return_sequences=True, name='lstm_layer')(x) x = GlobalMaxPool1D()(x) x = Dropout(0.1)(x) x = Dense(50, activation="relu")(x) x = Dropout(0.1)(x) x = Dense(number_of_classes, activation="softmax")(x) model = KerasModel(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
# lstm_output = Bidirectional(lstm_fn(256, return_sequences=True))(lstm_output) # lstm_output = Dropout(0.5)(lstm_output) lstm_output = BatchNormalization()(lstm_output) lstm_output = Conv1D(256, 3, activation='relu', padding='SAME')(lstm_output) lstm_output = Dropout(0.5)(lstm_output) softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output) # (num_windows, num_classes) ##### Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={'num_windows': num_windows} )(input_length) ctc_loss_output = Lambda( lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss' )([y_true, softmax_output, input_length_processed, label_length]) ctc_decoded_output = Lambda( lambda x: ctc_decode(x[0], x[1], output_length), name='ctc_decoded' )([softmax_output, input_length_processed]) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output] ) return model