def make_resize(): """Returns the image resize model. """ input_image = Input(shape=(64, 64, 3)) resized_image = Lambda(lambda x: K.resize_images(x, height_factor=3, width_factor=3, data_format='channels_last'))(input_image) return Model(inputs=[input_image], outputs=[resized_image])
def _resize_nearest_neighbour(self, input_tensor, size): """ Resize a tensor using nearest neighbor interpolation. Notes ----- Tensorflow has a bug that resizes the image incorrectly if :attr:`align_corners` is not set to ``True``. Keras Backend does not set this flag, so we explicitly call the Tensorflow operation for non-amd backends. Parameters ---------- input_tensor: tensor The tensor to be resized tuple: int The (`h`, `w`) that the tensor should be resized to (used for non-amd backends only) Returns ------- tensor The input tensor resized to the given size """ if get_backend() == "amd": retval = K.resize_images(input_tensor, self.scale, self.scale, "channels_last", interpolation="nearest") else: retval = tf.image.resize_nearest_neighbor(input_tensor, size=size, align_corners=True) logger.debug("Input Tensor: %s, Output Tensor: %s", input_tensor, retval) return retval
def _refine_boundary(self, low_features, features): """Refine segmentation boundary. Parameters ---------- low_features: Tensor Image input tensor. features: Tensor Encoder's output tensor. Returns ------- Refined features. Tensor """ low_features = self.base(low_features) low_features = Conv2D(48, kernel_size=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2( self.hps['weight_decay']))(low_features) low_features = BatchNormalization( momentum=self.hps['bn_momentum'], scale=self.hps['bn_scale'])(low_features) low_features = Activation('relu')(low_features) # Resize low_features, features. output_stride = self.nn_arch['output_stride'] low_features = Lambda( lambda x: K.resize_images(x, output_stride / 2, output_stride / 2, "channels_last", interpolation='bilinear'))( low_features) # ? features = Lambda(lambda x: K.resize_images(x, output_stride / 2, output_stride / 2, "channels_last", interpolation='bilinear'))( features) # ? x = Concatenate(axis=-1)([low_features, features]) return x
def build_image_resizer(input_shape=(64, 64, 3)): input_layer = Input(shape=input_shape) resized_images = Lambda(lambda x: K.resize_images( x, height_factor=3, width_factor=3, data_format='channels_last'))( input_layer) model = Model(inputs=[input_layer], outputs=[resized_images]) return model
def aspp(inputs): depth = inputs.shape[-1] #空洞卷积率[1,6,12,18] atrous_rates = [6,12,18] conv_1x1 = Conv2D( depth, 1, strides=1)(inputs) conv_3x3_1 = Conv2D( depth, 3, strides=1, dilation_rate=atrous_rates[0], padding = 'same')(inputs) conv_3x3_2 = Conv2D( depth, 3, strides=1, dilation_rate=atrous_rates[1], padding = 'same')(inputs) conv_3x3_3 = Conv2D( depth, 3, strides=1, dilation_rate=atrous_rates[2], padding = 'same')(inputs) #全局平均 averagepooling = GlobalAveragePooling2D()(inputs) averagepooling =Reshape(( 1, 1, inputs.shape[-1]))(averagepooling) averagepooling_conv_1x1 = Conv2D(depth, 1, strides=1)(averagepooling) averagepooling_resize = K.resize_images(averagepooling_conv_1x1,inputs.shape[1],inputs.shape[2],data_format = 'channels_last',interpolation = 'bilinear') return K.concatenate((conv_1x1, conv_3x3_1,conv_3x3_2,conv_3x3_3,averagepooling_resize), -1)
def call(self, input_tensor, training=None): input_transposed = tf.transpose(input_tensor, [3, 0, 1, 2, 4]) input_shape = K.shape(input_transposed) input_tensor_reshaped = K.reshape(input_transposed, [ input_shape[1] * input_shape[0], self.input_height, self.input_width, self.input_num_atoms]) input_tensor_reshaped.set_shape((None, self.input_height, self.input_width, self.input_num_atoms)) if self.upsamp_type == 'resize': upsamp = K.resize_images(input_tensor_reshaped, self.scaling, self.scaling, 'channels_last') outputs = K.conv2d(upsamp, kernel=self.W, strides=(1, 1), padding=self.padding, data_format='channels_last') elif self.upsamp_type == 'subpix': conv = K.conv2d(input_tensor_reshaped, kernel=self.W, strides=(1, 1), padding='same', data_format='channels_last') outputs = tf.depth_to_space(conv, self.scaling) else: batch_size = input_shape[1] * input_shape[0] # Infer the dynamic output shape: out_height = deconv_length(self.input_height, self.scaling, self.kernel_size, self.padding, output_padding=None) out_width = deconv_length(self.input_width, self.scaling, self.kernel_size, self.padding, output_padding=None) output_shape = (batch_size, out_height, out_width, self.num_capsule * self.num_atoms) outputs = K.conv2d_transpose(input_tensor_reshaped, self.W, output_shape, (self.scaling, self.scaling), padding=self.padding, data_format='channels_last') votes_shape = K.shape(outputs) _, conv_height, conv_width, _ = outputs.get_shape() votes = K.reshape(outputs, [input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], self.num_capsule, self.num_atoms]) votes.set_shape((None, self.input_num_capsule, conv_height, conv_width, self.num_capsule, self.num_atoms)) logit_shape = K.stack([ input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], self.num_capsule]) biases_replicated = K.tile(self.b, [votes_shape[1], votes_shape[2], 1, 1]) activations = update_routing( votes=votes, biases=biases_replicated, logit_shape=logit_shape, num_dims=6, input_dim=self.input_num_capsule, output_dim=self.num_capsule, num_routing=self.routings) return activations
def call(self, x, **kwargs): means = K.pool2d(x, self.tile_size, strides=self.tile_size, padding="same", pool_mode="avg", data_format="channels_last") mean_matrix = K.resize_images( means, self.tile_size[0], self.tile_size[1], data_format="channels_last")[:, 0:K.shape(x)[1], 0:K.shape(x)[2], :] quad_diff = (x - mean_matrix)**2 return K.pool2d(quad_diff, self.tile_size, strides=self.tile_size, padding="same", pool_mode="avg")
def resize_image(inp, s, data_format): try: return Lambda(lambda x: K.resize_images(x, height_factor=s[0], width_factor=s[1], data_format=data_format, interpolation='bilinear'))(inp) except Exception as e: # if keras is old , then rely on the tf function ... sorry theono/cntk users . assert data_format == 'channels_last' assert IMAGE_ORDERING == 'channels_last' import tensorflow as tf return Lambda(lambda x: tf.image.resize_images(x, (K.int_shape(x)[ 1] * s[0], K.int_shape(x)[2] * s[1])))(inp)
def build_fr_combined_network(encoder, generator, fr_model, input_shape=(64, 64, 3), label_shape=(6, )): input_image = Input(shape=input_shape) input_label = Input(shape=label_shape) latent0 = encoder(input_image) gen_images = generator([latent0, input_label]) fr_model.trainable = False resized_images = Lambda( lambda x: K.resize_images(gen_images, height_factor=2, width_factor=2, data_format='channels_last'))(gen_images) embeddings = fr_model(resized_images) model = Model(inputs=[input_image, input_label], outputs=[embeddings]) return model
def _make_decoder(self): """Make decoder.""" assert hasattr(self, 'base') and hasattr(self, 'encoder') inputs = self.encoder.outputs features = Input(shape=K.int_shape(inputs[0])[1:]) if self.nn_arch['boundary_refinement']: # Refine boundary. low_features = Input(shape=K.int_shape(self.encoder.inputs[0])[1:]) x = self._refine_boundary(low_features, features) else: x = features # Upsampling & softmax. x = Conv2D(self.nn_arch['num_classes'], kernel_size=3, padding='same', use_bias=False, kernel_regularizer=regularizers.l2( self.hps['weight_decay']))(x) # Kernel size? output_stride = self.nn_arch['output_stride'] if self.nn_arch['boundary_refinement']: output_stride = output_stride / 8 if output_stride == 16 else output_stride / 4 x = Lambda(lambda x: K.resize_images(x, output_stride, output_stride, "channels_last", interpolation='bilinear'))(x) # ? outputs = Activation('softmax')(x) self.decoder = Model(inputs=[low_features, features], outputs=outputs) if self.nn_arch['boundary_refinement'] \ else Model(inputs=[features], outputs=outputs) self.decoder._init_set_name('decoder')
def call(self, inputs, training=None, mask=None): assert (len(inputs) == 2) img, rois = inputs[0], inputs[1] nb_channels = tf.shape(inputs[0])[3] print(nb_channels) print(tf.shape(inputs[0])) nb_channels = 2048 outputs = [] for roi_idx in range(self.num_rois): x = K.cast(rois[0, roi_idx, 0], 'int32') y = K.cast(rois[0, roi_idx, 1], 'int32') w = K.cast(rois[0, roi_idx, 2], 'int32') h = K.cast(rois[0, roi_idx, 3], 'int32') rs = K.resize_images(img[:, y:y + h, x:x + w, :], self.pool_size, self.pool_size, K.image_data_format()) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape( final_output, (1, self.num_rois, self.pool_size, self.pool_size, nb_channels)) final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output
def _make_encoder(self): """Make encoder.""" assert hasattr(self, 'base') # Inputs. input_image = Input(shape=(self.nn_arch['image_size'], self.nn_arch['image_size'], 3), name='input_image') # Extract feature. x = self.base(input_image) # Conduct dilated convolution pooling. pooled_outputs = [] for conf in self.nn_arch["encoder_middle_conf"]: if conf['input'] == -1: x2 = x # ? else: x2 = pooled_outputs[conf['input']] if conf['op'] == 'conv': if conf['kernel'] == 1: x2 = Conv2D(self.nn_arch['reduction_size'], kernel_size=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2( self.hps['weight_decay']))(x2) x2 = BatchNormalization(momentum=self.hps['bn_momentum'], scale=self.hps['bn_scale'])(x2) x2 = Activation('relu')(x2) else: # Split separable conv2d. x2 = SeparableConv2D( self.nn_arch['reduction_size'] # ? , conf['kernel'], depth_multiplier=1, dilation_rate=(conf['rate'][0] * self.nn_arch['conv_rate_multiplier'], conf['rate'][1] * self.nn_arch['conv_rate_multiplier']), padding='same', use_bias=False, kernel_initializer=initializers.TruncatedNormal())(x2) x2 = BatchNormalization(momentum=self.hps['bn_momentum'], scale=self.hps['bn_scale'])(x2) x2 = Activation('relu')(x2) x2 = Conv2D( self.nn_arch['reduction_size'], kernel_size=1, padding='same', use_bias=False, kernel_initializer=initializers.TruncatedNormal(), kernel_regularizer=regularizers.l2( self.hps['weight_decay']))(x2) x2 = BatchNormalization(momentum=self.hps['bn_momentum'], scale=self.hps['bn_scale'])(x2) x2 = Activation('relu')(x2) elif conf['op'] == 'pyramid_pooling': x2 = AveragePooling2D(pool_size=conf['kernel'], padding='valid')(x2) x2 = Conv2D(self.nn_arch['reduction_size'], kernel_size=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2( self.hps['weight_decay']))(x2) x2 = BatchNormalization(momentum=self.hps['bn_momentum'], scale=self.hps['bn_scale'])(x2) x2 = Activation('relu')(x2) target_size = conf['target_size_factor'] # ? x2 = Lambda( lambda x: K.resize_images(x, target_size[0], target_size[1], "channels_last", interpolation='bilinear'))( x2) # ? else: raise ValueError('Invalid operation.') pooled_outputs.append(x2) # Concatenate pooled tensors. x3 = Concatenate(axis=-1)(pooled_outputs) x3 = Dropout(rate=self.nn_arch['dropout_rate'])(x3) x3 = Conv2D(self.nn_arch['concat_channels'], kernel_size=1, padding='same', use_bias=False, kernel_regularizer=regularizers.l2( self.hps['weight_decay']))(x3) x3 = BatchNormalization(momentum=self.hps['bn_momentum'], scale=self.hps['bn_scale'])(x3) x3 = Activation('relu')(x3) # output = Dropout(rate=self.nn_arch['dropout_rate'])(x3) output = x3 self.encoder = Model(input_image, output) self.encoder._init_set_name('encoder')
def upsample_to_size(x): y = im_size // x.shape[2] x = K.resize_images(x, y, y, "channels_last", interpolation='bilinear') return x
def upsample(x): return K.resize_images(x, 2, 2, "channels_last", interpolation='bilinear')
def upsample_to_size(x): # import pdb; pdb.set_trace() y = im_size // x.shape[2] x = tf.cast(x, tf.int32) x = K.resize_images(x, y, y, "channels_last", interpolation='bilinear') return x
def upsample_to_size(x): #changed y_h = to_shape[0] / x.shape[1] y_w = to_shape[1] / x.shape[2] x = K.resize_images(x, y_h, y_w, "channels_last",interpolation='bilinear') return x
def build_origin(self, print_summary=False, num_classes=5, image_size=(352, 640, 3)): input_tensor = keras.layers.Input(image_size) conv_0 = self.build_conv2D_block(input_tensor, filters=24, kernel_size=1, strides=1) # conv stage 1 conv_1 = self.build_conv2D_block(conv_0, filters=64, kernel_size=3, strides=1) conv_1 = self.build_conv2D_block(conv_1, filters=64, kernel_size=3, strides=1) # pool stage 1 pool1 = MaxPooling2D()(conv_1) # conv stage 2 conv_2 = self.build_conv2D_block(pool1, filters=128, kernel_size=3, strides=1) conv_2 = self.build_conv2D_block(conv_2, filters=128, kernel_size=3, strides=1) # pool stage 2 pool2 = MaxPooling2D()(conv_2) # conv stage 3 conv_3 = self.build_conv2D_block(pool2, filters=256, kernel_size=3, strides=1) conv_3 = self.build_conv2D_block(conv_3, filters=256, kernel_size=3, strides=1) conv_3 = self.build_conv2D_block(conv_3, filters=256, kernel_size=3, strides=1) # pool stage 3 pool3 = MaxPooling2D()(conv_3) # conv stage 4 conv_4 = self.build_conv2D_block(pool3, filters=512, kernel_size=3, strides=1) conv_4 = self.build_conv2D_block(conv_4, filters=512, kernel_size=3, strides=1) conv_4 = self.build_conv2D_block(conv_4, filters=512, kernel_size=3, strides=1) # pool4 = MaxPooling2D()(conv_4) ### add dilated convolution ### # conv stage 5_1 conv_5 = self.build_conv2D_block(conv_4, filters=512, kernel_size=3, strides=1, dilation_rate=2) conv_5 = self.build_conv2D_block(conv_5, filters=512, kernel_size=3, strides=1, dilation_rate=2) conv_5 = self.build_conv2D_block(conv_5, filters=512, kernel_size=3, strides=1, dilation_rate=2) # added part of SCNN # conv_6_4 = self.build_conv2D_block(conv_5, filters=1024, kernel_size=3, strides=1, dilation_rate=4) conv_6_5 = self.build_conv2D_block(conv_6_4, filters=128, kernel_size=1, strides=1) # 8 x 36 x 100 x 128 # add message passing # # top to down # feature_list_new = self.space_cnn_part(conv_6_5) ####################### dropout_output = Dropout(0.9)(feature_list_new) conv_output = K.resize_images( dropout_output, height_factor=self.IMG_HEIGHT // dropout_output.shape[1], width_factor=self.IMG_WIDTH // dropout_output.shape[2], data_format="channels_last") ret_prob_output = Conv2D(filters=num_classes, kernel_size=1, activation='softmax', name='ctg_out_1')(conv_output) ### add lane existence prediction branch ### # spatial softmax # features = ret_prob_output # N x H x W x C softmax = Activation('softmax')(features) avg_pool = AvgPool2D(strides=2)(softmax) _, H, W, C = avg_pool.get_shape().as_list() reshape_output = tf.reshape(avg_pool, [-1, H * W * C]) fc_output = Dense(128)(reshape_output) relu_output = ReLU(max_value=6)(fc_output) existence_output = Dense(4, name='ctg_out_2')(relu_output) self.model = Model(inputs=input_tensor, outputs=[ret_prob_output, existence_output]) # print(self.model.summary()) adam = optimizers.Adam(lr=0.001) sgd = optimizers.SGD(lr=0.001) if num_classes == 1: self.model.compile(optimizer=sgd, loss="binary_crossentropy", metrics=['accuracy']) else: self.model.compile(optimizer=sgd, loss={ 'ctg_out_1': 'categorical_crossentropy', 'ctg_out_2': 'binary_crossentropy' }, loss_weights={ 'ctg_out_1': 1., 'ctg_out_2': 0.2, }, metrics=['accuracy', 'mse'])
conv17_2 = Conv2D(filters=512, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/608_mean_Fused_Mul_1426714269_const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/data_add_1330113306_copy_const.npy'.format(ds, height, width)).flatten()))(conv17_1) add17_1 = Add()([conv17_2, relu16_1]) relu17_1 = ReLU()(add17_1) # Block_18 #################################################################################################################### conv18_1 = Conv2D(filters=256, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='elu', kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/onnx_initializer_node_mask_decoder.block1.pre_concat_conv.conv1.weight_Output_0_Data__const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/612_Dims7877_copy_const.npy'.format(ds, height, width)).flatten()))(relu17_1) conv18_2 = Conv2D(filters=256, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='elu', kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/onnx_initializer_node_mask_decoder.block1.pre_concat_conv.conv2.weight_Output_0_Data__const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/615_Dims7733_copy_const.npy'.format(ds, height, width)).flatten()))(conv18_1) resize18_1 = resize_images(conv18_2, 2, 2, 'channels_last', interpolation='nearest') concat18_1 = Concatenate()([resize18_1, relu14_1]) conv18_3 = Conv2D(filters=256, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='elu', kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/onnx_initializer_node_mask_decoder.block1.post_concat_conv.conv1.weight_Output_0_Data__const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/628_Dims7787_copy_const.npy'.format(ds, height, width)).flatten()))(concat18_1) conv18_4 = Conv2D(filters=256, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='elu', kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/onnx_initializer_node_mask_decoder.block1.post_concat_conv.conv2.weight_Output_0_Data__const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/631_Dims7859_copy_const.npy'.format(ds, height, width)).flatten()))(conv18_3) conv18_5 = Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='elu', kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/onnx_initializer_node_mask_decoder.block2.pre_concat_conv.conv1.weight_Output_0_Data__const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/634_Dims7805_copy_const.npy'.format(ds, height, width)).flatten()))(conv18_4) conv18_6 = Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='elu', kernel_initializer=Constant(np.load('weights/{}_{}x{}/FP32/onnx_initializer_node_mask_decoder.block2.pre_concat_conv.conv2.weight_Output_0_Data__const.npy'.format(ds, height, width)).transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/{}_{}x{}/FP32/637_Dims7913_copy_const.npy'.format(ds, height, width)).flatten()))(conv18_5)
def resize(img): return resize_images(img, [66, 200])
def NNresize(x, scale=scale): return K.resize_images(x, scale, scale, 'channels_last')
def upsample_to_size(x): # x[0] is the image, x[1] is the size in HxW y = x[1][0] / x.shape[2] return K.resize_images( x, y, y, "channels_last", interpolation='bilinear') # Scales same in both directions
def model(self, im, future_im, image_encoder, pose_encoder, renderer): im_dev, pose_dev, render_dev = None, None, None max_size = list(future_im.shape)[1:3] assert max_size[0] == max_size[1] max_size = max_size[0] # determine sizes for rendering render_sizes = [] size = max_size stride = self._config.renderer_stride while True: render_sizes.append(size) if size <= self._config.min_res: break size = size // stride embeddings = dev_wrap(lambda: image_encoder(im), im_dev) gauss_pt, pose_embeddings = dev_wrap( lambda: self.pose_encoder(future_im, map_sizes=render_sizes)) # helper def group_by_size(embeddings): # process image embeddings grouped_embeddings = defaultdict(list) for embedding in embeddings: size = list(embedding.shape)[1:3] assert size[0] == size[1] size = int(size[0]) grouped_embeddings[size].append(embedding) return grouped_embeddings grouped_embeddings = group_by_size(embeddings) # down sample for render_size in render_sizes: if render_size not in grouped_embeddings: # find closest larger size and resize embedding_size = None embedding_sizes = sorted(list(grouped_embeddings.keys())) for embedding_size in embedding_sizes: if embedding_size >= render_size: break resized_embeddings = [] for embedding in grouped_embeddings[embedding_size]: resized_embeddings.append( K.resize_images(embedding, render_size, render_size, interpolation="bilinear")) grouped_embeddings[render_size] += resized_embeddings # process pose embeddings grouped_pose_embeddings = group_by_size(pose_embeddings) # concatenate embeddings joint_embeddings = {} for rs in render_sizes: joint_embeddings[rs] = K.concatenate(grouped_embeddings[rs] + grouped_pose_embeddings[rs], axis=1) future_im_pred = dev_wrap(lambda: renderer(joint_embeddings), render_dev) workaround_channels = 0 color_channels = list(future_im_pred.shape)[3] - workaround_channels future_im_pred_mu, _ = np.split(future_im_pred, [color_channels, workaround_channels], axis=3) return future_im_pred_mu, gauss_pt, pose_embeddings
def _loss_mask(map, mask): mask = K.resize_images(mask, list(map.shape)[1:3]) return map * mask
def call(self, inputs): return K.resize_images(inputs, self.factor, self.factor, self.data_format)
# Block_08 conv8_1 = Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='relu', kernel_initializer=Constant(np.load('weights/256x256/FP32/173_mean_Fused_Mul_50425044_const.npy').transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/256x256/FP32/data_add_47174722_copy_const.npy').flatten()))(relu7_1) conv8_2 = Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], kernel_initializer=Constant(np.load('weights/256x256/FP32/176_mean_Fused_Mul_50465048_const.npy').transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/256x256/FP32/data_add_47254730_copy_const.npy').flatten()))(conv8_1) add8_1 = Add()([relu7_1, conv8_2]) relu8_1 = ReLU()(add8_1) # Block_09 conv9_1 = Conv2D(filters=128, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='relu', kernel_initializer=Constant(np.load('weights/256x256/FP32/180_mean_Fused_Mul_50505052_const.npy').transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/256x256/FP32/data_add_47334738_copy_const.npy').flatten()))(relu8_1) resize9_1 = resize_images(conv9_1, 2, 2, 'channels_last', interpolation='nearest') conv9_2 = Conv2D(filters=96, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='relu', kernel_initializer=Constant(np.load('weights/256x256/FP32/192_mean_Fused_Mul_50545056_const.npy').transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/256x256/FP32/data_add_47414746_copy_const.npy').flatten()))(resize9_1) add9_1 = Add()([conv3_2, conv9_2]) # Block_10 conv10_1 = Conv2D(filters=96, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='relu', kernel_initializer=Constant(np.load('weights/256x256/FP32/196_mean_Fused_Mul_50585060_const.npy').transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/256x256/FP32/data_add_47494754_copy_const.npy').flatten()))(add9_1) resize10_1 = resize_images(conv10_1, 2, 2, 'channels_last', interpolation='nearest') conv10_2 = Conv2D(filters=64, kernel_size=[3, 3], strides=[1, 1], padding="same", dilation_rate=[1, 1], activation='relu', kernel_initializer=Constant(np.load('weights/256x256/FP32/208_mean_Fused_Mul_50625064_const.npy').transpose(2,3,1,0)), bias_initializer=Constant(np.load('weights/256x256/FP32/data_add_47574762_copy_const.npy').flatten()))(resize10_1) add10_1 = Add()([conv2_2, conv10_2])
X_test = X_test / 255.0 y_train = to_categorical( y_train , 10 ) y_test = to_categorical( y_test , 10 ) #################################### # モデルの定義 # #################################### vgg16 = VGG16( weights = "imagenet" , include_top=False , input_shape=( 224,224,3 ) ) vgg16.trainable = False model = Sequential() model.add( Lambda( lambda x: resize_images( x , 7 , 7 , "channels_last" ), input_shape=( 32,32,3 ),trainable=False ) ) model.add( vgg16 ) model.add( Flatten( trainable=False ) ) model.add( Dense( 4096 ) ) model.add( Activation( "relu" ) ) model.add( BatchNormalization() ) model.add( Dropout( 0.3 ) ) model.add( Dense( 4096 ) ) model.add( Activation( "relu" ) ) model.add( BatchNormalization() ) model.add( Dropout( 0.3 ) )
generator.load_weights('generator.h5') resize_model = make_resize() resize_model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam()) face_rec = make_face_recognition(fr_image_shape) face_rec.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam()) face_rec.trainable = False image_input = Input(shape=(64, 64, 3)) conditioning_variable = Input(shape=(6,)) latent_approximation = encoder(image_input) reconstruction = generator([latent_approximation, conditioning_variable]) # fr_resized_images = tf.image.resize(reconstruction, [192, 192], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) fr_resized_images = Lambda(lambda x: K.resize_images(x, height_factor=3, width_factor=3, data_format='channels_last'))(reconstruction) embeddings = face_rec(fr_resized_images) fr_adversarial = Model(inputs=[image_input, conditioning_variable], outputs=[embeddings]) fr_adversarial.compile(loss=euclidean_loss, optimizer=adv_opt) for epoch in range(epochs): print(f'Epoch: {epoch}') num_batches = int(len(loaded_images)/batch_size) for i in range(num_batches): batch = loaded_images[i*batch_size:(i+1)*batch_size] batch = batch / 127.5 - 1. batch = batch.astype(np.float32) y_batch = y[i*batch_size:(i+1)*batch_size]
def main(): # Define hyperparameters # data_dir = "/content/data/UTKFaces2" data_dir = "./data/processed/UTKFaces2/" # source_file_path = "/content/data/source_file.txt" source_file_path = "./data/source_file.txt" # results_path = "/content/drive/MyDrive/face_aging/age_cgan/results" results_path = "./results" # saved_models_path = "/content/drive/MyDrive/face_aging/age_cgan/saved_models" saved_models_path = "./saved_models" images_path = "./data/images.npy" if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(saved_models_path): os.makedirs(saved_models_path) # wiki_dir = os.path.join(data_dir, "wiki_crop1") epochs = 500 batch_size = 2 image_shape = (128, 128, 3) z_shape = 100 TRAIN_GAN = True TRAIN_ENCODER = False TRAIN_GAN_WITH_FR = False fr_image_shape = (192, 192, 3) # Define optimizers dis_optimizer = Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=10e-8) gen_optimizer = Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=10e-8) adversarial_optimizer = Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=10e-8) """ Build and compile networks """ # Build and compile the discriminator network discriminator = build_discriminator(input_shape=image_shape) discriminator.compile(loss=['binary_crossentropy'], optimizer=dis_optimizer) # Build and compile the generator network generator = build_generator() generator.compile(loss=['binary_crossentropy'], optimizer=gen_optimizer) # Build and compile the adversarial model discriminator.trainable = False input_z_noise = Input(shape=(100, )) input_label = Input(shape=(6, )) recons_images = generator([input_z_noise, input_label]) valid = discriminator([recons_images, input_label]) adversarial_model = Model(inputs=[input_z_noise, input_label], outputs=[valid]) adversarial_model.compile(loss=['binary_crossentropy'], optimizer=gen_optimizer) tensorboard = TensorBoard(log_dir="logs/{}".format(time.time())) tensorboard.set_model(generator) tensorboard.set_model(discriminator) """ Load the dataset """ # images, age_list = load_data(data_dir=data_dir, dataset="wiki") images, age_list = load_data(source_file_path) # age_cat = age_to_category(age_list) age_cat = age_list final_age_cat = np.reshape(np.array(age_cat), [len(age_cat), 1]) classes = len(set(age_cat)) y = to_categorical(final_age_cat, num_classes=len(set(age_cat))) try: loaded_images = np.load(images_path, allow_pickle=True) except: loaded_images = load_images(data_dir, images, (image_shape[0], image_shape[1])) np.save(images_path, loaded_images, allow_pickle=True) # Implement label smoothing real_labels = np.ones((batch_size, 1), dtype=np.float32) * 0.9 fake_labels = np.zeros((batch_size, 1), dtype=np.float32) * 0.1 """ Train the generator and the discriminator network """ if TRAIN_GAN: for epoch in range(epochs): print("Epoch:{}".format(epoch)) gen_losses = [] dis_losses = [] number_of_batches = int(len(loaded_images) / batch_size) print("Number of batches:", number_of_batches) for index in range(number_of_batches): print("Batch:{}".format(index + 1)) images_batch = loaded_images[index * batch_size:(index + 1) * batch_size] images_batch = images_batch / 127.5 - 1.0 images_batch = images_batch.astype(np.float32) y_batch = y[index * batch_size:(index + 1) * batch_size] z_noise = np.random.normal(0, 1, size=(batch_size, z_shape)) """ Train the discriminator network """ # Generate fake images initial_recon_images = generator.predict_on_batch( [z_noise, y_batch]) d_loss_real = discriminator.train_on_batch( [images_batch, y_batch], real_labels) d_loss_fake = discriminator.train_on_batch( [initial_recon_images, y_batch], fake_labels) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) print("d_loss:{}".format(d_loss)) """ Train the generator network """ z_noise2 = np.random.normal(0, 1, size=(batch_size, z_shape)) random_labels = np.random.randint(0, 6, batch_size).reshape(-1, 1) random_labels = to_categorical(random_labels, 6) g_loss = adversarial_model.train_on_batch( [z_noise2, random_labels], [1] * batch_size) print("g_loss:{}".format(g_loss)) gen_losses.append(g_loss) dis_losses.append(d_loss) # Write losses to Tensorboard write_log(tensorboard, 'g_loss', np.mean(gen_losses), epoch) write_log(tensorboard, 'd_loss', np.mean(dis_losses), epoch) """ Generate images after every 10th epoch """ if epoch % 10 == 0: images_batch = loaded_images[0:batch_size] images_batch = images_batch / 127.5 - 1.0 images_batch = images_batch.astype(np.float32) y_batch = y[0:batch_size] z_noise = np.random.normal(0, 1, size=(batch_size, z_shape)) gen_images = generator.predict_on_batch([z_noise, y_batch]) for i, img in enumerate(gen_images[:5]): save_rgb_img(img, path="results/img_{}_{}.png".format(epoch, i)) # Save networks try: generator.save_weights("generator.h5") discriminator.save_weights("discriminator.h5") except Exception as e: print("Error:", e) """ Train encoder """ if TRAIN_ENCODER: # Build and compile encoder encoder = build_encoder(input_shape=image_shape) encoder.compile(loss=euclidean_distance_loss, optimizer='adam') # Load the generator network's weights try: generator.load_weights( os.path.join(saved_models_path, "generator.h5")) except Exception as e: print("Error:", e) z_i = np.random.normal(0, 1, size=(5000, z_shape)) y = np.random.randint(low=0, high=6, size=(5000, ), dtype=np.int64) num_classes = len(set(y)) y = np.reshape(np.array(y), [len(y), 1]) y = to_categorical(y, num_classes=num_classes) for epoch in range(epochs): print("Epoch:", epoch) encoder_losses = [] number_of_batches = int(z_i.shape[0] / batch_size) print("Number of batches:", number_of_batches) for index in range(number_of_batches): print("Batch:", index + 1) z_batch = z_i[index * batch_size:(index + 1) * batch_size] y_batch = y[index * batch_size:(index + 1) * batch_size] generated_images = generator.predict_on_batch( [z_batch, y_batch]) # Train the encoder model encoder_loss = encoder.train_on_batch(generated_images, z_batch) print("Encoder loss:", encoder_loss) encoder_losses.append(encoder_loss) # Write the encoder loss to Tensorboard write_log(tensorboard, "encoder_loss", np.mean(encoder_losses), epoch) # Save the encoder model encoder.save_weights(os.path.join(saved_models_path, "encoder.h5")) """ Optimize the encoder and the generator network """ if TRAIN_GAN_WITH_FR: # Load the encoder network encoder = build_encoder() encoder.load_weights(os.path.join(saved_models_path, "encoder.h5")) # Load the generator network generator.load_weights(os.path.join(saved_models_path, "generator.h5")) image_resizer = build_image_resizer() image_resizer.compile(loss=['binary_crossentropy'], optimizer='adam') # Face recognition model fr_model = build_fr_model(input_shape=fr_image_shape) fr_model.compile(loss=['binary_crossentropy'], optimizer="adam") # Make the face recognition network as non-trainable fr_model.trainable = False # Input layers input_image = Input(shape=(64, 64, 3)) input_label = Input(shape=(6, )) # Use the encoder and the generator network latent0 = encoder(input_image) gen_images = generator([latent0, input_label]) # Resize images to the desired shape resized_images = Lambda( lambda x: K.resize_images(gen_images, height_factor=3, width_factor=3, data_format='channels_last'))(gen_images) embeddings = fr_model(resized_images) # Create a Keras model and specify the inputs and outputs for the network fr_adversarial_model = Model(inputs=[input_image, input_label], outputs=[embeddings]) # Compile the model fr_adversarial_model.compile(loss=euclidean_distance_loss, optimizer=adversarial_optimizer) for epoch in range(epochs): print("Epoch:", epoch) reconstruction_losses = [] number_of_batches = int(len(loaded_images) / batch_size) print("Number of batches:", number_of_batches) for index in range(number_of_batches): print("Batch:", index + 1) images_batch = loaded_images[index * batch_size:(index + 1) * batch_size] images_batch = images_batch / 127.5 - 1.0 images_batch = images_batch.astype(np.float32) y_batch = y[index * batch_size:(index + 1) * batch_size] images_batch_resized = image_resizer.predict_on_batch( images_batch) real_embeddings = fr_model.predict_on_batch( images_batch_resized) reconstruction_loss = fr_adversarial_model.train_on_batch( [images_batch, y_batch], real_embeddings) print("Reconstruction loss:", reconstruction_loss) reconstruction_losses.append(reconstruction_loss) # Write the reconstruction loss to Tensorboard write_log(tensorboard, "reconstruction_loss", np.mean(reconstruction_losses), epoch) """ Generate images """ if epoch % 10 == 0: images_batch = loaded_images[0:batch_size] images_batch = images_batch / 127.5 - 1.0 images_batch = images_batch.astype(np.float32) y_batch = y[0:batch_size] z_noise = np.random.normal(0, 1, size=(batch_size, z_shape)) gen_images = generator.predict_on_batch([z_noise, y_batch]) for i, img in enumerate(gen_images[:5]): output_path = os.path.join( results_path, "img_opt_{}_{}.png".format(epoch, i)) save_rgb_img(img, path=output_path) # Save improved weights for both of the networks generator.save_weights( os.path.join(saved_models_path, "generator_optimized.h5")) encoder.save_weights( os.path.join(saved_models_path, "encoder_optimized.h5"))
def upsample_to_size(x, y = ss): x = K.resize_images(x, y, y, "channels_last",interpolation='bilinear') return x
def resizeImage(self, x, height_factor, width_factor, interpolation='nearest', data_format='channels_last'): return Lambda(lambda x: resize_images(x, height_factor=height_factor, width_factor=width_factor, interpolation=interpolation, data_format=data_format))(x)