def stem(self, inp): ''' inception v4 stem input: 256 x 256 x 3 output: 32 x 32 x 576 ''' xi = Input(shape=inp.get_shape().as_list()[1:]) # 256 x 256 x 3 x = layers.conv_bn_act(xi, 32, (3, 3), strides=(2, 2)) x = layers.conv_bn_act(x, 32, (3, 3)) x = layers.conv_bn_act(x, 64, (3, 3)) a = layers.conv_bn_act(x, 96, (3, 3), strides=(2, 2)) b = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) x = concatenate([a, b]) a = layers.conv_bn_act(x, 64, (1, 1)) a = layers.conv_bn(a, 96, (3, 3)) b = layers.conv_bn_act(x, 64, (1, 1)) b = layers.conv_bn_act(b, 64, (5, 1)) b = layers.conv_bn_act(b, 64, (1, 5)) b = layers.conv_bn(b, 96, (3, 3)) x = concatenate([a, b]) a = layers.act_conv_bn(x, 192, (3, 3), strides=(2, 2)) b = MaxPooling2D((2, 2), strides=(2, 2))(x) x = concatenate([a, b]) x = blocks.sepconv_residual(x, 3 * 192, name='sepconv1') model = Model(xi, x, name='Stem') x = model(inp) return x
def build(self, input_shape): ''' input: concat of z_a and z_p -> 16 x 16 x 2048 output: reconstructed image 256 x 256 x 3 ''' concat = Input(shape=input_shape) up = layers.up(concat) # 32 x 32 up = layers.conv_bn_act(up, 1024, (3, 3)) up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.conv_bn_act(up, 256, (3, 3)) up = layers.up(up) # 64 x 64 up = layers.conv_bn_act(up, 256, (3, 3)) up = layers.conv_bn_act(up, 256, (3, 3)) up = layers.conv_bn_act(up, 128, (3, 3)) up = layers.up(up) # 128 x 128 up = layers.conv_bn_act(up, 128, (3, 3)) up = layers.conv_bn_act(up, 64, (3, 3)) up = layers.up(up) # 256 x 256 up = layers.conv_bn_act(up, 3, (3, 3)) up = layers.conv_bn( up, 3, (1, 1) ) # 3 channels, output shape of this should be (None, 3, 256, 256) # TODO: should we permute here or have the input formatted with channels first? # perm = Permute((1, 2))(up) # i_hat = Permute((2, 3))(perm) i_hat = up self._model = Model(inputs=concat, outputs=i_hat, name='decoder')
def build(self, input_shape): concat = Input(shape=input_shape) # z_a = Input(shape=inp.get_shape().as_list()[1:]) # for now, only the z_a part (8 x 8 x 2048) up = layers.up(concat) # 16 x 16 up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.up(up) # 32 x 32 up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.conv_bn_act(up, 256, (3, 3)) up = layers.up(up) # 64 x 64 up = layers.conv_bn_act(up, 256, (3, 3)) up = layers.conv_bn_act(up, 256, (3, 3)) up = layers.conv_bn_act(up, 128, (3, 3)) up = layers.up(up) # 128 x 128 up = layers.conv_bn_act(up, 128, (3, 3)) up = layers.conv_bn_act(up, 64, (3, 3)) up = layers.up(up) # 256 x 256 up = layers.conv_bn_act(up, 3, (3, 3)) up = layers.conv_bn(up, 3, (1, 1)) # 3 channels, output shape of this should be (None, 3, 256, 256) # TODO: should we permute here or have the input formatted with channels first? # perm = Permute((1, 2))(up) # i_hat = Permute((2, 3))(perm) i_hat = up self._model = Model(inputs=concat, outputs=i_hat, name='old_decoder')
def stem(self, inp): ''' inception v4 stem input: 256 x 256 x 3 output: 32 x 32 x 576 ''' print("BUILDING STEM") # xi = Input(shape=inp.get_shape().as_list()[1:]) # 256 x 256 x 3 x = layers.conv_bn_act(inp, 32, (3, 3), strides=(2, 2), name="stem1") x = layers.conv_bn_act(x, 32, (3, 3), name="stem2") x = layers.conv_bn_act(x, 64, (3, 3), name="stem3") a = layers.conv_bn_act(x, 96, (3, 3), strides=(2, 2), name="stem4") b = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name="stem5")(x) x = concatenate([a, b], name="stem6") a = layers.conv_bn_act(x, 64, (1, 1), name="stem7") a = layers.conv_bn(a, 96, (3, 3), name="stem8") b = layers.conv_bn_act(x, 64, (1, 1), name="stem9") b = layers.conv_bn_act(b, 64, (5, 1), name="stem10") b = layers.conv_bn_act(b, 64, (1, 5), name="stem11") b = layers.conv_bn(b, 96, (3, 3), name="stem12") x = concatenate([a, b], name="stem13") a = layers.act_conv_bn(x, 192, (3, 3), strides=(2, 2), name="stem14") b = MaxPooling2D((2, 2), strides=(2, 2), name="stem15")(x) x = concatenate([a, b], name="stem16") x = blocks.sepconv_residual(x, 3 * 192, name='sepconv1') # model = Model(xi, x, name='Stem') # print("Stem model summary") # model.summary() # x = model(inp) return x
def build(self): inp = Input(shape=self.input_shape) enc_model = ResNet50(include_top=False, weights='imagenet', input_tensor=inp) z_a = enc_model.output # 8 x 8 x 2048 # decoder part up = layers.up(z_a) # 16 x 16 up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.up(up) # 32 x 32 up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.conv_bn_act(up, 512, (3, 3)) up = layers.conv_bn_act(up, 256, (3, 3)) up = layers.up(up) # 64 x 64 up = layers.conv_bn_act(up, 256, (3, 3)) up = layers.conv_bn_act(up, 256, (3, 3)) up = layers.conv_bn_act(up, 128, (3, 3)) up = layers.up(up) # 128 x 128 up = layers.conv_bn_act(up, 128, (3, 3)) up = layers.conv_bn_act(up, 64, (3, 3)) up = layers.up(up) # 256 x 256 up = layers.conv_bn_act(up, 3, (3, 3)) up = layers.conv_bn( up, 3, (1, 1) ) # 3 channels, output shape of this should be (None, 3, 256, 256) # TODO: should we permute here or have the input formatted with channels first? # perm = Permute((1, 2))(up) # i_hat = Permute((2, 3))(perm) i_hat = up self.model = Model(inputs=inp, outputs=i_hat) # loss = losses.combined_loss() loss = mean_squared_error # run_opts = tf.RunOptions(report_tensor_allocations_upon_oom = True) # self.model.compile(loss=loss, optimizer=RMSprop(lr=self.start_lr), options=run_opts) self.model.compile(loss=loss, optimizer=RMSprop(lr=self.start_lr)) self.model.summary()