def sam_resnet(x):
    # Dilated Convolutional Network
    dcn = dcn_resnet(input_tensor=x[0])
    conv_feat = Convolution2D(512, 3, 3, border_mode='same', activation='relu')(dcn.output)

    # Attentive Convolutional LSTM
    att_convlstm = Lambda(repeat, repeat_shape)(conv_feat)
    att_convlstm = AttentiveConvLSTM(nb_filters_in=512, nb_filters_out=512, nb_filters_att=512,
                                     nb_cols=3, nb_rows=3)(att_convlstm)

    # Learned Prior (1)
    priors1 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)(x[1])
    concateneted = merge([att_convlstm, priors1], mode='concat', concat_axis=1)
    learned_priors1 = AtrousConvolution2D(512, 5, 5, border_mode='same', activation='relu',
                                          atrous_rate=(4, 4))(concateneted)

    # Learned Prior (2)
    priors2 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)(x[1])
    concateneted = merge([learned_priors1, priors2], mode='concat', concat_axis=1)
    learned_priors2 = AtrousConvolution2D(512, 5, 5, border_mode='same', activation='relu',
                                          atrous_rate=(4, 4))(concateneted)

    # Final Convolutional Layer
    outs = Convolution2D(1, 1, 1, border_mode='same', activation='relu')(learned_priors2)
    outs_up = Lambda(upsampling, upsampling_shape)(outs)

    return [outs_up, outs_up, outs_up]
예제 #2
0
    def __init__(self):
        self._output = None
        self._mapping = {}

        self._nb_gaussian = config.PARAMS["nb_gaussian"]
        self.nb_timestep = config.PARAMS["nb_timestep"]
        self.shape_r_gt = config.PARAMS["shape_r_gt"]
        self.shape_c_gt = config.PARAMS["shape_c_gt"]
        self.shape_r_out = config.DIMS["image_out_size_salicon"][0]
        self.shape_c_out = config.DIMS["image_out_size_salicon"][1]

        if config.PARAMS["device"] == "gpu":
            self._data_format = "channels_first"
            self._channel_axis = 1
            self._dims_axis = (2, 3)
        elif config.PARAMS["device"] == "cpu":
            self._data_format = "channels_last"
            self._channel_axis = 3
            self._dims_axis = (1, 2)

        self.attionconvlstm = AttentiveConvLSTM(
            [self.shape_r_gt, self.shape_c_gt],
            3,
            512,
            512,
            512,
            data_format=self._data_format)
        self.priorlearing1 = LearningPrior(self._nb_gaussian,
                                           self.shape_r_gt,
                                           self.shape_c_gt,
                                           name='w1')
        self.priorlearing2 = LearningPrior(self._nb_gaussian,
                                           self.shape_r_gt,
                                           self.shape_c_gt,
                                           name='w2')
예제 #3
0
def gaussian_prior_match(tensor, fdm):
    # Learned Prior (1)
    priors1 = LearningPrior(64, nb_gaussian=nb_gaussian)(fdm[1])
    concateneted = concatenate([tensor, priors1], axis=1)
    learned_priors1 = AtrousConvolution2D(64, [5, 5],
                                          padding='same',
                                          activation='relu',
                                          atrous_rate=(4, 4))(concateneted)

    # Learned Prior (2)
    priors2 = LearningPrior(64, nb_gaussian=nb_gaussian)(fdm[1])
    concateneted = concatenate([learned_priors1, priors2], axis=1)
    learned_priors2 = AtrousConvolution2D(64, [5, 5],
                                          padding='same',
                                          activation='relu',
                                          atrous_rate=(4, 4))(concateneted)
    return learned_priors2
예제 #4
0
def TD_model_prior_masks(input_tensors=None, f1_train=True, stateful=False):
    f1 = Feature_dcross_res_matt_res_ds_masks()
    f1.trainable = f1_train
    
    if input_tensors is None:
        xgaus_shape = (shape_r_gaus, shape_c_gaus, nb_gaussian)
        ximgs_ops_shape = (None, shape_r, shape_c, 3+2*opt_num)
        input_tensors = [Input(shape=xgaus_shape) for i in range(0,num_frames)]
        input_tensors.append(Input(shape=ximgs_ops_shape))

    Ximgs_ops = input_tensors[-1] 
    Xgaus = input_tensors[:-1]
            
    features_out = TimeDistributed(f1)(Ximgs_ops)
    
    frame_features, aux_out1, aux_out2, aux_out3, mask3, mask4, mask5 \
        = Lambda(Slice_outputs_mask, output_shape=Slice_outs_shape_mask)(features_out)
    
    #print('frame_features', K.int_shape(frame_features))

    outs = ConvGRU2D(filters=256, kernel_size=(3, 3),
                   padding='same', return_sequences=True, stateful=stateful,
                   name='ConvGRU2D')(frame_features)

    outs = TimeDistributed(BatchNormalization(name='ConvGRU2D_BN'))(outs) # previously 256    
    
    prior_layer1 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)
    priors1 = [Lambda(Expand_gaus)(prior_layer1(x)) for x in Xgaus]
    priors1_merged = Concatenate(axis=-4)(priors1)
    
    sal_concat1 = Concatenate(axis=-1)([outs, priors1_merged])

    outs = TimeDistributed(Conv2D(1, (1, 1), padding='same', activation='sigmoid'))(sal_concat1)
    outs = TimeDistributed(BilinearUpSampling2D((8,8)))(outs)

    aux_out1 = TimeDistributed(BilinearUpSampling2D((8,8)))(aux_out1)
    aux_out2 = TimeDistributed(BilinearUpSampling2D((8,8)))(aux_out2)
    aux_out3 = TimeDistributed(BilinearUpSampling2D((8,8)))(aux_out3)

    # for visualization              
    model = Model(inputs=input_tensors,
              outputs=[outs,
                       aux_out1,
                       aux_out2,
                       aux_out3,
                       mask3,
                       mask4,
                       mask5
                       ],
              name = 'TD_model_prior')
    
    return model
def sam_vgg(data):
    # conv_1
    trainable = True
    conv_1_out = Conv2D(64, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block1_conv1',
                        trainable=trainable)(data[0])
    conv_1_out = Conv2D(64, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block1_conv2',
                        trainable=trainable)(conv_1_out)

    ds_conv_1_out = MaxPooling2D((2, 2), strides=(2, 2),
                                 name='block1_pool')(conv_1_out)

    # conv_2
    conv_2_out = Conv2D(128, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block2_conv1',
                        trainable=trainable)(ds_conv_1_out)
    conv_2_out = Conv2D(128, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block2_conv2',
                        trainable=trainable)(conv_2_out)

    ds_conv_2_out = MaxPooling2D((2, 2), strides=(2, 2),
                                 name='block2_pool')(conv_2_out)

    # conv_3
    conv_3_out = Conv2D(256, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block3_conv1',
                        trainable=trainable)(ds_conv_2_out)
    conv_3_out = Conv2D(256, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block3_conv2',
                        trainable=trainable)(conv_3_out)
    conv_3_out = Conv2D(256, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block3_conv3',
                        trainable=trainable)(conv_3_out)

    ds_conv_3_out = MaxPooling2D((2, 2),
                                 strides=(2, 2),
                                 name='block3_pool',
                                 padding='same')(conv_3_out)

    # conv_4
    conv_4_out = Conv2D(512, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block4_conv1',
                        trainable=trainable)(ds_conv_3_out)
    conv_4_out = Conv2D(512, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block4_conv2',
                        trainable=trainable)(conv_4_out)
    conv_4_out = Conv2D(512, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block4_conv3',
                        trainable=trainable)(conv_4_out)

    ds_conv_4_out = MaxPooling2D((2, 2),
                                 strides=(2, 2),
                                 name='block4_pool',
                                 padding='same')(conv_4_out)

    # conv_5 #
    conv_5_out = Conv2D(512, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block5_conv1',
                        trainable=trainable)(ds_conv_4_out)
    conv_5_out = Conv2D(512, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block5_conv2',
                        trainable=trainable)(conv_5_out)
    conv_5_out = Conv2D(512, (3, 3),
                        activation='relu',
                        padding='same',
                        name='block5_conv3',
                        trainable=trainable)(conv_5_out)

    s_conv_5_out = Conv2D(64, (3, 3),
                          padding='same',
                          activation='relu',
                          name='s_conv_5',
                          trainable=True)(conv_5_out)
    s_saliency_conv_5 = Conv2D(1, (1, 1),
                               activation='sigmoid',
                               name='s_saliency_conv_5',
                               trainable=True)(s_conv_5_out)

    # attention from conv_5 #
    attention_conv_5_out = Flatten()(conv_5_out)
    attention_conv_5_out = RepeatVector(nb_timestep)(attention_conv_5_out)
    attention_conv_5_out = Reshape(
        (nb_timestep, 14, 14, 512))(attention_conv_5_out)
    attention_conv_5 = (ConvLSTM2D(filters=512,
                                   kernel_size=(3, 3),
                                   padding='same',
                                   return_sequences=False,
                                   stateful=False,
                                   name='conv5_lstm1',
                                   trainable=trainable))(attention_conv_5_out)
    priors1 = LearningPrior(nb_gaussian=nb_gaussian,
                            init=gaussian_priors_init)(data[1])
    attention_conv_5 = Concatenate()([attention_conv_5, priors1])
    attention_conv_5 = Conv2D(64, (3, 3),
                              padding='same',
                              activation='relu',
                              name='merge_att_conv5',
                              trainable=trainable)(attention_conv_5)
    attention_conv_5 = Conv2D(1, (1, 1),
                              activation='sigmoid',
                              name='att_conv5',
                              trainable=trainable)(attention_conv_5)

    conv_5_out = Concatenate()([s_conv_5_out, attention_conv_5])
    conv_5_out = Flatten()(conv_5_out)
    conv_5_out = RepeatVector(nb_timestep)(conv_5_out)
    conv_5_out = Reshape((nb_timestep, 14, 14, 65))(conv_5_out)
    saliency_conv_5 = (ConvLSTM2D(filters=64,
                                  kernel_size=(3, 3),
                                  padding='same',
                                  return_sequences=False,
                                  stateful=False,
                                  name='conv5_lstm2',
                                  trainable=True))(conv_5_out)
    saliency_conv_5 = Conv2D(1, (1, 1),
                             activation='sigmoid',
                             name='sal_conv5',
                             trainable=True)(saliency_conv_5)

    conv_4_out = Conv2D(64, (1, 1),
                        padding='same',
                        name='conv_4_out',
                        trainable=trainable)(conv_4_out)
    conv_4_out = BatchNormalization()(conv_4_out)
    conv_4_out = Activation('sigmoid')(conv_4_out)
    up_saliency_conv_5 = UpSampling2D(size=(2, 2))(saliency_conv_5)
    conv_4_out = Concatenate()([conv_4_out, up_saliency_conv_5])
    conv_4_out = Flatten()(conv_4_out)
    conv_4_out = RepeatVector(nb_timestep)(conv_4_out)
    conv_4_out = Reshape((nb_timestep, 28, 28, 65))(conv_4_out)

    saliency_conv_4 = (
        ConvLSTM2D(
            filters=64,
            kernel_size=(3, 3),
            padding='same',
            return_sequences=False,
            stateful=False,  #True
            name='conv4_lstm2',
            trainable=True))(conv_4_out)
    saliency_conv_4 = Conv2D(1, (1, 1),
                             activation='sigmoid',
                             name='sal_conv4',
                             trainable=True)(saliency_conv_4)

    # saliency from conv_3 #
    conv_3_out = Conv2D(64, (1, 1),
                        padding='same',
                        name='conv_3_out',
                        trainable=True)(conv_3_out)
    conv_3_out = BatchNormalization()(conv_3_out)
    conv_3_out = Activation('sigmoid')(conv_3_out)
    up_saliency_conv_4 = UpSampling2D(size=(2, 2))(saliency_conv_4)
    conv_3_out = Concatenate()([conv_3_out, up_saliency_conv_4])
    conv_3_out = Flatten()(conv_3_out)
    conv_3_out = RepeatVector(nb_timestep)(conv_3_out)
    conv_3_out = Reshape((nb_timestep, 56, 56, 65))(conv_3_out)
    saliency_conv_3 = (ConvLSTM2D(filters=64,
                                  kernel_size=(3, 3),
                                  padding='same',
                                  return_sequences=False,
                                  stateful=False,
                                  name='conv3_lstm',
                                  trainable=True))(conv_3_out)
    saliency_conv_3 = Conv2D(1, (1, 1),
                             activation='sigmoid',
                             name='sal_conv3',
                             trainable=True)(saliency_conv_3)

    # saliency from conv_2 #
    conv_2_out = Conv2D(64, (1, 1),
                        padding='same',
                        name='conv_2_out',
                        trainable=True)(conv_2_out)
    conv_2_out = BatchNormalization()(conv_2_out)
    conv_2_out = Activation('sigmoid')(conv_2_out)
    up_saliency_conv_3 = UpSampling2D(size=(2, 2))(saliency_conv_3)
    conv_2_out = Concatenate()([conv_2_out, up_saliency_conv_3])
    conv_2_out = Flatten()(conv_2_out)
    conv_2_out = RepeatVector(nb_timestep)(conv_2_out)
    conv_2_out = Reshape((nb_timestep, 112, 112, 65))(conv_2_out)
    saliency_conv_2 = (ConvLSTM2D(filters=64,
                                  kernel_size=(3, 3),
                                  padding='same',
                                  return_sequences=False,
                                  stateful=False,
                                  name='conv2_lstm',
                                  trainable=True))(conv_2_out)
    saliency_conv_2 = Conv2D(1, (1, 1),
                             activation='sigmoid',
                             name='sal_conv2',
                             trainable=True)(saliency_conv_2)

    # saliency from conv_1 #
    conv_1_out = Conv2D(32, (1, 1),
                        padding='same',
                        name='conv_1_out',
                        trainable=True)(conv_1_out)
    conv_1_out = BatchNormalization()(conv_1_out)
    conv_1_out = Activation('sigmoid')(conv_1_out)
    up_saliency_conv_2 = UpSampling2D(size=(2, 2))(saliency_conv_2)
    conv_1_out = Concatenate()([conv_1_out, up_saliency_conv_2])
    conv_1_out = Flatten()(conv_1_out)
    conv_1_out = RepeatVector(nb_timestep)(conv_1_out)
    conv_1_out = Reshape((nb_timestep, 224, 224, 33))(conv_1_out)
    saliency_conv_1 = (ConvLSTM2D(filters=32,
                                  kernel_size=(3, 3),
                                  padding='same',
                                  return_sequences=False,
                                  stateful=False,
                                  name='conv1_lstm',
                                  trainable=True))(conv_1_out)
    saliency_conv_1 = Conv2D(1, (1, 1),
                             activation='sigmoid',
                             name='sal_conv1',
                             trainable=True)(saliency_conv_1)

    return [
        attention_conv_5, s_saliency_conv_5, saliency_conv_5, saliency_conv_4,
        saliency_conv_3, saliency_conv_2, saliency_conv_1
    ]
예제 #6
0
class SAMNET:
    """The class representing the SAM-Net based on the VGG16 model. It
       implements a definition of the computational graph, as well as
       functions related to network training.
    """
    def __init__(self):
        self._output = None
        self._mapping = {}

        self._nb_gaussian = config.PARAMS["nb_gaussian"]
        self.nb_timestep = config.PARAMS["nb_timestep"]
        self.shape_r_gt = config.PARAMS["shape_r_gt"]
        self.shape_c_gt = config.PARAMS["shape_c_gt"]
        self.shape_r_out = config.DIMS["image_out_size_salicon"][0]
        self.shape_c_out = config.DIMS["image_out_size_salicon"][1]

        if config.PARAMS["device"] == "gpu":
            self._data_format = "channels_first"
            self._channel_axis = 1
            self._dims_axis = (2, 3)
        elif config.PARAMS["device"] == "cpu":
            self._data_format = "channels_last"
            self._channel_axis = 3
            self._dims_axis = (1, 2)

        self.attionconvlstm = AttentiveConvLSTM(
            [self.shape_r_gt, self.shape_c_gt],
            3,
            512,
            512,
            512,
            data_format=self._data_format)
        self.priorlearing1 = LearningPrior(self._nb_gaussian,
                                           self.shape_r_gt,
                                           self.shape_c_gt,
                                           name='w1')
        self.priorlearing2 = LearningPrior(self._nb_gaussian,
                                           self.shape_r_gt,
                                           self.shape_c_gt,
                                           name='w2')

    def _encoder(self, images):
        """The encoder of the model consists of a pretrained VGG16 architecture
           with 13 convolutional layers. All dense layers are discarded and the
           last 3 layers are dilated at a rate of 2 to account for the omitted
           downsampling.
        Args:
            images (tensor, float32): A 4D tensor that holds the RGB image
                                      batches used as input to the network.
        """

        imagenet_mean = tf.constant([103.939, 116.779, 123.68])
        imagenet_mean = tf.reshape(imagenet_mean, [1, 1, 1, 3])

        images -= imagenet_mean

        if self._data_format == "channels_first":
            images = tf.transpose(images, (0, 3, 1, 2))

        features = vgg_net(images, self._data_format)
        self._output = features

    def _attenion_convlstm(self, features):
        """This is a attention convLSTM model, The input of the LSTM layer is
           computed, at each timestep (i.e. at each iteration), through an
           attentive mechanism which selectively focuses on different regions
           of the image. The Convolutional LSTM that focuses on the most salient
           regions of the input image to iteratively refine the predicted saliency
           map.
        Args:
            features (tensor, float32): A 4D tensor that holds the features extracted
                                        by encoder network.
        """

        x_tile = tf.tile(tf.layers.Flatten()(features), [1, self.nb_timestep])
        x_tile = tf.reshape(
            x_tile,
            [-1, self.nb_timestep, 512, self.shape_r_gt, self.shape_c_gt])
        inital_state = self.attionconvlstm.get_initial_states(x_tile)
        _, state = dynamic_rnn(self.attionconvlstm,
                               x_tile,
                               initial_state=inital_state,
                               dtype=x_tile.dtype)
        self._output = state.h

    def _prior_learing(self, features):
        """
        Thin model can learn a set of prior maps generated with Gaussian functions
        to tackle the center bias present in human eye fixations. The entire
        learning prior module is replicated two times.
        Args:
            features (tensor, float32): A 4D tensor that holds the features refined
                                        by Convolutional LSTM network.
        """

        priors1 = self.priorlearing1.forword(features)
        concateneted = tf.concat([features, priors1], axis=1)
        learned_priors1 = tf.layers.conv2d(concateneted,
                                           512,
                                           5,
                                           padding="same",
                                           activation=tf.nn.relu,
                                           dilation_rate=4,
                                           data_format=self._data_format,
                                           name="conv/priors1")

        priors2 = self.priorlearing2.forword(learned_priors1)
        concateneted = tf.concat([learned_priors1, priors2], axis=1)
        learned_priors2 = tf.layers.conv2d(concateneted,
                                           512,
                                           5,
                                           padding="same",
                                           activation=tf.nn.relu,
                                           dilation_rate=4,
                                           data_format=self._data_format,
                                           name="conv/priors2")

        # Final Convolutional Layer
        outs = tf.layers.conv2d(learned_priors2,
                                1,
                                1,
                                padding="same",
                                activation=tf.nn.relu,
                                data_format=self._data_format,
                                name="conv/decoder")

        b_s = tf.shape(features)[0]
        outs = self._upsample(outs,
                              [b_s, 1, self.shape_r_out, self.shape_c_out], 1)

        if self._data_format == "channels_first":
            outs = tf.transpose(outs, (0, 2, 3, 1))

        self._output = outs

    def _upsample(self, stack, shape, factor):
        """This function resizes the input to a desired shape via the
           bilinear upsampling method.

        Args:
            stack (tensor, float32): A 4D tensor with the function input.
            shape (tensor, int32): A 1D tensor with the reference shape.
            factor (scalar, int): An integer denoting the upsampling factor.

        Returns:
            tensor, float32: A 4D tensor that holds the activations after
                             bilinear upsampling of the input.
        """

        if self._data_format == "channels_first":
            stack = tf.transpose(stack, (0, 2, 3, 1))

        stack = tf.image.resize_bilinear(stack,
                                         (shape[self._dims_axis[0]] * factor,
                                          shape[self._dims_axis[1]] * factor))

        if self._data_format == "channels_first":
            stack = tf.transpose(stack, (0, 3, 1, 2))

        return stack

    def _normalize(self, maps, eps=1e-7):
        """This function normalizes the output values to a range
           between 0 and 1 per saliency map.

        Args:
            maps (tensor, float32): A 4D tensor that holds the model output.
            eps (scalar, float, optional): A small factor to avoid numerical
                                           instabilities. Defaults to 1e-7.
        """

        min_per_image = tf.reduce_min(maps, axis=(1, 2, 3), keep_dims=True)
        maps -= min_per_image

        max_per_image = tf.reduce_max(maps, axis=(1, 2, 3), keep_dims=True)
        maps = tf.divide(maps, eps + max_per_image, name="output")

        self._output = maps

    def _pretraining(self):
        """The first 26 variables of the model here are based on the VGG16
           network. Therefore, their names are matched to the ones of the
           pretrained VGG16 checkpoint for correct initialization.
        """
        for var in tf.global_variables()[2:28]:
            key = var.name.split("/", 1)[1]
            key = key.replace("kernel:0", "weights")
            key = key.replace("bias:0", "biases")
            self._mapping[key] = var

    def forward(self, images):
        """Public method to forward RGB images through the whole network
           architecture and retrieve the resulting output.

        Args:
            images (tensor, float32): A 4D tensor that holds the values of the
                                      raw input images.

        Returns:
            tensor, float32: A 4D tensor that holds the values of the
                             predicted saliency maps.
        """

        self._encoder(images)
        self._attenion_convlstm(self._output)
        self._prior_learing(self._output)
        self._normalize(self._output)

        return self._output

    def train(self, ground_truth_map, ground_truth_fixation, predicted_maps,
              learning_rate):
        """Public method to define the loss function and optimization
           algorithm for training the model.

        Args:
            ground_truth_map (tensor, float32): A 4D tensor with the ground truth map.
            ground_truth_fixation (tensor, float32):  A 4D tensor with the ground truth fixation.
            predicted_maps (tensor, float32): A 4D tensor with the predictions.
            learning_rate (scalar, float): Defines the learning rate.

        Returns:
            object: The optimizer element used to train the model.
            tensor, float32: A 0D tensor that holds the averaged error.
        """

        kld = loss.kld(ground_truth_map, predicted_maps)
        cc = loss.correlation_coefficient(ground_truth_map, predicted_maps)
        nss = loss.nss(ground_truth_fixation, predicted_maps)
        error = 10 * kld + 2 * cc + nss
        optimizer = tf.train.RMSPropOptimizer(learning_rate)
        optimizer = optimizer.minimize(error)

        return optimizer, error

    def save(self, saver, sess, dataset, path, device):
        """This saves a model checkpoint to disk and creates
           the folder if it doesn't exist yet.

        Args:
            saver (object): An object for saving the model.
            sess (object): The current TF training session.
            dataset ([type]): The dataset used for training.
            path (str): The path used for saving the model.
            device (str): Represents either "cpu" or "gpu".
        """

        os.makedirs(path, exist_ok=True)

        saver.save(sess,
                   path + "model_%s_%s.ckpt" % (dataset, device),
                   write_meta_graph=False,
                   write_state=False)

    def restore(self, sess, dataset, paths, device):
        """This function allows continued training from a prior checkpoint and
           training from scratch with the pretrained VGG16 weights. In case the
           dataset is either CAT2000 or MIT1003, a prior checkpoint based on
           the SALICON dataset is required.

        Args:
            sess (object): The current TF training session.
            dataset ([type]): The dataset used for training.
            paths (dict, str): A dictionary with all path elements.
            device (str): Represents either "cpu" or "gpu".

        Returns:
            object: A saver object for saving the model.
        """

        model_name = "model_%s_%s" % (dataset, device)
        salicon_name = "model_salicon_%s" % device
        vgg16_name = "vgg16_hybrid"

        ext1 = ".ckpt.data-00000-of-00001"
        ext2 = ".ckpt.index"

        saver = tf.train.Saver()

        if os.path.isfile(paths["latest"] + model_name + ext1) and \
           os.path.isfile(paths["latest"] + model_name + ext2):
            saver.restore(sess, paths["latest"] + model_name + ".ckpt")
        elif dataset in ("mit1003", "cat2000", "dutomron", "pascals", "osie",
                         "fiwi"):
            if os.path.isfile(paths["best"] + salicon_name + ext1) and \
               os.path.isfile(paths["best"] + salicon_name + ext2):
                saver.restore(sess, paths["best"] + salicon_name + ".ckpt")
            else:
                raise FileNotFoundError("Train model on SALICON first")
        else:
            if not (os.path.isfile(paths["weights"] + vgg16_name + ext1)
                    or os.path.isfile(paths["weights"] + vgg16_name + ext2)):
                download.download_pretrained_weights(paths["weights"],
                                                     "vgg16_hybrid")
            self._pretraining()

            loader = tf.train.Saver(self._mapping)
            loader.restore(sess, paths["weights"] + vgg16_name + ".ckpt")

        return saver

    def optimize(self, sess, dataset, path, device):
        """The best performing model is frozen, optimized for inference
           by removing unneeded training operations, and written to disk.

        Args:
            sess (object): The current TF training session.
            path (str): The path used for saving the model.
            device (str): Represents either "cpu" or "gpu".

        .. seealso:: https://bit.ly/2VBBdqQ and https://bit.ly/2W7YqBa
        """

        model_name = "model_%s_%s" % (dataset, device)
        model_path = path + model_name

        tf.train.write_graph(sess.graph.as_graph_def(), path,
                             model_name + ".pbtxt")

        freeze_graph.freeze_graph(model_path + ".pbtxt", "", False,
                                  model_path + ".ckpt", "output",
                                  "save/restore_all", "save/Const:0",
                                  model_path + ".pb", True, "")

        os.remove(model_path + ".pbtxt")

        graph_def = tf.GraphDef()

        with tf.gfile.Open(model_path + ".pb", "rb") as file:
            graph_def.ParseFromString(file.read())

        transforms = [
            "remove_nodes(op=Identity)", "merge_duplicate_nodes",
            "strip_unused_nodes", "fold_constants(ignore_errors=true)"
        ]

        optimized_graph_def = TransformGraph(graph_def, ["input"], ["output"],
                                             transforms)

        tf.train.write_graph(optimized_graph_def,
                             logdir=path,
                             as_text=False,
                             name=model_name + ".pb")
예제 #7
0
def sam_resnet(x):
    #x = [x, x_maps]
    # Dilated Convolutional Network
    print("Iniciando sam_resnet")
    print("Iniciando dcn_resnet...")
    dcn = dcn_resnet(input_tensor=x[0]) #Ready!!
    aux = K.permute_dimensions(dcn.output, (0, 3, 1, 2))   #Agregado para poner channels_first como en el codigo original.
    #conv_feat = Convolution2D(512, 3, 3, border_mode='same', activation='relu')(dcn.output)
    
    # New Version. Input shape = (None, 2048, 30, 40) output shape=(None, 512, 30, 40)
    #conv_feat = Conv2D(512, 
    #                   (3, 3), 
    #                   padding='same', 
    #                   activation='relu',
    #                   data_format="channels_first")(aux)     # GPU New Version

    conv_feat = Conv2D_NCHW(aux,512, 
                            (3, 3), 
                            padding='same', 
                            activation='relu')     # CPU New Version NCHW

    # Attentive Convolutional LSTM
    print("Iniciando att_convlstm...")
    att_convlstm = Lambda(repeat, repeat_shape)(conv_feat) #Output shape=(1, 4, 512, 30, 40)
    #x = att_convlstm
    #l = AttentiveConvLSTM(nb_filters_in=512, nb_filters_out=512, nb_filters_att=512,nb_cols=3, nb_rows=3)
    #l(x)
    att_convlstm = AttentiveConvLSTM(nb_filters_in=512,  #Output shape=(1, 512, 30, 40)
                                     nb_filters_out=512, 
                                     nb_filters_att=512,
                                     nb_cols=3, 
                                     nb_rows=3)(att_convlstm)
   
    # Learned Prior (1)

    #Input shape = (None, 16, 30, 40), output shape = (1, 16, 30, 40)
    print("Iniciando LearningPrior 1...")
    priors1 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)(x[1])

    #concateneted = merge([att_convlstm, priors1], mode='concat', concat_axis=1)
    #print(att_convlstm.shape)
    #print(priors1.shape)
    #attentive = att_convlstm * 1  #Eliminando un bug
    #prior = priors1 * 1           #Eliminando un bug
    #print(attentive.shape)
    #print(prior.shape)
    #concateneted = concatenate([attentive, prior], axis=1)          #Nueva version sin BUG

    print("Concatenando...")
    concateneted = concatenate([att_convlstm, priors1], axis=1)    #Version con BUG
 
    #learned_priors1 = AtrousConvolution2D(512, 5, 5, border_mode='same', activation='relu',
    #                                      atrous_rate=(4, 4))(concateneted)

    #learned_priors1 = Conv2D(512, (5, 5), dilation_rate=(4, 4), activation='relu',
    #           data_format="channels_first", padding='same')(concateneted)  #New version for GPU
        
    learned_priors1 = Conv2D_NCHW(concateneted, 512, 
                                  (5, 5), 
                                  dilation_rate=(4, 4), 
                                  activation='relu', 
                                  padding='same')  #New version for CPU 
    # Learned Prior (2) 
    print("Iniciando LearningPrior 2...")
    priors2 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)(x[1])
    
    #learned_priors1 = learned_priors1 * 1   #Eliminando un bug
    #priors2 = priors2 * 1                   #Eliminando un bug
    
    #concateneted = merge([learned_priors1, priors2], mode='concat', concat_axis=1)
    print("Concatenando...")
    concateneted = concatenate([learned_priors1, priors2], axis=1) 

    #learned_priors2 = AtrousConvolution2D(512, 5, 5, border_mode='same', activation='relu',
    #                                      atrous_rate=(4, 4))(concateneted)

    #learned_priors2 = Conv2D(512, (5, 5), dilation_rate=(4, 4), activation='relu',
    #           data_format="channels_first", padding='same')(concateneted)  #New version for GPU
     
    learned_priors2 = Conv2D_NCHW(concateneted, 512, 
                                  (5, 5), 
                                  dilation_rate=(4, 4), 
                                  activation='relu', 
                                  padding='same')  #New version for CPU 
       
    # Final Convolutional Layer
    print("Final Convolutional Layer")
    #outs = Convolution2D(1, 1, 1, border_mode='same', activation='relu')(learned_priors2)
    #outs = Conv2D(1, (1, 1), padding='same', data_format="channels_first", activation='relu')(learned_priors2) #New version for GPU output shape=(1, 1, 30, 40)
    outs = Conv2D_NCHW(learned_priors2, 1, 
                       (1, 1), 
                       padding='same', 
                       activation='relu') #New version for COU output shape=(1, 1, 30, 40)


    #VALIDAR ESTA FUNCION \\\\\\\\\\\\\\\
    outs_up = Lambda(upsampling, upsampling_shape)(outs) # Input shape=(1, 1, 30, 40)
    #print(outs_up.shape) #(1, 1, 480, 640)
    
    print("Finalizado sam_resnet")
    return [outs_up, outs_up, outs_up]   #When passing a list as loss, it should have one entry per model outputs