def sam_resnet(x): # Dilated Convolutional Network dcn = dcn_resnet(input_tensor=x[0]) conv_feat = Convolution2D(512, 3, 3, border_mode='same', activation='relu')(dcn.output) # Attentive Convolutional LSTM att_convlstm = Lambda(repeat, repeat_shape)(conv_feat) att_convlstm = AttentiveConvLSTM(nb_filters_in=512, nb_filters_out=512, nb_filters_att=512, nb_cols=3, nb_rows=3)(att_convlstm) # Learned Prior (1) priors1 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)(x[1]) concateneted = merge([att_convlstm, priors1], mode='concat', concat_axis=1) learned_priors1 = AtrousConvolution2D(512, 5, 5, border_mode='same', activation='relu', atrous_rate=(4, 4))(concateneted) # Learned Prior (2) priors2 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)(x[1]) concateneted = merge([learned_priors1, priors2], mode='concat', concat_axis=1) learned_priors2 = AtrousConvolution2D(512, 5, 5, border_mode='same', activation='relu', atrous_rate=(4, 4))(concateneted) # Final Convolutional Layer outs = Convolution2D(1, 1, 1, border_mode='same', activation='relu')(learned_priors2) outs_up = Lambda(upsampling, upsampling_shape)(outs) return [outs_up, outs_up, outs_up]
def __init__(self): self._output = None self._mapping = {} self._nb_gaussian = config.PARAMS["nb_gaussian"] self.nb_timestep = config.PARAMS["nb_timestep"] self.shape_r_gt = config.PARAMS["shape_r_gt"] self.shape_c_gt = config.PARAMS["shape_c_gt"] self.shape_r_out = config.DIMS["image_out_size_salicon"][0] self.shape_c_out = config.DIMS["image_out_size_salicon"][1] if config.PARAMS["device"] == "gpu": self._data_format = "channels_first" self._channel_axis = 1 self._dims_axis = (2, 3) elif config.PARAMS["device"] == "cpu": self._data_format = "channels_last" self._channel_axis = 3 self._dims_axis = (1, 2) self.attionconvlstm = AttentiveConvLSTM( [self.shape_r_gt, self.shape_c_gt], 3, 512, 512, 512, data_format=self._data_format) self.priorlearing1 = LearningPrior(self._nb_gaussian, self.shape_r_gt, self.shape_c_gt, name='w1') self.priorlearing2 = LearningPrior(self._nb_gaussian, self.shape_r_gt, self.shape_c_gt, name='w2')
def gaussian_prior_match(tensor, fdm): # Learned Prior (1) priors1 = LearningPrior(64, nb_gaussian=nb_gaussian)(fdm[1]) concateneted = concatenate([tensor, priors1], axis=1) learned_priors1 = AtrousConvolution2D(64, [5, 5], padding='same', activation='relu', atrous_rate=(4, 4))(concateneted) # Learned Prior (2) priors2 = LearningPrior(64, nb_gaussian=nb_gaussian)(fdm[1]) concateneted = concatenate([learned_priors1, priors2], axis=1) learned_priors2 = AtrousConvolution2D(64, [5, 5], padding='same', activation='relu', atrous_rate=(4, 4))(concateneted) return learned_priors2
def TD_model_prior_masks(input_tensors=None, f1_train=True, stateful=False): f1 = Feature_dcross_res_matt_res_ds_masks() f1.trainable = f1_train if input_tensors is None: xgaus_shape = (shape_r_gaus, shape_c_gaus, nb_gaussian) ximgs_ops_shape = (None, shape_r, shape_c, 3+2*opt_num) input_tensors = [Input(shape=xgaus_shape) for i in range(0,num_frames)] input_tensors.append(Input(shape=ximgs_ops_shape)) Ximgs_ops = input_tensors[-1] Xgaus = input_tensors[:-1] features_out = TimeDistributed(f1)(Ximgs_ops) frame_features, aux_out1, aux_out2, aux_out3, mask3, mask4, mask5 \ = Lambda(Slice_outputs_mask, output_shape=Slice_outs_shape_mask)(features_out) #print('frame_features', K.int_shape(frame_features)) outs = ConvGRU2D(filters=256, kernel_size=(3, 3), padding='same', return_sequences=True, stateful=stateful, name='ConvGRU2D')(frame_features) outs = TimeDistributed(BatchNormalization(name='ConvGRU2D_BN'))(outs) # previously 256 prior_layer1 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init) priors1 = [Lambda(Expand_gaus)(prior_layer1(x)) for x in Xgaus] priors1_merged = Concatenate(axis=-4)(priors1) sal_concat1 = Concatenate(axis=-1)([outs, priors1_merged]) outs = TimeDistributed(Conv2D(1, (1, 1), padding='same', activation='sigmoid'))(sal_concat1) outs = TimeDistributed(BilinearUpSampling2D((8,8)))(outs) aux_out1 = TimeDistributed(BilinearUpSampling2D((8,8)))(aux_out1) aux_out2 = TimeDistributed(BilinearUpSampling2D((8,8)))(aux_out2) aux_out3 = TimeDistributed(BilinearUpSampling2D((8,8)))(aux_out3) # for visualization model = Model(inputs=input_tensors, outputs=[outs, aux_out1, aux_out2, aux_out3, mask3, mask4, mask5 ], name = 'TD_model_prior') return model
def sam_vgg(data): # conv_1 trainable = True conv_1_out = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1', trainable=trainable)(data[0]) conv_1_out = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2', trainable=trainable)(conv_1_out) ds_conv_1_out = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(conv_1_out) # conv_2 conv_2_out = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1', trainable=trainable)(ds_conv_1_out) conv_2_out = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2', trainable=trainable)(conv_2_out) ds_conv_2_out = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(conv_2_out) # conv_3 conv_3_out = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1', trainable=trainable)(ds_conv_2_out) conv_3_out = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2', trainable=trainable)(conv_3_out) conv_3_out = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3', trainable=trainable)(conv_3_out) ds_conv_3_out = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', padding='same')(conv_3_out) # conv_4 conv_4_out = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1', trainable=trainable)(ds_conv_3_out) conv_4_out = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2', trainable=trainable)(conv_4_out) conv_4_out = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3', trainable=trainable)(conv_4_out) ds_conv_4_out = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', padding='same')(conv_4_out) # conv_5 # conv_5_out = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1', trainable=trainable)(ds_conv_4_out) conv_5_out = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2', trainable=trainable)(conv_5_out) conv_5_out = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3', trainable=trainable)(conv_5_out) s_conv_5_out = Conv2D(64, (3, 3), padding='same', activation='relu', name='s_conv_5', trainable=True)(conv_5_out) s_saliency_conv_5 = Conv2D(1, (1, 1), activation='sigmoid', name='s_saliency_conv_5', trainable=True)(s_conv_5_out) # attention from conv_5 # attention_conv_5_out = Flatten()(conv_5_out) attention_conv_5_out = RepeatVector(nb_timestep)(attention_conv_5_out) attention_conv_5_out = Reshape( (nb_timestep, 14, 14, 512))(attention_conv_5_out) attention_conv_5 = (ConvLSTM2D(filters=512, kernel_size=(3, 3), padding='same', return_sequences=False, stateful=False, name='conv5_lstm1', trainable=trainable))(attention_conv_5_out) priors1 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)(data[1]) attention_conv_5 = Concatenate()([attention_conv_5, priors1]) attention_conv_5 = Conv2D(64, (3, 3), padding='same', activation='relu', name='merge_att_conv5', trainable=trainable)(attention_conv_5) attention_conv_5 = Conv2D(1, (1, 1), activation='sigmoid', name='att_conv5', trainable=trainable)(attention_conv_5) conv_5_out = Concatenate()([s_conv_5_out, attention_conv_5]) conv_5_out = Flatten()(conv_5_out) conv_5_out = RepeatVector(nb_timestep)(conv_5_out) conv_5_out = Reshape((nb_timestep, 14, 14, 65))(conv_5_out) saliency_conv_5 = (ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=False, stateful=False, name='conv5_lstm2', trainable=True))(conv_5_out) saliency_conv_5 = Conv2D(1, (1, 1), activation='sigmoid', name='sal_conv5', trainable=True)(saliency_conv_5) conv_4_out = Conv2D(64, (1, 1), padding='same', name='conv_4_out', trainable=trainable)(conv_4_out) conv_4_out = BatchNormalization()(conv_4_out) conv_4_out = Activation('sigmoid')(conv_4_out) up_saliency_conv_5 = UpSampling2D(size=(2, 2))(saliency_conv_5) conv_4_out = Concatenate()([conv_4_out, up_saliency_conv_5]) conv_4_out = Flatten()(conv_4_out) conv_4_out = RepeatVector(nb_timestep)(conv_4_out) conv_4_out = Reshape((nb_timestep, 28, 28, 65))(conv_4_out) saliency_conv_4 = ( ConvLSTM2D( filters=64, kernel_size=(3, 3), padding='same', return_sequences=False, stateful=False, #True name='conv4_lstm2', trainable=True))(conv_4_out) saliency_conv_4 = Conv2D(1, (1, 1), activation='sigmoid', name='sal_conv4', trainable=True)(saliency_conv_4) # saliency from conv_3 # conv_3_out = Conv2D(64, (1, 1), padding='same', name='conv_3_out', trainable=True)(conv_3_out) conv_3_out = BatchNormalization()(conv_3_out) conv_3_out = Activation('sigmoid')(conv_3_out) up_saliency_conv_4 = UpSampling2D(size=(2, 2))(saliency_conv_4) conv_3_out = Concatenate()([conv_3_out, up_saliency_conv_4]) conv_3_out = Flatten()(conv_3_out) conv_3_out = RepeatVector(nb_timestep)(conv_3_out) conv_3_out = Reshape((nb_timestep, 56, 56, 65))(conv_3_out) saliency_conv_3 = (ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=False, stateful=False, name='conv3_lstm', trainable=True))(conv_3_out) saliency_conv_3 = Conv2D(1, (1, 1), activation='sigmoid', name='sal_conv3', trainable=True)(saliency_conv_3) # saliency from conv_2 # conv_2_out = Conv2D(64, (1, 1), padding='same', name='conv_2_out', trainable=True)(conv_2_out) conv_2_out = BatchNormalization()(conv_2_out) conv_2_out = Activation('sigmoid')(conv_2_out) up_saliency_conv_3 = UpSampling2D(size=(2, 2))(saliency_conv_3) conv_2_out = Concatenate()([conv_2_out, up_saliency_conv_3]) conv_2_out = Flatten()(conv_2_out) conv_2_out = RepeatVector(nb_timestep)(conv_2_out) conv_2_out = Reshape((nb_timestep, 112, 112, 65))(conv_2_out) saliency_conv_2 = (ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=False, stateful=False, name='conv2_lstm', trainable=True))(conv_2_out) saliency_conv_2 = Conv2D(1, (1, 1), activation='sigmoid', name='sal_conv2', trainable=True)(saliency_conv_2) # saliency from conv_1 # conv_1_out = Conv2D(32, (1, 1), padding='same', name='conv_1_out', trainable=True)(conv_1_out) conv_1_out = BatchNormalization()(conv_1_out) conv_1_out = Activation('sigmoid')(conv_1_out) up_saliency_conv_2 = UpSampling2D(size=(2, 2))(saliency_conv_2) conv_1_out = Concatenate()([conv_1_out, up_saliency_conv_2]) conv_1_out = Flatten()(conv_1_out) conv_1_out = RepeatVector(nb_timestep)(conv_1_out) conv_1_out = Reshape((nb_timestep, 224, 224, 33))(conv_1_out) saliency_conv_1 = (ConvLSTM2D(filters=32, kernel_size=(3, 3), padding='same', return_sequences=False, stateful=False, name='conv1_lstm', trainable=True))(conv_1_out) saliency_conv_1 = Conv2D(1, (1, 1), activation='sigmoid', name='sal_conv1', trainable=True)(saliency_conv_1) return [ attention_conv_5, s_saliency_conv_5, saliency_conv_5, saliency_conv_4, saliency_conv_3, saliency_conv_2, saliency_conv_1 ]
class SAMNET: """The class representing the SAM-Net based on the VGG16 model. It implements a definition of the computational graph, as well as functions related to network training. """ def __init__(self): self._output = None self._mapping = {} self._nb_gaussian = config.PARAMS["nb_gaussian"] self.nb_timestep = config.PARAMS["nb_timestep"] self.shape_r_gt = config.PARAMS["shape_r_gt"] self.shape_c_gt = config.PARAMS["shape_c_gt"] self.shape_r_out = config.DIMS["image_out_size_salicon"][0] self.shape_c_out = config.DIMS["image_out_size_salicon"][1] if config.PARAMS["device"] == "gpu": self._data_format = "channels_first" self._channel_axis = 1 self._dims_axis = (2, 3) elif config.PARAMS["device"] == "cpu": self._data_format = "channels_last" self._channel_axis = 3 self._dims_axis = (1, 2) self.attionconvlstm = AttentiveConvLSTM( [self.shape_r_gt, self.shape_c_gt], 3, 512, 512, 512, data_format=self._data_format) self.priorlearing1 = LearningPrior(self._nb_gaussian, self.shape_r_gt, self.shape_c_gt, name='w1') self.priorlearing2 = LearningPrior(self._nb_gaussian, self.shape_r_gt, self.shape_c_gt, name='w2') def _encoder(self, images): """The encoder of the model consists of a pretrained VGG16 architecture with 13 convolutional layers. All dense layers are discarded and the last 3 layers are dilated at a rate of 2 to account for the omitted downsampling. Args: images (tensor, float32): A 4D tensor that holds the RGB image batches used as input to the network. """ imagenet_mean = tf.constant([103.939, 116.779, 123.68]) imagenet_mean = tf.reshape(imagenet_mean, [1, 1, 1, 3]) images -= imagenet_mean if self._data_format == "channels_first": images = tf.transpose(images, (0, 3, 1, 2)) features = vgg_net(images, self._data_format) self._output = features def _attenion_convlstm(self, features): """This is a attention convLSTM model, The input of the LSTM layer is computed, at each timestep (i.e. at each iteration), through an attentive mechanism which selectively focuses on different regions of the image. The Convolutional LSTM that focuses on the most salient regions of the input image to iteratively refine the predicted saliency map. Args: features (tensor, float32): A 4D tensor that holds the features extracted by encoder network. """ x_tile = tf.tile(tf.layers.Flatten()(features), [1, self.nb_timestep]) x_tile = tf.reshape( x_tile, [-1, self.nb_timestep, 512, self.shape_r_gt, self.shape_c_gt]) inital_state = self.attionconvlstm.get_initial_states(x_tile) _, state = dynamic_rnn(self.attionconvlstm, x_tile, initial_state=inital_state, dtype=x_tile.dtype) self._output = state.h def _prior_learing(self, features): """ Thin model can learn a set of prior maps generated with Gaussian functions to tackle the center bias present in human eye fixations. The entire learning prior module is replicated two times. Args: features (tensor, float32): A 4D tensor that holds the features refined by Convolutional LSTM network. """ priors1 = self.priorlearing1.forword(features) concateneted = tf.concat([features, priors1], axis=1) learned_priors1 = tf.layers.conv2d(concateneted, 512, 5, padding="same", activation=tf.nn.relu, dilation_rate=4, data_format=self._data_format, name="conv/priors1") priors2 = self.priorlearing2.forword(learned_priors1) concateneted = tf.concat([learned_priors1, priors2], axis=1) learned_priors2 = tf.layers.conv2d(concateneted, 512, 5, padding="same", activation=tf.nn.relu, dilation_rate=4, data_format=self._data_format, name="conv/priors2") # Final Convolutional Layer outs = tf.layers.conv2d(learned_priors2, 1, 1, padding="same", activation=tf.nn.relu, data_format=self._data_format, name="conv/decoder") b_s = tf.shape(features)[0] outs = self._upsample(outs, [b_s, 1, self.shape_r_out, self.shape_c_out], 1) if self._data_format == "channels_first": outs = tf.transpose(outs, (0, 2, 3, 1)) self._output = outs def _upsample(self, stack, shape, factor): """This function resizes the input to a desired shape via the bilinear upsampling method. Args: stack (tensor, float32): A 4D tensor with the function input. shape (tensor, int32): A 1D tensor with the reference shape. factor (scalar, int): An integer denoting the upsampling factor. Returns: tensor, float32: A 4D tensor that holds the activations after bilinear upsampling of the input. """ if self._data_format == "channels_first": stack = tf.transpose(stack, (0, 2, 3, 1)) stack = tf.image.resize_bilinear(stack, (shape[self._dims_axis[0]] * factor, shape[self._dims_axis[1]] * factor)) if self._data_format == "channels_first": stack = tf.transpose(stack, (0, 3, 1, 2)) return stack def _normalize(self, maps, eps=1e-7): """This function normalizes the output values to a range between 0 and 1 per saliency map. Args: maps (tensor, float32): A 4D tensor that holds the model output. eps (scalar, float, optional): A small factor to avoid numerical instabilities. Defaults to 1e-7. """ min_per_image = tf.reduce_min(maps, axis=(1, 2, 3), keep_dims=True) maps -= min_per_image max_per_image = tf.reduce_max(maps, axis=(1, 2, 3), keep_dims=True) maps = tf.divide(maps, eps + max_per_image, name="output") self._output = maps def _pretraining(self): """The first 26 variables of the model here are based on the VGG16 network. Therefore, their names are matched to the ones of the pretrained VGG16 checkpoint for correct initialization. """ for var in tf.global_variables()[2:28]: key = var.name.split("/", 1)[1] key = key.replace("kernel:0", "weights") key = key.replace("bias:0", "biases") self._mapping[key] = var def forward(self, images): """Public method to forward RGB images through the whole network architecture and retrieve the resulting output. Args: images (tensor, float32): A 4D tensor that holds the values of the raw input images. Returns: tensor, float32: A 4D tensor that holds the values of the predicted saliency maps. """ self._encoder(images) self._attenion_convlstm(self._output) self._prior_learing(self._output) self._normalize(self._output) return self._output def train(self, ground_truth_map, ground_truth_fixation, predicted_maps, learning_rate): """Public method to define the loss function and optimization algorithm for training the model. Args: ground_truth_map (tensor, float32): A 4D tensor with the ground truth map. ground_truth_fixation (tensor, float32): A 4D tensor with the ground truth fixation. predicted_maps (tensor, float32): A 4D tensor with the predictions. learning_rate (scalar, float): Defines the learning rate. Returns: object: The optimizer element used to train the model. tensor, float32: A 0D tensor that holds the averaged error. """ kld = loss.kld(ground_truth_map, predicted_maps) cc = loss.correlation_coefficient(ground_truth_map, predicted_maps) nss = loss.nss(ground_truth_fixation, predicted_maps) error = 10 * kld + 2 * cc + nss optimizer = tf.train.RMSPropOptimizer(learning_rate) optimizer = optimizer.minimize(error) return optimizer, error def save(self, saver, sess, dataset, path, device): """This saves a model checkpoint to disk and creates the folder if it doesn't exist yet. Args: saver (object): An object for saving the model. sess (object): The current TF training session. dataset ([type]): The dataset used for training. path (str): The path used for saving the model. device (str): Represents either "cpu" or "gpu". """ os.makedirs(path, exist_ok=True) saver.save(sess, path + "model_%s_%s.ckpt" % (dataset, device), write_meta_graph=False, write_state=False) def restore(self, sess, dataset, paths, device): """This function allows continued training from a prior checkpoint and training from scratch with the pretrained VGG16 weights. In case the dataset is either CAT2000 or MIT1003, a prior checkpoint based on the SALICON dataset is required. Args: sess (object): The current TF training session. dataset ([type]): The dataset used for training. paths (dict, str): A dictionary with all path elements. device (str): Represents either "cpu" or "gpu". Returns: object: A saver object for saving the model. """ model_name = "model_%s_%s" % (dataset, device) salicon_name = "model_salicon_%s" % device vgg16_name = "vgg16_hybrid" ext1 = ".ckpt.data-00000-of-00001" ext2 = ".ckpt.index" saver = tf.train.Saver() if os.path.isfile(paths["latest"] + model_name + ext1) and \ os.path.isfile(paths["latest"] + model_name + ext2): saver.restore(sess, paths["latest"] + model_name + ".ckpt") elif dataset in ("mit1003", "cat2000", "dutomron", "pascals", "osie", "fiwi"): if os.path.isfile(paths["best"] + salicon_name + ext1) and \ os.path.isfile(paths["best"] + salicon_name + ext2): saver.restore(sess, paths["best"] + salicon_name + ".ckpt") else: raise FileNotFoundError("Train model on SALICON first") else: if not (os.path.isfile(paths["weights"] + vgg16_name + ext1) or os.path.isfile(paths["weights"] + vgg16_name + ext2)): download.download_pretrained_weights(paths["weights"], "vgg16_hybrid") self._pretraining() loader = tf.train.Saver(self._mapping) loader.restore(sess, paths["weights"] + vgg16_name + ".ckpt") return saver def optimize(self, sess, dataset, path, device): """The best performing model is frozen, optimized for inference by removing unneeded training operations, and written to disk. Args: sess (object): The current TF training session. path (str): The path used for saving the model. device (str): Represents either "cpu" or "gpu". .. seealso:: https://bit.ly/2VBBdqQ and https://bit.ly/2W7YqBa """ model_name = "model_%s_%s" % (dataset, device) model_path = path + model_name tf.train.write_graph(sess.graph.as_graph_def(), path, model_name + ".pbtxt") freeze_graph.freeze_graph(model_path + ".pbtxt", "", False, model_path + ".ckpt", "output", "save/restore_all", "save/Const:0", model_path + ".pb", True, "") os.remove(model_path + ".pbtxt") graph_def = tf.GraphDef() with tf.gfile.Open(model_path + ".pb", "rb") as file: graph_def.ParseFromString(file.read()) transforms = [ "remove_nodes(op=Identity)", "merge_duplicate_nodes", "strip_unused_nodes", "fold_constants(ignore_errors=true)" ] optimized_graph_def = TransformGraph(graph_def, ["input"], ["output"], transforms) tf.train.write_graph(optimized_graph_def, logdir=path, as_text=False, name=model_name + ".pb")
def sam_resnet(x): #x = [x, x_maps] # Dilated Convolutional Network print("Iniciando sam_resnet") print("Iniciando dcn_resnet...") dcn = dcn_resnet(input_tensor=x[0]) #Ready!! aux = K.permute_dimensions(dcn.output, (0, 3, 1, 2)) #Agregado para poner channels_first como en el codigo original. #conv_feat = Convolution2D(512, 3, 3, border_mode='same', activation='relu')(dcn.output) # New Version. Input shape = (None, 2048, 30, 40) output shape=(None, 512, 30, 40) #conv_feat = Conv2D(512, # (3, 3), # padding='same', # activation='relu', # data_format="channels_first")(aux) # GPU New Version conv_feat = Conv2D_NCHW(aux,512, (3, 3), padding='same', activation='relu') # CPU New Version NCHW # Attentive Convolutional LSTM print("Iniciando att_convlstm...") att_convlstm = Lambda(repeat, repeat_shape)(conv_feat) #Output shape=(1, 4, 512, 30, 40) #x = att_convlstm #l = AttentiveConvLSTM(nb_filters_in=512, nb_filters_out=512, nb_filters_att=512,nb_cols=3, nb_rows=3) #l(x) att_convlstm = AttentiveConvLSTM(nb_filters_in=512, #Output shape=(1, 512, 30, 40) nb_filters_out=512, nb_filters_att=512, nb_cols=3, nb_rows=3)(att_convlstm) # Learned Prior (1) #Input shape = (None, 16, 30, 40), output shape = (1, 16, 30, 40) print("Iniciando LearningPrior 1...") priors1 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)(x[1]) #concateneted = merge([att_convlstm, priors1], mode='concat', concat_axis=1) #print(att_convlstm.shape) #print(priors1.shape) #attentive = att_convlstm * 1 #Eliminando un bug #prior = priors1 * 1 #Eliminando un bug #print(attentive.shape) #print(prior.shape) #concateneted = concatenate([attentive, prior], axis=1) #Nueva version sin BUG print("Concatenando...") concateneted = concatenate([att_convlstm, priors1], axis=1) #Version con BUG #learned_priors1 = AtrousConvolution2D(512, 5, 5, border_mode='same', activation='relu', # atrous_rate=(4, 4))(concateneted) #learned_priors1 = Conv2D(512, (5, 5), dilation_rate=(4, 4), activation='relu', # data_format="channels_first", padding='same')(concateneted) #New version for GPU learned_priors1 = Conv2D_NCHW(concateneted, 512, (5, 5), dilation_rate=(4, 4), activation='relu', padding='same') #New version for CPU # Learned Prior (2) print("Iniciando LearningPrior 2...") priors2 = LearningPrior(nb_gaussian=nb_gaussian, init=gaussian_priors_init)(x[1]) #learned_priors1 = learned_priors1 * 1 #Eliminando un bug #priors2 = priors2 * 1 #Eliminando un bug #concateneted = merge([learned_priors1, priors2], mode='concat', concat_axis=1) print("Concatenando...") concateneted = concatenate([learned_priors1, priors2], axis=1) #learned_priors2 = AtrousConvolution2D(512, 5, 5, border_mode='same', activation='relu', # atrous_rate=(4, 4))(concateneted) #learned_priors2 = Conv2D(512, (5, 5), dilation_rate=(4, 4), activation='relu', # data_format="channels_first", padding='same')(concateneted) #New version for GPU learned_priors2 = Conv2D_NCHW(concateneted, 512, (5, 5), dilation_rate=(4, 4), activation='relu', padding='same') #New version for CPU # Final Convolutional Layer print("Final Convolutional Layer") #outs = Convolution2D(1, 1, 1, border_mode='same', activation='relu')(learned_priors2) #outs = Conv2D(1, (1, 1), padding='same', data_format="channels_first", activation='relu')(learned_priors2) #New version for GPU output shape=(1, 1, 30, 40) outs = Conv2D_NCHW(learned_priors2, 1, (1, 1), padding='same', activation='relu') #New version for COU output shape=(1, 1, 30, 40) #VALIDAR ESTA FUNCION \\\\\\\\\\\\\\\ outs_up = Lambda(upsampling, upsampling_shape)(outs) # Input shape=(1, 1, 30, 40) #print(outs_up.shape) #(1, 1, 480, 640) print("Finalizado sam_resnet") return [outs_up, outs_up, outs_up] #When passing a list as loss, it should have one entry per model outputs