def body(self, features): hp = self.hparams # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=eval(hp.image_model_fn), trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] image_feat = common_layers.flatten4d3d(image_feat) # image feature self attention # image_feat = tf.nn.dropout( # image_feat, keep_prob=1.-hp.layer_prepostprocess_dropout) # image_feat = image_feat - tf.reduce_mean( # image_feat, axis=-1, keepdims=True) # image_feat = tf.nn.l2_normalize(image_feat, -1) # utils.collect_named_outputs("norms", "image_feat_after_l2", # tf.norm(image_feat, axis=-1)) image_feat = tf.nn.dropout(image_feat, keep_prob=1.-hp.dropout) image_feat = image_encoder(image_feat, hp) utils.collect_named_outputs("norms", "image_feat_encoded", tf.norm(image_feat, axis=-1)) image_feat = common_layers.l2_norm(image_feat) utils.collect_named_outputs("norms", "image_feat_encoded_l2", tf.norm(image_feat, axis=-1)) query = question_encoder(features["question"], hp) utils.collect_named_outputs("norms", "query", tf.norm(query, axis=-1)) image_ave = attn(image_feat, query, hp) utils.collect_named_outputs("norms", "image_ave", tf.norm(image_ave, axis=-1)) image_question = tf.concat([image_ave, query], axis=1) utils.collect_named_outputs("norms", "image_question", tf.norm(image_question, axis=-1)) image_question = tf.nn.dropout(image_question, 1. - hp.dropout) output = mlp(image_question, hp) utils.collect_named_outputs("norms", "output", tf.norm(output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(tf.expand_dims(output, axis=1), axis=2)
def body(self, features): hp = self.hparams model_fn = resnet_v1_152 if hp.image_model_fn != "resnet_v1_152": model_fn = eval(hp.image_model_fn) # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=model_fn, trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] if hp.image_feat_size: image_feat = common_layers.dense(image_feat, hp.image_feat_size) # apply layer normalization and dropout on image_feature utils.collect_named_outputs("norms", "image_feat_before_l2", tf.norm(image_feat, axis=-1)) image_feat = common_layers.l2_norm(image_feat) utils.collect_named_outputs("norms", "image_feat_after_l2", tf.norm(image_feat, axis=-1)) image_feat = tf.nn.dropout(image_feat, keep_prob=1.-hp.dropout) query = question_encoder(features["question"], hp) utils.collect_named_outputs("norms", "query", tf.norm(query, axis=-1)) image_ave = attn(image_feat, query, hp) utils.collect_named_outputs("norms", "image_ave", tf.norm(image_ave, axis=-1)) image_question = tf.concat([image_ave, query], axis=1) utils.collect_named_outputs("norms", "image_question", tf.norm(image_question, axis=-1)) image_question = tf.nn.dropout(image_question, 1. - hp.dropout) output = mlp(image_question, hp) utils.collect_named_outputs("norms", "output", tf.norm(output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(tf.expand_dims(output, axis=1), axis=2)
def body(self, features): hp = self.hparams # pylint: disable=eval-used if hp.image_input_type == "image": image_feat = vqa_layers.image_embedding( features["inputs"], model_fn=eval(hp.image_model_fn), trainable=hp.train_resnet, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) else: image_feat = features["inputs"] if hp.image_feat_size: image_feat = common_layers.dense(image_feat, hp.image_feat_size) # apply layer normalization and dropout on image_feature utils.collect_named_outputs("norms", "image_feat_before_l2", tf.norm(image_feat, axis=-1)) image_feat = common_layers.l2_norm(image_feat) utils.collect_named_outputs("norms", "image_feat_after_l2", tf.norm(image_feat, axis=-1)) image_feat = tf.nn.dropout(image_feat, keep_prob=1.-hp.dropout) query = question_encoder(features["question"], hp) utils.collect_named_outputs("norms", "query", tf.norm(query, axis=-1)) image_ave = attn(image_feat, query, hp) utils.collect_named_outputs("norms", "image_ave", tf.norm(image_ave, axis=-1)) image_question = tf.concat([image_ave, query], axis=1) utils.collect_named_outputs("norms", "image_question", tf.norm(image_question, axis=-1)) image_question = tf.nn.dropout(image_question, 1. - hp.dropout) output = mlp(image_question, hp) utils.collect_named_outputs("norms", "output", tf.norm(output, axis=-1)) norm_tensors = utils.convert_collection_to_dict("norms") vqa_layers.summarize_tensors(norm_tensors, tag="norms/") # Expand dimension 1 and 2 return tf.expand_dims(tf.expand_dims(output, axis=1), axis=2)