Ejemplo n.º 1
0
    def _build_obj_encoding(self):
        # object appearance feature: Faster R-CNN
        self.obj_faster_rcnn_fc7 = ImageEncoder(
            encoder_type='finetune_faster_rcnn_fpn_fc7',
            in_dim=2048,
            weights_file='detectron/fc6/fc7_w.pkl',
            bias_file='detectron/fc6/fc7_b.pkl',
            model_data_dir=self.config["model_data_dir"]
        )
        # apply smaller lr to pretrained Faster R-CNN fc7
        self.finetune_modules.append({
            'module': self.obj_faster_rcnn_fc7,
            'lr_scale': self.config.lr_scale_frcn,
        })
        self.linear_obj_feat_to_mmt_in = nn.Linear(
            self.config.obj.mmt_in_dim, self.mmt_config.hidden_size
        )

        # object location feature: relative bounding box coordinates (4-dim)
        self.linear_obj_bbox_to_mmt_in = nn.Linear(
            4, self.mmt_config.hidden_size
        )

        self.obj_feat_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
        self.obj_bbox_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
        self.obj_drop = nn.Dropout(self.config.obj.dropout_prob)
Ejemplo n.º 2
0
    def _build_ocr_encoding(self):
        self.remove_ocr_fasttext = getattr(self.config.ocr,
                                           'remove_ocr_fasttext', False)
        self.remove_ocr_phoc = getattr(self.config.ocr, 'remove_ocr_phoc',
                                       False)
        self.remove_ocr_frcn = getattr(self.config.ocr, 'remove_ocr_frcn',
                                       False)
        self.remove_ocr_semantics = getattr(self.config.ocr,
                                            'remove_ocr_semantics', False)
        self.remove_ocr_bbox = getattr(self.config.ocr, 'remove_ocr_bbox',
                                       False)

        # OCR appearance feature: Faster R-CNN
        self.ocr_faster_rcnn_fc7 = ImageEncoder(
            encoder_type='finetune_faster_rcnn_fpn_fc7',
            in_dim=2048,
            weights_file='detectron/fc6/fc7_w.pkl',
            bias_file='detectron/fc6/fc7_b.pkl',
            model_data_dir=self.config["model_data_dir"])
        self.finetune_modules.append({
            'module': self.ocr_faster_rcnn_fc7,
            'lr_scale': self.config.lr_scale_frcn,
        })

        self.linear_ocr_feat_to_mmt_in = nn.Linear(self.config.ocr.mmt_in_dim,
                                                   self.mmt_config.hidden_size)

        # OCR location feature: relative bounding box coordinates (4-dim)
        self.linear_ocr_bbox_to_mmt_in = nn.Linear(4,
                                                   self.mmt_config.hidden_size)

        self.ocr_feat_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
        self.ocr_bbox_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
        self.ocr_drop = nn.Dropout(self.config.ocr.dropout_prob)
Ejemplo n.º 3
0
    def __init__(self, config):
        super().__init__()

        MAX_DEC_LENGTH = 100
        hidden_size = config.hidden_size
        ln_eps = config.layer_norm_eps

        self.position_embeddings = nn.Embedding(MAX_DEC_LENGTH, hidden_size)

        self.ans_layer_norm = BertLayerNorm(hidden_size, eps=ln_eps)
        self.emb_layer_norm = BertLayerNorm(hidden_size, eps=ln_eps)
        self.emb_dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 4
0
    def __init__(self, hidden_size, vocab_size, hidden_act="gelu", task_name="lm", **kwargs):
        super(BertLMHead, self).__init__()

        self.hidden_size = hidden_size
        self.hidden_act = hidden_act
        self.vocab_size = vocab_size
        self.loss_fct = CrossEntropyLoss(reduction="none", ignore_index=-1)
        self.num_labels = vocab_size  # vocab size
        # TODO Check if weight init needed!
        # self.apply(self.init_bert_weights)
        self.ph_output_type = "per_token"

        self.model_type = "language_modelling"
        self.task_name = task_name
        self.generate_config()

        # NN Layers
        # this is the "transform" module in the pytorch-transformers repo
        self.dense = nn.Linear(self.hidden_size, self.hidden_size)
        self.transform_act_fn = ACT2FN[self.hidden_act]
        self.LayerNorm = BertLayerNorm(self.hidden_size, eps=1e-12)

        # this is the "decoder" in the pytorch-transformers repo
        # The output weights are the same as the input embeddings, but there is
        # an output-only bias for each token.
        self.decoder = nn.Linear(hidden_size,
                                 vocab_size,
                                 bias=False)
        self.bias = nn.Parameter(torch.zeros(vocab_size))
Ejemplo n.º 5
0
    def __init__(self, config):
        super(RobertaLMHead, self).__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.layer_norm = BertLayerNorm(config.hidden_size, eps=config.layer_norm_eps)

        self.decoder = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
        self.bias = nn.Parameter(torch.zeros(config.vocab_size))
Ejemplo n.º 6
0
 def __init__(self, config):
     super(BertImageEmbeddings, self).__init__()
     self.image_embeddings = nn.Linear(2048, config.hidden_size)
     self.image_location_embeddings = nn.Linear(5, config.hidden_size)
     self.image_type_embeddings = nn.Embedding(1, config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 7
0
 def __init__(self, config):
     super(BertOutput, self).__init__()
     self.dense = quantized_linear_setup(config, "ffn_output",
                                         config.intermediate_size,
                                         config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size,
                                    eps=config.layer_norm_eps)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 8
0
 def __init__(self, config):
     super(BertSelfOutput, self).__init__()
     self.dense = quantized_linear_setup(config, 'attention_output',
                                         config.hidden_size,
                                         config.hidden_size)
     self.LayerNorm = BertLayerNorm(config.hidden_size,
                                    eps=config.layer_norm_eps)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 9
0
    def __init__(self, config):
        super().__init__()

        MAX_DEC_LENGTH = 100
        MAX_TYPE_NUM = 5
        hidden_size = config.hidden_size

        ln_eps = config.layer_norm_eps

        self.position_embeddings = nn.Embedding(MAX_DEC_LENGTH, hidden_size)
        self.token_type_embeddings = nn.Embedding(MAX_TYPE_NUM, hidden_size)

        self.ans_layer_norm = BertLayerNorm(hidden_size, eps=ln_eps)
        self.ocr_layer_norm = BertLayerNorm(hidden_size, eps=ln_eps)
        self.emb_layer_norm = BertLayerNorm(hidden_size, eps=ln_eps)
        # default value of 0.1 is used
        self.emb_dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 10
0
 def __init__(self, in_dim, hid_dim, out_dim, dropout):
     super().__init__()
     self.logit_fc = nn.Sequential(
         nn.Linear(in_dim, hid_dim),
         GeLU(),
         BertLayerNorm(hid_dim, eps=1e-12),
         nn.Linear(hid_dim, out_dim),
     )
Ejemplo n.º 11
0
    def _build_ocr_encoding(self):

        # (YK): Todo
        assert self.frcn_encoder_type == "default"
        # OCR appearance feature: Faster R-CNN
        self.ocr_faster_rcnn_fc7 = ImageEncoder(
            encoder_type=self.frcn_encoder_type,
            in_dim=2048,
            weights_file="detectron/fc6/fc7_w.pkl",
            bias_file="detectron/fc6/fc7_b.pkl",
            model_data_dir=None,
        )
        self.linear_ocr_feat_to_mmt_in = nn.Linear(
            self.mmt_config.ocr_feature_size, self.mmt_config.hidden_size)

        # OCR location feature: relative bounding box coordinates (4-dim)
        self.linear_ocr_bbox_to_mmt_in = nn.Linear(4,
                                                   self.mmt_config.hidden_size)

        self.ocr_feat_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
        self.ocr_bbox_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
        self.ocr_drop = nn.Dropout(self.mmt_config.ocr_drop)
Ejemplo n.º 12
0
    def _build_obj_encoding(self):
        # object appearance feature: Faster R-CNN
        # (YK) Todo: support for last-layer finetuning
        assert self.frcn_encoder_type == "default"
        self.obj_faster_rcnn_fc7 = ImageEncoder(
            encoder_type=self.frcn_encoder_type,
            in_dim=2048,
        )
        # apply smaller lr to pretrained Faster R-CNN fc7
        # self.finetune_modules.append({
        #     'module': self.obj_faster_rcnn_fc7,
        #     'lr_scale': self.config.lr_scale_frcn,
        # })
        self.linear_obj_feat_to_mmt_in = nn.Linear(
            self.mmt_config.obj_feature_size, self.mmt_config.hidden_size)

        # object location feature: relative bounding box coordinates (4-dim)
        self.linear_obj_bbox_to_mmt_in = nn.Linear(4,
                                                   self.mmt_config.hidden_size)

        self.obj_feat_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
        self.obj_bbox_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
        self.obj_drop = nn.Dropout(self.mmt_config.obj_drop)
Ejemplo n.º 13
0
Archivo: cnmt.py Proyecto: wzk1015/CNMT
    def _build_ocr_encoding(self):
        # OCR appearance feature: Faster R-CNN
        self.ocr_faster_rcnn_fc7 = ImageEncoder(
            encoder_type='finetune_faster_rcnn_fpn_fc7',
            in_dim=2048,
            weights_file='detectron/fc6/fc7_w.pkl',
            bias_file='detectron/fc6/fc7_b.pkl',
            model_data_dir=self.config["model_data_dir"])
        self.finetune_modules.append({
            'module': self.ocr_faster_rcnn_fc7,
            'lr_scale': 0.1,
        })

        self.linear_ocr_feat_to_mmt_in = nn.Linear(3002, 768)

        # OCR location feature: relative bounding box coordinates (4-dim)
        self.linear_ocr_bbox_to_mmt_in = nn.Linear(4, 768)

        self.linear_ocr_conf_to_mmt_in = nn.Linear(1, 768)

        self.ocr_feat_layer_norm = BertLayerNorm(768)
        self.ocr_bbox_layer_norm = BertLayerNorm(768)
        self.ocr_conf_layer_norm = BertLayerNorm(768)
        self.ocr_drop = nn.Dropout(0.1)
Ejemplo n.º 14
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = nn.Embedding(config.vocab_size,
                                            config.hidden_size,
                                            padding_idx=0)
        self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                                config.hidden_size)
        self.token_type_embeddings_modified = nn.Embedding(config.type_vocab_size,
                                                           config.hidden_size)

        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
        # any TensorFlow checkpoint file
        self.LayerNorm = BertLayerNorm(config.hidden_size,
                                       eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 15
0
    def __init__(self, config):
        super(BertEmbeddings, self).__init__()
        self.word_embeddings = quantized_embedding_setup(config,
                                                         'word_embeddings',
                                                         config.vocab_size,
                                                         config.hidden_size,
                                                         padding_idx=0)
        self.position_embeddings = quantized_embedding_setup(
            config, 'position_embeddings', config.max_position_embeddings,
            config.hidden_size)
        self.token_type_embeddings = quantized_embedding_setup(
            config, 'token_type_embeddings', config.type_vocab_size,
            config.hidden_size)

        self.LayerNorm = BertLayerNorm(config.hidden_size,
                                       eps=config.layer_norm_eps)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 16
0
 def __init__(self, config, newly_added_config):
     super(MAG, self).__init__()
     self.W_hv = nn.Linear(
         newly_added_config["d_visual_in"] +
         newly_added_config["h_merge_sent"],
         newly_added_config["h_merge_sent"])
     self.W_ha = nn.Linear(
         newly_added_config["d_acoustic_in"] +
         newly_added_config["h_merge_sent"],
         newly_added_config["h_merge_sent"])
     self.W_v = nn.Linear(newly_added_config["d_visual_in"],
                          newly_added_config["h_merge_sent"])
     self.W_a = nn.Linear(newly_added_config["d_acoustic_in"],
                          newly_added_config["h_merge_sent"])
     self.beta = newly_added_config["beta_shift"]
     self.newly_added_config = newly_added_config
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-6)
     self.final_dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 17
0
 def __init__(self, config):
     super(BertEmbeddingsDialog, self).__init__(config)
     self.word_embeddings = nn.Embedding(config.vocab_size,
                                         config.hidden_size)
     self.position_embeddings = nn.Embedding(config.max_position_embeddings,
                                             config.hidden_size)
     self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                               config.hidden_size)
     # add support for additional segment embeddings. Supporting 10 additional embedding as of now
     self.token_type_embeddings_extension = nn.Embedding(
         10, config.hidden_size)
     # adding specialized embeddings for sep tokens
     self.sep_embeddings = nn.Embedding(50, config.hidden_size)
     # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
     # any TensorFlow checkpoint file
     self.LayerNorm = BertLayerNorm(config.hidden_size, eps=1e-12)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.config = config
Ejemplo n.º 18
0
 def __init__(self, config):
     super(RobertaEmbeddings, self).__init__()
     self.padding_idx = config.padding_idx
     self.word_embeddings = nn.Embedding(config.vocab_size,
                                         config.hidden_size,
                                         padding_idx=config.padding_idx)
     self.position_embeddings = nn.Embedding(
         self.padding_idx + config.max_position_embeddings + 1,
         config.hidden_size,
         padding_idx=config.padding_idx)
     if config.type_vocab_size > 0:
         self.token_type_embeddings = nn.Embedding(config.type_vocab_size,
                                                   config.hidden_size)
     else:
         self.token_type_embeddings = None
     self.LayerNorm = BertLayerNorm(config.hidden_size,
                                    eps=config.layer_norm_eps)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 19
0
 def __init__(self, d_model=768, nhead=12,  num_layers=4, image_encoder=None):
     super().__init__()
     decoder_layer = nn.TransformerDecoderLayer(d_model=d_model, nhead=nhead)
     self.transformer = nn.TransformerDecoder(decoder_layer, num_layers=num_layers)
     if image_encoder:
         self.image_encoder1 = image_encoder
     else:
         self.image_encoder1 = nn.Sequential(
             nn.Linear(2048, 2048),
             nn.ELU(inplace=True)
         )
     self.image_encoder2 = nn.Sequential(
         nn.Linear(2048, d_model),
         BertLayerNorm(d_model)
     )
     self.update_gate = nn.Sequential(
         nn.Linear(2 * d_model, d_model),
         nn.ELU(inplace=True),
     )
Ejemplo n.º 20
0
 def build(self):
     self.mmt_config = BertConfig(**self.config.mmt)
     self.mmt = MMT(self.mmt_config)
     self.so_to_mmt_in = nn.Linear(3 * 1536, self.mmt_config.hidden_size)
     self.st_to_mmt_in = nn.Linear(3 * 1536, self.mmt_config.hidden_size)
     self.so_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
     self.st_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
     self.so_drop = nn.Dropout(0.1)
     self.st_drop = nn.Dropout(0.1)
     self.linear_go_to_mmt_in = nn.Linear(2048, self.mmt_config.hidden_size)
     self.linear_gt_to_mmt_in = nn.Linear(300, self.mmt_config.hidden_size)
     self.go_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
     self.gt_layer_norm = BertLayerNorm(self.mmt_config.hidden_size)
     self.go_drop = nn.Dropout(0.1)
     self.gt_drop = nn.Dropout(0.1)
     self.linear_updated_ocr_to_mmt_in = nn.Linear(
         300, self.mmt_config.hidden_size)
     self.updated_ocr_layer_norm = BertLayerNorm(
         self.mmt_config.hidden_size)
     self.updated_ocr_drop = nn.Dropout(self.config.ocr.dropout_prob)
     self.linear_joint = nn.Linear(1536, 768)
     self.answer_processor = registry.get(self._datasets[0] +
                                          "_answer_processor")
     self.ocr_ptr_net = OcrPtrNet(**self.config.classifier.ocr_ptr_net)
     # modules requiring custom learning rates (usually for finetuning)
     self.finetune_modules = []
     self._build_txt_encoding()
     self._build_obj_encoding()
     self._build_ocr_encoding()
     self._init_text_embeddings("text")
     # init feature embedding for "image"
     setattr(self, "image_feature_dim", self.config["image_feature_dim"])
     self.feature_embeddings_out_dim = 0
     feature_attn_model_params = self.config["image_feature_embeddings"][0]
     feature_embedding = ImageEmbedding(getattr(self, "image_feature_dim"),
                                        self.text_embeddings_out_dim,
                                        **feature_attn_model_params)
     self.feature_embeddings_out_dim += feature_embedding.out_dim
     self.feature_embeddings_out_dim *= getattr(self, "image_feature_dim")
     setattr(self, "image_feature_embeddings_out_dim",
             self.feature_embeddings_out_dim)
     del self.feature_embeddings_out_dim
     setattr(self, "image_feature_embedding", feature_embedding)
     # init feature embedding for "context"
     setattr(self, "context_feature_dim",
             self.config["context_feature_dim"])
     self.feature_embeddings_out_dim = 0
     feature_attn_model_params = self.config["context_feature_embeddings"][
         0]
     feature_embedding = ImageEmbedding(
         getattr(self, "context_feature_dim"), self.text_embeddings_out_dim,
         **feature_attn_model_params)
     self.feature_embeddings_out_dim += feature_embedding.out_dim
     self.feature_embeddings_out_dim *= getattr(self, "context_feature_dim")
     setattr(self, "context_feature_embeddings_out_dim",
             self.feature_embeddings_out_dim)
     del self.feature_embeddings_out_dim
     setattr(self, "context_feature_embedding", feature_embedding)
     self._init_combine_layer("image", "text")
     num_choices = registry.get(self._datasets[0] + "_num_final_outputs")
     self.classifier = ClassifierLayer(
         self.config["classifier"]["type"],
         in_dim=768,
         out_dim=num_choices - 50,
         **self.config["classifier"]["params"])