Example #1
0
    def load(cls, pretrained_model_name_or_path, language=None, **kwargs):
        """
        Load a pretrained model by supplying

        * the name of a remote model on s3 ("bert-base-cased" ...)
        * OR a local path of a model trained via transformers ("some_dir/huggingface_model")
        * OR a local path of a model trained via FARM ("some_dir/farm_model")

        :param pretrained_model_name_or_path: The path of the saved pretrained model or its name.
        :type pretrained_model_name_or_path: str

        """

        bert = cls()
        if "farm_lm_name" in kwargs:
            bert.name = kwargs["farm_lm_name"]
        else:
            bert.name = pretrained_model_name_or_path
        # We need to differentiate between loading model using FARM format and Pytorch-Transformers format
        farm_lm_config = Path(pretrained_model_name_or_path) / "language_model_config.json"
        if os.path.exists(farm_lm_config):
            # FARM style
            bert_config = BertConfig.from_pretrained(farm_lm_config)
            farm_lm_model = Path(pretrained_model_name_or_path) / "language_model.bin"
            bert.model = BertModel.from_pretrained(farm_lm_model, config=bert_config, **kwargs)
            bert.language = bert.model.config.language
        else:
            # Pytorch-transformer Style
            bert.model = BertModel.from_pretrained(str(pretrained_model_name_or_path), **kwargs)
            bert.language = cls._get_or_infer_language_from_name(language, pretrained_model_name_or_path)
        return bert
Example #2
0
    def __init__(self, config):
        super().__init__(config)

        # parallel, adapter-BERT
        self.parabert = BertModel(config.parabert_config)

        # freezing the pre-trained BERT
        self.freeze_original_params()
 def from_torch(model: TorchBertModel,
                device: Optional[torch.device] = None):
     if device is not None and 'cuda' in device.type and torch.cuda.is_available(
     ):
         model.to(device)
     embeddings = BertEmbeddings.from_torch(model.embeddings)
     encoder = BertEncoder.from_torch(model.encoder)
     return BertModelNoPooler(embeddings, encoder)
Example #4
0
 def __init__(self, config: DPRConfig):
     super().__init__(config)
     self.bert_model = BertModel(config)
     assert self.bert_model.config.hidden_size > 0, "Encoder hidden_size can't be zero"
     self.projection_dim = config.projection_dim
     if self.projection_dim > 0:
         self.encode_proj = nn.Linear(self.bert_model.config.hidden_size,
                                      config.projection_dim)
     self.init_weights()
Example #5
0
class DPREncoder(PreTrainedModel):

    base_model_prefix = "bert_model"

    def __init__(self, config: DPRConfig):
        super().__init__(config)
        self.bert_model = BertModel(config)
        assert self.bert_model.config.hidden_size > 0, "Encoder hidden_size can't be zero"
        self.projection_dim = config.projection_dim
        if self.projection_dim > 0:
            self.encode_proj = nn.Linear(self.bert_model.config.hidden_size,
                                         config.projection_dim)
        self.init_weights()

    def forward(
        self,
        input_ids: Tensor,
        attention_mask: Optional[Tensor] = None,
        token_type_ids: Optional[Tensor] = None,
        inputs_embeds: Optional[Tensor] = None,
        output_attentions: bool = False,
        output_hidden_states: bool = False,
        return_tuple: bool = True,
    ) -> Union[BaseModelOutputWithPooling, Tuple[Tensor, ...]]:
        outputs = self.bert_model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
        )
        sequence_output, pooled_output = outputs[:2]
        pooled_output = sequence_output[:, 0, :]
        if self.projection_dim > 0:
            pooled_output = self.encode_proj(pooled_output)

        if return_tuple:
            return (sequence_output, pooled_output) + outputs[2:]

        return BaseModelOutputWithPooling(
            last_hidden_state=sequence_output,
            pooler_output=pooled_output,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

    @property
    def embeddings_size(self) -> int:
        if self.projection_dim > 0:
            return self.encode_proj.out_features
        return self.bert_model.config.hidden_size

    def init_weights(self):
        self.bert_model.init_weights()
        if self.projection_dim > 0:
            self.encode_proj.apply(self.bert_model._init_weights)
Example #6
0
    def __init__(self, config):
        super(BertForSequenceClassificationNq, self).__init__(config)
        self.num_labels = config.num_labels
        # config.output_hidden_states = True
        bert_later_dropout = 0.3
        self.dropout = nn.Dropout(bert_later_dropout)
        self.later_model_type = config.later_model_type

        if self.later_model_type == 'linear':
            self.bert = BertModel(config)
            self.projection = nn.Linear(config.hidden_size * 3,
                                        config.hidden_size)
            self.projection_dropout = nn.Dropout(0.1)
            self.projection_activation = nn.Tanh()
            self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        elif self.later_model_type == '1bert_layer':
            config.num_hidden_layers = 1
            self.bert = BertModel(config)
            self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        elif self.later_model_type == 'bilinear':
            self.bert = BertModel(config)
            lstm_layers = 2
            self.qemb_match = SeqAttnMatch(config.hidden_size)
            doc_input_size = 2 * config.hidden_size
            # RNN document encoder
            self.doc_rnn = StackedBRNN(
                input_size=doc_input_size,
                hidden_size=config.hidden_size,
                num_layers=lstm_layers,
                dropout_rate=bert_later_dropout,
                dropout_output=bert_later_dropout,
                concat_layers=True,
                rnn_type=nn.LSTM,
                padding=False,
            )

            self.bilinear_dropout = nn.Dropout(bert_later_dropout)
            self.bilinear_size = 128
            self.doc_proj = nn.Linear(lstm_layers * 2 * config.hidden_size,
                                      self.bilinear_size)
            self.qs_proj = nn.Linear(config.hidden_size, self.bilinear_size)
            self.bilinear = nn.Bilinear(self.bilinear_size, self.bilinear_size,
                                        self.bilinear_size)
            self.classifier = nn.Linear(self.bilinear_size, config.num_labels)
        elif self.later_model_type == 'transformer':
            self.copy_from_bert_layer_num = 11
            self.bert = BertModel(config)
            self.bert_position_emb = nn.Embedding(
                config.max_position_embeddings, config.hidden_size)
            self.bert_type_id_emb = nn.Embedding(config.type_vocab_size,
                                                 config.hidden_size)

            self.bert_layer = BertLayer(config)
            self.bert_pooler_qd = BertPoolerQD(config)
            self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.init_weights()
 def __init__(self):
     config = BertConfig.from_json_file(join(BERT_PATH, 'bert_config.json'))
     self.tokenizer = BertTokenizer(vocab_file=join(BERT_PATH, 'vocab.txt'))
     self.model = BertModel(config, add_pooling_layer=False)
     load_tf_weights_in_bert(self.model,
                             tf_checkpoint_path=join(
                                 BERT_PATH, 'bert_model.ckpt'),
                             strip_bert=True)
     self.model.to(PT_DEVICE)
     self.model.eval()
Example #8
0
    def _bert_encode_article(self,
                             max_seq_length=128,
                             sequence_a_segment_id=0,
                             sequence_b_segment_id=1,
                             cls_token_segment_id=1,
                             pad_token_segment_id=0,
                             mask_padding_with_zero=True):
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                                  do_lower_case=True)
        bert_config = BertConfig.from_pretrained('bert-base-uncased')
        model = BertModel(bert_config)

        all_input_ids, all_input_mask, all_segment_ids = [], [], []
        for header, article in zip(self.df_url['header'],
                                   self.df_url['article']):
            text = header + '. ' + article
            tokens = tokenizer.tokenize(text)
            special_tokens_count = 2
            if len(tokens) > max_seq_length - special_tokens_count:
                tokens = tokens[:(max_seq_length - special_tokens_count)]
            segment_ids = [sequence_a_segment_id] * len(tokens)
            tokens = [tokenizer.cls_token] + tokens + [tokenizer.sep_token]
            segment_ids = [cls_token_segment_id
                           ] + segment_ids + [sequence_a_segment_id]
            input_ids = tokenizer.convert_tokens_to_ids(tokens)
            input_mask = [1 if mask_padding_with_zero else 0] * len(input_ids)

            # Padding
            padding_length = max_seq_length - len(input_ids)
            pad_token = tokenizer.convert_tokens_to_ids([tokenizer.pad_token
                                                         ])[0]
            input_ids = input_ids + ([pad_token] * padding_length)
            input_mask = input_mask + [0] * padding_length
            segment_ids = segment_ids + ([pad_token_segment_id] *
                                         padding_length)

            assert len(input_ids) == max_seq_length
            assert len(input_mask) == max_seq_length
            assert len(segment_ids) == max_seq_length
            all_input_ids.append(input_ids)
            all_input_mask.append(input_mask)
            all_segment_ids.append(segment_ids)

        all_input_ids = torch.tensor(all_input_ids)
        all_input_mask = torch.tensor(all_input_mask)
        all_segment_ids = torch.tensor(all_segment_ids)

        model.eval()
        outputs = model(all_input_ids,
                        attention_mask=all_input_mask,
                        token_type_ids=all_segment_ids)
        embedding = outputs[1].data.numpy()
        del model
        return embedding
 def from_torch(
         model: TorchBertModel,  # from_torch函数实现
         device: Optional[torch.device] = None):
     if device is not None and 'cuda' in device.type and torch.cuda.is_available(
     ):
         model.to(device)
     bertmodel = turbo_transformers.BertModel.from_torch(model.bert)
     # We can copy the following code and do not change it
     # Notice: classifier is the class member of BertForSequenceClassification. If user define the other class member,
     # they need modify it here.
     return BertForSequenceClassification(bertmodel, model.classifier)
    def __init__(self, bert_model_config: BertConfig):
        super(DocumentBertLinear, self).__init__(bert_model_config)
        self.bert = BertModel(bert_model_config)
        self.bert_batch_size = self.bert.config.bert_batch_size
        self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob)

        #self.transformer_encoder = TransformerEncoder(encoder_layer, num_layers=6, norm=nn.LayerNorm(bert_model_config.hidden_size))
        self.classifier = nn.Sequential(
            nn.Dropout(p=bert_model_config.hidden_dropout_prob),
            nn.Linear(bert_model_config.hidden_size * self.bert_batch_size,
                      bert_model_config.num_labels), nn.Tanh())
Example #11
0
class BertMultiTask:
    def __init__(self, job_config, use_pretrain, tokenizer, cache_dir, device, write_log, summary_writer):
        self.job_config = job_config

        if not use_pretrain:
            model_config = self.job_config.get_model_config()
            bert_config = BertConfig(**model_config)
            bert_config.vocab_size = len(tokenizer.vocab)

            self.bert_encoder = BertModel(bert_config)
        # Use pretrained bert weights
        else:
            self.bert_encoder = BertModel.from_pretrained(self.job_config.get_model_file_type())
            bert_config = self.bert_encoder.config
        self.bert_encoder.to(device)

        self.network=MTLRouting(self.bert_encoder, write_log = write_log, summary_writer = summary_writer)

        #config_data=self.config['data']
        loss_calculation = BertPretrainingLoss(self.bert_encoder, bert_config)
        loss_calculation.to(device)
        # Pretrain Dataset
        self.network.register_batch(BatchType.PRETRAIN_BATCH, "pretrain_dataset", loss_calculation=loss_calculation)

        self.device=device
        # self.network = self.network.float()
        # print(f"Bert ID: {id(self.bert_encoder)}  from GPU: {dist.get_rank()}")

    def save(self, filename: str):
        network=self.network.module
        return torch.save(network.state_dict(), filename)

    def load(self, model_state_dict: str):
        return self.network.module.load_state_dict(torch.load(model_state_dict, map_location=lambda storage, loc: storage))

    def move_batch(self, batch, non_blocking=False):
        return batch.to(self.device, non_blocking)

    def eval(self):
        self.network.eval()

    def train(self):
        self.network.train()

    def save_bert(self, filename: str):
        return torch.save(self.bert_encoder.state_dict(), filename)

    def to(self, device):
        assert isinstance(device, torch.device)
        self.network.to(device)

    def half(self):
        self.network.half()
 def __init__(self, bert_model_config: BertConfig):
     super(DocumentBertLSTM, self).__init__(bert_model_config)
     self.bert = BertModel(bert_model_config)
     self.bert_batch_size = self.bert.config.bert_batch_size
     self.dropout = nn.Dropout(p=bert_model_config.hidden_dropout_prob)
     self.lstm = LSTM(
         bert_model_config.hidden_size,
         bert_model_config.hidden_size,
     )
     self.classifier = nn.Sequential(
         nn.Dropout(p=bert_model_config.hidden_dropout_prob),
         nn.Linear(bert_model_config.hidden_size,
                   bert_model_config.num_labels), nn.Tanh())
Example #13
0
class TestBertModel(unittest.TestCase):
    def init_data(self, use_cuda) -> None:
        torch.set_grad_enabled(False)
        torch.set_num_threads(4)
        turbo_transformers.set_num_threads(4)
        self.test_device = torch.device('cuda:0') if use_cuda else \
            torch.device('cpu:0')

        self.cfg = BertConfig()
        self.torch_model = BertModel(self.cfg)
        self.torch_model.eval()

        if torch.cuda.is_available():
            self.torch_model.to(self.test_device)

        self.turbo_model = turbo_transformers.BertModel.from_torch(
            self.torch_model, self.test_device)

    def check_torch_and_turbo(self, use_cuda):
        self.init_data(use_cuda)
        num_iter = 1
        device_name = "GPU" if use_cuda else "CPU"
        input_ids = torch.randint(low=0,
                                  high=self.cfg.vocab_size - 1,
                                  size=(1, 10),
                                  dtype=torch.long,
                                  device=self.test_device)

        torch_model = lambda: self.torch_model(input_ids)
        torch_result, torch_qps, torch_time = \
            test_helper.run_model(torch_model, use_cuda, num_iter)
        print(f'BertModel PyTorch({device_name}) QPS {torch_qps}')

        turbo_model = (lambda: self.turbo_model(input_ids))

        with turbo_transformers.pref_guard("bert_perf") as perf:
            turbo_result, turbo_qps, turbo_time = \
                test_helper.run_model(turbo_model, use_cuda, num_iter)
        print(f'BertModel TurboTransformer({device_name}) QPS {turbo_qps}')

        self.assertTrue(
            numpy.allclose(torch_result[0][:, 0].cpu(),
                           turbo_result[0].cpu(),
                           atol=1e-3,
                           rtol=1e-3))

    def test_bert_model(self):
        if torch.cuda.is_available() and \
            turbo_transformers.config.is_compiled_with_cuda():
            self.check_torch_and_turbo(use_cuda=True)
        self.check_torch_and_turbo(use_cuda=False)
Example #14
0
    def __init__(self, config):

        super(BertForMultiLable, self).__init__(config)
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.init_weights()
Example #15
0
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, BertModel],
        embedding_dropout: float = 0.0,
        initializer: InitializerApplicator = InitializerApplicator(),
        label_smoothing: float = None,
        ignore_span_metric: bool = False,
        srl_eval_path: str = DEFAULT_SRL_EVAL_PATH,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        if isinstance(bert_model, str):
            self.bert_model = BertModel.from_pretrained(bert_model)
        else:
            self.bert_model = bert_model

        self.num_classes = self.vocab.get_vocab_size("labels")
        if srl_eval_path is not None:
            # For the span based evaluation, we don't want to consider labels
            # for verb, because the verb index is provided to the model.
            self.span_metric = SrlEvalScorer(srl_eval_path,
                                             ignore_classes=["V"])
        else:
            self.span_metric = None
        self.tag_projection_layer = Linear(self.bert_model.config.hidden_size,
                                           self.num_classes)

        self.embedding_dropout = Dropout(p=embedding_dropout)
        self._label_smoothing = label_smoothing
        self.ignore_span_metric = ignore_span_metric
        initializer(self)
Example #16
0
    def __init__(self, config):
        super(LFESM, self).__init__(config)
        self.bert = BertModel(config)
        # self.dropout = nn.Dropout(config.hidden_dropout_prob)
        # self.seq_relationship = nn.Linear(config.hidden_size, 2)
        self.init_weights()

        # dropout = 0.5
        # self._rnn_dropout = RNNDropout(p=dropout)

        feature_size = 28
        self._feature = nn.Linear(feature_size, config.hidden_size)

        self._attention = SoftmaxAttention()
        self._projection = nn.Sequential(nn.Linear(4 * config.hidden_size, config.hidden_size),
                                         nn.ReLU())
        self._composition = Seq2SeqEncoder(nn.LSTM,
                                           config.hidden_size,
                                           config.hidden_size,
                                           bidirectional=True)
        self._classification = nn.Sequential(nn.Dropout(p=config.hidden_dropout_prob),  # p=dropout
                                             nn.Linear(4 * 2 * config.hidden_size, config.hidden_size),
                                             nn.Tanh(),
                                             nn.Dropout(p=config.hidden_dropout_prob),  # p=dropout
                                             nn.Linear(config.hidden_size, 2))
        self.apply(self.init_esim_weights)
    def init_data(self, use_cuda) -> None:
        torch.set_grad_enabled(False)
        torch.set_num_threads(4)
        turbo_transformers.set_num_threads(4)
        self.test_device = torch.device('cuda:0') if use_cuda else \
            torch.device('cpu:0')

        self.cfg = BertConfig()
        self.torch_model = BertModel(self.cfg)
        self.torch_model.eval()

        if torch.cuda.is_available():
            self.torch_model.to(self.test_device)

        self.turbo_model = turbo_transformers.BertModel.from_torch(
            self.torch_model, self.test_device, "turbo")
Example #18
0
    def __init__(self, config):
        """Initialize the model with config dict.

        Args:
            config: python dict must contains the attributes below:
                config.bert_model_path: pretrained model path or model type
                    e.g. 'bert-base-chinese'
                config.hidden_size: The same as BERT model, usually 768
                config.num_classes: int, e.g. 2
                config.dropout: float between 0 and 1
        """
        super().__init__()
        self.bert = BertModel.from_pretrained(config.bert_model_path)
        for param in self.bert.parameters():
            param.requires_grad = True

        hidden_size = config.fc_hidden
        target_class = config.num_classes
        # self.resnet = resnet18(num_classes=hidden_size)
        #self.resnet = ResNet(block=BasicBlock, layers=[1, 1, 1, 1], num_classes=hidden_size)
        # self.resnet = ResNet(config.in_channels, 18)
        self.fpn = FPN([256]* 4, 4)

        self.fpn_seq = FPN([128,128,128,70], 4)
        #cnn feature map has a total number of 228 dimensions.
        self.dropout = nn.Dropout(config.dropout)
        self.fc1 = nn.Linear(hidden_size, target_class)
        self.num_classes = config.num_classes
Example #19
0
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.bert = BertModel(config)

        self.crf = CRF(config.num_labels, batch_first=True)

        self.classifier_bienc = nn.Linear(2 * config.hidden_size,
                                          config.num_labels)

        N = 4  #layer number
        h = 4  #heads
        dropout_value = 0.1
        d_model = config.hidden_size
        d_ff = 2048
        c = copy.deepcopy
        attn = MultiHeadedAttention(h, d_model, dropout=dropout_value)
        ff = PositionwiseFeedForward(d_model, d_ff, dropout=dropout_value)
        self.encoder = Encoder(
            EncoderLayer(d_model, c(attn), c(ff), dropout_value), N)
        self.decoder = Decoder(
            DecoderLayer(d_model,
                         c(attn),
                         c(attn),
                         c(ff),
                         dropout=dropout_value), N)

        self.init_weights()
Example #20
0
    def __init__(self, config: BertConfig, **kwargs: Any):
        """The classification init is a super set of LM init"""
        super().__init__(config, **kwargs)
        self.config = config
        self.bert = BertModel(config=self.config)

        self.lm_head = BertOnlyMLMHead(self.config)
        self.lm_head.apply(self._init_weights)

        self.qa_head = BertOnlyMLMHead(self.config)
        self.qa_head.apply(self._init_weights)

        self.dropout = nn.Dropout(self.config.hidden_dropout_prob)
        self.classifier = nn.Linear(self.config.hidden_size,
                                    self.config.num_labels)
        self.classifier.apply(self._init_weights)
Example #21
0
    def __init__(self, config):
        super(ImageBertForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels
        self.config = config
        if config.img_feature_dim > 0:
            self.bert = BertImgModel(config)
        else:
            self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        if hasattr(config, 'classifier'):
            if not hasattr(config, 'cls_hidden_scale'):
                config.cls_hidden_scale = 2

            if config.classifier == 'linear':
                self.classifier = nn.Linear(config.hidden_size,
                                            self.config.num_labels)
            elif config.classifier == 'mlp':
                self.classifier = nn.Sequential(
                    nn.Linear(config.hidden_size,
                              config.hidden_size * config.cls_hidden_scale),
                    nn.ReLU(),
                    nn.Linear(config.hidden_size * config.cls_hidden_scale,
                              self.config.num_labels))
        else:
            self.classifier = nn.Linear(config.hidden_size,
                                        self.config.num_labels)  # original
        self.apply(self._init_weights)
Example #22
0
 def __init__(self, config, args, tokenizer):
     super(DecoderWithLoss, self).__init__()
     # model components
     print("initializing decoder with params {}".format(args))
     self.bert = BertModel(config)
     self.lm_head = BertOnlyMLMHead(config)
     self.span_b_proj = nn.ModuleList([
         HighwayLayer(config.hidden_size) for _ in range(args.num_highway)
     ])
     self.span_e_proj = nn.ModuleList([
         HighwayLayer(config.hidden_size) for _ in range(args.num_highway)
     ])
     # predict text span beginning and end
     self.text_span_start_head = nn.Linear(config.hidden_size,
                                           config.hidden_size)
     self.text_span_end_head = nn.Linear(config.hidden_size,
                                         config.hidden_size)
     # loss functions
     if args.node_label_smoothing > 0:
         self.lm_ce_loss = LabelSmoothingLoss(
             args.node_label_smoothing,
             config.vocab_size,
             ignore_index=tokenizer.pad_token_id)
     else:
         self.lm_ce_loss = torch.nn.CrossEntropyLoss(
             ignore_index=tokenizer.pad_token_id, reduction="none")
     self.span_ce_loss = torch.nn.CrossEntropyLoss(ignore_index=-1,
                                                   reduction="none")
     self.span_loss_lb = args.lambda_span_loss
     self.text_span_loss = torch.nn.CrossEntropyLoss(ignore_index=-1,
                                                     reduction="none")
     self.tree_to_text = args.tree_to_text
Example #23
0
    def __init__(self, config):
        super(BertForSequentialSentenceSelector, self).__init__(config)

        self.bert = BertModel(config)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        # Initial state
        self.s = Parameter(
            torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1))

        # Scaling factor for weight norm
        self.g = Parameter(torch.FloatTensor(1).fill_(1.0))

        # RNN weight
        self.rw = nn.Linear(2 * config.hidden_size, config.hidden_size)

        # EOE and output bias
        self.eos = Parameter(
            torch.FloatTensor(config.hidden_size).uniform_(-0.1, 0.1))
        self.bias = Parameter(torch.FloatTensor(1).zero_())

        # self.apply(self.init_bert_weights)
        self.init_weights()
        self.cpu = torch.device('cpu')
Example #24
0
 def from_scratch(cls, vocab_size, name="bert", language="en"):
     bert = cls()
     bert.name = name
     bert.language = language
     config = BertConfig(vocab_size=vocab_size)
     bert.model = BertModel(config)
     return bert
Example #25
0
    def __init__(self,
                 config,
                 action_num,
                 recur_type="gated",
                 allow_yes_no=False):
        super(RCMBert, self).__init__(config)
        self.bert = BertModel(config)
        self.recur_type = recur_type
        self.allow_yes_no = allow_yes_no
        if recur_type == "gated":
            self.recur_network = recurGatedNetwork(config.hidden_size,
                                                   config.hidden_size)
        elif recur_type == "lstm":
            self.recur_network = recurLSTMNetwork(config.hidden_size,
                                                  config.hidden_size)
        else:
            print("Invalid recur_type: {}".format(recur_type))
            sys.exit(0)
        self.action_num = action_num
        self.stop_network = stopNetwork(config.hidden_size)
        self.move_stride_network = moveStrideNetwork(config.hidden_size,
                                                     self.action_num)

        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        if self.allow_yes_no:
            self.yes_no_flag_outputs = nn.Linear(config.hidden_size, 2)
            self.yes_no_ans_outputs = nn.Linear(config.hidden_size, 2)
        self.qa_outputs = nn.Linear(config.hidden_size, 2)

        self.init_weights()
Example #26
0
    def __init__(self, config):
        super().__init__(config)
        self.bert = BertModel(config)
        self.cls = BertPreTrainingHeads(config)
        self.qa_outputs = torch.nn.Linear(config.hidden_size, 2)

        self.init_weights()
Example #27
0
 def __init__(self, config, num_classes, vocab) -> None:
     super(PairwiseClassifier, self).__init__(config)
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, num_classes)
     self.vocab = vocab
     self.init_weights()
Example #28
0
    def __init__(self, config):
        super(BertForDST, self).__init__(config)
        self.slot_list = config.dst_slot_list
        self.class_types = config.dst_class_types
        self.class_labels = config.dst_class_labels
        self.token_loss_for_nonpointable = config.dst_token_loss_for_nonpointable
        self.refer_loss_for_nonpointable = config.dst_refer_loss_for_nonpointable
        self.class_aux_feats_inform = config.dst_class_aux_feats_inform
        self.class_aux_feats_ds = config.dst_class_aux_feats_ds
        self.class_loss_ratio = config.dst_class_loss_ratio

        # Only use refer loss if refer class is present in dataset.
        if 'refer' in self.class_types:
            self.refer_index = self.class_types.index('refer')
        else:
            self.refer_index = -1

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.dst_dropout_rate)
        self.dropout_heads = nn.Dropout(config.dst_heads_dropout_rate)

        if self.class_aux_feats_inform:
            self.add_module("inform_projection", nn.Linear(len(self.slot_list), len(self.slot_list)))
        if self.class_aux_feats_ds:
            self.add_module("ds_projection", nn.Linear(len(self.slot_list), len(self.slot_list)))

        aux_dims = len(self.slot_list) * (self.class_aux_feats_inform + self.class_aux_feats_ds) # second term is 0, 1 or 2

        for slot in self.slot_list:
            self.add_module("class_" + slot, nn.Linear(config.hidden_size + aux_dims, self.class_labels))
            self.add_module("token_" + slot, nn.Linear(config.hidden_size, 2))
            self.add_module("refer_" + slot, nn.Linear(config.hidden_size + aux_dims, len(self.slot_list) + 1))

        self.init_weights()
 def from_pretrained(model_id_or_path: str,
                     device: Optional[torch.device] = None):
     torch_model = TorchBertModel.from_pretrained(model_id_or_path)
     model = BertModelNoPooler.from_torch(torch_model, device)
     model.config = torch_model.config
     model._torch_model = torch_model  # prevent destroy torch model.
     return model
Example #30
0
    def __init__(self, config, feature=None, use_lstm=False, device="cpu"):
        super(NerModel, self).__init__(config)
        self.num_labels = config.num_labels
        self.use_feature = False
        self.use_lstm = False
        self.hidden_size = config.hidden_size
        self.bert = BertModel(config)
        self.ferep = None

        if feature is not None:
            self.ferep = FeatureRep(feature, device)
            self.use_feature = True
            self.hidden_size += self.ferep.feature_dim

        if use_lstm:
            self.use_lstm = True
            self.lstm = nn.LSTM(self.hidden_size,
                                config.hidden_size,
                                batch_first=True,
                                num_layers=1)
            self.hidden_size = config.hidden_size

        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.classifier = nn.Linear(self.hidden_size, config.num_labels)

        self.init_weights()