Ejemplo n.º 1
0
    def load(cls, pretrained_model_name_or_path, language=None, **kwargs):
        """
        Load a pretrained model by supplying

        * the name of a remote model on s3 ("bert-base-cased" ...)
        * OR a local path of a model trained via transformers ("some_dir/huggingface_model")
        * OR a local path of a model trained via FARM ("some_dir/farm_model")

        :param pretrained_model_name_or_path: The path of the saved pretrained model or its name.
        :type pretrained_model_name_or_path: str

        """

        bert = cls()
        if "farm_lm_name" in kwargs:
            bert.name = kwargs["farm_lm_name"]
        else:
            bert.name = pretrained_model_name_or_path
        # We need to differentiate between loading model using FARM format and Pytorch-Transformers format
        farm_lm_config = Path(pretrained_model_name_or_path) / "language_model_config.json"
        if os.path.exists(farm_lm_config):
            # FARM style
            bert_config = BertConfig.from_pretrained(farm_lm_config)
            farm_lm_model = Path(pretrained_model_name_or_path) / "language_model.bin"
            bert.model = BertModel.from_pretrained(farm_lm_model, config=bert_config, **kwargs)
            bert.language = bert.model.config.language
        else:
            # Pytorch-transformer Style
            bert.model = BertModel.from_pretrained(str(pretrained_model_name_or_path), **kwargs)
            bert.language = cls._get_or_infer_language_from_name(language, pretrained_model_name_or_path)
        return bert
Ejemplo n.º 2
0
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, BertModel],
        embedding_dropout: float = 0.0,
        initializer: InitializerApplicator = InitializerApplicator(),
        label_smoothing: float = None,
        ignore_span_metric: bool = False,
        srl_eval_path: str = DEFAULT_SRL_EVAL_PATH,
        **kwargs,
    ) -> None:
        super().__init__(vocab, **kwargs)

        if isinstance(bert_model, str):
            self.bert_model = BertModel.from_pretrained(bert_model)
        else:
            self.bert_model = bert_model

        self.num_classes = self.vocab.get_vocab_size("labels")
        if srl_eval_path is not None:
            # For the span based evaluation, we don't want to consider labels
            # for verb, because the verb index is provided to the model.
            self.span_metric = SrlEvalScorer(srl_eval_path,
                                             ignore_classes=["V"])
        else:
            self.span_metric = None
        self.tag_projection_layer = Linear(self.bert_model.config.hidden_size,
                                           self.num_classes)

        self.embedding_dropout = Dropout(p=embedding_dropout)
        self._label_smoothing = label_smoothing
        self.ignore_span_metric = ignore_span_metric
        initializer(self)
Ejemplo n.º 3
0
    def __init__(self, config):
        """Initialize the model with config dict.

        Args:
            config: python dict must contains the attributes below:
                config.bert_model_path: pretrained model path or model type
                    e.g. 'bert-base-chinese'
                config.hidden_size: The same as BERT model, usually 768
                config.num_classes: int, e.g. 2
                config.dropout: float between 0 and 1
        """
        super().__init__()
        self.bert = BertModel.from_pretrained(config.bert_model_path)
        for param in self.bert.parameters():
            param.requires_grad = True

        hidden_size = config.fc_hidden
        target_class = config.num_classes
        # self.resnet = resnet18(num_classes=hidden_size)
        #self.resnet = ResNet(block=BasicBlock, layers=[1, 1, 1, 1], num_classes=hidden_size)
        # self.resnet = ResNet(config.in_channels, 18)
        self.fpn = FPN([256]* 4, 4)

        self.fpn_seq = FPN([128,128,128,70], 4)
        #cnn feature map has a total number of 228 dimensions.
        self.dropout = nn.Dropout(config.dropout)
        self.fc1 = nn.Linear(hidden_size, target_class)
        self.num_classes = config.num_classes
Ejemplo n.º 4
0
 def from_pretrained(model_id_or_path: str,
                     device: Optional[torch.device] = None):
     torch_model = TorchBertModel.from_pretrained(model_id_or_path)
     model = BertModelNoPooler.from_torch(torch_model, device)
     model.config = torch_model.config
     model._torch_model = torch_model  # prevent destroy torch model.
     return model
def bertModel(*args, **kwargs):
    """
    BertModel is the basic BERT Transformer model with a layer of summed token,
    position and sequence embeddings followed by a series of identical
    self-attention blocks (12 for BERT-base, 24 for BERT-large).

    Example:
        # Load the tokenizer
        >>> import torch
        >>> tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'bertTokenizer', 'bert-base-cased', do_basic_tokenize=False)
        #  Prepare tokenized input
        >>> text = "[CLS] Who was Jim Henson ? [SEP] Jim Henson was a puppeteer [SEP]"
        >>> tokenized_text = tokenizer.tokenize(text)
        >>> indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        >>> segments_ids = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
        >>> tokens_tensor = torch.tensor([indexed_tokens])
        >>> segments_tensors = torch.tensor([segments_ids])
        # Load bertModel
        >>> model = torch.hub.load('huggingface/pytorch-transformers', 'bertModel', 'bert-base-cased')
        >>> model.eval()
        # Predict hidden states features for each layer
        >>> with torch.no_grad():
                encoded_layers, _ = model(tokens_tensor, segments_tensors)
    """
    model = BertModel.from_pretrained(*args, **kwargs)
    return model
Ejemplo n.º 6
0
    def __init__(self,
                 input_path: str = None,
                 model: str = None,
                 tokenizer: Any = None,
                 num_classes: int = 2,
                 cuda_device: int = 0,
                 batch_size: int = 4,
                 num_workers: int = 0,
                 lr: float = 2e-5,
                 weight_decay: float = 0.1,
                 warm_up: int = 500):
        super(BertClassificationModel, self).__init__()

        self.num_classes = num_classes
        self.cuda_device = cuda_device
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.lr = lr
        self.weight_decay = weight_decay
        self.warm_up = warm_up

        self.save_hyperparameters()

        self.dataset = BertDataset(input_path, tokenizer)

        self.text_embedding = BertModel.from_pretrained(
            model, output_attentions=False, output_hidden_states=True)

        self.classifier_hidden_size = self.text_embedding.config.hidden_size
        self.classifier = nn.Linear(self.classifier_hidden_size,
                                    self.num_classes)
Ejemplo n.º 7
0
    def __init__(self, config):
        """Initialize the model with config dict.

        Args:
            config: python dict must contains the attributes below:
                config.bert_model_path: pretrained model path or model type
                    e.g. 'bert-base-chinese'
                config.hidden_size: The same as BERT model, usually 768
                config.num_classes: int, e.g. 2
                config.dropout: float between 0 and 1
        """
        super().__init__()
        self.bert = BertModel.from_pretrained(config.bert_model_path)
        for param in self.bert.parameters():
            param.requires_grad = True

        hidden_size = config.num_fc_hidden_size
        target_class = config.num_classes
        # self.resnet = resnet18(num_classes=hidden_size)
        self.resnet = resnet_pool[config.resnet_type](num_classes=hidden_size)

        #cnn feature map has a total number of 228 dimensions.
        self.dropout = nn.Dropout(config.dropout)
        self.fc1 = nn.Linear(hidden_size, target_class)
        self.num_classes = config.num_classes
Ejemplo n.º 8
0
    def __init__(self, decoder, src_pad_idx, trg_pad_idx, bert_config, device):
        super().__init__()

        self.bert_encoder = BertModel.from_pretrained('bert-base-uncased')
        self.decoder = decoder
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
Ejemplo n.º 9
0
    def load(cls, model_name: str, cache_model: bool = True) -> BertModel:
        if model_name in cls._cache:
            return PretrainedBertModel._cache[model_name]

        model = BertModel.from_pretrained(model_name)
        if cache_model:
            cls._cache[model_name] = model

        return model
Ejemplo n.º 10
0
def main():
    if len(sys.argv) != 3:
        print(
            "Usage: \n"
            "    convert_huggingface_bert_to_npz model_name (bert-base-uncased) output_file"
        )
        exit(0)
    torch.set_grad_enabled(False)

    model_name = sys.argv[1]
    model = BertModel.from_pretrained(model_name)
    arrays = {k: v.detach() for k, v in model.named_parameters()}

    q_weight_key = 'self.query.weight'
    k_weight_key = 'self.key.weight'
    v_weight_key = 'self.value.weight'

    q_bias_key = 'self.query.bias'
    k_bias_key = 'self.key.bias'
    v_bias_key = 'self.value.bias'

    numpy_dict = {}
    for k in arrays.keys():
        if k.endswith(q_weight_key):
            v = torch.clone(
                torch.t(
                    torch.cat([
                        arrays[k],
                        arrays[k[:-len(q_weight_key)] + k_weight_key],
                        arrays[k[:-len(q_weight_key)] + v_weight_key]
                    ], 0).contiguous()).contiguous())
            numpy_dict[k[:-len(q_weight_key)] + "qkv.weight"] = v.numpy()
        elif k.endswith(q_bias_key):
            v = torch.cat([
                arrays[k], arrays[k[:-len(q_bias_key)] + k_bias_key],
                arrays[k[:-len(q_bias_key)] + v_bias_key]
            ], 0).numpy()
            numpy_dict[k[:-len(q_bias_key)] + 'qkv.bias'] = v
        elif any((k.endswith(suffix) for suffix in (k_weight_key, v_weight_key,
                                                    k_bias_key, v_bias_key))):
            continue
        elif (k.endswith("attention.output.dense.weight")
              or k.endswith("pooler.dense.weight")
              or (k.endswith("output.dense.weight")
                  or k.endswith("intermediate.dense.weight"))):
            numpy_dict[k] = torch.clone(torch.t(
                arrays[k]).contiguous()).numpy()
        else:
            numpy_dict[k] = arrays[k].numpy()
    del arrays
    del model
    numpy.savez_compressed(sys.argv[2], **numpy_dict)
Ejemplo n.º 11
0
    def __init__(self, pretrained_bert_model_dir: str = 'bert-base-uncased'):
        super().__init__()
        self.num_target = 3
        self.target = 'target'
        self.class_map = {'negative': 0, 'neutral': 1, 'positive': 2}

        self.bert = BertModel.from_pretrained(pretrained_bert_model_dir,
                                              output_hidden_states=True,
                                              output_attentions=False)

        self.hidden_size = self.bert.config.hidden_size
        self.batch_norm = nn.BatchNorm1d(num_features=3 * self.hidden_size,
                                         momentum=0.1)
        self.linear = nn.Linear(self.hidden_size, self.num_target)
Ejemplo n.º 12
0
    def __init__(self, config, num_classes):

        super().__init__()

        self.num_classes = num_classes

        self.bert = BertModel.from_pretrained(config.bert_model_dir)

        for param in self.bert.parameters():
            param.requires_grad = True

        self.dropout = nn.Dropout(config.hidden_dropout_prob)

        self.classifier = nn.Linear(config.hidden_size, num_classes)
Ejemplo n.º 13
0
def initialize_bertgraph(BERT_NAME_OR_PATH,
                         layernorm_key=False,
                         layernorm_value=False,
                         input_label_graph=False,
                         input_unlabel_graph=False,
                         label_size=None):
    bertgconfig = BertGraphConfig.from_pretrained(BERT_NAME_OR_PATH)
    init_bert = BertModel.from_pretrained(BERT_NAME_OR_PATH)
    bertgconfig.add_graph_par(layernorm_key, layernorm_value,
                              input_label_graph, input_unlabel_graph,
                              label_size)
    model = BertGraphModel(bertgconfig)
    model.load_state_dict(init_bert.state_dict(), strict=False)
    return model
    def __init__(self, bert_model: str, label_size: int, hidden_size: int = 256, layers: int = 1,
                 lstm_dropout: float = 0.50, fine_tune: bool = False) -> None:
        super(BiRecurrentConvCRF4NestedNER, self).__init__()

        self.bert: BertModel = BertModel.from_pretrained(bert_model)
        self.bert.embeddings.dropout = VarDropout(self.bert.embeddings.dropout.p)
        for l in range(len(self.bert.encoder.layer)):
            self.bert.encoder.layer[l].attention.output.dropout \
                = VarDropout(self.bert.encoder.layer[l].attention.output.dropout.p)
            self.bert.encoder.layer[l].output.dropout \
                = VarDropout(self.bert.encoder.layer[l].output.dropout.p)
        self.fine_tune: bool = fine_tune
        if fine_tune:
            self.bert.embeddings.word_embeddings.weight.requires_grad = False
            self.bert.embeddings.position_embeddings.weight.requires_grad = False
            self.bert.embeddings.token_type_embeddings.weight.requires_grad = False
        else:
            for name, parameter in self.bert.named_parameters():
                parameter.requires_grad = False
            self.bert.encoder.output_hidden_states = True
        # standard dropout
        self.dropout_out: nn.Dropout2d = nn.Dropout2d(p=lstm_dropout)

        if fine_tune:
            self.rnn: VarMaskedFastLSTM = VarMaskedFastLSTM(self.bert.config.hidden_size, hidden_size,
                                                            num_layers=layers, batch_first=True, bidirectional=True,
                                                            dropout=(lstm_dropout, lstm_dropout))
        else:
            self.bert_layers: int = 8
            self.rnn: VarMaskedFastLSTM = VarMaskedFastLSTM(self.bert.config.hidden_size * self.bert_layers,
                                                            hidden_size, num_layers=layers,
                                                            batch_first=True, bidirectional=True,
                                                            dropout=(lstm_dropout, lstm_dropout))

        self.reset_parameters()

        self.all_crfs: List[ChainCRF4NestedNER] = []

        for label in range(label_size):
            crf = ChainCRF4NestedNER(hidden_size * 2, 1)
            self.all_crfs.append(crf)
            self.add_module('crf%d' % label, crf)

        self.b_id: int = 0
        self.i_id: int = 1
        self.e_id: int = 2
        self.s_id: int = 3
        self.o_id: int = 4
        self.eos_id: int = 5
Ejemplo n.º 15
0
 def __init__(self, config, bert_type_or_path, vocab_size):
     super().__init__(config)
     self.config = config
     self.vocab_size = vocab_size
     self.main_encoder = BertModel.from_pretrained(
         bert_type_or_path
     )  # out: last_HS, pooled_out, all_HS, attention(opt)
     self.mlp_input_size = config.hidden_size  # CR
     self.mlp = MLPWithLayerNorm(config, self.mlp_input_size)
     self.decoder = nn.Linear(self.config.hidden_size,
                              self.config.vocab_size,
                              bias=False)
     self.decoder.weight = self.main_encoder.get_input_embeddings(
     ).weight  #initalize decoder with encoder embedding
     self.init_weights()
Ejemplo n.º 16
0
    def test_model_from_pretrained(self):
        logging.basicConfig(level=logging.INFO)
        for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            config = BertConfig.from_pretrained(model_name)
            self.assertIsNotNone(config)
            self.assertIsInstance(config, PretrainedConfig)

            model = BertModel.from_pretrained(model_name)
            model, loading_info = BertModel.from_pretrained(
                model_name, output_loading_info=True)
            self.assertIsNotNone(model)
            self.assertIsInstance(model, PreTrainedModel)
            for value in loading_info.values():
                self.assertEqual(len(value), 0)

            config = BertConfig.from_pretrained(model_name,
                                                output_attentions=True,
                                                output_hidden_states=True)
            model = BertModel.from_pretrained(model_name,
                                              output_attentions=True,
                                              output_hidden_states=True)
            self.assertEqual(model.config.output_attentions, True)
            self.assertEqual(model.config.output_hidden_states, True)
            self.assertEqual(model.config, config)
Ejemplo n.º 17
0
    def __init__(self, config):
        """Initialize the model with config dict.

        Args:
            config: python dict must contains the attributes below:
                config.bert_model_path: pretrained model path or model type
                    e.g. 'bert-base-chinese'
                config.hidden_size: The same as BERT model, usually 768
                config.num_classes: int, e.g. 2
                config.dropout: float between 0 and 1
        """
        super().__init__()
        self.bert = BertModel.from_pretrained(config.bert_model_path)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.dropout = nn.Dropout(config.dropout)
        self.linear = nn.Linear(4, config.num_classes)
        self.num_classes = config.num_classes

        self.dim_capsule = config.dim_capsule
        self.num_compressed_capsule = config.num_compressed_capsule
        self.ngram_size = [2, 4, 8]
        self.convs_doc = nn.ModuleList([
            nn.Conv1d(config.max_seq_len, 32, K, stride=2)
            for K in self.ngram_size
        ])
        torch.nn.init.xavier_uniform_(self.convs_doc[0].weight)
        torch.nn.init.xavier_uniform_(self.convs_doc[1].weight)
        torch.nn.init.xavier_uniform_(self.convs_doc[2].weight)

        self.primary_capsules_doc = PrimaryCaps(num_capsules=self.dim_capsule,
                                                in_channels=32,
                                                out_channels=32,
                                                kernel_size=1,
                                                stride=1)

        self.flatten_capsules = FlattenCaps()

        self.W_doc = nn.Parameter(
            torch.FloatTensor(147328, self.num_compressed_capsule))
        torch.nn.init.xavier_uniform_(self.W_doc)

        self.fc_capsules_doc_child = FCCaps(
            config,
            output_capsule_num=config.num_classes,
            input_capsule_num=self.num_compressed_capsule,
            in_channels=self.dim_capsule,
            out_channels=self.dim_capsule)
    def __init__(self, batch_size=256, num_workers=8):
        super().__init__()
        self.model = BertModel.from_pretrained(
            'cl-tohoku/bert-base-japanese-whole-word-masking')
        self.linear = nn.Linear(768, 9)
        self.batch_size = batch_size
        self.num_workers = num_workers

        for param in self.model.parameters():
            param.requires_grad = False

        for param in self.model.encoder.layer[-1].parameters():
            param.requires_grad = True

        for param in self.model.linear.parameters():
            param.requires_grad = True
Ejemplo n.º 19
0
 def __init__(self,
              param_path='bert-base-uncased',
              aggregation: [Callable, str] = 'cls'):
     super(BasicBertEncoder, self).__init__()
     self._encoder = BertModel.from_pretrained(param_path)
     if isinstance(aggregation, str):
         if aggregation == 'cls':
             self.aggregation_layer = lambda x: x[:, 0]
         elif aggregation == 'mean':
             self.aggregation_layer = lambda x: torch.sum(x, dim=1)
         else:
             raise Exception("Aggregation Layer doesn't support %s!" %
                             aggregation)
     elif isinstance(aggregation, Callable):
         self.aggregation_layer = aggregation
     else:
         raise Exception("Aggregation Layer doesn't support this!")
Ejemplo n.º 20
0
    def test_from_pytorch(self):
        with torch.no_grad():
            with self.subTest("bert-base-cased"):
                tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased")
                fx_model = FlaxBertModel.from_pretrained("bert-base-cased")
                pt_model = BertModel.from_pretrained("bert-base-cased")

                # Check for simple input
                pt_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.PYTORCH)
                fx_inputs = tokenizer.encode_plus("This is a simple input", return_tensors=TensorType.JAX)
                pt_outputs = pt_model(**pt_inputs).to_tuple()
                fx_outputs = fx_model(**fx_inputs)

                self.assertEqual(len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch")

                for fx_output, pt_output in zip(fx_outputs, pt_outputs):
                    self.assert_almost_equals(fx_output, pt_output.numpy(), 5e-4)
Ejemplo n.º 21
0
    def __init__(self, config):
        """Initialize the model with config dict.

        Args:
            config: python dict must contains the attributes below:
                config.bert_model_path: pretrained model path or model type
                    e.g. 'bert-base-chinese'
                config.hidden_size: The same as BERT model, usually 768
                config.num_classes: int, e.g. 2
                config.dropout: float between 0 and 1
        """
        super().__init__()
        self.bert = BertModel.from_pretrained(config.bert_model_path)
        for param in self.bert.parameters():
            param.requires_grad = True
        self.linear = nn.Linear(config.hidden_size, config.num_classes)
        self.dropout = nn.Dropout(config.dropout)
        self.num_classes = config.num_classes
Ejemplo n.º 22
0
    def __init__(self, vocab_size, embed_dim, enc_hid_dim, dec_hid_dim,
                 dropout, embedding_matrix):
        super().__init__()

        self.vocab_size = vocab_size
        self.dec_hid_dim = dec_hid_dim
        self.dropout = nn.Dropout(dropout)
        self.word_embedding = nn.Embedding(vocab_size, embed_dim)
        self.word_embedding.weight.data.copy_(
            torch.from_numpy(embedding_matrix))

        self.encoder = BertModel.from_pretrained(model_name_or_path)
        for param in self.encoder.parameters():
            param.requires_grad = True

        # self.transform = nn.Linear()

        self.decoder = Decoder(vocab_size, embed_dim, enc_hid_dim, dec_hid_dim)
Ejemplo n.º 23
0
    def __init__(self, config, gpu_list, *args, **params):
        super(BertXQA, self).__init__()
        self.bert = BertModel.from_pretrained(config.get("model", "bert_path"))
        self.dropout = nn.Dropout(0.2)
        self.criterion = nn.CrossEntropyLoss()
        # self.multi = config.getboolean("data", "multi_choice")
        # self.multi_module = nn.Linear(4, 15)
        # self.softmax = nn.Softmax(dim=-1)
        # (b, 4, 768) -> conv(b, 4, 768) -> mp(b, 3, 6)
        self.conv_module = nn.Sequential(
            nn.Conv2d(1, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)),
            nn.BatchNorm2d(1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 32), stride=(2, 32), padding=(1, 1)))
        self.linear = nn.Linear(18, config.getint("model", "num_classes"))

        self.accuracy_function = single_label_top1_accuracy
        self.bn = nn.BatchNorm1d(config.getint("model", "num_classes"))
        self.num_classes = config.getint("model", "num_classes")
Ejemplo n.º 24
0
    def __init__(self, config):
        """Initialize the model with config dict.

        Args:
            config: python dict must contains the attributes below:
                config.bert_model_path: pretrained model path or model type
                    e.g. 'bert-base-chinese'
                config.hidden_size: The same as BERT model, usually 768
                config.num_classes: int, e.g. 2
                config.dropout: float between 0 and 1
        """
        super().__init__()
        self.bert = BertModel.from_pretrained(config.bert_model_path)
        for param in self.bert.parameters():
            param.requires_grad = True

        num_conv_filters = config.num_conv_filters
        output_channel = config.output_channel
        hidden_size = config.num_fc_hidden_size
        target_class = config.num_classes
        input_channel = config.hidden_size
        # data(b, 512, 768) -> conv(b, 511,767) -> bn -> mp(b, 4, 6)
        self.conv1 = nn.Conv1d(input_channel, num_conv_filters, kernel_size=7)
        self.conv2 = nn.Conv1d(num_conv_filters,
                               num_conv_filters,
                               kernel_size=7)
        self.conv3 = nn.Conv1d(num_conv_filters,
                               num_conv_filters,
                               kernel_size=5)
        self.conv4 = nn.Conv1d(num_conv_filters,
                               num_conv_filters,
                               kernel_size=5)
        self.conv5 = nn.Conv1d(num_conv_filters,
                               num_conv_filters,
                               kernel_size=3)
        self.conv6 = nn.Conv1d(num_conv_filters, output_channel, kernel_size=3)

        #cnn feature map has a total number of 228 dimensions.
        self.dropout = nn.Dropout(config.dropout)
        self.fc1 = nn.Linear(output_channel, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, target_class)

        self.num_classes = config.num_classes
Ejemplo n.º 25
0
    def __init__(
        self,
        vocab: Vocabulary,
        bert_model: Union[str, BertModel],
        feedforward: Optional[FeedForward] = None,
        dropout: float = None,
        num_labels: int = None,
        label_namespace: str = "labels",
        initializer: InitializerApplicator = InitializerApplicator(),
    ) -> None:

        super().__init__(vocab)

        if isinstance(bert_model, str):
            self.bert_model = BertModel.from_pretrained(bert_model)
        else:
            self.bert_model = bert_model

        self._feedforward = feedforward
        if feedforward is not None:
            self._classifier_input_dim = self._feedforward.get_output_dim()
        else:
            self._classifier_input_dim = self.bert_model.config.hidden_size

        if dropout:
            self._dropout = torch.nn.Dropout(dropout)
        else:
            self._dropout = None
        self._label_namespace = label_namespace

        if num_labels:
            self._num_labels = num_labels
        else:
            self._num_labels = vocab.get_vocab_size(
                namespace=self._label_namespace)

        # 分类器
        self._classification_layer = torch.nn.Linear(
            self._classifier_input_dim, self._num_labels)
        self._accuracy = CategoricalAccuracy()
        self._loss = torch.nn.CrossEntropyLoss()
        initializer(self)
Ejemplo n.º 26
0
    def __init__(self, config, args):
        super().__init__(config)
        self.args = args

        if args.bert_model == "albert-base-v2":
            bert = AlbertModel.from_pretrained(args.bert_model)
        elif args.bert_model == "emilyalsentzer/Bio_ClinicalBERT":
            bert = AutoModel.from_pretrained(args.bert_model)
        elif args.bert_model == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12":
            bert = AutoModel.from_pretrained(args.bert_model)
        elif args.bert_model == "bert-small-scratch":
            config = BertConfig.from_pretrained(
                "google/bert_uncased_L-4_H-512_A-8")
            bert = BertModel(config)
        elif args.bert_model == "bert-base-scratch":
            config = BertConfig.from_pretrained("bert-base-uncased")
            bert = BertModel(config)
        else:
            bert = BertModel.from_pretrained(
                args.bert_model)  # bert-base-uncased, small, tiny

        self.txt_embeddings = bert.embeddings
        self.img_embeddings = ImageBertEmbeddings(args, self.txt_embeddings)

        if args.img_encoder == 'ViT':
            img_size = args.img_size
            patch_sz = 32 if img_size == 512 else 16
            self.img_encoder = Img_patch_embedding(image_size=img_size,
                                                   patch_size=patch_sz,
                                                   dim=2048)
        else:
            self.img_encoder = ImageEncoder_cnn(args)
            for p in self.img_encoder.parameters():
                p.requires_grad = False
            for c in list(self.img_encoder.children())[5:]:
                for p in c.parameters():
                    p.requires_grad = True

        self.encoder = bert.encoder
        self.pooler = bert.pooler
Ejemplo n.º 27
0
 def __init__(self, args, tok=None):
     super().__init__()
     cfg = BertConfig.from_json_file(args.config_path)
     cfg.hidden_size = args.hidden_dim
     cfg.vocab_size = 3  # [SEP], [CLS], [PAD]
     cfg.type_vocab_size = 3  # seq 0 vid, seq 1 vid, text
     self.video_transformer = VideoTransformer(cfg, args, tok)
     self.clip_prediction = VideoTransformerHead(d_in=args.hidden_dim, d_out=3, hidden_act=cfg.hidden_act)
     self.next_seq_prediction = VideoTransformerHead(d_in=args.hidden_dim, d_out=2, hidden_act=cfg.hidden_act,
                                                     pool='first')
     self.args = args
     if self.args.svo:
         self.svo_decoder_head = nn.Sequential(nn.GELU(), nn.Linear(args.hidden_dim, args.svo_dim * 3))
         self.svo_decoder_embs = nn.Linear(args.svo_dim, args.svo_vocab_size, bias=False)
         if self.args.svo_pretrained_embs:
             tok = BertTokenizer.from_pretrained('bert-base-uncased')
             bert = BertModel.from_pretrained('bert-base-uncased')
             self.svo_decoder_embs.weight.data = torch.stack([bert.embeddings.word_embeddings(
                 torch.tensor(tok.encode(_, add_special_tokens=False) if _ else [0])).mean(dim=0) for _ in
                                                                 args.svo_vocab])
             del bert
             del tok
Ejemplo n.º 28
0
    def __init__(self, data):
        super(BertNER, self).__init__()

        self.gpu = data.HP_gpu
        self.use_bert = data.use_bert
        self.bertpath = data.bertpath

        char_feature_dim = 768
        print('total char_feature_dim is {}'.format(char_feature_dim))

        self.bert_encoder = BertModel.from_pretrained(self.bertpath)

        self.hidden2tag = nn.Linear(char_feature_dim,
                                    data.label_alphabet_size + 2)
        self.drop = nn.Dropout(p=data.HP_dropout)

        self.crf = CRF(data.label_alphabet_size, self.gpu)

        if self.gpu:
            self.bert_encoder = self.bert_encoder.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            self.crf = self.crf.cuda()
Ejemplo n.º 29
0
    def __init__(self, config, args):
        super().__init__(config)
        self.args = args

        if args.bert_model == "emilyalsentzer/Bio_ClinicalBERT":
            bert = AutoModel.from_pretrained(args.bert_model)
        elif args.bert_model == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12":
            bert = AutoModel.from_pretrained(args.bert_model)
        elif args.bert_model == "bert-small-scratch":
            config = BertConfig.from_pretrained(
                "google/bert_uncased_L-4_H-512_A-8")
            bert = BertModel(config)
        elif args.bert_model == "bert-base-scratch":
            config = BertConfig.from_pretrained("bert-base-uncased")
            bert = BertModel(config)
        else:
            bert = BertModel.from_pretrained(
                args.bert_model)  # bert-base-uncased, small, tiny

        self.txt_embeddings = bert.embeddings

        self.encoder = bert.encoder
        self.pooler = bert.pooler
Ejemplo n.º 30
0
    def __init__(self, hidden_size, dropout, device):
        super(BERTEncoder, self).__init__()

        self.device = device
        # Load config and pre-trained model
        pre_trained_model = BertModel.from_pretrained(
            args['bert_model'],
            cache_dir=PYTORCH_PRETRAINED_BERT_CACHE /
            'distributed_{}'.format(-1))
        bert_config = pre_trained_model.config

        # modify config if you want
        bert_config.num_hidden_layers = args['num_bert_layers']

        self.bert = BertModel(bert_config)

        # load desired layers from pre-trained model
        self.bert.load_state_dict(pre_trained_model.state_dict(), strict=False)

        self.proj = nn.Linear(bert_config.hidden_size, hidden_size)

        self.dropout = dropout
        self.dropout_layer = nn.Dropout(dropout)