def __init__(self, model_name: str, max_length: int = None) -> None: super().__init__() self.transformer_model = AutoModel.from_pretrained(model_name) self._max_length = max_length # I'm not sure if this works for all models; open an issue on github if you find a case # where it doesn't work. self.output_dim = self.transformer_model.config.hidden_size tokenizer = AutoTokenizer.from_pretrained(model_name) ( self._num_added_start_tokens, self._num_added_end_tokens, ) = PretrainedTransformerIndexer.determine_num_special_tokens_added(tokenizer) self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens
def __init__(self, model_name: str, max_length: int = None, layer_dropout: float = 0.0, bert_dropout: float = 0.0, dropout: float = 0.0, combine_layers: str = "mix", adapter_size: int = 8, pretrained: bool = True) -> None: super().__init__() placeholder = model_name.split("_") tokenizer_name = placeholder[-1] self.transformer_model = PretrainedAutoModel.load( model_name, tokenizer_name, adapter_size=adapter_size, pretrained=pretrained) self._max_length = max_length # I'm not sure if this works for all models; open an issue on github if you find a case # where it doesn't work. self.output_dim = self.transformer_model.config.hidden_size self.combine_layers = combine_layers if self.combine_layers == "mix": self._scalar_mix = ScalarMixWithDropout( self.transformer_model.config.num_hidden_layers, do_layer_norm=False, dropout=layer_dropout) else: self._scalar_mix = None self._bert_dropout = InputVariationalDropout(bert_dropout) self.set_dropout(dropout) tokenizer = PretrainedAutoTokenizer.load(tokenizer_name) ( self._num_added_start_tokens, self._num_added_end_tokens, ) = PretrainedTransformerIndexer.determine_num_special_tokens_added( tokenizer) self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens
def test_determine_num_special_tokens_added(self): tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") assert PretrainedTransformerIndexer.determine_num_special_tokens_added( tokenizer) == (1, 1)