def load(cls, model_name: str, tokenizer_name: str, cache_model: bool = True, adapter_size: int = 8, pretrained: bool = True) -> AutoModel: has_adapter = False if model_name.startswith("adapter"): has_adapter = True _, model_name = model_name.split("_") if model_name in cls._cache: return PretrainedAutoModel._cache[model_name] pretrained_config = AutoConfig.from_pretrained( model_name, output_hidden_states=True) if has_adapter: from src.modules.modeling_adapter_bert import AdapterBertModel pretrained_config.adapter_size = adapter_size model = AdapterBertModel.from_pretrained(model_name, config=pretrained_config) else: if pretrained: model = AutoModel.from_pretrained(model_name, config=pretrained_config) else: model = AutoModel.from_config(config=pretrained_config) if cache_model: cls._cache[model_name] = model return model
def __init__(self, config: Config, *args, **kwargs): super().__init__() self.config = config hf_params = {"config": self._build_encoder_config(config)} should_random_init = self.config.get("random_init", False) # For BERT models, initialize using Jit version if self.config.bert_model_name.startswith("bert-"): if should_random_init: self.module = BertModelJit(**hf_params) else: self.module = BertModelJit.from_pretrained( self.config.bert_model_name, **hf_params ) else: if should_random_init: self.module = AutoModel.from_config(**hf_params) else: self.module = AutoModel.from_pretrained( self.config.bert_model_name, **hf_params ) self.embeddings = self.module.embeddings self.original_config = self.config self.config = self.module.config self._init_segment_embeddings()
def __init__( self, model_name: str, max_length: int = None, sub_module: str = None, train_parameters: bool = True, ) -> None: super().__init__() self.transformer_model = AutoModel.from_pretrained(model_name) self.config = self.transformer_model.config if sub_module: assert hasattr(self.transformer_model, sub_module) self.transformer_model = getattr(self.transformer_model, sub_module) self._max_length = max_length # I'm not sure if this works for all models; open an issue on github if you find a case # where it doesn't work. self.output_dim = self.config.hidden_size tokenizer = PretrainedTransformerTokenizer(model_name) self._num_added_start_tokens = len( tokenizer.single_sequence_start_tokens) self._num_added_end_tokens = len(tokenizer.single_sequence_end_tokens) self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens if not train_parameters: for param in self.transformer_model.parameters(): param.requires_grad = False
def __init__(self, model_name: str, reduced_num: int, max_length: int = None) -> None: super().__init__() assert 'bert' in model_name, "暂不支持其他模型的某几层操作" self.transformer_model = AutoModel.from_pretrained(model_name) self._max_length = max_length # I'm not sure if this works for all models; open an issue on github if you find a case # where it doesn't work. # for bert model self.output_dim = self.transformer_model.config.hidden_size assert 1 <= reduced_num <= 12 self.transformer_model.config.num_hidden_layers = reduced_num self.transformer_model.encoder.layer = torch.nn.ModuleList( list(self.transformer_model.encoder.layer)[:reduced_num]) import torch.nn as nn class Lambda(nn.Module): def __init__(self, f): super().__init__() self.f = f # self.layernorm = nn.LayerNorm(768) def forward(self, x): # x = self.layernorm(x) return self.f(x) self.transformer_model.pooler = Lambda(lambda x: x) tokenizer = PretrainedTransformerTokenizer(model_name) self._num_added_start_tokens = tokenizer.num_added_start_tokens self._num_added_end_tokens = tokenizer.num_added_end_tokens self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens
def __init__(self, config, model_name_or_path): super().__init__() self.bert = AutoModel.from_pretrained(model_name_or_path) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.model_teacher = Teacher(config) self.model_student = Student(config)
def __init__(self, model_name: str, max_length: int = None, sub_module: str = None, train_parameters: bool = False, if_top_layers: bool = False, if_normalize: bool = False, map_path: str = None, iter_norm: int = None) -> None: super().__init__() self.if_top_layers = if_top_layers self.if_normalize = if_normalize self.map_path = map_path self.iter_norm = iter_norm if self.iter_norm: self.mean_emb_train = [] self.mean_emb_eval = [] self.is_train = None if if_top_layers: config = AutoConfig.from_pretrained(model_name, output_hidden_states=True) self.transformer_model = AutoModel.from_pretrained(model_name, config=config) else: self.transformer_model = AutoModel.from_pretrained(model_name) self.config = self.transformer_model.config if sub_module: assert hasattr(self.transformer_model, sub_module) self.transformer_model = getattr(self.transformer_model, sub_module) if not train_parameters: self.transformer_model.eval() self._max_length = max_length # I'm not sure if this works for all models; open an issue on github if you find a case # where it doesn't work. self.output_dim = self.config.hidden_size self._train_parameters = train_parameters tokenizer = PretrainedTransformerTokenizer(model_name) self._num_added_start_tokens = len( tokenizer.single_sequence_start_tokens) self._num_added_end_tokens = len(tokenizer.single_sequence_end_tokens) self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens
def __init__(self, config, *args, **kwargs): super().__init__() self.config = config self.module = AutoModel.from_pretrained( self.config.bert_model_name, config=self._build_encoder_config(config), cache_dir=os.path.join(get_mmf_cache_dir(), "distributed_{}".format(-1)), ) self.embeddings = self.module.embeddings self.config = self.module.config
def __init__(self, model_name: str, max_length: int = None) -> None: super().__init__() self.transformer_model = AutoModel.from_pretrained(model_name) self._max_length = max_length # I'm not sure if this works for all models; open an issue on github if you find a case # where it doesn't work. self.output_dim = self.transformer_model.config.hidden_size tokenizer = PretrainedTransformerTokenizer(model_name) self._num_added_start_tokens = tokenizer.num_added_start_tokens self._num_added_end_tokens = tokenizer.num_added_end_tokens self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens
def __init__(self, config, args): super().__init__(config) self.args = args if args.bert_model == "albert-base-v2": bert = AlbertModel.from_pretrained(args.bert_model) elif args.bert_model == "emilyalsentzer/Bio_ClinicalBERT": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bert-small-scratch": config = BertConfig.from_pretrained( "google/bert_uncased_L-4_H-512_A-8") bert = BertModel(config) elif args.bert_model == "bert-base-scratch": config = BertConfig.from_pretrained("bert-base-uncased") bert = BertModel(config) else: bert = BertModel.from_pretrained( args.bert_model) # bert-base-uncased, small, tiny self.txt_embeddings = bert.embeddings self.img_embeddings = ImageBertEmbeddings(args, self.txt_embeddings) if args.img_encoder == 'ViT': img_size = args.img_size patch_sz = 32 if img_size == 512 else 16 self.img_encoder = Img_patch_embedding(image_size=img_size, patch_size=patch_sz, dim=2048) else: self.img_encoder = ImageEncoder_cnn(args) for p in self.img_encoder.parameters(): p.requires_grad = False for c in list(self.img_encoder.children())[5:]: for p in c.parameters(): p.requires_grad = True self.encoder = bert.encoder self.pooler = bert.pooler
def __init__( self, pretrained_model: str, requires_grad: bool = True, dropout: float = 0.0 ) -> None: super().__init__() model = AutoModel.from_pretrained(pretrained_model) self._dropout = torch.nn.Dropout(p=dropout) self.pooler = model.pooler for param in self.pooler.parameters(): param.requires_grad = requires_grad self._embedding_dim = model.config.hidden_size
def __init__(self, config, args): super().__init__(config) self.args = args if args.bert_model == "emilyalsentzer/Bio_ClinicalBERT": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bionlp/bluebert_pubmed_mimic_uncased_L-12_H-768_A-12": bert = AutoModel.from_pretrained(args.bert_model) elif args.bert_model == "bert-small-scratch": config = BertConfig.from_pretrained( "google/bert_uncased_L-4_H-512_A-8") bert = BertModel(config) elif args.bert_model == "bert-base-scratch": config = BertConfig.from_pretrained("bert-base-uncased") bert = BertModel(config) else: bert = BertModel.from_pretrained( args.bert_model) # bert-base-uncased, small, tiny self.txt_embeddings = bert.embeddings self.encoder = bert.encoder self.pooler = bert.pooler
def __init__(self, model_name: str, layers_to_merge: List, max_length: int = None, layer_merger:Callable[[List[Tensor]],Tensor] = sum) -> None: super().__init__() config = AutoConfig.from_pretrained(model_name, output_hidden_states=True) self.transformer_model = AutoModel.from_pretrained(model_name, config=config) self.layers_to_merge = layers_to_merge self._max_length = max_length # I'm not sure if this works for all models; open an issue on github if you find a case # where it doesn't work. self.output_dim = self.transformer_model.config.hidden_size self.layer_merger = layer_merger tokenizer = PretrainedTransformerTokenizer(model_name) self._num_added_start_tokens = len(tokenizer.single_sequence_start_tokens) self._num_added_end_tokens = len(tokenizer.single_sequence_end_tokens) self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens
def from_huggingface_model_name(cls, vocab: Vocabulary, model_name: str, ffn_activation: str, ffn_dropout: float, attention: Attention, num_labels: int, seq_classif_dropout: float): transformer = AutoModel.from_pretrained(model_name) embeddings = deepcopy(transformer.embeddings) encoder = DistilBertEncoder.from_huggingface_model( model=transformer, ffn_activation=ffn_activation, ffn_dropout=ffn_dropout, attention=attention) return cls(vocab=vocab, embeddings=embeddings, encoder=encoder, num_labels=num_labels, seq_classif_dropout=seq_classif_dropout)
def __init__( self, vocab: Vocabulary, model_name: str, num_labels: int, translation_factor: float = 0.5, seq_decoder: SeqDecoder = None, decoding_dim: int = 512, target_embedding_dim: int = 512, load_classifier: bool = False, transformer_trainable: bool = True, classifier_traninable: bool = True, dropout: float = 0.1, index: str = "transformer", label_namespace: str = "label", initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None, ) -> None: super().__init__(vocab, regularizer) if not num_labels: num_labels = vocab.get_vocab_size(namespace=label_namespace) config = AutoConfig.from_pretrained(model_name) config.num_labels = num_labels self.transformer = AutoModel.from_pretrained(model_name, config=config) for param in self.transformer.parameters(): param.requires_grad = transformer_trainable # Only BERT supports loading classifier layer currently if load_classifier: self.classifier = BertForNextSentencePrediction.from_pretrained( model_name, config=config).cls for param in self.classifier.parameters(): param.requires_grad = classifier_traninable else: classifier = torch.nn.Linear(config.hidden_size, config.num_labels) initializer(classifier) self.classifier = torch.nn.Sequential(torch.nn.Dropout(dropout), classifier) self._accuracy = CategoricalAccuracy() self._loss = torch.nn.CrossEntropyLoss() self._index = index self._label_namespace = label_namespace self._translation_factor = translation_factor self._seq_decoder = seq_decoder
def __init__(self, model_name: str, reduced_num: int, max_length: int = None) -> None: super().__init__() assert 'bert' in model_name, "暂不支持其他模型的某几层操作" self.transformer_model = AutoModel.from_pretrained(model_name) self._max_length = max_length # I'm not sure if this works for all models; open an issue on github if you find a case # where it doesn't work. self.output_dim = self.transformer_model.config.hidden_size assert 1 <= reduced_num <= 12 self.transformer_model.config.num_hidden_layers = reduced_num self.transformer_model.encoder.layer = torch.nn.ModuleList( list(self.transformer_model.encoder.layer)[:reduced_num]) tokenizer = PretrainedTransformerTokenizer(model_name) self._num_added_start_tokens = tokenizer.num_added_start_tokens self._num_added_end_tokens = tokenizer.num_added_end_tokens self._num_added_tokens = self._num_added_start_tokens + self._num_added_end_tokens
def __init__(self, model_name: str) -> None: super().__init__() self.transformer_model = AutoModel.from_pretrained(model_name) # I'm not sure if this works for all models; open an issue on github if you find a case # where it doesn't work. self.output_dim = self.transformer_model.config.hidden_size