def get_hf_objects( pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], model_cls: PreTrainedModel, config: Union[PretrainedConfig, str, os.PathLike] = None, tokenizer_cls: PreTrainedTokenizerBase = None, config_kwargs: dict = {}, tokenizer_kwargs: dict = {}, model_kwargs: dict = {}, cache_dir: Union[str, os.PathLike] = None ) -> Tuple[str, PretrainedConfig, PreTrainedTokenizerBase, PreTrainedModel]: """ Given at minimum a `pretrained_model_name_or_path` and `model_cls (such as `AutoModelForSequenceClassification"), this method returns all the Hugging Face objects you need to train a model using Blurr """ # config if config is None: config = AutoConfig.from_pretrained(pretrained_model_name_or_path, cache_dir=cache_dir, **config_kwargs) # tokenizer (gpt2, roberta, bart (and maybe others) tokenizers require a prefix space) if any(s in pretrained_model_name_or_path for s in ["gpt2", "roberta", "bart", "longformer"]): tokenizer_kwargs = {**{"add_prefix_space": True}, **tokenizer_kwargs} if tokenizer_cls is None: tokenizer = AutoTokenizer.from_pretrained( pretrained_model_name_or_path, cache_dir=cache_dir, **tokenizer_kwargs) else: tokenizer = tokenizer_cls.from_pretrained( pretrained_model_name_or_path, cache_dir=cache_dir, **tokenizer_kwargs) # model model = model_cls.from_pretrained(pretrained_model_name_or_path, config=config, cache_dir=cache_dir, **model_kwargs) # arch try: arch = model.__module__.split(".")[2] except: arch = "unknown" return (arch, config, tokenizer, model)
def __init__(self, pretrained_model_class: PreTrainedModel, pretrained_model_name: str, extra_layers: List[int], dropout_layers: List[float] = None, freeze: bool = False): """ @param pretrained_model_class: an object of a pre trained model class (e.g., BertModel) @param pretrained_model_name: a pretrained model path (e.g., 'neuralmind/bert-base-portuguese-cased') @param freeze (bool): whether the model should be fine tuned (True) or not (False). """ super(TransformerClassifier, self).__init__() # Instantiate model self.model = pretrained_model_class.from_pretrained( pretrained_model_name) dropout_layers = dropout_layers or [0. for _ in extra_layers] assert len(extra_layers) == len( dropout_layers ), 'Extra Layers and Dropout Layers should have the same length' # Adds the size of the output layer all_layers = [self.model.config.hidden_size] + extra_layers + [3] dropout_layers = [0.] + dropout_layers + [0.] # Instantiate layers based on the sizes received layers_instances = fp.lflatten( [[nn.Linear(prev, layer), nn.ReLU()] + ([nn.Dropout(dropout_layers[i])] if dropout_layers[i] > 0 else []) for i, (layer, prev) in enumerate(fp.with_prev(all_layers)) if prev]) layers_instances = layers_instances[:-1] # Remove the last ReLU added. self.classifier = nn.Sequential(*layers_instances) if freeze: for param in self.model.parameters(): param.requires_grad = False