예제 #1
0
파일: utils.py 프로젝트: ohmeow/blurr
def get_hf_objects(
    pretrained_model_name_or_path: Optional[Union[str, os.PathLike]],
    model_cls: PreTrainedModel,
    config: Union[PretrainedConfig, str, os.PathLike] = None,
    tokenizer_cls: PreTrainedTokenizerBase = None,
    config_kwargs: dict = {},
    tokenizer_kwargs: dict = {},
    model_kwargs: dict = {},
    cache_dir: Union[str, os.PathLike] = None
) -> Tuple[str, PretrainedConfig, PreTrainedTokenizerBase, PreTrainedModel]:
    """
    Given at minimum a `pretrained_model_name_or_path` and `model_cls (such as
    `AutoModelForSequenceClassification"), this method returns all the Hugging Face objects you need to train
    a model using Blurr
    """
    # config
    if config is None:
        config = AutoConfig.from_pretrained(pretrained_model_name_or_path,
                                            cache_dir=cache_dir,
                                            **config_kwargs)

    # tokenizer (gpt2, roberta, bart (and maybe others) tokenizers require a prefix space)
    if any(s in pretrained_model_name_or_path
           for s in ["gpt2", "roberta", "bart", "longformer"]):
        tokenizer_kwargs = {**{"add_prefix_space": True}, **tokenizer_kwargs}

    if tokenizer_cls is None:
        tokenizer = AutoTokenizer.from_pretrained(
            pretrained_model_name_or_path,
            cache_dir=cache_dir,
            **tokenizer_kwargs)
    else:
        tokenizer = tokenizer_cls.from_pretrained(
            pretrained_model_name_or_path,
            cache_dir=cache_dir,
            **tokenizer_kwargs)

    # model
    model = model_cls.from_pretrained(pretrained_model_name_or_path,
                                      config=config,
                                      cache_dir=cache_dir,
                                      **model_kwargs)

    # arch
    try:
        arch = model.__module__.split(".")[2]
    except:
        arch = "unknown"

    return (arch, config, tokenizer, model)
    def __init__(self,
                 pretrained_model_class: PreTrainedModel,
                 pretrained_model_name: str,
                 extra_layers: List[int],
                 dropout_layers: List[float] = None,
                 freeze: bool = False):
        """
        @param  pretrained_model_class: an object of a pre trained model class (e.g., BertModel)
        @param  pretrained_model_name: a pretrained model path (e.g., 'neuralmind/bert-base-portuguese-cased')
        @param  freeze (bool): whether the model should be fine tuned (True) or not (False).
        """
        super(TransformerClassifier, self).__init__()
        # Instantiate  model
        self.model = pretrained_model_class.from_pretrained(
            pretrained_model_name)

        dropout_layers = dropout_layers or [0. for _ in extra_layers]
        assert len(extra_layers) == len(
            dropout_layers
        ), 'Extra Layers and Dropout Layers should have the same length'

        # Adds the size of the output layer
        all_layers = [self.model.config.hidden_size] + extra_layers + [3]
        dropout_layers = [0.] + dropout_layers + [0.]
        # Instantiate layers based on the sizes received
        layers_instances = fp.lflatten(
            [[nn.Linear(prev, layer), nn.ReLU()] +
             ([nn.Dropout(dropout_layers[i])] if dropout_layers[i] > 0 else [])
             for i, (layer, prev) in enumerate(fp.with_prev(all_layers))
             if prev])
        layers_instances = layers_instances[:-1]  # Remove the last ReLU added.
        self.classifier = nn.Sequential(*layers_instances)

        if freeze:
            for param in self.model.parameters():
                param.requires_grad = False