Ejemplos de AutoConfig en Python, ejemplos de transformers.models.auto.configuration_auto.AutoConfig en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_tokenization_auto.py Proyecto: techthiyanes/transformers

    def test_new_tokenizer_fast_registration(self):
        try:
            AutoConfig.register("custom", CustomConfig)

            # Can register in two steps
            AutoTokenizer.register(CustomConfig,
                                   slow_tokenizer_class=CustomTokenizer)
            self.assertEqual(TOKENIZER_MAPPING[CustomConfig],
                             (CustomTokenizer, None))
            AutoTokenizer.register(CustomConfig,
                                   fast_tokenizer_class=CustomTokenizerFast)
            self.assertEqual(TOKENIZER_MAPPING[CustomConfig],
                             (CustomTokenizer, CustomTokenizerFast))

            del TOKENIZER_MAPPING._extra_content[CustomConfig]
            # Can register in one step
            AutoTokenizer.register(CustomConfig,
                                   slow_tokenizer_class=CustomTokenizer,
                                   fast_tokenizer_class=CustomTokenizerFast)
            self.assertEqual(TOKENIZER_MAPPING[CustomConfig],
                             (CustomTokenizer, CustomTokenizerFast))

            # Trying to register something existing in the Transformers library will raise an error
            with self.assertRaises(ValueError):
                AutoTokenizer.register(BertConfig,
                                       fast_tokenizer_class=BertTokenizerFast)

            # We pass through a bert tokenizer fast cause there is no converter slow to fast for our new toknizer
            # and that model does not have a tokenizer.json
            with tempfile.TemporaryDirectory() as tmp_dir:
                bert_tokenizer = BertTokenizerFast.from_pretrained(
                    SMALL_MODEL_IDENTIFIER)
                bert_tokenizer.save_pretrained(tmp_dir)
                tokenizer = CustomTokenizerFast.from_pretrained(tmp_dir)

            with tempfile.TemporaryDirectory() as tmp_dir:
                tokenizer.save_pretrained(tmp_dir)

                new_tokenizer = AutoTokenizer.from_pretrained(tmp_dir)
                self.assertIsInstance(new_tokenizer, CustomTokenizerFast)

                new_tokenizer = AutoTokenizer.from_pretrained(tmp_dir,
                                                              use_fast=False)
                self.assertIsInstance(new_tokenizer, CustomTokenizer)

        finally:
            if "custom" in CONFIG_MAPPING._extra_content:
                del CONFIG_MAPPING._extra_content["custom"]
            if CustomConfig in TOKENIZER_MAPPING._extra_content:
                del TOKENIZER_MAPPING._extra_content[CustomConfig]

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: huggingface/transformers

 def test_configuration_not_found(self):
     with self.assertRaisesRegex(
             EnvironmentError,
             "hf-internal-testing/no-config-test-repo does not appear to have a file named config.json.",
     ):
         _ = AutoConfig.from_pretrained(
             "hf-internal-testing/no-config-test-repo")

Ejemplo n.º 3

0

Mostrar archivo

Archivo: test_tokenization_auto.py Proyecto: cccntu/transformers

 def test_tokenizer_from_tokenizer_class(self):
     config = AutoConfig.from_pretrained(DUMMY_DIFF_TOKENIZER_IDENTIFIER)
     self.assertIsInstance(config, RobertaConfig)
     # Check that tokenizer_type ≠ model_type
     tokenizer = AutoTokenizer.from_pretrained(DUMMY_DIFF_TOKENIZER_IDENTIFIER, config=config)
     self.assertIsInstance(tokenizer, (BertTokenizer, BertTokenizerFast))
     self.assertEqual(tokenizer.vocab_size, 12)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: huggingface/transformers

 def test_revision_not_found(self):
     with self.assertRaisesRegex(
             EnvironmentError,
             r"aaaaaa is not a valid git identifier \(branch name, tag name or commit id\)"
     ):
         _ = AutoConfig.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER,
                                        revision="aaaaaa")

Ejemplo n.º 5

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: yulinggu-cs/transformers

 def test_pattern_matching_fallback(self):
     with tempfile.TemporaryDirectory() as tmp_dir:
         # This model name contains bert and roberta, but roberta ends up being picked.
         folder = os.path.join(tmp_dir, "fake-roberta")
         os.makedirs(folder, exist_ok=True)
         with open(os.path.join(folder, "config.json"), "w") as f:
             f.write(json.dumps({}))
         config = AutoConfig.from_pretrained(folder)
         self.assertEqual(type(config), RobertaConfig)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: test_tokenization_auto.py Proyecto: peteriz/transformers

    def test_new_tokenizer_registration(self):
        try:
            AutoConfig.register("new-model", NewConfig)

            AutoTokenizer.register(NewConfig, slow_tokenizer_class=NewTokenizer)
            # Trying to register something existing in the Transformers library will raise an error
            with self.assertRaises(ValueError):
                AutoTokenizer.register(BertConfig, slow_tokenizer_class=BertTokenizer)

            tokenizer = NewTokenizer.from_pretrained(SMALL_MODEL_IDENTIFIER)
            with tempfile.TemporaryDirectory() as tmp_dir:
                tokenizer.save_pretrained(tmp_dir)

                new_tokenizer = AutoTokenizer.from_pretrained(tmp_dir)
                self.assertIsInstance(new_tokenizer, NewTokenizer)

        finally:
            if "new-model" in CONFIG_MAPPING._extra_content:
                del CONFIG_MAPPING._extra_content["new-model"]
            if NewConfig in TOKENIZER_MAPPING._extra_content:
                del TOKENIZER_MAPPING._extra_content[NewConfig]

Ejemplo n.º 7

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: huggingface/transformers

    def test_new_config_registration(self):
        try:
            AutoConfig.register("custom", CustomConfig)
            # Wrong model type will raise an error
            with self.assertRaises(ValueError):
                AutoConfig.register("model", CustomConfig)
            # Trying to register something existing in the Transformers library will raise an error
            with self.assertRaises(ValueError):
                AutoConfig.register("bert", BertConfig)

            # Now that the config is registered, it can be used as any other config with the auto-API
            config = CustomConfig()
            with tempfile.TemporaryDirectory() as tmp_dir:
                config.save_pretrained(tmp_dir)
                new_config = AutoConfig.from_pretrained(tmp_dir)
                self.assertIsInstance(new_config, CustomConfig)

        finally:
            if "custom" in CONFIG_MAPPING._extra_content:
                del CONFIG_MAPPING._extra_content["custom"]

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: huggingface/transformers

 def test_repo_not_found(self):
     with self.assertRaisesRegex(
             EnvironmentError,
             "bert-base is not a local folder and is not a valid model identifier"
     ):
         _ = AutoConfig.from_pretrained("bert-base")

Ejemplo n.º 9

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: huggingface/transformers

 def test_config_for_model_str(self):
     config = AutoConfig.for_model("roberta")
     self.assertIsInstance(config, RobertaConfig)

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: huggingface/transformers

 def test_config_model_type_from_model_identifier(self):
     config = AutoConfig.from_pretrained(DUMMY_UNKNOWN_IDENTIFIER)
     self.assertIsInstance(config, RobertaConfig)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: huggingface/transformers

 def test_config_model_type_from_local_file(self):
     config = AutoConfig.from_pretrained(SAMPLE_ROBERTA_CONFIG)
     self.assertIsInstance(config, RobertaConfig)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: huggingface/transformers

 def test_config_from_model_shortcut(self):
     config = AutoConfig.from_pretrained("bert-base-uncased")
     self.assertIsInstance(config, BertConfig)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: prune.py Proyecto: cr1m5onk1ng/text_similarity

    configuration = config.Configuration(
        model_parameters=model_config,
        model=args.model,
        save_path=args.output_dir,
        sequence_max_len=args.seq_len,
        batch_size=args.batch_size,
        epochs=args.epochs,
        device=torch.device(args.device),
        tokenizer=tokenizer,
    )

    valid_data_loader = SmartParaphraseDataloader.build_batches(
        valid_dataset, 16, mode="sequence", config=configuration)
    autoconfig = AutoConfig.from_pretrained(
        args.pretrained_model_path,
        output_attentions=True,
    )
    autoconfig.num_labels = len(LABELS_TO_ID)
    model = AutoModelForSequenceClassification.from_pretrained(
        args.pretrained_model_path, config=autoconfig)
    """
        model = TransformerWrapper.load_pretrained(
            args.pretrained_model_path, 
            params=configuration,
            pooler = BertPoolingStrategy(configuration),
            loss = SoftmaxLoss(configuration))
            
        
        model_config = config.ModelParameters(
        model_name = args.config_name,
        hidden_size = args.embed_dim,

Ejemplo n.º 14

0

Mostrar archivo

Archivo: __init__.py Proyecto: zzzhacker/onnx-trasformers2

def pipeline(
    task: str,
    model: Optional = None,
    config: Optional[Union[str, PretrainedConfig]] = None,
    tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
    framework: Optional[str] = None,
    revision: Optional[str] = None,
    onnx_model_dir : Optional[str] = None,
    use_fast: bool = True,
    model_kwargs: Dict[str, Any] = {},
    onnx: bool = True,
    optimization_level : str = 'all',
    **kwargs
) -> Pipeline:
    """
    Utility factory method to build a :class:`~transformers.Pipeline`.

    Pipelines are made of:

        - A :doc:`tokenizer <tokenizer>` in charge of mapping raw textual input to token.
        - A :doc:`model <model>` to make predictions from the inputs.
        - Some (optional) post processing for enhancing model's output.

    Args:
        task (:obj:`str`):
            The task defining which pipeline will be returned. Currently accepted tasks are:

            - :obj:`"feature-extraction"`: will return a :class:`~transformers.FeatureExtractionPipeline`.
            - :obj:`"sentiment-analysis"`: will return a :class:`~transformers.TextClassificationPipeline`.
            - :obj:`"ner"`: will return a :class:`~transformers.TokenClassificationPipeline`.
            - :obj:`"question-answering"`: will return a :class:`~transformers.QuestionAnsweringPipeline`.
            - :obj:`"fill-mask"`: will return a :class:`~transformers.FillMaskPipeline`.
            - :obj:`"summarization"`: will return a :class:`~transformers.SummarizationPipeline`.
            - :obj:`"translation_xx_to_yy"`: will return a :class:`~transformers.TranslationPipeline`.
            - :obj:`"text2text-generation"`: will return a :class:`~transformers.Text2TextGenerationPipeline`.
            - :obj:`"text-generation"`: will return a :class:`~transformers.TextGenerationPipeline`.
            - :obj:`"zero-shot-classification:`: will return a :class:`~transformers.ZeroShotClassificationPipeline`.
            - :obj:`"conversational"`: will return a :class:`~transformers.ConversationalPipeline`.
        model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`):
            The model that will be used by the pipeline to make predictions. This can be a model identifier or an
            actual instance of a pretrained model inheriting from :class:`~transformers.PreTrainedModel` (for PyTorch)
            or :class:`~transformers.TFPreTrainedModel` (for TensorFlow).

            If not provided, the default for the :obj:`task` will be loaded.
        config (:obj:`str` or :obj:`~transformers.PretrainedConfig`, `optional`):
            The configuration that will be used by the pipeline to instantiate the model. This can be a model
            identifier or an actual pretrained model configuration inheriting from
            :class:`~transformers.PretrainedConfig`.

            If not provided, the default configuration file for the requested model will be used. That means that if
            :obj:`model` is given, its default configuration will be used. However, if :obj:`model` is not supplied,
            this :obj:`task`'s default model's config is used instead.
        tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`):
            The tokenizer that will be used by the pipeline to encode data for the model. This can be a model
            identifier or an actual pretrained tokenizer inheriting from :class:`~transformers.PreTrainedTokenizer`.

            If not provided, the default tokenizer for the given :obj:`model` will be loaded (if it is a string). If
            :obj:`model` is not specified or not a string, then the default tokenizer for :obj:`config` is loaded (if
            it is a string). However, if :obj:`config` is also not given or not a string, then the default tokenizer
            for the given :obj:`task` will be loaded.
        framework (:obj:`str`, `optional`):
            The framework to use, either :obj:`"pt"` for PyTorch or :obj:`"tf"` for TensorFlow. The specified framework
            must be installed.

            If no framework is specified, will default to the one currently installed. If no framework is specified and
            both frameworks are installed, will default to the framework of the :obj:`model`, or to PyTorch if no model
            is provided.
        revision(:obj:`str`, `optional`, defaults to :obj:`"main"`):
            When passing a task name or a string model identifier: The specific model version to use. It can be a
            branch name, a tag name, or a commit id, since we use a git-based system for storing models and other
            artifacts on huggingface.co, so ``revision`` can be any identifier allowed by git.
        use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
            Whether or not to use a Fast tokenizer if possible (a :class:`~transformers.PreTrainedTokenizerFast`).
        model_kwargs:
            Additional dictionary of keyword arguments passed along to the model's :obj:`from_pretrained(.,
            **model_kwargs)` function.
        kwargs:
            Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
            corresponding pipeline class for possible values).

    Returns:
        :class:`~transformers.Pipeline`: A suitable pipeline for the task.

    Examples::

        >>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer

        >>> # Sentiment analysis pipeline
        >>> pipeline('sentiment-analysis')

        >>> # Question answering pipeline, specifying the checkpoint identifier
        >>> pipeline('question-answering', model='distilbert-base-cased-distilled-squad', tokenizer='bert-base-cased')

        >>> # Named entity recognition pipeline, passing in a specific model and tokenizer
        >>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
        >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
        >>> pipeline('ner', model=model, tokenizer=tokenizer)
    """
    # Retrieve the task
    targeted_task, task_options = check_task(task)

    # Use default model/config/tokenizer for the task if no model is provided
    if model is None:
        # At that point framework might still be undetermined
        model = get_default_model(targeted_task, framework, task_options)

    # Try to infer tokenizer from model or config name (if provided as str)
    if tokenizer is None:
        if isinstance(model, str):
            tokenizer = model
        elif isinstance(config, str):
            tokenizer = config
        else:
            # Impossible to guest what is the right tokenizer here
            raise Exception(
                "Impossible to guess which tokenizer to use. "
                "Please provided a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer."
            )

    modelcard = None
    # Try to infer modelcard from model or config name (if provided as str)
    if isinstance(model, str):
        modelcard = model
    elif isinstance(config, str):
        modelcard = config

    
    # Instantiate config
    if config is not None and isinstance(config, str):
        config = AutoConfig.from_pretrained(config,revision=revision)
    elif config is None:
        config = AutoConfig.from_pretrained(model,revision=revision)

    if onnx_model_dir:
        ONNX_CACHE_DIR = Path(onnx_model_dir)
    else:
        ONNX_CACHE_DIR = Path(os.path.dirname(__file__)).parent.joinpath(".onnx")


    graph_name = f"{os.path.basename(model)}_{task}.onnx"
    graph_path = ONNX_CACHE_DIR.joinpath(model, graph_name)

    # Infer the framework form the model
    if framework is None:
        framework, model = infer_framework_from_model(model, targeted_task, revision=revision, task=task,onnx=onnx,graph_path=graph_path)

    task_class, model_class = targeted_task["impl"], targeted_task[framework]

    # Instantiate tokenizer if needed
    if isinstance(tokenizer, (str, tuple)):
        if isinstance(tokenizer, tuple):
            # For tuple we have (tokenizer name, {kwargs})
            use_fast = tokenizer[1].pop("use_fast", use_fast)
            tokenizer = AutoTokenizer.from_pretrained(
                tokenizer[0], use_fast=use_fast, revision=revision, **tokenizer[1]
            )
        else:
            tokenizer = AutoTokenizer.from_pretrained(
                tokenizer, revision=revision, use_fast=use_fast
            )

    
    # Instantiate model if needed
    if (onnx and not os.path.exists(graph_path)) or not onnx:
        if isinstance(model, str):
            # Handle transparent TF/PT model conversion
            if framework == "pt" and model.endswith(".h5"):
                model_kwargs["from_tf"] = True
                logger.warning(
                    "Model might be a TensorFlow model (ending with `.h5`) but TensorFlow is not available. "
                    "Trying to load the model with PyTorch."
                )
            elif framework == "tf" and model.endswith(".bin"):
                model_kwargs["from_pt"] = True
                logger.warning(
                    "Model might be a PyTorch model (ending with `.bin`) but PyTorch is not available. "
                    "Trying to load the model with Tensorflow."
                )

            if model_class is None:
                raise ValueError(
                    f"Pipeline using {framework} framework, but this framework is not supported by this pipeline."
                )

            model = model_class.from_pretrained(
                model, config=config, revision=revision, **model_kwargs
            )

        if task == "translation" and model.config.task_specific_params:
            for key in model.config.task_specific_params:
                if key.startswith("translation"):
                    task = key
                    warnings.warn(
                        f'"translation" task was used, instead of "translation_XX_to_YY", defaulting to "{task}"',
                        UserWarning,
                    )
                    break

    return task_class(model=model, tokenizer=tokenizer,config=config, modelcard=modelcard, framework=framework, task=task, onnx=onnx,graph_path=graph_path,optimization_level=optimization_level,**kwargs)

Ejemplo n.º 15

0

Mostrar archivo

Archivo: transformers.py Proyecto: aarnphm/BentoML

def load(
    tag: t.Union[str, Tag],
    from_tf: bool = False,
    from_flax: bool = False,
    *,
    return_config: bool = False,
    model_store: "ModelStore" = Provide[BentoMLContainer.model_store],
    **kwargs: t.Any,
) -> t.Union["ext.TransformersPipeline",
             t.Tuple["ext.PretrainedConfig", "ext.TransformersModelType",
                     t.Union["ext.TransformersTokenizerType",
                             "ext.PreTrainedFeatureExtractor"], ], t.Tuple[
                                 None, "ext.TransformersModelType", t.Union[
                                     "ext.TransformersTokenizerType",
                                     "ext.PreTrainedFeatureExtractor"], ], ]:
    """
    Load a model from BentoML local modelstore with given name.

    Args:
        tag (:code:`Union[str, Tag]`):
            Tag of a saved model in BentoML local modelstore.
        model_store (:mod:`~bentoml._internal.models.store.ModelStore`, default to :mod:`BentoMLContainer.model_store`):
            BentoML modelstore, provided by DI Container.
        from_tf (:code:`bool`, `optional`, defaults to :code:`False`):
            Load the model weights from a TensorFlow checkpoint save file.
        from_flax (:code:`bool`, `optional`, defaults to :code:`False`):
            Load the model weights from a Flax checkpoint save file
        return_config (:code:`bool`, `optional`, default to :code:`False`):
            Whether or not to return configuration of the Transformers model.
        config_kwargs (:code:`Dict[str, Any]`, `optional`):
            Kwargs to pass into :code:`Config` object.
        model_kwargs (:code:`Dict[str, Any]`, `optional`):
            Kwargs to pass into :code:`Model` object.
        tokenizer_kwargs (:code:`Dict[str, Any]`, `optional`):
            Kwargs to pass into :code:`Tokenizer` object.
        feature_extractor_kwargs (:code:`Dict[str, Any]`, `optional`):
            Kwargs to pass into :code:`FeatureExtractor` object.
        kwargs (:code:`Dict[str, Any]`, `optional`):
            Other kwargs that can be parsed to transformers that is neither configs, model, tokenizer, and feature extractor.

        .. warnings::
            Make sure to add the corresponding kwargs for your Transformers :code:`Model`, :code:`Tokenizer`, :code:`Config`, :code:`FeatureExtractor` to the correct kwargs dict.

        .. warnings::
            Currently :code:`kwargs` accepts all kwargs for corresponding Pipeline.

    Returns:
        :obj:`Union[Pipeline, Tuple[Optional[PretrainedConfig], Union[PreTrainedModel, TFPreTrainedModel, FlaxPreTrainedModel], Optional[Union[PreTrainedTokenizer, PreTrainedTokenizerFast, PreTrainedFeatureExtractor]]]]`: either returning a
        pipeline or a tuple containing :obj:`PretrainedConfig`, :obj:`Model` class object defined by :obj:`transformers`, with an optional :obj:`Tokenizer` class, or :obj:`FeatureExtractor` class for the given model saved in BentoML modelstore.

    Examples:

    .. code-block:: python

        import bentoml
        model, tokenizer = bentoml.transformers.load('custom_gpt2')

    If you want to returns an config object:

    .. code-block:: python

        import bentoml
        config, model, tokenizer = bentoml.transformers.load('custom_gpt2', return_config=True, tokenizer_kwargs={"use_fast":True})

    If the pipeline is saved with :code:`bentoml.transformers.save()`, then :code:`load()` will return pipeline objects:

    .. code-block:: python

        import bentoml
        pipeline = bentoml.transformers.load("roberta_text_classification", return_all_scores=True)
    """  # noqa
    check_flax_supported()  # pragma: no cover
    model = model_store.get(tag)
    if model.info.module not in (MODULE_NAME, __name__):
        raise BentoMLException(
            f"Model {tag} was saved with module {model.info.module}, failed loading with {MODULE_NAME}."
        )

    if model.info.context["pipeline"]:
        _tasks = model.info.context["task"]
        return transformers.pipeline(_tasks, model.path, **kwargs)
    else:
        config_kwargs = kwargs.pop("config_kwargs", {})
        config: "ext.PretrainedConfig" = AutoConfig.from_pretrained(
            model.path, **config_kwargs)

        model_kwargs = kwargs.pop("model_kwargs", {})

        _model, _tokenizer = (
            model.info.options["model"],
            model.info.options["tokenizer"],
        )
        _feature_extractor = model.info.options["feature_extractor"]

        if _tokenizer is False:
            tokenizer: t.Optional["ext.TransformersTokenizerType"] = None
        else:
            tokenizer_kwargs = kwargs.pop("tokenizer_kwargs", {})
            tokenizer = getattr(import_module("transformers"),
                                _tokenizer).from_pretrained(
                                    model.path,
                                    from_tf=from_tf,
                                    from_flax=from_flax,
                                    **tokenizer_kwargs)
        if _feature_extractor is False:
            feature_extractor: t.Optional[
                "ext.PreTrainedFeatureExtractor"] = None
        else:
            feature_extractor_kwargs = kwargs.pop("feature_extractor_kwargs ",
                                                  {})
            feature_extractor = getattr(import_module("transformers"),
                                        _feature_extractor).from_pretrained(
                                            model.path,
                                            **feature_extractor_kwargs)

        tfe = tokenizer if tokenizer is not None else feature_extractor

        tmodel: "ext.TransformersModelType" = getattr(
            import_module("transformers"),
            _model).from_pretrained(  # type: ignore[reportUnknownMemberType]
                model.path,
                config=config,
                **model_kwargs,
            )

        if return_config:
            return config, tmodel, tfe  # type: ignore
        return None, tmodel, tfe  # type: ignore

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_configuration_auto.py Proyecto: huggingface/transformers

 def test_from_pretrained_dynamic_config(self):
     config = AutoConfig.from_pretrained(
         "hf-internal-testing/test_dynamic_model", trust_remote_code=True)
     self.assertEqual(config.__class__.__name__, "NewModelConfig")

Ejemplo n.º 17

0

Mostrar archivo

Archivo: encoders.py Proyecto: naykun/mmf

 def _build_encoder_config(self, config: Config):
     return AutoConfig.from_pretrained(config.bert_model_name,
                                       **OmegaConf.to_container(config))

Ejemplo n.º 18

0

Mostrar archivo

def main(args):

    task = GLUE_TASKS[args.task_name]

    # prepare output dirs
    if not os.path.isdir(args.output_dir):
        print('Prepare output dir "{}"'.format(args.output_dir))
        os.makedirs(args.output_dir)

    # the datasets will be stored in <args_output_dir>/datasets/
    datasets_cache_dir = os.path.join(args.output_dir, "datasets")
    if not os.path.isdir(datasets_cache_dir):
        os.makedirs(datasets_cache_dir)

    # the checkpoints will be stored in <args_output_dir>/checkpoints/
    checkpoints_dir = os.path.join(args.output_dir, "checkpoints")
    if not os.path.isdir(checkpoints_dir):
        os.makedirs(checkpoints_dir)

    # task params
    tasks_param = setup_tasks(args.wsc_trick)

    # tokenizer & model
    print('Loading weights and tokenizer from "{}"'.format(
        args.model_name_or_path))
    config = AutoConfig.from_pretrained(args.model_name_or_path)
    tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path)
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_name_or_path)
    print("Model config: ", config)

    # datasets setup
    dataloaders = store_datasets(
        task,
        tasks_param,
        hf_tokenizer=tokenizer,
        config=config,
        cache_dir=datasets_cache_dir,
        max_length=args.max_seq_length,
        double_unordered=args.double_unordered,
        num_workers=args.num_workers,
    )

    # finetune
    if args.do_train:

        learn, fit_fc = get_glue_learner(
            task,
            tasks_param,
            config,
            model,
            dataloaders,
            args.weight_decay,
            tokenizer,
            args.wsc_trick,
            args.device,
            args.learning_rate,
            args.layer_lr_decay,
            args.adam_bias_correction,
            args.schedule,
            seed=args.seed,
            run_name=args.output_dir,
            inference=True,
        )
        fit_fc()
        learn.save(f"{task}_{args.seed}")

        # save measures
        measures = [(measure, str(learn.recorder.log[i]))
                    for i, measure in enumerate(learn.recorder.metric_names)]
        print('Saving in "{}"'.format(args.output_dir))
        with open(
                os.path.join(args.output_dir,
                             "eval_results_{}.txt".format(task)), "w") as f:
            f.write("\n".join(" = ".join(m) for m in measures))