def test_model_card_from_and_save_pretrained(self): model_card_first = ModelCard.from_dict(self.inputs_dict) with tempfile.TemporaryDirectory() as tmpdirname: model_card_first.save_pretrained(tmpdirname) model_card_second = ModelCard.from_pretrained(tmpdirname) self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())
def test_model_card_to_json_file(self): model_card_first = ModelCard.from_dict(self.inputs_dict) with tempfile.TemporaryDirectory() as tmpdirname: filename = os.path.join(tmpdirname, "modelcard.json") model_card_first.to_json_file(filename) model_card_second = ModelCard.from_json_file(filename) self.assertEqual(model_card_second.to_dict(), model_card_first.to_dict())
def test_model_card_common_properties(self): modelcard = ModelCard.from_dict(self.inputs_dict) self.assertTrue(hasattr(modelcard, "model_details")) self.assertTrue(hasattr(modelcard, "intended_use")) self.assertTrue(hasattr(modelcard, "factors")) self.assertTrue(hasattr(modelcard, "metrics")) self.assertTrue(hasattr(modelcard, "evaluation_data")) self.assertTrue(hasattr(modelcard, "training_data")) self.assertTrue(hasattr(modelcard, "quantitative_analyses")) self.assertTrue(hasattr(modelcard, "ethical_considerations")) self.assertTrue(hasattr(modelcard, "caveats_and_recommendations"))
def test_model_card_common_properties(self): modelcard = ModelCard.from_dict(self.inputs_dict) self.assertTrue(hasattr(modelcard, 'model_details')) self.assertTrue(hasattr(modelcard, 'intended_use')) self.assertTrue(hasattr(modelcard, 'factors')) self.assertTrue(hasattr(modelcard, 'metrics')) self.assertTrue(hasattr(modelcard, 'evaluation_data')) self.assertTrue(hasattr(modelcard, 'training_data')) self.assertTrue(hasattr(modelcard, 'quantitative_analyses')) self.assertTrue(hasattr(modelcard, 'ethical_considerations')) self.assertTrue(hasattr(modelcard, 'caveats_and_recommendations'))
def pipeline(task: str, model: Optional = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, modelcard: Optional[Union[str, ModelCard]] = None, device=torch.device("cpu"), **kwargs) -> Pipeline: """ Utility factory method to build a pipeline. Pipeline are made of: A Tokenizer instance in charge of mapping raw textual input to token A Model instance Some (optional) post processing for enhancing model's output Examples: pipeline('sentiment-analysis') """ # Register all the supported task here SUPPORTED_TASKS = { "sentiment-analysis": { "impl": TextClassificationPipelineMod, "pt": AutoModelForSequenceClassification, # if is_torch_available() else None, "default": { "model": { "pt": "distilbert-base-uncased-finetuned-sst-2-english", }, "config": "distilbert-base-uncased-finetuned-sst-2-english", "tokenizer": "distilbert-base-uncased", }, }, } # Retrieve the task if task not in SUPPORTED_TASKS: raise KeyError("Unknown task {}, available tasks are {}".format( task, list(SUPPORTED_TASKS.keys()))) framework = "pt" #get_framework(model) targeted_task = SUPPORTED_TASKS[task] task, model_class = targeted_task["impl"], targeted_task[framework] # Use default model/config/tokenizer for the task if no model is provided if model is None: models, config, tokenizer = tuple(targeted_task["default"].values()) model = models[framework] # Try to infer tokenizer from model or config name (if provided as str) if tokenizer is None: if isinstance(model, str) and model in ALL_PRETRAINED_CONFIG_ARCHIVE_MAP: tokenizer = model elif isinstance(config, str) and config in ALL_PRETRAINED_CONFIG_ARCHIVE_MAP: tokenizer = config else: # Impossible to guest what is the right tokenizer here raise Exception( "Impossible to guess which tokenizer to use. " "Please provided a PretrainedTokenizer class or a path/url/shortcut name to a pretrained tokenizer." ) # Try to infer modelcard from model or config name (if provided as str) if modelcard is None: # Try to fallback on one of the provided string for model or config (will replace the suffix) if isinstance(model, str): modelcard = model elif isinstance(config, str): modelcard = config # Instantiate tokenizer if needed if isinstance(tokenizer, str): tokenizer = AutoTokenizer.from_pretrained(tokenizer) # Instantiate config if needed if isinstance(config, str): config = AutoConfig.from_pretrained(config) # Instantiate modelcard if needed if isinstance(modelcard, str): modelcard = ModelCard.from_pretrained(modelcard) # Instantiate model if needed if isinstance(model, str): # Handle transparent TF/PT model conversion model_kwargs = {} if framework == "pt" and model.endswith(".h5"): model_kwargs["from_tf"] = True logger.warning( "Model might be a TensorFlow model (ending with `.h5`) but TensorFlow is not available. " "Trying to load the model with PyTorch.") model = model_class.from_pretrained(model, config=config, **model_kwargs) model = model.to(device) model.device = device return task(model=model, tokenizer=tokenizer, modelcard=modelcard, framework=framework, **kwargs)
def test_model_card_to_json_string(self): modelcard = ModelCard.from_dict(self.inputs_dict) obj = json.loads(modelcard.to_json_string()) for key, value in self.inputs_dict.items(): self.assertEqual(obj[key], value)
def pipeline(task: str, model: Optional = None, config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None, framework: Optional[str] = None, **kwargs) -> Pipeline: """ Utility factory method to build a :class:`~transformers.Pipeline`. Pipelines are made of: - A :doc:`tokenizer <tokenizer>` in charge of mapping raw textual input to token. - A :doc:`model <model>` to make predictions from the inputs. - Some (optional) post processing for enhancing model's output. Args: task (:obj:`str`): The task defining which pipeline will be returned. Currently accepted tasks are: - :obj:`"feature-extraction"`: will return a :class:`~transformers.FeatureExtractionPipeline`. - :obj:`"sentiment-analysis"`: will return a :class:`~transformers.TextClassificationPipeline`. - :obj:`"ner"`: will return a :class:`~transformers.TokenClassificationPipeline`. - :obj:`"question-answering"`: will return a :class:`~transformers.QuestionAnsweringPipeline`. - :obj:`"fill-mask"`: will return a :class:`~transformers.FillMaskPipeline`. - :obj:`"summarization"`: will return a :class:`~transformers.SummarizationPipeline`. - :obj:`"translation_xx_to_yy"`: will return a :class:`~transformers.TranslationPipeline`. - :obj:`"text-generation"`: will return a :class:`~transformers.TextGenerationPipeline`. - :obj:`"conversation"`: will return a :class:`~transformers.ConversationalPipeline`. model (:obj:`str` or :obj:`~transformers.PreTrainedModel` or :obj:`~transformers.TFPreTrainedModel`, `optional`): The model that will be used by the pipeline to make predictions. This can be a model identifier or an actual instance of a pretrained model inheriting from :class:`~transformers.PreTrainedModel` (for PyTorch) or :class:`~transformers.TFPreTrainedModel` (for TensorFlow). If not provided, the default for the :obj:`task` will be loaded. config (:obj:`str` or :obj:`~transformers.PretrainedConfig`, `optional`): The configuration that will be used by the pipeline to instantiate the model. This can be a model identifier or an actual pretrained model configuration inheriting from :class:`~transformers.PretrainedConfig`. If not provided, the default for the :obj:`task` will be loaded. tokenizer (:obj:`str` or :obj:`~transformers.PreTrainedTokenizer`, `optional`): The tokenizer that will be used by the pipeline to encode data for the model. This can be a model identifier or an actual pretrained tokenizer inheriting from :class:`~transformers.PreTrainedTokenizer`. If not provided, the default for the :obj:`task` will be loaded. framework (:obj:`str`, `optional`): The framework to use, either :obj:`"pt"` for PyTorch or :obj:`"tf"` for TensorFlow. The specified framework must be installed. If no framework is specified, will default to the one currently installed. If no framework is specified and both frameworks are installed, will default to the framework of the :obj:`model`, or to PyTorch if no model is provided. kwargs: Additional keyword arguments passed along to the specific pipeline init (see the documentation for the corresponding pipeline class for possible values). Returns: :class:`~transformers.Pipeline`: A suitable pipeline for the task. Examples:: >>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer >>> # Sentiment analysis pipeline >>> pipeline('sentiment-analysis') >>> # Question answering pipeline, specifying the checkpoint identifier >>> pipeline('question-answering', model='distilbert-base-cased-distilled-squad', tokenizer='bert-base-cased') >>> # Named entity recognition pipeline, passing in a specific model and tokenizer >>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english") >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") >>> pipeline('ner', model=model, tokenizer=tokenizer) """ framework = 'pt' task_class, model_class = QuestionAnsweringPipeline, AutoModelForQuestionAnswering # Use default model/config/tokenizer for the task if no model is provided if model is None: model = 'distilbert-base-cased-distilled-squad' # Try to infer tokenizer from model or config name (if provided as str) if tokenizer is None: if isinstance(model, str): tokenizer = model elif isinstance(config, str): tokenizer = config else: # Impossible to guest what is the right tokenizer here raise Exception( "Impossible to guess which tokenizer to use. " "Please provided a PretrainedTokenizer class or a path/identifier to a pretrained tokenizer." ) modelcard = None # Try to infer modelcard from model or config name (if provided as str) if isinstance(model, str): modelcard = model elif isinstance(config, str): modelcard = config # Instantiate tokenizer if needed if isinstance(tokenizer, (str, tuple)): if isinstance(tokenizer, tuple): # For tuple we have (tokenizer name, {kwargs}) tokenizer = AutoTokenizer.from_pretrained(tokenizer[0], **tokenizer[1]) else: tokenizer = AutoTokenizer.from_pretrained(tokenizer) # Instantiate config if needed if isinstance(config, str): config = AutoConfig.from_pretrained(config) # Instantiate modelcard if needed if isinstance(modelcard, str): modelcard = ModelCard.from_pretrained(modelcard) # Instantiate model if needed if isinstance(model, str): # Handle transparent TF/PT model conversion model_kwargs = {} if framework == "pt" and model.endswith(".h5"): model_kwargs["from_tf"] = True logger.warning( "Model might be a TensorFlow model (ending with `.h5`) but TensorFlow is not available. " "Trying to load the model with PyTorch.") elif framework == "tf" and model.endswith(".bin"): model_kwargs["from_pt"] = True logger.warning( "Model might be a PyTorch model (ending with `.bin`) but PyTorch is not available. " "Trying to load the model with Tensorflow.") model = model_class.from_pretrained(model, config=config, **model_kwargs) return task_class(model=model, tokenizer=tokenizer, modelcard=modelcard, framework=framework, task=task, **kwargs)