def create_whitespace_tokenizer( config: Optional[Dict[Text, Any]] = None) -> WhitespaceTokenizerGraphComponent: return WhitespaceTokenizerGraphComponent({ **WhitespaceTokenizerGraphComponent.get_default_config(), **(config if config else {}), })
def create_whitespace_tokenizer( config: Optional[Dict] = None, ) -> WhitespaceTokenizerGraphComponent: config = config if config else {} return WhitespaceTokenizerGraphComponent( { **WhitespaceTokenizerGraphComponent.get_default_config(), **config }, )
def test_model_data_signature_with_entities( messages: List[Message], entity_expected: bool, create_diet: Callable[..., DIETClassifierGraphComponent], ): classifier = create_diet({"BILOU_flag": False}) training_data = TrainingData(messages) # create tokens for entity parsing inside DIET tokenizer = WhitespaceTokenizerGraphComponent( WhitespaceTokenizerGraphComponent.get_default_config() ) tokenizer.process_training_data(training_data) model_data = classifier.preprocess_train_data(training_data) entity_exists = "entities" in model_data.get_signature().keys() assert entity_exists == entity_expected
def whitespace_tokenizer() -> WhitespaceTokenizerGraphComponent: return WhitespaceTokenizerGraphComponent( WhitespaceTokenizerGraphComponent.get_default_config() )