def test_char_lstm(self, resource_loader): """Tests that a fit succeeds""" config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "lstm" }, "params": { "emb_dim": 30, "tokenizer_type": "char-tokenizer" }, } examples = self.labeled_data.queries() labels = self.labeled_data.intents() model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "add_terminals": "True" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"]
def test_default_embedder(self, resource_loader): """Tests that a fit succeeds w/ and w/o crf layer""" config = { "model_type": "tagger", "example_type": ENTITY_EXAMPLE_TYPE, "label_type": ENTITIES_LABEL_TYPE, "model_settings": { "classifier_type": "embedder" }, "params": { "emb_dim": 5 }, } model = ModelFactory.create_model_from_config(ModelConfig(**config)) examples = self.labeled_data.queries() labels = self.labeled_data.entities() model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) config = { **config, "params": { **config["params"], "use_crf_layer": False } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model)
def test_char_embedder(self, resource_loader): """Tests that a fit succeeds w/ and w/o crf layer""" config = { "model_type": "tagger", "example_type": ENTITY_EXAMPLE_TYPE, "label_type": ENTITIES_LABEL_TYPE, "model_settings": { "classifier_type": "embedder" }, "params": { # default token_spans_pooling_type is "first" "emb_dim": 30, "tokenizer_type": "char-tokenizer" }, } examples = self.labeled_data.queries() labels = self.labeled_data.entities() model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) config = { **config, "params": { **config["params"], "add_terminals": "True" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) config = { **config, "params": { **config["params"], "token_spans_pooling_type": "mean" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) config = { **config, "params": { **config["params"], "use_crf_layer": False } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model)
def test_create_model_from_incomplete_config_without_model_settings(): incomplete_config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, } with pytest.raises(TypeError): model = ModelFactory.create_model_from_config( model_config=incomplete_config) del model
def test_default_embedder(self, resource_loader): """Tests that a fit succeeds""" config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "embedder" }, "params": { "emb_dim": 5 }, # default embedder_output_pooling_type is "mean" } examples = self.labeled_data.queries() labels = self.labeled_data.intents() model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "embedder_output_pooling_type": "first" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "embedder_output_pooling_type": "last" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"]
def test_create_model_from_config_object(): config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "lstm" }, } model = ModelFactory.create_model_from_config(model_config=ModelConfig( **config)) assert isinstance(model, PytorchTextModel)
def test_create_model_from_incomplete_config_without_params(): incomplete_config = { "model_type": "tagger", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "lstm" }, } with pytest.raises(ValueError): model = ModelFactory.create_model_from_config( model_config=incomplete_config) del model
def test_bert_embedder_frozen_params(self, resource_loader): """Tests that a fit succeeds""" config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "embedder" }, "params": { # default embedder_output_pooling_type for bert is "first" "embedder_type": "bert", "pretrained_model_name_or_path": "distilbert-base-uncased", "embedder_output_pooling_type": "mean", "update_embeddings": False }, } examples = self.labeled_data.queries() labels = self.labeled_data.intents() # fit the model model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) # assert only some weights are trainable clf = model._clf n_requires_grad, n_total = get_num_weights_of_model(clf) assert n_requires_grad < n_total, print(n_requires_grad, n_total) # check if dumping and loading partial state dict logs required messages & throws no errors os.makedirs(GENERATED_TMP_FOLDER, exist_ok=True) clf.dump(GENERATED_TMP_FOLDER) new_clf = clf.load(GENERATED_TMP_FOLDER) shutil.rmtree(GENERATED_TMP_FOLDER) # do predictions with loaded model model._clf = new_clf assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"]
def test_glove_cnn(self, resource_loader): """Tests that a fit succeeds""" config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "cnn" }, "params": { "embedder_type": "glove" }, } examples = self.labeled_data.queries() labels = self.labeled_data.intents() model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"]
def test_bert_lstm(self, resource_loader): """Tests that a fit succeeds""" config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "lstm" }, "params": { "embedder_type": "bert", "pretrained_model_name_or_path": "bert-base-cased" }, } examples = self.labeled_data.queries() labels = self.labeled_data.intents() # To use a embedder_type 'bert', classifier_type must be 'embedder'. with pytest.raises(ValueError): model = ModelFactory.create_model_from_config( ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels)
def test_create_model_from_config_dict(): config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "logreg" }, "params": { "fit_intercept": True, "C": 100 }, "features": { "bag-of-words": { "lengths": [1] }, "freq": { "bins": 5 }, "length": {}, }, } model = ModelFactory.create_model_from_config(model_config=config) assert isinstance(model, TextModel)
def test_bert_embedder(self, resource_loader): """Tests that a fit succeeds""" config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "embedder" }, "params": { # default embedder_output_pooling_type for bert is "first" "embedder_type": "bert" }, } examples = self.labeled_data.queries() labels = self.labeled_data.intents() """ test different configurations for bert-base-cased model""" config = { **config, "params": { **config["params"], "pretrained_model_name_or_path": "bert-base-cased" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "embedder_output_pooling_type": "mean" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "embedder_output_pooling_type": "last" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "embedder_output_pooling_type": "max" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "embedder_output_pooling_type": "mean_sqrt" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] """ test for different pretrained transformers""" config = { **config, "params": { "pretrained_model_name_or_path": "distilbert-base-uncased", } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { "pretrained_model_name_or_path": "roberta-base" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { "pretrained_model_name_or_path": "albert-base-v2" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { "pretrained_model_name_or_path": "sentence-transformers/all-mpnet-base-v2" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "embedder_output_pooling_type": "mean" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"]
def test_bert_embedder(self, resource_loader): """Tests that a fit succeeds w/ and w/o crf layer""" config = { "model_type": "tagger", "example_type": ENTITY_EXAMPLE_TYPE, "label_type": ENTITIES_LABEL_TYPE, "model_settings": { "classifier_type": "embedder" }, "params": { "embedder_type": "bert" }, } examples = self.labeled_data.queries() labels = self.labeled_data.entities() config = { **config, "params": { "embedder_type": "bert", "pretrained_model_name_or_path": "bert-base-cased", "add_terminals": True } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) new_config = { **config, "params": { **config["params"], "token_spans_pooling_type": "mean" } } model = ModelFactory.create_model_from_config( ModelConfig(**new_config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) """ test for different pretrained transformers""" config = { **config, "params": { "embedder_type": "bert", "pretrained_model_name_or_path": "distilbert-base-uncased", } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) config = { **config, "params": { "embedder_type": "bert", "pretrained_model_name_or_path": "albert-base-v2", } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) config = { **config, "params": { "embedder_type": "bert", "pretrained_model_name_or_path": "sentence-transformers/all-mpnet-base-v2", } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) config = { **config, "params": { "embedder_type": "bert", "pretrained_model_name_or_path": "roberta-base", } } with pytest.raises(ValueError): model = ModelFactory.create_model_from_config( ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model)
def test_char_cnn_word_lstm(self, resource_loader): """Tests that a fit succeeds w/ and w/o crf layer""" config = { "model_type": "tagger", "example_type": ENTITY_EXAMPLE_TYPE, "label_type": ENTITIES_LABEL_TYPE, "model_settings": { "classifier_type": "cnn-lstm" }, "params": { "emb_dim": 5 }, } examples = self.labeled_data.queries() labels = self.labeled_data.entities() incorrect_config = { **config, "params": { **config["params"], "add_terminals": True } } with pytest.raises(ValueError): model = ModelFactory.create_model_from_config( ModelConfig(**incorrect_config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) incorrect_config = { **config, "params": { **config["params"], "tokenizer_type": "char-tokenizer" } } with pytest.raises(ValueError): model = ModelFactory.create_model_from_config( ModelConfig(**incorrect_config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) incorrect_config = { **config, "params": { **config["params"], "embedder_type": "bert", "pretrained_model_name_or_path": "bert-base-cased" } } with pytest.raises(ValueError): model = ModelFactory.create_model_from_config( ModelConfig(**incorrect_config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) config = { **config, "params": { **config["params"], "use_crf_layer": False } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model) glove_config = {**config, "params": {"embedder_type": "glove"}} model = ModelFactory.create_model_from_config( ModelConfig(**glove_config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) model_predictions_assertions(model)