def test_fit_predict(self, resource_loader): """Tests prediction after a fit""" config = ModelConfig( **{ "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "logreg" }, "params": { "fit_intercept": True, "C": 100 }, "features": { "bag-of-words": { "lengths": [1] }, "freq": { "bins": 5 }, "length": {}, }, }) model = TextModel(config) examples = self.labeled_data.queries() labels = self.labeled_data.intents() model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query]) == "greet" assert model.predict([markup.load_query("bye").query]) == "exit"
def test_with_role(self): """Tests groups when roles are explicitly specified in the config""" text = '{large|size} {latte|dish|beverage} {ice|option|beverage}' query = markup.load_query(text) entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 3 assert entities[0].parent == entities[1] assert entities[1].children == (entities[0], entities[2]) text = 'I’d like a {muffin|dish|baked_good} with {no sugar|option|beverage}' query = markup.load_query(text) entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 2 assert entities[0].children is None assert entities[1].parent is None text = 'I’d like a {latte|dish|beverage} with {maple syrup|option|general}' query = markup.load_query(text) entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 2 assert entities[0].children is None assert entities[1].parent is None
def test_char_lstm(self, resource_loader): """Tests that a fit succeeds""" config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "lstm" }, "params": { "emb_dim": 30, "tokenizer_type": "char-tokenizer" }, } examples = self.labeled_data.queries() labels = self.labeled_data.intents() model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "add_terminals": "True" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"]
def test_fit_predict(self, resource_loader): """Tests prediction after a fit""" config = ModelConfig( **{ 'model_type': 'text', 'example_type': QUERY_EXAMPLE_TYPE, 'label_type': CLASS_LABEL_TYPE, 'model_settings': { 'classifier_type': 'logreg' }, 'params': { 'fit_intercept': True, 'C': 100 }, 'features': { 'bag-of-words': { 'lengths': [1] }, 'freq': { 'bins': 5 }, 'length': {} } }) model = TextModel(config) examples = [q.query for q in self.labeled_data] labels = [q.intent for q in self.labeled_data] model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query('hi').query]) == 'greet' assert model.predict([markup.load_query('bye').query]) == 'exit'
def test_extract_features(self, resource_loader): """Tests extracted features after a fit""" config = ModelConfig( **{ "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": {"classifier_type": "logreg"}, "params": {"fit_intercept": True, "C": 100}, "features": { "bag-of-words": {"lengths": [1]}, }, } ) model = TextModel(config) examples = self.labeled_data.queries() labels = self.labeled_data.intents() model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) expected_features = { "bag_of_words|length:1|ngram:hi": 1, "bag_of_words|length:1|ngram:OOV": 1, } extracted_features = model.view_extracted_features( markup.load_query("hi there").query ) assert extracted_features == expected_features
def test_load_system_role(query_factory): """Tests loading a basic query with an entity with a role""" text = ( "What stores are open between {3|sys_time|open_hours} and " "{5|sys_time|close_hours}" ) processed_query = markup.load_query(text, query_factory) assert len(processed_query.entities) == 2 entity = processed_query.entities[0] assert entity.span.start == 29 assert entity.span.end == 29 assert entity.normalized_text == "3" assert entity.entity.type == "sys_time" assert entity.entity.text == "3" assert entity.entity.role == "open_hours" entity = processed_query.entities[1] assert entity.span.start == 35 assert entity.span.end == 35 assert entity.normalized_text == "5" assert entity.entity.type == "sys_time" assert entity.entity.text == "5" assert entity.entity.role == "close_hours"
def test_entity_values(query_factory, query, expected_values): # timestamp is for 2020-06-12T11:00:39.000-07:00 processed_query = markup.load_query(query, query_factory, query_options={"timestamp": 1591984839}) entity = processed_query.entities[0] assert "alternate_values" in entity.entity.value assert entity.entity.value['alternate_values'] == expected_values
def test_load_system( query_factory, query, entity_value, entity_type, entity_start_span, value, unit_or_grain, ): """Tests loading a query with a system entity""" # We added a pm timestamp to bias duckling time resolution to resolve to pm times processed_query = markup.load_query( query, query_factory, query_options={"timestamp": 1592002800} ) assert processed_query assert len(processed_query.entities) == 1 entity = processed_query.entities[0] assert entity.text == entity_value assert entity.entity.type == entity_type assert entity.span.start == entity_start_span assert not isinstance(entity.entity.value, str) if entity_type == "sys_amount-of-money": assert entity.entity.value == {"unit": unit_or_grain, "value": value} else: assert entity.entity.value["grain"] == unit_or_grain assert value in entity.entity.value["value"]
def test_parser_timeout(): """Tests that the parser throws a ParserTimeout exception on very ambiguous queries which take long to entities. """ config = { "name": { "form": { "max_instances": 1 }, "size": { "max_instances": 1 }, "number": { "max_instances": 1, "right": False }, "option": { "linking_words": ["with"] }, } } parser = Parser(config=config) text = ( "{venti|size} {jade citrus|name} with {one|number} bag of " "{peach tranquility|name} and {one|number} bag {jade citrus|name} " "{2 pumps peppermint|option} {no hot water|option} sub {steamed|option} " "{lemonade|option} {4|number} {honeys|option}") query = markup.load_query(text) with pytest.raises(ParserTimeout): parser.parse_entities(query.query, query.entities, handle_timeout=False)
def setup_class(cls): data_dict = { 'greet': [ 'Hello', 'Hello!', 'hey', "what's up", 'greetings', 'yo', 'hi', 'hey, how are you?', 'hola', 'start', ], 'exit': [ 'bye', 'goodbye', 'until next time', 'see ya later', 'ttyl', 'talk to you later' 'later', 'have a nice day', 'finish', 'gotta go' "I'm leaving", "I'm done", "that's all" ] } labeled_data = [] for intent in data_dict: for text in data_dict[intent]: labeled_data.append(markup.load_query(text, intent=intent)) cls.labeled_data = labeled_data
def test_load_nested_4(query_factory): """Tests dumping a query with multiple nested system entities""" text = "show me houses {between {600,000|sys_number} and {1,000,000|sys_number} dollars|price}" processed_query = markup.load_query(text, query_factory) assert processed_query assert len(processed_query.entities) == 1 entity = processed_query.entities[0] assert entity.text == "between 600,000 and 1,000,000 dollars" assert entity.entity.type == "price" assert entity.span == Span(15, 51) assert not isinstance(entity.entity.value, str) assert "children" in entity.entity.value assert len(entity.entity.value["children"]) == 2 lower, upper = entity.entity.value["children"] assert lower.text == "600,000" assert lower.entity.value == {"value": 600000} assert lower.span == Span(8, 14) assert upper.text == "1,000,000" assert upper.entity.value == {"value": 1000000} assert upper.span == Span(20, 28)
def setup_class(cls): data_dict = { "greet": [ "Hello", "Hello!", "hey", "what's up", "greetings", "yo", "hi", "hey, how are you?", "hola", "start", ], "exit": [ "bye", "goodbye", "until next time", "see ya later", "ttyl", "talk to you later" "later", "have a nice day", "finish", "gotta go" "I'm leaving", "I'm done", "that's all", ], } labeled_data = [] for intent in data_dict: for text in data_dict[intent]: labeled_data.append(markup.load_query(text, intent=intent)) cls.labeled_data = ProcessedQueryList.from_in_memory_list(labeled_data)
def test_load_basic_query(query_factory): """Tests loading a basic query with no entities""" markup_text = "This is a test query string" processed_query = markup.load_query(markup_text, query_factory) assert processed_query assert processed_query.query
def test_parser_timeout(): """Tests that the parser throws a ParserTimeout exception on very ambiguous queries which take long to entities. """ config = { 'name': { 'form': { 'max_instances': 1 }, 'size': { 'max_instances': 1 }, 'number': { 'max_instances': 1, 'right': False }, 'option': { 'linking_words': ['with'] } } } parser = Parser(config=config) text = ( '{venti|size} {jade citrus|name} with {one|number} bag of ' '{peach tranquility|name} and {one|number} bag {jade citrus|name} ' '{2 pumps peppermint|option} {no hot water|option} sub {steamed|option} ' '{lemonade|option} {4|number} {honeys|option}') query = markup.load_query(text) with pytest.raises(ParserTimeout): parser.parse_entities(query.query, query.entities, handle_timeout=False)
def test_extract_features(self, resource_loader): """Tests extracted features after a fit""" config = ModelConfig( **{ 'model_type': 'text', 'example_type': QUERY_EXAMPLE_TYPE, 'label_type': CLASS_LABEL_TYPE, 'model_settings': { 'classifier_type': 'logreg' }, 'params': { 'fit_intercept': True, 'C': 100 }, 'features': { 'bag-of-words': { 'lengths': [1] }, } }) model = TextModel(config) examples = [q.query for q in self.labeled_data] labels = [q.intent for q in self.labeled_data] model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) expected_features = { 'bag_of_words|length:1|ngram:hi': 1, 'bag_of_words|length:1|ngram:there': 1 } extracted_features = model.view_extracted_features( markup.load_query('hi there').query) assert extracted_features == expected_features
def test_right(self): """Tests the parser attaches dependents from the right""" query = markup.load_query('{Hello|head} {there|dependent}') entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 2 assert entities[0].children == (entities[1], ) assert entities[1].parent == entities[0]
def test_standalone_option(self): """Tests that an option can exist as a standalone group""" query = markup.load_query('{light|size} {ice|option}') entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 2 assert entities[0].parent == entities[1] assert entities[1].children == (entities[0], )
def test_generic(self): """Tests groups where no roles are specified in the config""" query = markup.load_query( '{noodles|dish|main_course} with {tofu|option}') entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 2 assert entities[0].children == (entities[1], ) assert entities[1].parent == entities[0]
def test_singleton(self): """Tests the parser returns no groups when a head has no dependents""" query = markup.load_query('Hello {there|head}') entities = self.parser.parse_entities(query.query, query.entities, timeout=None) assert entities == query.entities
def test_distance(self): """Tests the parser attaches dependents to their nearest head""" query = markup.load_query( '{Hello|head} {there|dependent} my {friend|head}') entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 3 assert entities[0].children == (entities[1], ) assert entities[1].parent == entities[0] assert entities[2].children is None
def test_default_embedder(self, resource_loader): """Tests that a fit succeeds""" config = { "model_type": "text", "example_type": QUERY_EXAMPLE_TYPE, "label_type": CLASS_LABEL_TYPE, "model_settings": { "classifier_type": "embedder" }, "params": { "emb_dim": 5 }, # default embedder_output_pooling_type is "mean" } examples = self.labeled_data.queries() labels = self.labeled_data.intents() model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "embedder_output_pooling_type": "first" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"] config = { **config, "params": { **config["params"], "embedder_output_pooling_type": "last" } } model = ModelFactory.create_model_from_config(ModelConfig(**config)) model.initialize_resources(resource_loader, examples, labels) model.fit(examples, labels) assert model.predict([markup.load_query("hi").query ])[0] in ["greet", "exit"]
def test_link_word(self): """Tests that parser considers link words, overriding default distance calculation.""" text = 'A {pizza|dish} with {olives|option}, {breadsticks|dish} and a {coke|dish}' query = markup.load_query(text) entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 4 assert entities[0].children == (entities[1], ) assert entities[1].parent == entities[0] assert entities[2].children is None assert entities[3].children is None
def test_nested(self): """Tests that an option can exist as a standalone group""" text = '{large|size} {latte|dish} {light|size} {ice|option}' query = markup.load_query(text) entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 4 assert entities[0].parent == entities[1] assert entities[1].children == (entities[0], entities[3]) assert entities[2].parent == entities[3] assert entities[3].children == (entities[2], )
def test_load_dump_groups_roles(query_factory): """Tests that load_query and dump_query are reversible""" text = ( "Order [{one|sys_number|quantity} {large|size} {Tesora|product|dish} with " "[{medium|size} {cream|option|addin}|option]|product]") processed_query = markup.load_query(text, query_factory) markup_text = markup.dump_query(processed_query) assert text == markup_text
def test_max_instances(self): """Tests that parser respects the max instances constraint""" text = '{light|size} {medium|size} {latte|dish}' query = markup.load_query(text) entities = self.parser.parse_entities(query.query, query.entities) assert len(entities) == 3 assert entities[0].parent is None assert entities[1].parent == entities[2] assert entities[2].children == (entities[1], )
def test_load_dump_groups(query_factory): """Tests that load_query and dump_query are reversible""" text = ("Order [{one|quantity} {large|size} {Tesora|product} with " "[{medium|size} {cream|option}|option]|product] from " "[{Philz|store} in {Downtown Sunnyvale|location}|store]") processed_query = markup.load_query(text, query_factory) markup_text = markup.dump_query(processed_query) assert text == markup_text
def test_load_special_chars_6(query_factory): """Tests loading a query with special characters""" text = "what's on {after {8 p.m.|sys_time}|range}?" processed_query = markup.load_query(text, query_factory) entities = processed_query.entities assert len(entities) == 1 assert entities[0].text == "after 8 p.m." assert entities[0].normalized_text == "after 8 p m" assert entities[0].span == Span(10, 21)
def test_link_word_negative(self): """Tests that parser does not apply link words for other dependent types.""" text = 'A {pepperoni pizza|dish} with {large|size} {coke|dish}' query = markup.load_query(text) entities = self.parser.parse_entities(query.query, query.entities) assert len([ e for e in entities if e.parent is None and e.children is not None ]) == 1 assert entities[0].children is None assert entities[1].parent == entities[2] assert entities[2].children == (entities[1], )
def test_load_special_chars_5(query_factory): """Tests loading a query with special characters""" text = "what christmas movies are , showing at {{8pm|sys_time}|range}" processed_query = markup.load_query(text, query_factory) assert len(processed_query.entities) == 1 entity = processed_query.entities[0] assert entity.span == Span(42, 44) assert entity.normalized_text == "8pm"
def test_load_special_chars(query_factory): """Tests loading a query with special characters""" text = "play {s.o.b.|track}" processed_query = markup.load_query(text, query_factory) entities = processed_query.entities assert len(entities) entity = entities[0] assert entity.text == "s.o.b." assert entity.normalized_text == "s o b" assert entity.span.start == 5 assert entity.span.end == 10