def test_slot_case_inside_substitution(self): """Ensure word casing doesn't interfere with group substitution.""" ini_text = """ [TestIntent] this is a ($test){value} """ replacements = { "$test": [Sentence.parse("(Bar:bar | Baz:baz):barorbaz")] } graph = intents_to_graph(parse_ini(ini_text), replacements) recognitions = zero_times( recognize("this is a bar", graph, fuzzy=False, word_transform=str.lower)) self.assertEqual(len(recognitions), 1) recognition = recognitions[0] self.assertIsNotNone(recognition.intent) # Check sequence substitution self.assertEqual(recognition.text, "this is a barorbaz") self.assertEqual(recognition.raw_text, "this is a bar") self.assertEqual(len(recognition.entities), 1) value = recognition.entities[0] self.assertEqual(value.entity, "value") self.assertEqual(value.value, "barorbaz") self.assertEqual(value.raw_value, "bar") self.assertEqual(value.source, "test")
def test_rules(self): """Make sure local and remote rules work.""" intents = parse_ini(""" [Intent1] rule = a test this is <rule> [Intent2] rule = this is <rule> <Intent1.rule> """) graph = intents_to_graph(intents) # Lower confidence with no stop words recognitions = zero_times(recognize("this is a test", graph)) self.assertEqual( recognitions, [ Recognition( intent=Intent(name="Intent1", confidence=1.0), text="this is a test", raw_text="this is a test", tokens=["this", "is", "a", "test"], raw_tokens=["this", "is", "a", "test"], ), Recognition( intent=Intent(name="Intent2", confidence=1.0), text="this is a test", raw_text="this is a test", tokens=["this", "is", "a", "test"], raw_tokens=["this", "is", "a", "test"], ), ], )
def test_single_sentence(self): """Single intent, single sentence.""" intents = parse_ini(""" [TestIntent] this is a test """) graph = intents_to_graph(intents) # Exact recognitions = zero_times( recognize("this is a test", graph, fuzzy=False)) print(recognitions) self.assertEqual( recognitions, [ Recognition( intent=Intent(name="TestIntent", confidence=1.0), text="this is a test", raw_text="this is a test", tokens=["this", "is", "a", "test"], raw_tokens=["this", "is", "a", "test"], ) ], ) # Too many tokens (lower confidence) recognitions = zero_times( recognize("this is a bad test", graph, fuzzy=False)) self.assertFalse(recognitions) # Too few tokens (failure) recognitions = zero_times(recognize("this is a", graph, fuzzy=False)) self.assertFalse(recognitions)
def test_converters(self): """Check sentence with converters.""" intents = parse_ini(""" [TestIntent] this is a test!upper ten:10!int!square """) graph = intents_to_graph(intents) # Should upper-case "test" and convert "ten" -> 10 -> 100 recognitions = zero_times( recognize( "this is a test ten", graph, fuzzy=False, extra_converters={ "square": lambda *args: [x**2 for x in args] }, )) self.assertEqual( recognitions, [ Recognition( intent=Intent(name="TestIntent", confidence=1.0), text="this is a TEST 100", raw_text="this is a test ten", tokens=["this", "is", "a", "TEST", 100], raw_tokens=["this", "is", "a", "test", "ten"], ) ], )
def test_stop_words(self): """Check sentence with stop words.""" intents = parse_ini(""" [TestIntent] this is a test """) graph = intents_to_graph(intents) # Failure without stop words recognitions = zero_times( recognize("this is a abcd test", graph, fuzzy=False)) self.assertFalse(recognitions) # Success with stop words recognitions = zero_times( recognize("this is a abcd test", graph, stop_words={"abcd"}, fuzzy=False)) self.assertEqual( recognitions, [ Recognition( intent=Intent(name="TestIntent", confidence=1.0), text="this is a test", raw_text="this is a test", tokens=["this", "is", "a", "test"], raw_tokens=["this", "is", "a", "test"], ) ], )
def test_escape(self): """Test escaped optional.""" ini_text = """ [TestIntent1] \\[this] is a test """ intents = parse_ini(ini_text) self.assertEqual( intents, { "TestIntent1": [ Sentence( text="[this] is a test", items=[ Sequence( text="this", type=SequenceType.ALTERNATIVE, items=[Word("this"), Word("")], ), Word("is"), Word("a"), Word("test"), ], ) ] }, )
def test_multiple_sentences(self): """Identical sentences from two different intents.""" intents = parse_ini(""" [TestIntent1] this is a test [TestIntent2] this is a test """) graph = intents_to_graph(intents) # Should produce a recognition for each intent recognitions = zero_times(recognize("this is a test", graph)) self.assertEqual(len(recognitions), 2) self.assertIn( Recognition( intent=Intent(name="TestIntent1", confidence=1.0), text="this is a test", raw_text="this is a test", tokens=["this", "is", "a", "test"], raw_tokens=["this", "is", "a", "test"], ), recognitions, ) self.assertIn( Recognition( intent=Intent(name="TestIntent2", confidence=1.0), text="this is a test", raw_text="this is a test", tokens=["this", "is", "a", "test"], raw_tokens=["this", "is", "a", "test"], ), recognitions, )
def test_stop_words(self): """Check sentence with stop words.""" intents = parse_ini(""" [TestIntent] this is a test """) graph = intents_to_graph(intents) # Lower confidence with no stop words recognitions = zero_times(recognize("this is a abcd test", graph)) self.assertEqual(len(recognitions), 1) self.assertEqual(recognitions[0].intent.confidence, 1 - (1 / 4)) # Higher confidence with stop words recognitions = zero_times( recognize("this is a abcd test", graph, stop_words={"abcd"})) self.assertEqual( recognitions, [ Recognition( intent=Intent(name="TestIntent", confidence=float(1 - (0.1 / 4))), text="this is a test", raw_text="this is a test", tokens=["this", "is", "a", "test"], raw_tokens=["this", "is", "a", "test"], ) ], )
def test_intent_filter(self): """Identical sentences from two different intents with filter.""" intents = parse_ini(""" [TestIntent1] this is a test [TestIntent2] this is a test """) graph = intents_to_graph(intents) def intent_filter(name): return name == "TestIntent1" # Should produce a recognition for first intent only recognitions = zero_times( recognize("this is a test", graph, intent_filter=intent_filter)) self.assertEqual( recognitions, [ Recognition( intent=Intent(name="TestIntent1", confidence=1.0), text="this is a test", raw_text="this is a test", tokens=["this", "is", "a", "test"], raw_tokens=["this", "is", "a", "test"], ) ], )
def test_single_sentence(self): """Single intent, single sentence.""" intents = parse_ini(""" [TestIntent] this is a test? """) graph = intents_to_graph(intents) examples = train(graph) # Exact recognitions = zero_times(recognize("this is a test", graph, examples)) self.assertEqual( recognitions, [ Recognition( intent=Intent(name="TestIntent", confidence=1), text="this is a test?", raw_text="this is a test", tokens=["this", "is", "a", "test?"], raw_tokens=["this", "is", "a", "test"], ) ], ) # Mispellings, too many tokens (lower confidence) for sentence in ["this is a bad test", "this iz b tst"]: recognitions = zero_times(recognize(sentence, graph, examples)) self.assertEqual(len(recognitions), 1) intent = recognitions[0].intent self.assertIsNotNone(intent) self.assertLess(intent.confidence, 1.0)
def test_intent_filter(self): """Test filtering intents.""" ini_text = """ [TestIntent1] this is a test [TestIntent2] this is another test """ intents = parse_ini(ini_text, intent_filter=lambda n: n != "TestIntent2") self.assertEqual( intents, { "TestIntent1": [ Sentence( text="this is a test", items=[ Word("this"), Word("is"), Word("a"), Word("test") ], ) ] }, )
def test_optional_entity(self): """Ensure entity inside optional is recognized.""" ini_text = """ [playBook] read me ($audio-book-name){book} in [the] [($assistant-zones){zone}] """ replacements = { "$audio-book-name": [Sentence.parse("the hound of the baskervilles")], "$assistant-zones": [Sentence.parse("bedroom")], } graph = intents_to_graph(parse_ini(ini_text), replacements) recognitions = zero_times( recognize( "read me the hound of the baskervilles in the bedroom", graph, fuzzy=False, )) self.assertEqual(len(recognitions), 1) recognition = recognitions[0] self.assertTrue(recognition.intent) entities = {e.entity: e for e in recognition.entities} self.assertIn("book", entities) book = entities["book"] self.assertEqual(book.value, "the hound of the baskervilles") self.assertIn("zone", entities) zone = entities["zone"] self.assertEqual(zone.value, "bedroom")
def test_converter_args(self): """Check converter with arguments.""" intents = parse_ini(""" [TestIntent] this is a test ten:10!int!pow,3 """) graph = intents_to_graph(intents) def pow_converter(*args, converter_args=None): exponent = int(converter_args[0]) if converter_args else 1 return [x**exponent for x in args] # Should convert "ten" -> 10 -> 1000 recognitions = zero_times( recognize( "this is a test ten", graph, fuzzy=False, extra_converters={"pow": pow_converter}, )) self.assertEqual( recognitions, [ Recognition( intent=Intent(name="TestIntent", confidence=1.0), text="this is a test 1000", raw_text="this is a test ten", tokens=["this", "is", "a", "test", 1000], raw_tokens=["this", "is", "a", "test", "ten"], ) ], )
def test_entity_converters_both(self): """Check sentence with an entity converter and a converter inside the entity.""" intents = parse_ini(""" [TestIntent] this is a test (four:4 point: two:2){number!floatify} """) graph = intents_to_graph(intents) # "four two" -> 4.2 recognitions = zero_times( recognize( "this is a test four point two", graph, fuzzy=False, extra_converters={ "floatify": lambda a, b: [float(f"{a}.{b}")] }, )) self.assertEqual(len(recognitions), 1) recognition = recognitions[0] self.assertTrue(recognition.intent) entities = {e.entity: e for e in recognition.entities} self.assertIn("number", entities) number = entities["number"] self.assertEqual(number.value, 4.2)
def test_word_case_preservation(self): """Ensure word casing is preserved in raw text.""" ini_text = """ [TestIntent] this is a (test){value} """ graph = intents_to_graph(parse_ini(ini_text)) recognitions = zero_times( recognize("this is a TEST", graph, fuzzy=False, word_transform=str.lower)) self.assertEqual(len(recognitions), 1) recognition = recognitions[0] self.assertIsNotNone(recognition.intent) # Check sequence substitution self.assertEqual(recognition.text, "this is a test") self.assertEqual(recognition.raw_text, "this is a TEST") self.assertEqual(len(recognition.entities), 1) value = recognition.entities[0] self.assertEqual(value.entity, "value") self.assertEqual(value.value, "test") self.assertEqual(value.raw_value, "TEST")
def test_final_optional_entity(self): """Ensure final optional entity is matched.""" ini_text = """ [ChangeDisplay] display (top | bottom){location} [(page | layer){layout}] """ graph = intents_to_graph(parse_ini(ini_text)) recognitions = zero_times(recognize("display bottom layer", graph)) self.assertEqual(len(recognitions), 1) recognition = recognitions[0] self.assertIsNotNone(recognition.intent) entities = {e.entity: e for e in recognition.entities} self.assertIn("location", entities) location = entities["location"] self.assertEqual(location.value, "bottom") self.assertIn("layout", entities) layout = entities["layout"] self.assertEqual(layout.value, "layer")
def test_multiple_sentences(self): """Test multiple intents.""" intents = parse_ini(""" [TestIntent1] this is a test [TestIntent2] this is another test """) graph = intents_to_graph(intents) fsts = graph_to_fsts(graph) self.assertEqual( fsts, GraphFsts( intent_fsts={ "TestIntent1": "0 1 this this 0\n" "1 2 is is 0\n" "2 3 a a 0\n" "3 4 test test 0\n" "4 5 <eps> <eps> 0\n" "5\n", "TestIntent2": "0 1 this this 0\n" "1 2 is is 0\n" "2 3 another another 0\n" "3 4 test test 0\n" "4 5 <eps> <eps> 0\n" "5\n", }, symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4, "another": 5, }, input_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4, "another": 5, }, output_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4, "another": 5, }, ), )
def test_multiple_weights(self): """Multiple intents should have balanced weights.""" intents = parse_ini(""" [TestIntent1] this is a test [TestIntent2] this is a test """) graph = intents_to_graph(intents) fst = graph_to_fst(graph) print(fst) self.assertEqual( fst, GraphFst( intent_fst="0 1 <eps> __label__TestIntent1 0.5\n" "1 2 this this 0\n" "2 3 is is 0\n" "3 4 a a 0\n" "4 5 test test 0\n" "5 6 <eps> <eps> 0\n" "0 7 <eps> __label__TestIntent2 0.5\n" "7 8 this this 0\n" "8 9 is is 0\n" "9 10 a a 0\n" "10 11 test test 0\n" "11 6 <eps> <eps> 0\n" "6\n", symbols={ "<eps>": 0, "__label__TestIntent1": 1, "this": 2, "is": 3, "a": 4, "test": 5, "__label__TestIntent2": 6, }, input_symbols={ "<eps>": 0, "this": 2, "is": 3, "a": 4, "test": 5 }, output_symbols={ "<eps>": 0, "__label__TestIntent1": 1, "this": 2, "is": 3, "a": 4, "test": 5, "__label__TestIntent2": 6, }, ), )
def test_multiple_weights(self): """Multiple intents should have balanced weights.""" intents = parse_ini(""" [TestIntent1] this is a test [TestIntent2] this is a test """) graph = intents_to_graph(intents) fst = graph_to_fst(graph) self.assertEqual( fst, GraphFst( intent_fst="0 1 <eps> __label__TestIntent1 0.5\n" "0 2 <eps> __label__TestIntent2 0.5\n" "1 3 this this 0\n" "2 4 this this 0\n" "3 5 is is 0\n" "4 6 is is 0\n" "5 7 a a 0\n" "6 8 a a 0\n" "7 9 test test 0\n" "8 10 test test 0\n" "9 11 <eps> <eps> 0\n" "10 11 <eps> <eps> 0\n" "11\n", symbols={ "<eps>": 0, "__label__TestIntent1": 1, "__label__TestIntent2": 2, "this": 3, "is": 4, "a": 5, "test": 6, }, input_symbols={ "<eps>": 0, "this": 3, "is": 4, "a": 5, "test": 6 }, output_symbols={ "<eps>": 0, "__label__TestIntent1": 1, "__label__TestIntent2": 2, "this": 3, "is": 4, "a": 5, "test": 6, }, ), )
def test_nested_remote_rule(self): """Test a nested rule reference from a separate grammar.""" intents = parse_ini(""" [TestIntent1] test_rule_1 = <test_rule_2> test_rule_2 = test this is a test [TestIntent2] this is another <TestIntent1.test_rule_1> """) # Will fail to parse if nested rule references are broken intents_to_graph(intents)
def test_intent_filter_single_fst(self): """Test multiple intents, single FST with an intent filter.""" intents = parse_ini(""" [TestIntent1] this is a test [TestIntent2] this is another test """) graph = intents_to_graph(intents) fst = graph_to_fst( graph, intent_filter=lambda intent: intent == "TestIntent1") print(fst) self.assertEqual( fst, GraphFst( intent_fst="0 1 <eps> __label__TestIntent1 0.5\n" "1 2 this this 0\n" "2 3 is is 0\n" "3 4 a a 0\n" "4 5 test test 0\n" "5 6 <eps> <eps> 0\n" "6\n", symbols={ "<eps>": 0, "__label__TestIntent1": 1, "this": 2, "is": 3, "a": 4, "test": 5, }, input_symbols={ "<eps>": 0, "this": 2, "is": 3, "a": 4, "test": 5 }, output_symbols={ "<eps>": 0, "__label__TestIntent1": 1, "this": 2, "is": 3, "a": 4, "test": 5, }, ), )
def test_walk(self): """Test Expression.walk with rule and slot reference.""" ini_text = """ [SetAlarm] minutes = $minute minutes set alarm for <minutes> """ intents = parse_ini(ini_text) sentences, replacements = split_rules(intents) replacements["$minute"] = [Sentence.parse("2 | 3")] def num2words(word): if not isinstance(word, Word): return try: n = int(word.text) if n == 2: word.text = "two" word.substitution = "2" elif n == 3: word.text = "three" word.substitution = "3" except ValueError: pass for s in sentences["SetAlarm"]: walk_expression(s, num2words, replacements) # Verify minute digits were replaced minute = replacements["$minute"][0] self.assertEqual( minute, Sentence( text="2 | 3", type=SequenceType.GROUP, items=[ Sequence( text="2 | 3", type=SequenceType.ALTERNATIVE, items=[ Word("two", substitution="2"), Word("three", substitution="3"), ], ) ], ), )
def test_intent_filter_multiple_fsts(self): """Test multiple intents, multiple FSTs with an intent filter.""" intents = parse_ini(""" [TestIntent1] this is a test [TestIntent2] this is another test """) graph = intents_to_graph(intents) fsts = graph_to_fsts( graph, intent_filter=lambda intent: intent == "TestIntent1") self.assertEqual( fsts, GraphFsts( intent_fsts={ "TestIntent1": "0 1 this this 0\n" "1 2 is is 0\n" "2 3 a a 0\n" "3 4 test test 0\n" "4 5 <eps> <eps> 0\n" "5\n" }, symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, input_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, output_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, ), )
def test_optional(self): """Test one intent, one sentence with an optional word.""" intents = parse_ini(""" [TestIntent] this is [a] test """) graph = intents_to_graph(intents) fsts = graph_to_fsts(graph) self.assertEqual( fsts, GraphFsts( intent_fsts={ "TestIntent": "0 1 this this 0\n" "1 2 is is 0\n" "2 3 a a 0\n" "2 4 <eps> <eps> 0\n" "3 5 <eps> <eps> 0\n" "4 5 <eps> <eps> 0\n" "5 6 test test 0\n" "6 7 <eps> <eps> 0\n" "7\n" }, symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, input_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, output_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, ), )
def test_substitution(self): """Test one intent, one sentence with a substitution.""" intents = parse_ini(""" [TestIntent] this is a test:sub """) graph = intents_to_graph(intents) fsts = graph_to_fsts(graph) self.assertEqual( fsts, GraphFsts( intent_fsts={ "TestIntent": "0 1 this this 0\n" "1 2 is is 0\n" "2 3 a a 0\n" "3 4 test <eps> 0\n" "4 5 <eps> sub 0\n" "5 6 <eps> <eps> 0\n" "6\n" }, symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4, "sub": 5 }, input_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, output_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "sub": 5 }, ), )
def test_single_sentence(self): """Test one intent, one sentence.""" intents = parse_ini(""" [TestIntent] this is a test """) graph = intents_to_graph(intents) fsts = graph_to_fsts(graph) self.assertEqual( fsts, GraphFsts( intent_fsts={ "TestIntent": "0 1 this this 0\n" "1 2 is is 0\n" "2 3 a a 0\n" "3 4 test test 0\n" "4 5 <eps> <eps> 0\n" "5\n" }, symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, input_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, output_symbols={ "<eps>": 0, "this": 1, "is": 2, "a": 3, "test": 4 }, ), )
def test_one_weight(self): """Single intent should have an edge weight of 0.""" intents = parse_ini(""" [TestIntent] this is a test """) graph = intents_to_graph(intents) fst = graph_to_fst(graph) self.assertEqual( fst, GraphFst( intent_fst="0 1 <eps> __label__TestIntent 0\n" "1 2 this this 0\n" "2 3 is is 0\n" "3 4 a a 0\n" "4 5 test test 0\n" "5 6 <eps> <eps> 0\n" "6\n", symbols={ "<eps>": 0, "__label__TestIntent": 1, "this": 2, "is": 3, "a": 4, "test": 5, }, input_symbols={ "<eps>": 0, "this": 2, "is": 3, "a": 4, "test": 5 }, output_symbols={ "<eps>": 0, "__label__TestIntent": 1, "this": 2, "is": 3, "a": 4, "test": 5, }, ), )
def test_final_optional_entity(self): """Ensure final optional entity has tag.""" ini_text = """ [ChangeDisplay] display [(page | layer){layout}] """ intents = parse_ini(ini_text) self.assertEqual( intents, { "ChangeDisplay": [ Sentence( text="display [(page | layer){layout}]", items=[ Word("display"), Sequence( text="(page | layer){layout}", type=SequenceType.ALTERNATIVE, items=[ Sequence( text="page | layer", type=SequenceType.GROUP, tag=Tag(tag_text="layout"), items=[ Sequence( text="page | layer", type=SequenceType.ALTERNATIVE, items=[ Word("page"), Word("layer") ], ) ], ), Word(""), ], ), ], ) ] }, )
def test_slot_sequence_replacement(self): """Ensure word sequences in slots can be replaced.""" ini_text = """ [PlayMusic] play me ($music_genre){genre} """ replacements = { "$music_genre": [ Sentence.parse("(rock | hard rock):(Hard Rock)"), Sentence.parse("classical:(Classical Music)"), ] } graph = intents_to_graph(parse_ini(ini_text), replacements) for text in ["play me rock", "play me hard rock"]: recognitions = zero_times(recognize(text, graph, fuzzy=False)) self.assertEqual(len(recognitions), 1) recognition = recognitions[0] self.assertIsNotNone(recognition.intent) # Check sequence substitution self.assertEqual(recognition.text, "play me Hard Rock") # Check entity source self.assertEqual(len(recognition.entities), 1) genre = recognition.entities[0] self.assertEqual(genre.source, "music_genre") recognitions = zero_times( recognize("play me classical", graph, fuzzy=False)) self.assertEqual(len(recognitions), 1) recognition = recognitions[0] self.assertIsNotNone(recognition.intent) # Check sequence substitution self.assertEqual(recognition.text, "play me Classical Music") # Check entity source self.assertEqual(len(recognition.entities), 1) genre = recognition.entities[0] self.assertEqual(genre.source, "music_genre")
def test_intent_counts(self): """Test sentence counts by intent.""" ini_text = """ [TestIntent1] this [is] [a] test this is [another] test [TestIntent2] this is (my | your| another) test """ intents = parse_ini(ini_text) intent_counts = get_intent_counts(intents) self.assertEqual( intent_counts, { "TestIntent1": (1 * 2 * 2 * 1) + (1 * 1 * 2 * 1), "TestIntent2": (1 * 1 * 3 * 1), }, )