コード例 #1
0
    def test_fit_predict(self, resource_loader):
        """Tests prediction after a fit"""
        config = ModelConfig(
            **{
                "model_type": "text",
                "example_type": QUERY_EXAMPLE_TYPE,
                "label_type": CLASS_LABEL_TYPE,
                "model_settings": {
                    "classifier_type": "logreg"
                },
                "params": {
                    "fit_intercept": True,
                    "C": 100
                },
                "features": {
                    "bag-of-words": {
                        "lengths": [1]
                    },
                    "freq": {
                        "bins": 5
                    },
                    "length": {},
                },
            })
        model = TextModel(config)
        examples = self.labeled_data.queries()
        labels = self.labeled_data.intents()
        model.initialize_resources(resource_loader, examples, labels)
        model.fit(examples, labels)

        assert model.predict([markup.load_query("hi").query]) == "greet"
        assert model.predict([markup.load_query("bye").query]) == "exit"
コード例 #2
0
    def test_with_role(self):
        """Tests groups when roles are explicitly specified in the config"""
        text = '{large|size} {latte|dish|beverage} {ice|option|beverage}'
        query = markup.load_query(text)
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 3
        assert entities[0].parent == entities[1]
        assert entities[1].children == (entities[0], entities[2])

        text = 'I’d like a {muffin|dish|baked_good} with {no sugar|option|beverage}'
        query = markup.load_query(text)
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 2
        assert entities[0].children is None
        assert entities[1].parent is None

        text = 'I’d like a {latte|dish|beverage} with {maple syrup|option|general}'
        query = markup.load_query(text)
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 2
        assert entities[0].children is None
        assert entities[1].parent is None
コード例 #3
0
    def test_char_lstm(self, resource_loader):
        """Tests that a fit succeeds"""
        config = {
            "model_type": "text",
            "example_type": QUERY_EXAMPLE_TYPE,
            "label_type": CLASS_LABEL_TYPE,
            "model_settings": {
                "classifier_type": "lstm"
            },
            "params": {
                "emb_dim": 30,
                "tokenizer_type": "char-tokenizer"
            },
        }
        examples = self.labeled_data.queries()
        labels = self.labeled_data.intents()

        model = ModelFactory.create_model_from_config(ModelConfig(**config))
        model.initialize_resources(resource_loader, examples, labels)
        model.fit(examples, labels)
        assert model.predict([markup.load_query("hi").query
                              ])[0] in ["greet", "exit"]

        config = {
            **config, "params": {
                **config["params"], "add_terminals": "True"
            }
        }
        model = ModelFactory.create_model_from_config(ModelConfig(**config))
        model.initialize_resources(resource_loader, examples, labels)
        model.fit(examples, labels)
        assert model.predict([markup.load_query("hi").query
                              ])[0] in ["greet", "exit"]
コード例 #4
0
    def test_fit_predict(self, resource_loader):
        """Tests prediction after a fit"""
        config = ModelConfig(
            **{
                'model_type': 'text',
                'example_type': QUERY_EXAMPLE_TYPE,
                'label_type': CLASS_LABEL_TYPE,
                'model_settings': {
                    'classifier_type': 'logreg'
                },
                'params': {
                    'fit_intercept': True,
                    'C': 100
                },
                'features': {
                    'bag-of-words': {
                        'lengths': [1]
                    },
                    'freq': {
                        'bins': 5
                    },
                    'length': {}
                }
            })
        model = TextModel(config)
        examples = [q.query for q in self.labeled_data]
        labels = [q.intent for q in self.labeled_data]
        model.initialize_resources(resource_loader, examples, labels)
        model.fit(examples, labels)

        assert model.predict([markup.load_query('hi').query]) == 'greet'
        assert model.predict([markup.load_query('bye').query]) == 'exit'
コード例 #5
0
ファイル: test_text_models.py プロジェクト: deanp70/mindmeld
    def test_extract_features(self, resource_loader):
        """Tests extracted features after a fit"""
        config = ModelConfig(
            **{
                "model_type": "text",
                "example_type": QUERY_EXAMPLE_TYPE,
                "label_type": CLASS_LABEL_TYPE,
                "model_settings": {"classifier_type": "logreg"},
                "params": {"fit_intercept": True, "C": 100},
                "features": {
                    "bag-of-words": {"lengths": [1]},
                },
            }
        )
        model = TextModel(config)
        examples = self.labeled_data.queries()
        labels = self.labeled_data.intents()
        model.initialize_resources(resource_loader, examples, labels)
        model.fit(examples, labels)

        expected_features = {
            "bag_of_words|length:1|ngram:hi": 1,
            "bag_of_words|length:1|ngram:OOV": 1,
        }
        extracted_features = model.view_extracted_features(
            markup.load_query("hi there").query
        )
        assert extracted_features == expected_features
コード例 #6
0
def test_load_system_role(query_factory):
    """Tests loading a basic query with an entity with a role"""
    text = (
        "What stores are open between {3|sys_time|open_hours} and "
        "{5|sys_time|close_hours}"
    )

    processed_query = markup.load_query(text, query_factory)

    assert len(processed_query.entities) == 2

    entity = processed_query.entities[0]
    assert entity.span.start == 29
    assert entity.span.end == 29
    assert entity.normalized_text == "3"
    assert entity.entity.type == "sys_time"
    assert entity.entity.text == "3"
    assert entity.entity.role == "open_hours"

    entity = processed_query.entities[1]
    assert entity.span.start == 35
    assert entity.span.end == 35
    assert entity.normalized_text == "5"
    assert entity.entity.type == "sys_time"
    assert entity.entity.text == "5"
    assert entity.entity.role == "close_hours"
コード例 #7
0
def test_entity_values(query_factory, query, expected_values):
    # timestamp is for 2020-06-12T11:00:39.000-07:00
    processed_query = markup.load_query(query, query_factory,
                                        query_options={"timestamp": 1591984839})
    entity = processed_query.entities[0]
    assert "alternate_values" in entity.entity.value
    assert entity.entity.value['alternate_values'] == expected_values
コード例 #8
0
def test_load_system(
    query_factory,
    query,
    entity_value,
    entity_type,
    entity_start_span,
    value,
    unit_or_grain,
):
    """Tests loading a query with a system entity"""
    # We added a pm timestamp to bias duckling time resolution to resolve to pm times
    processed_query = markup.load_query(
        query, query_factory, query_options={"timestamp": 1592002800}
    )

    assert processed_query
    assert len(processed_query.entities) == 1

    entity = processed_query.entities[0]
    assert entity.text == entity_value
    assert entity.entity.type == entity_type
    assert entity.span.start == entity_start_span
    assert not isinstance(entity.entity.value, str)

    if entity_type == "sys_amount-of-money":
        assert entity.entity.value == {"unit": unit_or_grain, "value": value}
    else:
        assert entity.entity.value["grain"] == unit_or_grain
        assert value in entity.entity.value["value"]
コード例 #9
0
ファイル: test_parser.py プロジェクト: wjnbreu/mindmeld
def test_parser_timeout():
    """Tests that the parser throws a ParserTimeout exception on very ambiguous queries
    which take long to entities.
    """
    config = {
        "name": {
            "form": {
                "max_instances": 1
            },
            "size": {
                "max_instances": 1
            },
            "number": {
                "max_instances": 1,
                "right": False
            },
            "option": {
                "linking_words": ["with"]
            },
        }
    }
    parser = Parser(config=config)

    text = (
        "{venti|size} {jade citrus|name} with {one|number} bag of "
        "{peach tranquility|name} and {one|number} bag {jade citrus|name} "
        "{2 pumps peppermint|option} {no hot water|option} sub {steamed|option} "
        "{lemonade|option} {4|number} {honeys|option}")

    query = markup.load_query(text)

    with pytest.raises(ParserTimeout):
        parser.parse_entities(query.query,
                              query.entities,
                              handle_timeout=False)
コード例 #10
0
 def setup_class(cls):
     data_dict = {
         'greet': [
             'Hello',
             'Hello!',
             'hey',
             "what's up",
             'greetings',
             'yo',
             'hi',
             'hey, how are you?',
             'hola',
             'start',
         ],
         'exit': [
             'bye', 'goodbye', 'until next time', 'see ya later', 'ttyl',
             'talk to you later'
             'later', 'have a nice day', 'finish', 'gotta go'
             "I'm leaving", "I'm done", "that's all"
         ]
     }
     labeled_data = []
     for intent in data_dict:
         for text in data_dict[intent]:
             labeled_data.append(markup.load_query(text, intent=intent))
     cls.labeled_data = labeled_data
コード例 #11
0
def test_load_nested_4(query_factory):
    """Tests dumping a query with multiple nested system entities"""
    text = "show me houses {between {600,000|sys_number} and {1,000,000|sys_number} dollars|price}"
    processed_query = markup.load_query(text, query_factory)

    assert processed_query
    assert len(processed_query.entities) == 1

    entity = processed_query.entities[0]
    assert entity.text == "between 600,000 and 1,000,000 dollars"
    assert entity.entity.type == "price"
    assert entity.span == Span(15, 51)

    assert not isinstance(entity.entity.value, str)
    assert "children" in entity.entity.value
    assert len(entity.entity.value["children"]) == 2
    lower, upper = entity.entity.value["children"]

    assert lower.text == "600,000"
    assert lower.entity.value == {"value": 600000}
    assert lower.span == Span(8, 14)

    assert upper.text == "1,000,000"
    assert upper.entity.value == {"value": 1000000}
    assert upper.span == Span(20, 28)
コード例 #12
0
ファイル: test_text_models.py プロジェクト: deanp70/mindmeld
 def setup_class(cls):
     data_dict = {
         "greet": [
             "Hello",
             "Hello!",
             "hey",
             "what's up",
             "greetings",
             "yo",
             "hi",
             "hey, how are you?",
             "hola",
             "start",
         ],
         "exit": [
             "bye",
             "goodbye",
             "until next time",
             "see ya later",
             "ttyl",
             "talk to you later" "later",
             "have a nice day",
             "finish",
             "gotta go" "I'm leaving",
             "I'm done",
             "that's all",
         ],
     }
     labeled_data = []
     for intent in data_dict:
         for text in data_dict[intent]:
             labeled_data.append(markup.load_query(text, intent=intent))
     cls.labeled_data = ProcessedQueryList.from_in_memory_list(labeled_data)
コード例 #13
0
def test_load_basic_query(query_factory):
    """Tests loading a basic query with no entities"""
    markup_text = "This is a test query string"

    processed_query = markup.load_query(markup_text, query_factory)
    assert processed_query
    assert processed_query.query
コード例 #14
0
def test_parser_timeout():
    """Tests that the parser throws a ParserTimeout exception on very ambiguous queries
    which take long to entities.
    """
    config = {
        'name': {
            'form': {
                'max_instances': 1
            },
            'size': {
                'max_instances': 1
            },
            'number': {
                'max_instances': 1,
                'right': False
            },
            'option': {
                'linking_words': ['with']
            }
        }
    }
    parser = Parser(config=config)

    text = (
        '{venti|size} {jade citrus|name} with {one|number} bag of '
        '{peach tranquility|name} and {one|number} bag {jade citrus|name} '
        '{2 pumps peppermint|option} {no hot water|option} sub {steamed|option} '
        '{lemonade|option} {4|number} {honeys|option}')

    query = markup.load_query(text)

    with pytest.raises(ParserTimeout):
        parser.parse_entities(query.query,
                              query.entities,
                              handle_timeout=False)
コード例 #15
0
    def test_extract_features(self, resource_loader):
        """Tests extracted features after a fit"""
        config = ModelConfig(
            **{
                'model_type': 'text',
                'example_type': QUERY_EXAMPLE_TYPE,
                'label_type': CLASS_LABEL_TYPE,
                'model_settings': {
                    'classifier_type': 'logreg'
                },
                'params': {
                    'fit_intercept': True,
                    'C': 100
                },
                'features': {
                    'bag-of-words': {
                        'lengths': [1]
                    },
                }
            })
        model = TextModel(config)
        examples = [q.query for q in self.labeled_data]
        labels = [q.intent for q in self.labeled_data]
        model.initialize_resources(resource_loader, examples, labels)
        model.fit(examples, labels)

        expected_features = {
            'bag_of_words|length:1|ngram:hi': 1,
            'bag_of_words|length:1|ngram:there': 1
        }
        extracted_features = model.view_extracted_features(
            markup.load_query('hi there').query)
        assert extracted_features == expected_features
コード例 #16
0
    def test_right(self):
        """Tests the parser attaches dependents from the right"""
        query = markup.load_query('{Hello|head} {there|dependent}')
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 2
        assert entities[0].children == (entities[1], )
        assert entities[1].parent == entities[0]
コード例 #17
0
    def test_standalone_option(self):
        """Tests that an option can exist as a standalone group"""
        query = markup.load_query('{light|size} {ice|option}')
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 2
        assert entities[0].parent == entities[1]
        assert entities[1].children == (entities[0], )
コード例 #18
0
    def test_generic(self):
        """Tests groups where no roles are specified in the config"""
        query = markup.load_query(
            '{noodles|dish|main_course} with {tofu|option}')
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 2
        assert entities[0].children == (entities[1], )
        assert entities[1].parent == entities[0]
コード例 #19
0
    def test_singleton(self):
        """Tests the parser returns no groups when a head has no dependents"""
        query = markup.load_query('Hello {there|head}')

        entities = self.parser.parse_entities(query.query,
                                              query.entities,
                                              timeout=None)

        assert entities == query.entities
コード例 #20
0
    def test_distance(self):
        """Tests the parser attaches dependents to their nearest head"""
        query = markup.load_query(
            '{Hello|head} {there|dependent} my {friend|head}')
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 3
        assert entities[0].children == (entities[1], )
        assert entities[1].parent == entities[0]
        assert entities[2].children is None
コード例 #21
0
    def test_default_embedder(self, resource_loader):
        """Tests that a fit succeeds"""
        config = {
            "model_type": "text",
            "example_type": QUERY_EXAMPLE_TYPE,
            "label_type": CLASS_LABEL_TYPE,
            "model_settings": {
                "classifier_type": "embedder"
            },
            "params": {
                "emb_dim": 5
            },  # default embedder_output_pooling_type is "mean"
        }
        examples = self.labeled_data.queries()
        labels = self.labeled_data.intents()

        model = ModelFactory.create_model_from_config(ModelConfig(**config))
        model.initialize_resources(resource_loader, examples, labels)
        model.fit(examples, labels)
        assert model.predict([markup.load_query("hi").query
                              ])[0] in ["greet", "exit"]

        config = {
            **config, "params": {
                **config["params"], "embedder_output_pooling_type": "first"
            }
        }
        model = ModelFactory.create_model_from_config(ModelConfig(**config))
        model.initialize_resources(resource_loader, examples, labels)
        model.fit(examples, labels)
        assert model.predict([markup.load_query("hi").query
                              ])[0] in ["greet", "exit"]

        config = {
            **config, "params": {
                **config["params"], "embedder_output_pooling_type": "last"
            }
        }
        model = ModelFactory.create_model_from_config(ModelConfig(**config))
        model.initialize_resources(resource_loader, examples, labels)
        model.fit(examples, labels)
        assert model.predict([markup.load_query("hi").query
                              ])[0] in ["greet", "exit"]
コード例 #22
0
    def test_link_word(self):
        """Tests that parser considers link words, overriding default distance calculation."""
        text = 'A {pizza|dish} with {olives|option}, {breadsticks|dish} and a {coke|dish}'
        query = markup.load_query(text)
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 4
        assert entities[0].children == (entities[1], )
        assert entities[1].parent == entities[0]
        assert entities[2].children is None
        assert entities[3].children is None
コード例 #23
0
    def test_nested(self):
        """Tests that an option can exist as a standalone group"""
        text = '{large|size} {latte|dish} {light|size} {ice|option}'
        query = markup.load_query(text)
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 4
        assert entities[0].parent == entities[1]
        assert entities[1].children == (entities[0], entities[3])
        assert entities[2].parent == entities[3]
        assert entities[3].children == (entities[2], )
コード例 #24
0
def test_load_dump_groups_roles(query_factory):
    """Tests that load_query and dump_query are reversible"""
    text = (
        "Order [{one|sys_number|quantity} {large|size} {Tesora|product|dish} with "
        "[{medium|size} {cream|option|addin}|option]|product]")

    processed_query = markup.load_query(text, query_factory)

    markup_text = markup.dump_query(processed_query)

    assert text == markup_text
コード例 #25
0
    def test_max_instances(self):
        """Tests that parser respects the max instances constraint"""
        text = '{light|size} {medium|size} {latte|dish}'
        query = markup.load_query(text)

        entities = self.parser.parse_entities(query.query, query.entities)

        assert len(entities) == 3
        assert entities[0].parent is None
        assert entities[1].parent == entities[2]
        assert entities[2].children == (entities[1], )
コード例 #26
0
def test_load_dump_groups(query_factory):
    """Tests that load_query and dump_query are reversible"""
    text = ("Order [{one|quantity} {large|size} {Tesora|product} with "
            "[{medium|size} {cream|option}|option]|product] from "
            "[{Philz|store} in {Downtown Sunnyvale|location}|store]")

    processed_query = markup.load_query(text, query_factory)

    markup_text = markup.dump_query(processed_query)

    assert text == markup_text
コード例 #27
0
def test_load_special_chars_6(query_factory):
    """Tests loading a query with special characters"""
    text = "what's on {after {8 p.m.|sys_time}|range}?"
    processed_query = markup.load_query(text, query_factory)
    entities = processed_query.entities

    assert len(entities) == 1

    assert entities[0].text == "after 8 p.m."
    assert entities[0].normalized_text == "after 8 p m"
    assert entities[0].span == Span(10, 21)
コード例 #28
0
    def test_link_word_negative(self):
        """Tests that parser does not apply link words for other dependent types."""
        text = 'A {pepperoni pizza|dish} with {large|size} {coke|dish}'
        query = markup.load_query(text)
        entities = self.parser.parse_entities(query.query, query.entities)

        assert len([
            e for e in entities if e.parent is None and e.children is not None
        ]) == 1
        assert entities[0].children is None
        assert entities[1].parent == entities[2]
        assert entities[2].children == (entities[1], )
コード例 #29
0
def test_load_special_chars_5(query_factory):
    """Tests loading a query with special characters"""
    text = "what christmas movies   are  , showing at {{8pm|sys_time}|range}"

    processed_query = markup.load_query(text, query_factory)

    assert len(processed_query.entities) == 1

    entity = processed_query.entities[0]

    assert entity.span == Span(42, 44)
    assert entity.normalized_text == "8pm"
コード例 #30
0
def test_load_special_chars(query_factory):
    """Tests loading a query with special characters"""
    text = "play {s.o.b.|track}"
    processed_query = markup.load_query(text, query_factory)
    entities = processed_query.entities

    assert len(entities)
    entity = entities[0]
    assert entity.text == "s.o.b."
    assert entity.normalized_text == "s o b"
    assert entity.span.start == 5
    assert entity.span.end == 10