def testConsistentExpandWithSameOverlapMultipleTimes(self): """ example: play season 1 of the big bang theory play season one of the big bang theory series should contain two instances of the big bang theory :return: """ utterance = "play season 1 of the big bang theory" tags = self.tagger.tag(utterance) def score_clique(clique): score = 0.0 for tagged_entity in clique: ec = tagged_entity.get('entities', [{ 'confidence': 0.0 }])[0].get('confidence') score += ec * len( tagged_entity.get('entities', [{ 'match': '' }])[0].get('match')) / (len(utterance) + 1) return score expander = BronKerboschExpander(self.tokenizer) parse_results = list( expander.expand(tags, clique_scoring_func=score_clique)) assert len(parse_results) == 6 result_text = ' '.join( [tag.get('entities')[0].get('key') for tag in parse_results[0]]) result_parse = ', '.join([ tag.get('entities')[0].get('data')[0][1] for tag in parse_results[0] ]) assert result_text == 'play season 1 the big bang theory'
def parse(self, utterance, context=None, N=1): """ :param utterance: :param context: a list of entities :param N: :return: """ start = time.time() context_trie = None if context and isinstance(context, list): # sort by confidence in ascending order, so # highest confidence for an entity is last. # see comment on TrieNode ctor context.sort(key=lambda x: x.get('confidence')) context_trie = Trie() for entity in context: entity_value, entity_type = entity.get('data')[0] context_trie.insert(entity_value.lower(), data=(entity_value, entity_type), weight=entity.get('confidence')) tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie) self.emit("tagged_entities", { 'utterance': utterance, 'tags': list(tagged), 'time': time.time() - start }) start = time.time() bke = BronKerboschExpander(self._tokenizer) def score_clique(clique): score = 0.0 for tagged_entity in clique: ec = tagged_entity.get('entities', [{'confidence': 0.0}])[0].get('confidence') score += ec * len(tagged_entity.get('entities', [{'match': ''}])[0].get('match')) / ( len(utterance) + 1) return score parse_results = bke.expand(tagged, clique_scoring_func=score_clique) count = 0 for result in parse_results: count += 1 parse_confidence = 0.0 for tag in result: sample_entity = tag['entities'][0] entity_confidence = sample_entity.get('confidence', 0.0) * float( len(sample_entity.get('match'))) / len(utterance) parse_confidence += entity_confidence yield { 'utterance': utterance, 'tags': result, 'time': time.time() - start, 'confidence': parse_confidence } if count >= N: break
def testConsistentExpandWithSameOverlapMultipleTimes(self): """ example: play season 1 of the big bang theory play season one of the big bang theory series should contain two instances of the big bang theory :return: """ utterance = "play season 1 of the big bang theory" tags = self.tagger.tag(utterance) def score_clique(clique): score = 0.0 for tagged_entity in clique: ec = tagged_entity.get('entities', [{'confidence': 0.0}])[0].get('confidence') score += ec * len(tagged_entity.get('entities', [{'match': ''}])[0].get('match')) / ( len(utterance) + 1) return score expander = BronKerboschExpander(self.tokenizer) parse_results = list(expander.expand(tags, clique_scoring_func=score_clique)) assert len(parse_results) == 6 result_text = ' '.join([tag.get('entities')[0].get('key') for tag in parse_results[0]]) result_parse = ', '.join( [tag.get('entities')[0].get('data')[0][1] for tag in parse_results[0]] ) assert result_text == 'play season 1 the big bang theory'
def parse(self, utterance, relevance_store=None, N=1): start = time.time() tagged = self._tagger.tag(utterance.lower()) self.emit("tagged_entities", {"utterance": utterance, "tags": list(tagged), "time": time.time() - start}) start = time.time() bke = BronKerboschExpander(self._tokenizer) def score_clique(clique): score = 0.0 for tagged_entity in clique: ec = tagged_entity.get("entities", [{"confidence": 0.0}])[0].get("confidence") score += ec * len(tagged_entity.get("entities", [{"match": ""}])[0].get("match")) / (len(utterance) + 1) return score parse_results = bke.expand(tagged, clique_scoring_func=score_clique) count = 0 for result in parse_results: count += 1 parse_confidence = 0.0 for tag in result: sample_entity = tag["entities"][0] entity_confidence = ( sample_entity.get("confidence", 0.0) * float(len(sample_entity.get("match"))) / len(utterance) ) parse_confidence += entity_confidence yield {"utterance": utterance, "tags": result, "time": time.time() - start, "confidence": parse_confidence} if count >= N: break
def testExpandWithRegexAndLiteralTokenMatch(self): # two tags for the same token, different confidence, should expand to two cliques tags = [{'end_token': 0, 'start_token': 0, 'key': u'spell', 'match': u'spell', 'entities': [{'confidence': 0.5, 'data': [u'SearchTerms'], 'match': u'spell', 'key': u'spell'}]}, {'end_token': 0, 'start_token': 0, 'key': u'spell', 'match': u'spell', 'entities': [{'confidence': 1.0, 'data': [u'SpellingKeyword'], 'match': u'spell', 'key': u'spell'}]}] expander = BronKerboschExpander(self.tokenizer) cliques = list(expander._sub_expand(tags)) assert len(cliques) == 2
def parse(self, utterance, relevance_store=None, N=1): start = time.time() tagged = self._tagger.tag(utterance.lower()) self.emit( "tagged_entities", { 'utterance': utterance, 'tags': list(tagged), 'time': time.time() - start }) start = time.time() bke = BronKerboschExpander(self._tokenizer) def score_clique(clique): score = 0.0 for tagged_entity in clique: ec = tagged_entity.get('entities', [{ 'confidence': 0.0 }])[0].get('confidence') score += ec * len( tagged_entity.get('entities', [{ 'match': '' }])[0].get('match')) / (len(utterance) + 1) return score parse_results = bke.expand(tagged, clique_scoring_func=score_clique) count = 0 for result in parse_results: count += 1 parse_confidence = 0.0 for tag in result: sample_entity = tag['entities'][0] entity_confidence = sample_entity.get( 'confidence', 0.0) * float(len( sample_entity.get('match'))) / len(utterance) parse_confidence += entity_confidence yield { 'utterance': utterance, 'tags': result, 'time': time.time() - start, 'confidence': parse_confidence } if count >= N: break
def parse(self, utterance, context=None, N=1): """ :param utterance: :param context: a list of entities :param N: :return: """ start = time.time() context_trie = None if context and isinstance(context, list): # sort by confidence in ascending order, so # highest confidence for an entity is last. # see comment on TrieNode ctor context.sort(key=lambda x: x.get('confidence')) context_trie = Trie() for entity in context: entity_value, entity_type, metadata = entity.get('data')[0] context_trie.insert(entity_value.lower(), data=(entity_value, entity_type, metadata), weight=entity.get('confidence')) tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie) self.emit( "tagged_entities", { 'utterance': utterance, 'tags': list(tagged), 'time': time.time() - start }) start = time.time() bke = BronKerboschExpander(self._tokenizer) def score_clique(clique): score = 0.0 for tagged_entity in clique: ec = tagged_entity.get('entities', [{ 'confidence': 0.0 }])[0].get('confidence') score += ec * len( tagged_entity.get('entities', [{ 'match': '' }])[0].get('match')) / (len(utterance) + 1) return score parse_results = bke.expand(tagged, clique_scoring_func=score_clique) count = 0 for result in parse_results: count += 1 parse_confidence = 0.0 for tag in result: sample_entity = tag['entities'][0] entity_confidence = sample_entity.get( 'confidence', 0.0) * float(len( sample_entity.get('match'))) / len(utterance) parse_confidence += entity_confidence yield { 'utterance': utterance, 'tags': result, 'time': time.time() - start, 'confidence': parse_confidence } if count >= N: break
def testExpandedResult(self): tags = self.tagger.tag("season 1") expander = BronKerboschExpander(self.tokenizer) parse_results = list(expander.expand(tags)) assert len(parse_results) == 1 assert len(parse_results[0]) == 2
def testExpander(self): self.tagger.trie.max_edit_distance = 0 tags = self.tagger.tag("play season 1 of the big bang theory") expander = BronKerboschExpander(self.tokenizer) parse_results = list(expander.expand(tags)) assert len(parse_results) == 6
def parse(self, utterance, context=None, N=1): """Used to find tags within utterance with a given confidence Args: utterance(str): conversational piece given by the user context(list): a list of entities N(int): number of results Returns: yield an object with the following fields utterance(str): the value passed in tags(list) : a list of tags found in utterance time(time) : duration since call of function confidence(float) : float indicating how confident of a match to the utterance. This might be used to determan the most likely intent. """ start = time.time() context_trie = None if context and isinstance(context, list): # sort by confidence in ascending order, so # highest confidence for an entity is last. # see comment on TrieNode ctor context.sort(key=lambda x: x.get('confidence')) context_trie = Trie() for entity in context: entity_value, entity_type = entity.get('data')[0] context_trie.insert(entity_value.lower(), data=(entity_value, entity_type), weight=entity.get('confidence')) tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie) self.emit( "tagged_entities", { 'utterance': utterance, 'tags': list(tagged), 'time': time.time() - start }) start = time.time() bke = BronKerboschExpander(self._tokenizer) def score_clique(clique): score = 0.0 for tagged_entity in clique: ec = tagged_entity.get('entities', [{ 'confidence': 0.0 }])[0].get('confidence') score += ec * len( tagged_entity.get('entities', [{ 'match': '' }])[0].get('match')) / (len(utterance) + 1) return score parse_results = bke.expand(tagged, clique_scoring_func=score_clique) count = 0 for result in parse_results: count += 1 parse_confidence = 0.0 for tag in result: sample_entity = tag['entities'][0] entity_confidence = sample_entity.get( 'confidence', 0.0) * float(len( sample_entity.get('match'))) / len(utterance) parse_confidence += entity_confidence yield { 'utterance': utterance, 'tags': result, 'time': time.time() - start, 'confidence': parse_confidence } if count >= N: break
def parse(self, utterance, context=None, N=1): """Used to find tags within utterance with a given confidence Args: utterance(str): conversational piece given by the user context(list): a list of entities N(int): number of results Returns: yield an object with the following fields utterance(str): the value passed in tags(list) : a list of tags found in utterance time(time) : duration since call of function confidence(float) : float indicating how confident of a match to the utterance. This might be used to determan the most likely intent. """ start = time.time() context_trie = None if context and isinstance(context, list): # sort by confidence in ascending order, so # highest confidence for an entity is last. # see comment on TrieNode ctor context.sort(key=lambda x: x.get('confidence')) context_trie = Trie() for entity in context: entity_value, entity_type = entity.get('data')[0] context_trie.insert(entity_value.lower(), data=(entity_value, entity_type), weight=entity.get('confidence')) tagged = self._tagger.tag(utterance.lower(), context_trie=context_trie) self.emit("tagged_entities", { 'utterance': utterance, 'tags': list(tagged), 'time': time.time() - start }) start = time.time() bke = BronKerboschExpander(self._tokenizer) def score_clique(clique): score = 0.0 for tagged_entity in clique: ec = tagged_entity.get('entities', [{'confidence': 0.0}])[0].get('confidence') score += ec * len(tagged_entity.get('entities', [{'match': ''}])[0].get('match')) / ( len(utterance) + 1) return score parse_results = bke.expand(tagged, clique_scoring_func=score_clique) count = 0 for result in parse_results: count += 1 parse_confidence = 0.0 for tag in result: sample_entity = tag['entities'][0] entity_confidence = sample_entity.get('confidence', 0.0) * float( len(sample_entity.get('match'))) / len(utterance) parse_confidence += entity_confidence yield { 'utterance': utterance, 'tags': result, 'time': time.time() - start, 'confidence': parse_confidence } if count >= N: break