def tag_pattern(patterns, utterances): final_utterances = [] for iter in utterances: utterance = get(iter, "utterance") case_converted_utterance = get(iter, "case_converted_utterance") mapping = get(iter, "mapping") mapping = json.loads(mapping) tokens = get(mapping, "tokens") text = get(mapping, "text") tags = get(mapping, "tags") intent = get(mapping, "intent") pattern = regex_checker(utterance, patterns) final_tags = tags for pat in pattern: final_tags.append(check(tags, pat)) final_tags = remove_duplicates(final_tags) final_tags = sorted(final_tags, key=itemgetter('start')) mappings = { "tokens": tokens, "text": text, "tags": final_tags, "intent": intent } data = { "utterance": utterance, "case_converted_utterance": case_converted_utterance, "mapping": json.dumps(mappings) } final_utterances.append(data) return final_utterances
def test_regex_checker_with_data1(mocker): patterns = regex_checker( 'my birthdate is 27-06-1995', [{ "entity": "custom_date", "pattern": "\s\d{4}-\d{2}-\d{2}|\d{2}-\d{2}-\d{4}" }]) assert patterns == [{ 'end': 4, 'entity': '27-06-1995', 'start': 3, 'tag': 'custom_date' }]
def get_entities_for_ds(self, serviceid, text, datasources_map): predefined_tags = datasources_map["predefined_entities"] if 'predefined_entities' in datasources_map else [] pattern_response = [] phrase_response = [] pattern_entities = datasources_map[ "patterns"] if 'patterns' in datasources_map else [] phrase_entities = datasources_map[ "phrases"] if 'phrases' in datasources_map else [] if len(pattern_entities) != 0: pattern_response = regex_checker(text, pattern_entities) if len(phrase_entities) != 0: phrase_response = phrase_checker(text, phrase_entities) return pattern_response, phrase_response, predefined_tags
def test_regex_checker_with_null(mocker): patterns = regex_checker('', []) resp = mocker.patch('ice_commons.store.util.remove_duplicates', []) assert patterns == resp