def _parse_raw_user_utterance( self, step: Dict[Text, Any]) -> Optional[UserUttered]: from rasa.shared.nlu.interpreter import RegexInterpreter intent_name, full_retrieval_intent = self._user_intent_from_step(step) intent = { INTENT_NAME_KEY: intent_name, FULL_RETRIEVAL_INTENT_NAME_KEY: full_retrieval_intent, PREDICTED_CONFIDENCE_KEY: 1.0, } if KEY_USER_MESSAGE in step: user_message = step[KEY_USER_MESSAGE].strip() entities = entities_parser.find_entities_in_training_example( user_message) plain_text = entities_parser.replace_entities(user_message) if plain_text.startswith(INTENT_MESSAGE_PREFIX): entities = ( RegexInterpreter().synchronous_parse(plain_text).get( ENTITIES, [])) else: raw_entities = step.get(KEY_ENTITIES, []) entities = self._parse_raw_entities(raw_entities) # set plain_text to None because only intent was provided in the stories plain_text = None return UserUttered(plain_text, intent, entities)
def _parse_intent(self, intent_data: Dict[Text, Any]) -> None: import rasa.shared.nlu.training_data.entities_parser as entities_parser import rasa.shared.nlu.training_data.synonyms_parser as synonyms_parser intent = intent_data.get(KEY_INTENT, "") if not intent: rasa.shared.utils.io.raise_warning( f"Issue found while processing '{self.filename}': " f"The intent has an empty name. " f"Intents should have a name defined under the {KEY_INTENT} key. " f"It will be skipped.", docs=DOCS_URL_TRAINING_DATA, ) return examples = intent_data.get(KEY_INTENT_EXAMPLES, "") intent_metadata = intent_data.get(KEY_METADATA) for example, entities, metadata in self._parse_training_examples( examples, intent): plain_text = entities_parser.replace_entities(example) synonyms_parser.add_synonyms_from_entities(plain_text, entities, self.entity_synonyms) self.training_examples.append( Message.build(plain_text, intent, entities, intent_metadata, metadata))
def test_markdown_entity_regex(example: Text, expected_entities: List[Dict[Text, Any]], expected_text: Text): result = entities_parser.find_entities_in_training_example(example) assert result == expected_entities replaced_text = entities_parser.replace_entities(example) assert replaced_text == expected_text
def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUttered]: intent_name = self._user_intent_from_step(step) intent = {"name": intent_name, "confidence": 1.0} if KEY_USER_MESSAGE in step: user_message = step[KEY_USER_MESSAGE].strip() entities = entities_parser.find_entities_in_training_example(user_message) plain_text = entities_parser.replace_entities(user_message) if plain_text.startswith(INTENT_MESSAGE_PREFIX): entities = ( RegexInterpreter().synchronous_parse(plain_text).get(ENTITIES, []) ) else: raw_entities = step.get(KEY_ENTITIES, []) entities = self._parse_raw_entities(raw_entities) # set plain_text to None because only intent was provided in the stories plain_text = None return UserUttered(plain_text, intent, entities)
dir_path = r"F:\Documents\stopansko\masters\thesis\sig-detect\data\clean\enron_random_clean_signatures" full_d = [] for root, dirs, filenames in os.walk(dir_path): if ".idea" in root: continue for i, filename in enumerate(filenames): # d = defaultdict(list) file_features = [] print(f"{i}. {filename} ...") with open(os.path.join(root, filename), encoding="utf-8") as f: lines = f.readlines() for line in lines: entities = find_entities_in_training_example(line) plain_text = replace_entities(line) doc = nlp(plain_text) for t in doc: low = t.orth_.lower() curr_d = { "token": t.orth_, "filename": filename, "label": get_label(t.idx, t.orth_, entities), "email": t.like_email, "url": t.like_url, "num": t.like_num, "stop": t.is_stop, "alpha": t.is_alpha, "title": t.is_title, "first": low in first_names,