Example #1
0
    def _parse_raw_user_utterance(
            self, step: Dict[Text, Any]) -> Optional[UserUttered]:
        from rasa.shared.nlu.interpreter import RegexInterpreter

        intent_name, full_retrieval_intent = self._user_intent_from_step(step)
        intent = {
            INTENT_NAME_KEY: intent_name,
            FULL_RETRIEVAL_INTENT_NAME_KEY: full_retrieval_intent,
            PREDICTED_CONFIDENCE_KEY: 1.0,
        }

        if KEY_USER_MESSAGE in step:
            user_message = step[KEY_USER_MESSAGE].strip()
            entities = entities_parser.find_entities_in_training_example(
                user_message)
            plain_text = entities_parser.replace_entities(user_message)

            if plain_text.startswith(INTENT_MESSAGE_PREFIX):
                entities = (
                    RegexInterpreter().synchronous_parse(plain_text).get(
                        ENTITIES, []))
        else:
            raw_entities = step.get(KEY_ENTITIES, [])
            entities = self._parse_raw_entities(raw_entities)
            # set plain_text to None because only intent was provided in the stories
            plain_text = None
        return UserUttered(plain_text, intent, entities)
def test_markdown_entity_regex(example: Text,
                               expected_entities: List[Dict[Text, Any]],
                               expected_text: Text):

    result = entities_parser.find_entities_in_training_example(example)
    assert result == expected_entities

    replaced_text = entities_parser.replace_entities(example)
    assert replaced_text == expected_text
Example #3
0
 async def replace_placeholders(
     self, example: Message, faker_: Faker, matches: List[Tuple[Any, ...]], count: int
 ) -> AsyncIterator[Message]:
     original_text = await self.rebuild_original_text(example)
     for _ in range(count):
         text = await self.replace_placeholders_in_text(example.data.get("text"), faker_, matches)
         original_text = await self.replace_placeholders_in_text(original_text, faker_, matches)
         entities = find_entities_in_training_example(original_text)
         new_message = Message.build(text, example.get("intent"), entities)
         yield new_message
Example #4
0
    def _parse_training_examples(
        self, examples: Union[Text, List[Dict[Text, Any]]], intent: Text
    ) -> List[Tuple[Text, List[Dict[Text, Any]], Optional[Any]]]:
        import rasa.shared.nlu.training_data.entities_parser as entities_parser

        if isinstance(examples, list):
            example_tuples = [
                (
                    # pytype: disable=attribute-error
                    example.get(KEY_INTENT_TEXT, "").strip(STRIP_SYMBOLS),
                    example.get(KEY_METADATA),
                )
                for example in examples
                if example
            ]
        # pytype: enable=attribute-error
        elif isinstance(examples, str):
            example_tuples = [
                (example, None)
                for example in self._parse_multiline_example(intent, examples)
            ]
        else:
            rasa.shared.utils.io.raise_warning(
                f"Unexpected block found in '{self.filename}' "
                f"while processing intent '{intent}':\n"
                f"{examples}\n"
                f"This block will be skipped.",
                docs=DOCS_URL_TRAINING_DATA_NLU,
            )
            return []

        if not example_tuples:
            rasa.shared.utils.io.raise_warning(
                f"Issue found while processing '{self.filename}': "
                f"Intent '{intent}' has no examples.",
                docs=DOCS_URL_TRAINING_DATA_NLU,
            )

        results = []
        for example, metadata in example_tuples:
            entities = entities_parser.find_entities_in_training_example(example)
            results.append((example, entities, metadata))

        return results
Example #5
0
    def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUttered]:
        intent_name = self._user_intent_from_step(step)
        intent = {"name": intent_name, "confidence": 1.0}

        if KEY_USER_MESSAGE in step:
            user_message = step[KEY_USER_MESSAGE].strip()
            entities = entities_parser.find_entities_in_training_example(user_message)
            plain_text = entities_parser.replace_entities(user_message)

            if plain_text.startswith(INTENT_MESSAGE_PREFIX):
                entities = (
                    RegexInterpreter().synchronous_parse(plain_text).get(ENTITIES, [])
                )
        else:
            raw_entities = step.get(KEY_ENTITIES, [])
            entities = self._parse_raw_entities(raw_entities)
            # set plain_text to None because only intent was provided in the stories
            plain_text = None
        return UserUttered(plain_text, intent, entities)
def test_markdown_entity_regex_error_handling_wrong_schema():
    with pytest.raises(SchemaValidationError):
        entities_parser.find_entities_in_training_example(
            # Schema error: "entiti" instead of "entity"
            'I want to fly from [Berlin]{"entiti": "city", "role": "from"}')
def test_markdown_entity_regex_error_handling_not_json():
    with pytest.raises(InvalidEntityFormatException):
        entities_parser.find_entities_in_training_example(
            # JSON syntax error: missing closing " for `role`
            'I want to fly from [Berlin]{"entity": "city", "role: "from"}')
Example #8
0

dir_path = r"F:\Documents\stopansko\masters\thesis\sig-detect\data\clean\enron_random_clean_signatures"
full_d = []
for root, dirs, filenames in os.walk(dir_path):
    if ".idea" in root:
        continue
    for i, filename in enumerate(filenames):
        # d = defaultdict(list)
        file_features = []
        print(f"{i}. {filename} ...")
        with open(os.path.join(root, filename), encoding="utf-8") as f:
            lines = f.readlines()

        for line in lines:
            entities = find_entities_in_training_example(line)
            plain_text = replace_entities(line)
            doc = nlp(plain_text)
            for t in doc:
                low = t.orth_.lower()
                curr_d = {
                    "token": t.orth_,
                    "filename": filename,
                    "label": get_label(t.idx, t.orth_, entities),

                    "email": t.like_email,
                    "url": t.like_url,
                    "num": t.like_num,
                    "stop": t.is_stop,
                    "alpha": t.is_alpha,
                    "title": t.is_title,