Python DataUtility Examples, kairon.shared.data.utils.DataUtility Python Examples

Example #1

0

Show file

    def __prepare_nlu(intent: str, training_examples: list):
        from rasa.shared.nlu.training_data.message import Message
        from kairon.shared.data.constant import TRAINING_EXAMPLE
        from rasa.shared.nlu.constants import TEXT
        from kairon import Utility

        if training_examples:
            test_data_threshold = Utility.environment['model']['test'].get('dataset_threshold') or 10

            if len(training_examples) >= 100:
                test_data_threshold = Utility.environment['model']['test'].get('dataset_percentage') or 10
                test_data_threshold = test_data_threshold/100
                num_samples = int(len(training_examples) * test_data_threshold)
                training_examples = random.sample(training_examples, num_samples)
            elif len(training_examples) > test_data_threshold:
                training_examples = random.sample(training_examples, test_data_threshold)
        else:
            return []

        augmented_examples = TestDataGenerator.augment_sentences(training_examples)
        for example in augmented_examples:
            message = Message()
            plain_text, entities = DataUtility.extract_text_and_entities(example)
            message.data = {TRAINING_EXAMPLE.INTENT.value: intent, TEXT: plain_text}
            if entities:
                message.data[TRAINING_EXAMPLE.ENTITIES.value] = entities
            yield message

        if not augmented_examples:
            return []

Example #2

0

Show file

File: file_validator.py Project: udit-pandey/kairon

    def validate_rasa_config(config: Dict):
        """
        validates bot config.yml content for invalid entries

        :param config: configuration
        :return: None
        """
        config_errors = []
        from rasa.nlu.registry import registered_components as nlu_components
        if config.get('pipeline'):
            for item in config['pipeline']:
                component_cfg = item['name']
                if not (component_cfg in nlu_components or
                        component_cfg in ["custom.ner.SpacyPatternNER", "custom.fallback.FallbackIntentFilter"]):
                    config_errors.append("Invalid component " + component_cfg)
        else:
            config_errors.append("You didn't define any pipeline")

        if config.get('policies'):
            core_policies = DataUtility.get_rasa_core_policies()
            for policy in config['policies']:
                if policy['name'] not in core_policies:
                    config_errors.append("Invalid policy " + policy['name'])
        else:
            config_errors.append("You didn't define any policies")
        return config_errors

Example #3

0

Show file

File: data_objects.py Project: udit-pandey/kairon

    def validate(self, clean=True):
        from kairon.shared.data.utils import DataUtility

        Utility.validate_channel_config(self.connector_type, self.config, ValidationError)
        if self.connector_type == "telegram":
            webhook_url = DataUtility.get_channel_endpoint({
                'bot': self.bot, 'user': self.user, 'connector_type': self.connector_type
            })
            Utility.register_telegram_webhook(Utility.decrypt_message(self.config['access_token']), webhook_url)

Example #4

0

Show file

File: file_validator.py Project: udit-pandey/kairon

    def verify_utterances_in_stories(self, raise_exception: bool = True):
        """
        Validates utterances in stories.
        @param raise_exception: Set this flag to false to prevent raising exceptions.
        @return:
        """
        utterance_mismatch_summary = []
        story_utterance_not_found_in_domain = []
        self.verify_utterances(raise_exception)

        utterance_actions = self.validator._gather_utterance_actions()
        fallback_action = DataUtility.parse_fallback_action(self.config)
        system_triggered_actions = DEFAULT_ACTIONS.union(SYSTEM_TRIGGERED_UTTERANCES)
        stories_utterances = set()

        for story in self.story_graph.story_steps:
            for event in story.events:
                if not isinstance(event, ActionExecuted):
                    continue
                if not event.action_name.startswith(UTTER_PREFIX):
                    # we are only interested in utter actions
                    continue

                if event.action_name in stories_utterances:
                    # we already processed this one before, we only want to warn once
                    continue

                if event.action_name not in utterance_actions and event.action_name not in system_triggered_actions:
                    msg = f"The action '{event.action_name}' is used in the stories, " \
                          f"but is not a valid utterance action. Please make sure " \
                          f"the action is listed in your domain and there is a " \
                          f"template defined with its name."
                    if raise_exception:
                        raise AppException(msg)
                    story_utterance_not_found_in_domain.append(msg)
                stories_utterances.add(event.action_name)

        for utterance in utterance_actions:
            if utterance not in stories_utterances and utterance not in system_triggered_actions.union(fallback_action):
                msg = f"The utterance '{utterance}' is not used in any story."
                if raise_exception:
                    raise AppException(msg)
                utterance_mismatch_summary.append(msg)

        if not self.summary.get('utterances'):
            self.summary['utterances'] = []
        self.summary['utterances'] = self.summary['utterances'] + utterance_mismatch_summary

        if not self.summary.get('stories'):
            self.summary['stories'] = []
        self.summary['stories'] = self.summary['stories'] + story_utterance_not_found_in_domain

Example #5

0

Show file

File: augment.py Project: udit-pandey/kairon

async def paraphrases(request_data: ParaphrasesRequest,
                      current_user: User = Depends(
                          Authentication.get_current_user)):
    """
    Generates other similar text by augmenting original text
    """
    plain_text_data = [
        DataUtility.extract_text_and_entities(data)[0]
        for data in request_data.data
    ]
    response = requests.post(
        Utility.environment["augmentation"]["paraphrase_url"],
        json=plain_text_data)
    return response.json()

Example #6

0

Show file

    def __augment_entities(input_text: list, stopwords: list, entity_names: list):
        final_augmented_text = []

        if input_text and stopwords:
            for txt in input_text:
                for i, word in enumerate(stopwords):
                    if word in txt:
                        final_augmented_text.append(txt.replace(word, f'[{word}]({entity_names[i]})'))
                        final_augmented_text.extend(list(
                            map(
                                lambda synonym: txt.replace(word, f'[{synonym}]({entity_names[i]})'),
                                DataUtility.generate_synonym(word))
                        ))
        else:
            final_augmented_text = input_text
        return final_augmented_text

Example #7

0

Show file

async def visitor_hit_fallback(month: int = Query(default=1, ge=1, le=6),
                               current_user: User = Security(
                                   Authentication.get_current_user_and_bot,
                                   scopes=TESTER_ACCESS)):
    """
    Fetches the number of times the agent hit a fallback (ie. not able to answer) to user queries
    """
    fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions(
        current_user.get_bot())
    return Utility.trigger_history_server_request(
        current_user.get_bot(),
        f'/api/history/{current_user.get_bot()}/metrics/fallback', {
            'month': month,
            'action_fallback': fallback_action,
            'nlu_fallback': nlu_fallback_action
        })

Example #8

0

Show file

async def fallback_dropoff(month: int = Query(default=1, ge=1, le=6),
                           current_user: User = Security(
                               Authentication.get_current_user_and_bot,
                               scopes=TESTER_ACCESS)):
    """
    Fetches the list of users that dropped off after encountering fallback
    """
    fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions(
        current_user.get_bot())
    return Utility.trigger_history_server_request(
        current_user.get_bot(),
        f'/api/history/{current_user.get_bot()}/metrics/fallback/dropoff', {
            'month': month,
            'action_fallback': fallback_action,
            'nlu_fallback': nlu_fallback_action
        })

Example #9

0

Show file

async def fallback_trend(month: int = Query(default=6, ge=1, le=6),
                         current_user: User = Security(
                             Authentication.get_current_user_and_bot,
                             scopes=TESTER_ACCESS)):
    """
    Fetches the fallback count of the bot for previous months
    """
    fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions(
        current_user.get_bot())
    return Utility.trigger_history_server_request(
        current_user.get_bot(),
        f'/api/history/{current_user.get_bot()}/trends/fallback', {
            'month': month,
            'action_fallback': fallback_action,
            'nlu_fallback': nlu_fallback_action
        })

Example #10

0

Show file

async def unsuccessful_session_count(
        month: int = Query(default=1, ge=1, le=6),
        current_user: User = Security(Authentication.get_current_user_and_bot,
                                      scopes=TESTER_ACCESS)):
    """
    Fetches the count of sessions that encountered a fallback for a particular user.
    """
    fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions(
        current_user.get_bot())
    return Utility.trigger_history_server_request(
        current_user.get_bot(),
        f'/api/history/{current_user.get_bot()}/metrics/sessions/unsuccessful',
        {
            'month': month,
            'action_fallback': fallback_action,
            'nlu_fallback': nlu_fallback_action
        })

Example #11

0

Show file

async def complete_conversations(month: int = Query(default=1, ge=1, le=6),
                                 current_user: User = Security(
                                     Authentication.get_current_user_and_bot,
                                     scopes=TESTER_ACCESS)):
    """
    Fetches the number of successful conversations of the bot, which had no fallback
    """
    fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions(
        current_user.get_bot())
    return Utility.trigger_history_server_request(
        current_user.get_bot(),
        f'/api/history/{current_user.get_bot()}/metrics/conversation/success',
        {
            'month': month,
            'action_fallback': fallback_action,
            'nlu_fallback': nlu_fallback_action
        })

Example #12

0

Show file

 def test_prepare_nlu_text_with_entities(self):
     expected = "n=[8](n), p=1[8](n), k=2[8](n) ec=[14](ec), ph=[3](p)"
     text, entities = DataUtility.extract_text_and_entities(expected)
     actual = DataUtility.prepare_nlu_text(text, entities)
     assert expected == actual

Example #13

0

Show file

 def test_validate_path_not_found(self):
     with pytest.raises(AppException):
         DataUtility.validate_and_get_requirements('/tests/path_not_found')

Example #14

0

Show file

 def test_get_interpreter_with_no_model(self):
     actual = DataUtility.get_interpreter("test.tar.gz")
     assert actual is None

Example #15

0

Show file

 def test_prepare_nlu_text(self):
     expected = "India is beautiful"
     text, entities = DataUtility.extract_text_and_entities(expected)
     actual = DataUtility.prepare_nlu_text(text, entities)
     assert expected == actual

Example #16

0

Show file

 def __augment_sentences_with_mistakes_and_entities(input_text: str, stopwords, entity_names):
     augmented_text = list(DataUtility.augment_sentences([input_text], stopwords))
     augmented_text = TestDataGenerator.__augment_entities(augmented_text, stopwords, entity_names)
     return augmented_text

Example #17

0

Show file

 def test_validate_files(self, resource_validate_files):
     requirements = DataUtility.validate_and_get_requirements(
         pytest.bot_data_home_dir)
     assert not requirements

Example #18

0

Show file

 def test_validate_only_config(self, resource_validate_only_config):
     requirements = DataUtility.validate_and_get_requirements(
         pytest.bot_data_home_dir, True)
     assert {'rules', 'actions', 'domain', 'stories', 'nlu'} == requirements

Example #19

0

Show file

 def test_validate_only_stories_and_nlu(
         self, resource_validate_only_stories_and_nlu):
     requirements = DataUtility.validate_and_get_requirements(
         pytest.bot_data_home_dir, True)
     assert {'actions', 'config', 'domain'} == requirements

Example #20

0

Show file

 def test_validate_no_files_delete_dir(
         self, resource_validate_no_training_files_delete_dir):
     with pytest.raises(AppException):
         DataUtility.validate_and_get_requirements(pytest.bot_data_home_dir,
                                                   True)
     assert not os.path.exists(pytest.bot_data_home_dir)