Ejemplo n.º 1
0
    def _process_augmentations(self, augmentor) -> AudioAugmentor:
        augmentations = []
        for augment_name, augment_kwargs in augmentor.items():
            prob = augment_kwargs.get('prob', None)

            if prob is None:
                logging.error(
                    f'Augmentation "{augment_name}" will not be applied as '
                    f'keyword argument "prob" was not defined for this augmentation.'
                )

            else:
                _ = augment_kwargs.pop('prob')

                try:
                    augmentation = perturbation_types[augment_name](
                        **augment_kwargs)
                    augmentations.append([prob, augmentation])
                except KeyError:
                    logging.error(
                        f"Invalid perturbation name. Allowed values : {perturbation_types.keys()}"
                    )

        augmentor = AudioAugmentor(perturbations=augmentations)
        return augmentor
Ejemplo n.º 2
0
    def __call__(self):
        network = TensorRTRunnerV2.create_network(
            explicit_precision=self.explicit_precision)

        parser = trt.OnnxParser(network, TRT_LOGGER)
        success = parser.parse(self.onnx_loader().SerializeToString())
        if not success:
            for index in range(parser.num_errors):
                logging.error(parser.get_error(index))
            logging.critical("Could not parse ONNX correctly")

        return network, parser
Ejemplo n.º 3
0
    def _generate_dial_turns(self, turns, dial_id):
        """Generate the dialog turns and the services mentioned in the dialogue."""
        prev_dialog_states = collections.defaultdict(dict)
        corrected_slot_values = collections.defaultdict(dict)
        converted_turns = []
        appear_services = set()
        if len(turns) % 2 != 0:
            raise ValueError('dialog ended by user')
        for i in range(len(turns))[::2]:
            user_info = turns[i]
            sys_info = turns[i + 1]
            user_utt = self._basic_text_process(user_info['text'], False)
            sys_utt = self._basic_text_process(sys_info['text'], False)
            user_actions = collections.defaultdict(list)
            sys_actions = collections.defaultdict(list)
            if 'dialog_act' in user_info:
                user_actions = self._generate_actions(user_info['dialog_act'])
            if 'dialog_act' in sys_info:
                sys_actions = self._generate_actions(sys_info['dialog_act'])

            sys_turn = {
                'utterance': sys_utt,
                'speaker': 'SYSTEM',
                'frames': [],
                'turn_id': str(i + 1)
            }
            user_turn = {
                'utterance': user_utt,
                'speaker': 'USER',
                'frames': [],
                'turn_id': str(i)
            }
            dialog_states, _ = self._generate_dialog_states(
                sys_info['metadata'], corrected_slot_values)
            appear_services.update(dialog_states.keys())

            # Fill in slot spans in the user turn and the previous system turn for
            # the non categorical slots.
            user_slots = collections.defaultdict(list)
            sys_slots = collections.defaultdict(list)
            update_states = self._get_update_states(prev_dialog_states,
                                                    dialog_states)
            prev_sys_utt = converted_turns[-1][
                'utterance'] if converted_turns else ''
            for service_name, slot_values_dict in update_states.items():
                new_service_name = self._new_service_name(service_name)
                service_schema = self._schemas.get_service_schema(
                    new_service_name)
                for slot, slot_value in slot_values_dict.items():
                    assert slot_value, 'slot values shouls not be empty'
                    slot_value = slot_value[0]
                    if slot in service_schema.categorical_slots:
                        if slot_value not in service_schema.get_categorical_slot_values(
                                slot) and slot_value not in [_DONT_CARE]:
                            logging.error(
                                'Value %s not contained in slot %s, dial_id %s, ',
                                slot_value, slot, dial_id)
                            dialog_states[service_name][slot] = [slot_value]
                    else:
                        self._slot_spans_num += 1
                        if slot_value == _DONT_CARE:
                            continue
                        user_slot_ann, slot_value_from_user = self._generate_slot_annotation(
                            user_utt, slot, slot_value)
                        sys_slot_ann, slot_value_from_sys = self._generate_slot_annotation(
                            prev_sys_utt, slot, slot_value)
                        # Values from user utterance has a higher priority than values from
                        # sys utterance. We correct the slot value of non-categorical slot
                        # first based on user utterance, then system utterance.
                        if user_slot_ann and slot_value_from_user != slot_value:
                            if sys_slot_ann and (slot_value_from_sys
                                                 == slot_value):
                                user_slot_ann = None
                            else:
                                self._update_corrected_slot_values(
                                    corrected_slot_values, service_name, slot,
                                    slot_value, slot_value_from_user)
                                dialog_states[service_name][slot] = list(
                                    corrected_slot_values[service_name][slot]
                                    [slot_value])
                        if not user_slot_ann and sys_slot_ann and slot_value_from_sys != slot_value:
                            self._update_corrected_slot_values(
                                corrected_slot_values, service_name, slot,
                                slot_value, slot_value_from_sys)
                            dialog_states[service_name][slot] = list(
                                corrected_slot_values[service_name][slot]
                                [slot_value])
                        if user_slot_ann:
                            user_slots[service_name].extend(user_slot_ann)
                        if sys_slot_ann:
                            sys_slots[service_name].extend(sys_slot_ann)
                        if not user_slot_ann and not sys_slot_ann:
                            # First check if it exists in the previous dialogue states.
                            from_service_name, from_slot, from_slot_values = exists_in_prev_dialog_states(
                                slot_value, converted_turns)
                            if from_service_name is not None:
                                self._unfound_slot_spans_num[
                                    'copy_from_prev_dialog_state'] += 1
                                if args.annotate_copy_slots:
                                    user_slots[service_name].append({
                                        'slot':
                                        slot,
                                        'copy_from':
                                        from_slot,
                                        'value':
                                        from_slot_values
                                    })
                                continue
                            # Second, trace back the dialogue history to find the span.
                            for prev_turn in converted_turns[-2::-1]:
                                prev_utt = prev_turn['utterance']
                                prev_slot_ann, prev_slot_value = self._generate_slot_annotation(
                                    prev_utt, slot, slot_value)
                                if prev_slot_ann:
                                    if prev_slot_value != slot_value:
                                        self._update_corrected_slot_values(
                                            corrected_slot_values,
                                            service_name, slot, slot_value,
                                            prev_slot_value)
                                        dialog_states[service_name][
                                            slot] = list(corrected_slot_values[
                                                service_name][slot]
                                                         [slot_value])
                                    self._insert_slots_annotations_to_turn(
                                        prev_turn, prev_slot_ann, service_name)
                                    break
                            self._unfound_slot_spans_num[slot] += 1
                            continue
            # Fill in slot annotations for the system turn.
            for service_name in sys_slots:
                if not sys_slots[service_name]:
                    continue
                self._insert_slots_annotations_to_turn(converted_turns[-1],
                                                       sys_slots[service_name],
                                                       service_name)
            # Generate user frames from dialog_states.
            latest_update_states = self._get_update_states(
                prev_dialog_states, dialog_states)
            for service_name, slot_values_dict in dialog_states.items():
                user_intent = self._get_intent_from_actions(
                    latest_update_states[service_name],
                    sys_actions[service_name], user_actions[service_name])
                # Fill in values.
                user_turn['frames'].append({
                    'slots': user_slots[service_name],
                    'state': {
                        'slot_values':
                        {k: v
                         for k, v in slot_values_dict.items() if v},
                        'requested_slots':
                        self._get_requested_slots_from_action(
                            user_actions[service_name]),
                        'active_intent':
                        user_intent,
                    },
                    'service': service_name,
                })
            non_active_services = set(self._schemas.services) - appear_services
            for service_name in non_active_services:
                user_intent = self._get_intent_from_actions(
                    {}, sys_actions[service_name], user_actions[service_name])
                user_turn['frames'].append({
                    'service': service_name,
                    'slots': [],
                    'state': {
                        'active_intent':
                        user_intent,
                        'requested_slots':
                        self._get_requested_slots_from_action(
                            user_actions[service_name]),
                        'slot_values': {},
                    },
                })
            converted_turns.extend([user_turn, sys_turn])
            prev_dialog_states = dialog_states
        return converted_turns, list(appear_services)
Ejemplo n.º 4
0
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================

import json

from nemo import logging
from nemo.collections.nlp.nm.trainables.common.huggingface.huggingface_utils import *

try:
    __megatron_utils_satisfied = True
    from nemo.collections.nlp.nm.trainables.common.megatron.megatron_bert_nm import MegatronBERT
    from nemo.collections.nlp.nm.trainables.common.megatron.megatron_utils import *

except Exception as e:
    logging.error('Failed to import Megatron Neural Module and utils: `{}` ({})'.format(str(e), type(e)))
    __megatron_utils_satisfied = False


__all__ = ['get_pretrained_lm_models_list', 'get_pretrained_lm_model']


def get_pretrained_lm_models_list():
    '''
    Returns the list of support pretrained models
    '''
    if __megatron_utils_satisfied:
        return get_megatron_lm_models_list() + get_huggingface_lm_models_list()
    else:
        return get_huggingface_lm_models_list()