Ejemplo n.º 1
0
 def input_types(self):
     """Returns definitions of module output ports.
     """
     return {
         "input_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()),
         "length": NeuralType(tuple('B'), LengthsType()),
     }
Ejemplo n.º 2
0
 def input_types(self) -> Optional[Dict[str, NeuralType]]:
     if hasattr(self.preprocessor, '_sample_rate'):
         input_signal_eltype = AudioSignal(
             freq=self.preprocessor._sample_rate)
     else:
         input_signal_eltype = AudioSignal()
     return {
         "input_signal":
         NeuralType(('B', 'T'), input_signal_eltype, optional=True),
         "input_signal_length":
         NeuralType(tuple('B'), LengthsType(), optional=True),
         "processed_signal":
         NeuralType(('B', 'D', 'T'), SpectrogramType(), optional=True),
         "processed_signal_length":
         NeuralType(tuple('B'), LengthsType(), optional=True),
     }
 def input_ports(self):
     """Returns definitions of module input ports."""
     return dict(
         text=NeuralType(('B', 'T'), EmbeddedTextType()),
         text_pos=NeuralType(('B', 'T'), MaskType()),
         mel_true=NeuralType(('B', 'D', 'T'), MelSpectrogramType()),
         dur_true=NeuralType(('B', 'T'), LengthsType()),
     )
Ejemplo n.º 4
0
Archivo: rnnt.py Proyecto: sycomix/NeMo
 def output_types(self):
     """Returns definitions of module output ports.
     """
     return {
         "outputs": NeuralType(('B', 'D', 'T'), EmbeddedTextType()),
         "prednet_lengths": NeuralType(tuple('B'), LengthsType()),
         "states": [NeuralType((('D', 'B', 'D')), ElementType(), optional=True)],  # must always be last
     }
Ejemplo n.º 5
0
 def input_types(self):
     """Returns definitions of module input ports.
     """
     return {
         "encoder_outputs":
         NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
         "decoder_outputs":
         NeuralType(('B', 'D', 'T'), EmbeddedTextType()),
         "encoder_lengths":
         NeuralType(tuple('B'), LengthsType(), optional=True),
         "transcripts":
         NeuralType(('B', 'T'), LabelsType(), optional=True),
         "transcript_lengths":
         NeuralType(tuple('B'), LengthsType(), optional=True),
         "compute_wer":
         NeuralType(optional=True),
     }
Ejemplo n.º 6
0
Archivo: rnnt.py Proyecto: sycomix/NeMo
 def input_types(self):
     """Returns definitions of module input ports.
     """
     return {
         "targets": NeuralType(('B', 'T'), LabelsType()),
         "target_length": NeuralType(tuple('B'), LengthsType()),
         "states": [NeuralType(('D', 'B', 'D'), ElementType(), optional=True)],  # must always be last
     }
Ejemplo n.º 7
0
Archivo: rnnt.py Proyecto: NVIDIA/NeMo
 def input_types(self):
     return OrderedDict(
         {
             "encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
             "targets": NeuralType(('B', 'T'), LabelsType()),
             "target_lengths": NeuralType(tuple('B'), LengthsType()),
         }
     )
Ejemplo n.º 8
0
 def output_types(self) -> Optional[Dict[str, NeuralType]]:
     return {
         "spectrograms": NeuralType(('B', 'D', 'T'), SpectrogramType()),
         "spec_masks": NeuralType(('B', 'D', 'T'), SpectrogramType()),
         "encoded": NeuralType(('B', 'D', 'T'),
                               AcousticEncodedRepresentation()),
         "encoded_len": NeuralType(tuple('B'), LengthsType()),
     }
Ejemplo n.º 9
0
 def input_types(self):
     """Returns definitions of module input ports.
     """
     return {
         "encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
         "encoded_lengths": NeuralType(tuple('B'), LengthsType()),
         "partial_hypotheses": [NeuralType(elements_type=HypothesisType(), optional=True)],  # must always be last
     }
Ejemplo n.º 10
0
 def output_types(self) -> Optional[Dict[str, NeuralType]]:
     """Returns definitions of module output ports.
     """
     return {
         'audio_signal':
         NeuralType(
             ('B', 'T'),
             AudioSignal(freq=self._sample_rate) if self is not None
             and hasattr(self, '_sample_rate') else AudioSignal(),
         ),
         'a_sig_length':
         NeuralType(tuple('B'), LengthsType()),
         'label':
         NeuralType(tuple('B'), LabelsType()),
         'label_length':
         NeuralType(tuple('B'), LengthsType()),
     }
Ejemplo n.º 11
0
 def input_types(self):
     """Returns definitions of module input ports.
     """
     return {
         "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
         "length": NeuralType(
             tuple('B'), LengthsType()
         ),  # Please note that length should be in samples not seconds.
     }
Ejemplo n.º 12
0
 def input_types(self):
     """Returns definitions of module input ports.
     """
     return OrderedDict(
         {
             "audio_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()),
             "length": NeuralType(tuple('B'), LengthsType()),
         }
     )
Ejemplo n.º 13
0
 def input_types(self) -> Optional[Dict[str, NeuralType]]:
     if hasattr(self.preprocessor, '_sample_rate'):
         audio_eltype = AudioSignal(freq=self.preprocessor._sample_rate)
     else:
         audio_eltype = AudioSignal()
     return {
         "input_signal": NeuralType(('B', 'T'), audio_eltype),
         "input_signal_length": NeuralType(tuple('B'), LengthsType()),
     }
    def output_ports(self):
        """Returns definitions of module output ports.
        src_ids: ids of input sequences
        src_lens: lengths of input sequences
        tgt_ids: labels for the generator output
        tgt_lens: lengths of the generator targets
        gating_labels: labels for the gating head
        turn_domain: list of the domains

        """
        return {
            "src_ids": NeuralType(('B', 'T'), ChannelType()),
            "src_lens": NeuralType(tuple('B'), LengthsType()),
            "tgt_ids": NeuralType(('B', 'D', 'T'), LabelsType()),
            "tgt_lens": NeuralType(('B', 'D'), LengthsType()),
            "gating_labels": NeuralType(('B', 'D'), LabelsType()),
            "turn_domain": NeuralType(),
        }
Ejemplo n.º 15
0
 def input_types(self):
     """Returns definitions of module input ports.
     """
     return {
         "input_signal":
         NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
         "length":
         NeuralType(tuple('B'), LengthsType()),
     }
Ejemplo n.º 16
0
 def input_types(self):
     """Returns definitions of module input ports.
     """
     return {
         "encoder_output":
         NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
         "encoded_lengths":
         NeuralType(tuple('B'), LengthsType()),
     }
Ejemplo n.º 17
0
 def output_ports(self):
     """Returns definitions of module output ports.
     """
     return {
         # "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
         # "a_sig_length": NeuralType({0: AxisType(BatchTag)}),
         "audio_signal": NeuralType(('B', 'T'), AudioSignal()),
         "a_sig_length": NeuralType(tuple('B'), LengthsType()),
     }
Ejemplo n.º 18
0
 def input_ports(self):
     """Returns definitions of module input ports.
     """
     return {
         'encoder_hidden': NeuralType(('B', 'T', 'C'), ChannelType()),
         'encoder_outputs': NeuralType(('B', 'T', 'C'), ChannelType()),
         'input_lens': NeuralType(tuple('B'), LengthsType()),
         'src_ids': NeuralType(('B', 'T'), ChannelType()),
         'targets': NeuralType(('B', 'D', 'T'), LabelsType()),
     }
Ejemplo n.º 19
0
Archivo: rnnt.py Proyecto: sycomix/NeMo
    def input_types(self):
        state_type = NeuralType(('D', 'B', 'D'), ElementType())
        mytypes = {
            'encoder_outputs': NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()),
            "targets": NeuralType(('B', 'T'), LabelsType()),
            "target_length": NeuralType(tuple('B'), LengthsType()),
            'input-states-1': state_type,
            'input-states-2': state_type,
        }

        return mytypes
Ejemplo n.º 20
0
 def output_ports(self):
     """Returns definitions of module output ports.
     """
     return {
         # 'audio_signal': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
         # 'a_sig_length': NeuralType({0: AxisType(BatchTag)}),
         # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}),
         # 'transcript_length': NeuralType({0: AxisType(BatchTag)}),
         "audio_signal":
         NeuralType(
             ("B", "T"),
             AudioSignal(freq=self._sample_rate) if self is not None
             and self._sample_rate is not None else AudioSignal(),
         ),
         "a_sig_length":
         NeuralType(tuple("B"), LengthsType()),
         "transcripts":
         NeuralType(("B", "T"), LabelsType()),
         "transcript_length":
         NeuralType(tuple("B"), LengthsType()),
     }
Ejemplo n.º 21
0
 def input_types(self):
     """Input types definitions for Contrastive.
     """
     return {
         "spectrograms":
         NeuralType(("B", "D", "T"), SpectrogramType()),
         "spec_masks":
         NeuralType(("B", "D", "T"), SpectrogramType()),
         "decoder_outputs":
         NeuralType(("B", "T", "D"), AcousticEncodedRepresentation()),
         "decoder_lengths":
         NeuralType(tuple('B'), LengthsType(), optional=True),
     }
Ejemplo n.º 22
0
 def output_types(self):
     """Returns definitions of module output ports.
     processed_signal:
         0: AxisType(BatchTag)
         1: AxisType(MelSpectrogramSignalTag)
         2: AxisType(ProcessedTimeTag)
     processed_length:
         0: AxisType(BatchTag)
     """
     return {
         "processed_signal": NeuralType(('B', 'D', 'T'), MelSpectrogramType()),
         "processed_length": NeuralType(tuple('B'), LengthsType()),
     }
Ejemplo n.º 23
0
 def output_types(self):
     """Returns definitions of module output ports. 
     For compatibility, processed features are treated as Spectrogram types
     processed_signal:
         0: AxisType(BatchTag)
         1: AxisType(ChannelTag)
         2: AxisType(ProcessedTimeTag)
     processed_signal_length:
         0: AxisType(BatchTag)
     """
     return {
         "processed_signal": NeuralType(('B', 'C', 'T'), SpectrogramType()),
         "processed_signal_length": NeuralType(tuple('B'), LengthsType()),
     }
Ejemplo n.º 24
0
 def input_types(self):
     """Returns definitions of module output ports. 
     We treat features as SpectrogramType for Nemo compatibility
     audio_signal:
         0: AxisType(BatchTag)
         1: AxisType(ChannelTag)
         2: AxisType(ProcessedTimeTag)
     length:
         0: AxisType(BatchTag)
     """
     return {
         "audio_signal": NeuralType(('B', 'C', 'T'), SpectrogramType()),
         "length": NeuralType(tuple('B'), LengthsType()),
     }
    def output_ports(self):
        """Returns definitions of module output ports.
        example_id_num (int): example ids
        service_id  (int): service ids
        is_real_example (bool): flag to determine is the example is valid
        utterance_ids (int): utterance ids
        utterance_segment (int): Denotes the identity of the sequence. Takes values 0 (system utterance) and 1 (user utterance)
        utterance_mask (int): Mask which takes the value 0 for padded tokens and 1 otherwise
        categorical_slot_status (int): The status of each categorical slot in the service
        cat_slot_status_mask(int): Masks out categorical status for padded cat slots, takes values 0 and 1
        categorical_slot_values (int): The index of the correct value for each categorical slot
        cat_slot_values_mask (int): Masks out categorical slots values for slots not used in the service, takes values 0 and 1
        noncategorical_slot_status (int): The status of each non-categorical slot in the service
        noncat_slot_status_mask(int): Masks out non-categorical status for padded cat slots, takes values 0 and 1
        noncategorical_slot_value_start (int): The index of the starting subword corresponding to the slot span for a non-categorical slot value
        noncategorical_slot_value_end (int): The index of the ending (inclusive) subword corresponding to the slot span for a non-categorical slot value
        start_char_idx (int): Start character indices in the original utterance corresponding to the tokens
        end_char_idx (int): Inclusive end character indices in the original utterance corresponding to the tokens
        num_slots (int): Total number of slots present in the service
        requested_slot_status (int): Takes value 1 if the corresponding slot is requested, 0 otherwise
        req_slot_mask (int): Masks requested slots not used for the particular service
        intent_status_mask (long): Masks out padded intents in the service, takes values 0 and 1
        intent_status_labels (int): Intent labels

        """
        return {
            "example_id_num": NeuralType(('B'), ChannelType()),
            "service_id": NeuralType(('B'), ChannelType()),
            "is_real_example": NeuralType(('B'), ChannelType()),
            "utterance_ids": NeuralType(('B', 'T'), ChannelType()),
            "utterance_segment": NeuralType(('B', 'T'), ChannelType()),
            "utterance_mask": NeuralType(('B', 'T'), ChannelType()),
            "categorical_slot_status": NeuralType(('B', 'T'), LabelsType()),
            "cat_slot_status_mask": NeuralType(('B', 'T'), ChannelType()),
            "categorical_slot_values": NeuralType(('B', 'T'), LabelsType()),
            "cat_slot_values_mask": NeuralType(('B', 'T', 'C'), ChannelType()),
            "noncategorical_slot_status": NeuralType(('B', 'T'), LabelsType()),
            "noncat_slot_status_mask": NeuralType(('B', 'T'), ChannelType()),
            "noncategorical_slot_value_start": NeuralType(('B', 'T'),
                                                          LabelsType()),
            "noncategorical_slot_value_end": NeuralType(('B', 'T'),
                                                        LabelsType()),
            "start_char_idx": NeuralType(('B', 'T'), LabelsType()),
            "end_char_idx": NeuralType(('B', 'T'), LabelsType()),
            "num_slots": NeuralType(('B'), LengthsType()),
            "requested_slot_status": NeuralType(('B', 'T'), LabelsType()),
            "req_slot_mask": NeuralType(('B', 'T'), ChannelType()),
            "intent_status_mask": NeuralType(('B', 'T'), ChannelType()),
            "intent_status_labels": NeuralType(('B'), LabelsType()),
        }
Ejemplo n.º 26
0
 def input_types(self):
     """Returns definitions of module input ports.
     input_signal:
         0: AxisType(BatchTag)
         1: AxisType(TimeTag)
     input_signal_length:
         0: AxisType(BatchTag)
     Note: length is in number of samples, not seconds
     """
     return {
         "input_signal":
         NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)),
         "length":
         NeuralType(tuple('B'), LengthsType()),
     }
    def input_ports(self):
        """Returns definitions of module input ports.

        logits: 4d tensor of logits

        labels: 3d tensor of labels

        loss_mask: specifies the words to be considered in the loss calculation

        """
        return {
            "logits": NeuralType(('B', 'T', 'D', 'D'), LogitsType()),
            "labels": NeuralType(('B', 'D', 'T'), LabelsType()),
            "length_mask": NeuralType(('B', 'D'), LengthsType()),
        }
Ejemplo n.º 28
0
 def output_types(self):
     """Returns definitions of module output ports. 
     We're using SpectrogramType for now to keep things Nemo safe
     processed_signal:
         0: AxisType(BatchTag)
         1: AxisType(ChannelTag)
         2: AxisType(ProcessedTimeTag)
     processed_length:
         0: AxisType(BatchTag)
     """
     return {
         "processed_signal":
         NeuralType(('B', 'C', 'T'), AcousticEncodedRepresentation()),
         "processed_length":
         NeuralType(tuple('B'), LengthsType()),
     }
Ejemplo n.º 29
0
    def input_ports(self):
        """Returns definitions of module input ports.

        logits: 4d tensor of logits

        targets: 3d tensor of labels

        loss_mask: specifies the words to be considered in the loss calculation

        """
        return {
            # "logits": NeuralType(
            #     {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)}
            # ),
            # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}),
            # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}),
            "logits": NeuralType(('B', 'T', 'D', 'D'), LogitsType()),
            "targets": NeuralType(('B', 'D', 'T'), LabelsType()),
            "loss_mask": NeuralType(('B', 'D'), LengthsType()),
        }
Ejemplo n.º 30
0
 def output_types(self) -> Optional[Dict[str, NeuralType]]:
     return {
         "outputs": NeuralType(('B', 'T', 'D'), LogprobsType()),
         "encoded_lengths": NeuralType(tuple('B'), LengthsType()),
         "greedy_predictions": NeuralType(('B', 'T'), LabelsType()),
     }