def input_types(self): """Returns definitions of module output ports. """ return { "input_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()), "length": NeuralType(tuple('B'), LengthsType()), }
def input_types(self) -> Optional[Dict[str, NeuralType]]: if hasattr(self.preprocessor, '_sample_rate'): input_signal_eltype = AudioSignal( freq=self.preprocessor._sample_rate) else: input_signal_eltype = AudioSignal() return { "input_signal": NeuralType(('B', 'T'), input_signal_eltype, optional=True), "input_signal_length": NeuralType(tuple('B'), LengthsType(), optional=True), "processed_signal": NeuralType(('B', 'D', 'T'), SpectrogramType(), optional=True), "processed_signal_length": NeuralType(tuple('B'), LengthsType(), optional=True), }
def input_ports(self): """Returns definitions of module input ports.""" return dict( text=NeuralType(('B', 'T'), EmbeddedTextType()), text_pos=NeuralType(('B', 'T'), MaskType()), mel_true=NeuralType(('B', 'D', 'T'), MelSpectrogramType()), dur_true=NeuralType(('B', 'T'), LengthsType()), )
def output_types(self): """Returns definitions of module output ports. """ return { "outputs": NeuralType(('B', 'D', 'T'), EmbeddedTextType()), "prednet_lengths": NeuralType(tuple('B'), LengthsType()), "states": [NeuralType((('D', 'B', 'D')), ElementType(), optional=True)], # must always be last }
def input_types(self): """Returns definitions of module input ports. """ return { "encoder_outputs": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), "decoder_outputs": NeuralType(('B', 'D', 'T'), EmbeddedTextType()), "encoder_lengths": NeuralType(tuple('B'), LengthsType(), optional=True), "transcripts": NeuralType(('B', 'T'), LabelsType(), optional=True), "transcript_lengths": NeuralType(tuple('B'), LengthsType(), optional=True), "compute_wer": NeuralType(optional=True), }
def input_types(self): """Returns definitions of module input ports. """ return { "targets": NeuralType(('B', 'T'), LabelsType()), "target_length": NeuralType(tuple('B'), LengthsType()), "states": [NeuralType(('D', 'B', 'D'), ElementType(), optional=True)], # must always be last }
def input_types(self): return OrderedDict( { "encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), "targets": NeuralType(('B', 'T'), LabelsType()), "target_lengths": NeuralType(tuple('B'), LengthsType()), } )
def output_types(self) -> Optional[Dict[str, NeuralType]]: return { "spectrograms": NeuralType(('B', 'D', 'T'), SpectrogramType()), "spec_masks": NeuralType(('B', 'D', 'T'), SpectrogramType()), "encoded": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), "encoded_len": NeuralType(tuple('B'), LengthsType()), }
def input_types(self): """Returns definitions of module input ports. """ return { "encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), "encoded_lengths": NeuralType(tuple('B'), LengthsType()), "partial_hypotheses": [NeuralType(elements_type=HypothesisType(), optional=True)], # must always be last }
def output_types(self) -> Optional[Dict[str, NeuralType]]: """Returns definitions of module output ports. """ return { 'audio_signal': NeuralType( ('B', 'T'), AudioSignal(freq=self._sample_rate) if self is not None and hasattr(self, '_sample_rate') else AudioSignal(), ), 'a_sig_length': NeuralType(tuple('B'), LengthsType()), 'label': NeuralType(tuple('B'), LabelsType()), 'label_length': NeuralType(tuple('B'), LengthsType()), }
def input_types(self): """Returns definitions of module input ports. """ return { "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), "length": NeuralType( tuple('B'), LengthsType() ), # Please note that length should be in samples not seconds. }
def input_types(self): """Returns definitions of module input ports. """ return OrderedDict( { "audio_signal": NeuralType(('B', 'D', 'T'), SpectrogramType()), "length": NeuralType(tuple('B'), LengthsType()), } )
def input_types(self) -> Optional[Dict[str, NeuralType]]: if hasattr(self.preprocessor, '_sample_rate'): audio_eltype = AudioSignal(freq=self.preprocessor._sample_rate) else: audio_eltype = AudioSignal() return { "input_signal": NeuralType(('B', 'T'), audio_eltype), "input_signal_length": NeuralType(tuple('B'), LengthsType()), }
def output_ports(self): """Returns definitions of module output ports. src_ids: ids of input sequences src_lens: lengths of input sequences tgt_ids: labels for the generator output tgt_lens: lengths of the generator targets gating_labels: labels for the gating head turn_domain: list of the domains """ return { "src_ids": NeuralType(('B', 'T'), ChannelType()), "src_lens": NeuralType(tuple('B'), LengthsType()), "tgt_ids": NeuralType(('B', 'D', 'T'), LabelsType()), "tgt_lens": NeuralType(('B', 'D'), LengthsType()), "gating_labels": NeuralType(('B', 'D'), LabelsType()), "turn_domain": NeuralType(), }
def input_types(self): """Returns definitions of module input ports. """ return { "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), "length": NeuralType(tuple('B'), LengthsType()), }
def input_types(self): """Returns definitions of module input ports. """ return { "encoder_output": NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), "encoded_lengths": NeuralType(tuple('B'), LengthsType()), }
def output_ports(self): """Returns definitions of module output ports. """ return { # "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # "a_sig_length": NeuralType({0: AxisType(BatchTag)}), "audio_signal": NeuralType(('B', 'T'), AudioSignal()), "a_sig_length": NeuralType(tuple('B'), LengthsType()), }
def input_ports(self): """Returns definitions of module input ports. """ return { 'encoder_hidden': NeuralType(('B', 'T', 'C'), ChannelType()), 'encoder_outputs': NeuralType(('B', 'T', 'C'), ChannelType()), 'input_lens': NeuralType(tuple('B'), LengthsType()), 'src_ids': NeuralType(('B', 'T'), ChannelType()), 'targets': NeuralType(('B', 'D', 'T'), LabelsType()), }
def input_types(self): state_type = NeuralType(('D', 'B', 'D'), ElementType()) mytypes = { 'encoder_outputs': NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()), "targets": NeuralType(('B', 'T'), LabelsType()), "target_length": NeuralType(tuple('B'), LengthsType()), 'input-states-1': state_type, 'input-states-2': state_type, } return mytypes
def output_ports(self): """Returns definitions of module output ports. """ return { # 'audio_signal': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'a_sig_length': NeuralType({0: AxisType(BatchTag)}), # 'transcripts': NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), # 'transcript_length': NeuralType({0: AxisType(BatchTag)}), "audio_signal": NeuralType( ("B", "T"), AudioSignal(freq=self._sample_rate) if self is not None and self._sample_rate is not None else AudioSignal(), ), "a_sig_length": NeuralType(tuple("B"), LengthsType()), "transcripts": NeuralType(("B", "T"), LabelsType()), "transcript_length": NeuralType(tuple("B"), LengthsType()), }
def input_types(self): """Input types definitions for Contrastive. """ return { "spectrograms": NeuralType(("B", "D", "T"), SpectrogramType()), "spec_masks": NeuralType(("B", "D", "T"), SpectrogramType()), "decoder_outputs": NeuralType(("B", "T", "D"), AcousticEncodedRepresentation()), "decoder_lengths": NeuralType(tuple('B'), LengthsType(), optional=True), }
def output_types(self): """Returns definitions of module output ports. processed_signal: 0: AxisType(BatchTag) 1: AxisType(MelSpectrogramSignalTag) 2: AxisType(ProcessedTimeTag) processed_length: 0: AxisType(BatchTag) """ return { "processed_signal": NeuralType(('B', 'D', 'T'), MelSpectrogramType()), "processed_length": NeuralType(tuple('B'), LengthsType()), }
def output_types(self): """Returns definitions of module output ports. For compatibility, processed features are treated as Spectrogram types processed_signal: 0: AxisType(BatchTag) 1: AxisType(ChannelTag) 2: AxisType(ProcessedTimeTag) processed_signal_length: 0: AxisType(BatchTag) """ return { "processed_signal": NeuralType(('B', 'C', 'T'), SpectrogramType()), "processed_signal_length": NeuralType(tuple('B'), LengthsType()), }
def input_types(self): """Returns definitions of module output ports. We treat features as SpectrogramType for Nemo compatibility audio_signal: 0: AxisType(BatchTag) 1: AxisType(ChannelTag) 2: AxisType(ProcessedTimeTag) length: 0: AxisType(BatchTag) """ return { "audio_signal": NeuralType(('B', 'C', 'T'), SpectrogramType()), "length": NeuralType(tuple('B'), LengthsType()), }
def output_ports(self): """Returns definitions of module output ports. example_id_num (int): example ids service_id (int): service ids is_real_example (bool): flag to determine is the example is valid utterance_ids (int): utterance ids utterance_segment (int): Denotes the identity of the sequence. Takes values 0 (system utterance) and 1 (user utterance) utterance_mask (int): Mask which takes the value 0 for padded tokens and 1 otherwise categorical_slot_status (int): The status of each categorical slot in the service cat_slot_status_mask(int): Masks out categorical status for padded cat slots, takes values 0 and 1 categorical_slot_values (int): The index of the correct value for each categorical slot cat_slot_values_mask (int): Masks out categorical slots values for slots not used in the service, takes values 0 and 1 noncategorical_slot_status (int): The status of each non-categorical slot in the service noncat_slot_status_mask(int): Masks out non-categorical status for padded cat slots, takes values 0 and 1 noncategorical_slot_value_start (int): The index of the starting subword corresponding to the slot span for a non-categorical slot value noncategorical_slot_value_end (int): The index of the ending (inclusive) subword corresponding to the slot span for a non-categorical slot value start_char_idx (int): Start character indices in the original utterance corresponding to the tokens end_char_idx (int): Inclusive end character indices in the original utterance corresponding to the tokens num_slots (int): Total number of slots present in the service requested_slot_status (int): Takes value 1 if the corresponding slot is requested, 0 otherwise req_slot_mask (int): Masks requested slots not used for the particular service intent_status_mask (long): Masks out padded intents in the service, takes values 0 and 1 intent_status_labels (int): Intent labels """ return { "example_id_num": NeuralType(('B'), ChannelType()), "service_id": NeuralType(('B'), ChannelType()), "is_real_example": NeuralType(('B'), ChannelType()), "utterance_ids": NeuralType(('B', 'T'), ChannelType()), "utterance_segment": NeuralType(('B', 'T'), ChannelType()), "utterance_mask": NeuralType(('B', 'T'), ChannelType()), "categorical_slot_status": NeuralType(('B', 'T'), LabelsType()), "cat_slot_status_mask": NeuralType(('B', 'T'), ChannelType()), "categorical_slot_values": NeuralType(('B', 'T'), LabelsType()), "cat_slot_values_mask": NeuralType(('B', 'T', 'C'), ChannelType()), "noncategorical_slot_status": NeuralType(('B', 'T'), LabelsType()), "noncat_slot_status_mask": NeuralType(('B', 'T'), ChannelType()), "noncategorical_slot_value_start": NeuralType(('B', 'T'), LabelsType()), "noncategorical_slot_value_end": NeuralType(('B', 'T'), LabelsType()), "start_char_idx": NeuralType(('B', 'T'), LabelsType()), "end_char_idx": NeuralType(('B', 'T'), LabelsType()), "num_slots": NeuralType(('B'), LengthsType()), "requested_slot_status": NeuralType(('B', 'T'), LabelsType()), "req_slot_mask": NeuralType(('B', 'T'), ChannelType()), "intent_status_mask": NeuralType(('B', 'T'), ChannelType()), "intent_status_labels": NeuralType(('B'), LabelsType()), }
def input_types(self): """Returns definitions of module input ports. input_signal: 0: AxisType(BatchTag) 1: AxisType(TimeTag) input_signal_length: 0: AxisType(BatchTag) Note: length is in number of samples, not seconds """ return { "input_signal": NeuralType(('B', 'T'), AudioSignal(freq=self._sample_rate)), "length": NeuralType(tuple('B'), LengthsType()), }
def input_ports(self): """Returns definitions of module input ports. logits: 4d tensor of logits labels: 3d tensor of labels loss_mask: specifies the words to be considered in the loss calculation """ return { "logits": NeuralType(('B', 'T', 'D', 'D'), LogitsType()), "labels": NeuralType(('B', 'D', 'T'), LabelsType()), "length_mask": NeuralType(('B', 'D'), LengthsType()), }
def output_types(self): """Returns definitions of module output ports. We're using SpectrogramType for now to keep things Nemo safe processed_signal: 0: AxisType(BatchTag) 1: AxisType(ChannelTag) 2: AxisType(ProcessedTimeTag) processed_length: 0: AxisType(BatchTag) """ return { "processed_signal": NeuralType(('B', 'C', 'T'), AcousticEncodedRepresentation()), "processed_length": NeuralType(tuple('B'), LengthsType()), }
def input_ports(self): """Returns definitions of module input ports. logits: 4d tensor of logits targets: 3d tensor of labels loss_mask: specifies the words to be considered in the loss calculation """ return { # "logits": NeuralType( # {0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), 3: AxisType(ChannelTag)} # ), # "targets": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(TimeTag)}), # "loss_mask": NeuralType({0: AxisType(BatchTag), 1: AxisType(ChannelTag)}), "logits": NeuralType(('B', 'T', 'D', 'D'), LogitsType()), "targets": NeuralType(('B', 'D', 'T'), LabelsType()), "loss_mask": NeuralType(('B', 'D'), LengthsType()), }
def output_types(self) -> Optional[Dict[str, NeuralType]]: return { "outputs": NeuralType(('B', 'T', 'D'), LogprobsType()), "encoded_lengths": NeuralType(tuple('B'), LengthsType()), "greedy_predictions": NeuralType(('B', 'T'), LabelsType()), }