def output_ports(self): """ Creates definitions of output ports. By default, it sets image width and height to 32. """ return { "indices": NeuralType(tuple('B'), elements_type=Index()), "images": NeuralType( axes=( AxisType(kind=AxisKind.Batch), AxisType(kind=AxisKind.Channel, size=1), AxisType(kind=AxisKind.Height, size=self._height), AxisType(kind=AxisKind.Width, size=self._width), ), elements_type=NormalizedImageValue(), # float, <0-1> ), "targets": NeuralType( tuple('B'), elements_type=ClassificationTarget()), # Target are ints! "labels": NeuralType(tuple('B'), elements_type=StringLabel()), # Labels is string! }
def output_types(self): """ Returns definitions of module output ports. """ return { "predictions": NeuralType( axes=(AxisType(kind=AxisKind.Batch), AxisType(kind=AxisKind.Dimension)), elements_type=LogprobsType() ) }
def input_ports(self): """Returns definitions of module input ports. log_probs: 0: AxisType(BatchTag) 1: AxisType(TimeTag) 2: AxisType(ChannelTag) targets: 0: AxisType(BatchTag) 1: AxisType(TimeTag) """ return { 'log_probs': NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), }), 'targets': NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag) }), }
def output_ports(self): """Returns definitions of module output ports. predictions: 0: AxisType(BatchTag) 1: AxisType(TimeTag) attention_weights: 0: AxisType(BatchTag) 1: AxisType(TimeTag) 2: AxisType(TimeTag) """ return { 'predictions': NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag) }), 'attention_weights': NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag), }), }
def test_infer_caching(self): data_source = nemo.backends.pytorch.common.ZerosDataLayer( size=1, dtype=torch.FloatTensor, batch_size=1, output_ports={ "dl_out": NeuralType((AxisType( AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType()) }, ) addten = AddsTen() minusten = SubtractsTen() zero_tensor = data_source() ten_tensor = addten(mod_in=zero_tensor) twenty_tensor = addten(mod_in=ten_tensor) thirty_tensor = addten(mod_in=twenty_tensor) evaluated_tensors = self.nf.infer( tensors=[twenty_tensor, thirty_tensor], verbose=False, cache=True) self.assertEqual(evaluated_tensors[0][0].squeeze().data, 20) self.assertEqual(evaluated_tensors[1][0].squeeze().data, 30) new_ten_tensor = minusten(mod_in=twenty_tensor) evaluated_tensors = self.nf.infer(tensors=[new_ten_tensor], verbose=False, use_cache=True) self.assertEqual(evaluated_tensors[0][0].squeeze().data, 10)
def test_struct(self): class BoundingBox(ElementType): def __str__(self): return "bounding box from detection model" def fields(self): return ("X", "Y", "W", "H") # ALSO ADD new, user-defined, axis kind class AxisKind2(AxisKindAbstract): Image = 0 T1 = NeuralType( elements_type=BoundingBox(), axes=( AxisType(kind=AxisKind.Batch, size=None, is_list=True), AxisType(kind=AxisKind2.Image, size=None, is_list=True), ), ) class BadBoundingBox(ElementType): def __str__(self): return "bad bounding box from detection model" def fields(self): return ("X", "Y", "H") T2 = NeuralType( elements_type=BadBoundingBox(), axes=( AxisType(kind=AxisKind.Batch, size=None, is_list=True), AxisType(kind=AxisKind2.Image, size=None, is_list=True), ), ) assert T2.compare(T1) == NeuralTypeComparisonResult.INCOMPATIBLE
def output_ports(self): return { "mod_out": NeuralType( (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType()) }
def input_ports(self): """Returns definitions of module input ports. targets: 0: AxisType(BatchTag) 1: AxisType(TimeTag) encoder_outputs: 0: AxisType(BatchTag) 1: AxisType(TimeTag) 2: AxisType(ChannelTag) """ return { 'targets': NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag) }), 'encoder_outputs': NeuralType( { 0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), }, optional=True, ), }
def create_ports(): input_ports = { "preds": NeuralType({0: AxisType(RegressionTag)}), "labels": NeuralType({0: AxisType(RegressionTag)}) } output_ports = {"loss": NeuralType(None)} return input_ports, output_ports
def output_ports(self): # return {"mod_out": NeuralType({0: AxisType(BatchTag), 1: AxisType(BaseTag, dim=1)})} return { "mod_out": NeuralType( (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType()) }
def create_ports(): input_ports = {} output_ports = { "audio_signal": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)}), "a_sig_length": NeuralType({0: AxisType(BatchTag)}), } return input_ports, output_ports
def output_ports(self): """Returns definitions of module output ports. predictions: 0: AxisType(BatchTag) 1: AxisType(TimeTag) """ return {"predictions": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag)})}
def output_ports(self): """ Returns definitions of module output ports. """ # Prepare list of axes. axes = [AxisType(kind=AxisKind.Batch)] for size in self._output_sizes[1:]: axes.append(AxisType(kind=AxisKind.Any, size=size)) # Return neural type. return {"outputs": NeuralType(axes, VoidType())}
def test_unspecified_dimensions(self): t0 = NeuralType( (AxisType(AxisKind.Batch, 64), AxisType( AxisKind.Time, 10), AxisType(AxisKind.Dimension, 128)), SpectrogramType(), ) t1 = NeuralType(('B', 'T', 'C'), SpectrogramType()) assert t1.compare(t0), NeuralTypeComparisonResult.SAME assert t0.compare(t1), NeuralTypeComparisonResult.DIM_INCOMPATIBLE
def create_ports(): input_ports = { "input_ids": NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag) }), "token_type_ids": NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag) }), "attention_mask": NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag) }) } output_ports = { "hidden_states": NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag) }) } return input_ports, output_ports
def input_ports(self): """ Returns definitions of module input ports. Batch of inputs, each represented as index [BATCH_SIZE x ... x INPUT_SIZE] """ # Prepare list of axes. axes = [AxisType(kind=AxisKind.Batch)] for size in self._sizes[1:]: axes.append(AxisType(kind=AxisKind.Any, size=size)) # Return neural type. return {"inputs": NeuralType(axes, VoidType())}
def output_ports(self): """Returns definitions of module output ports. output: 0: AxisType(BatchTag) 1: AxisType(TimeTag) 2: AxisType(ChannelTag) """ return {"output": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag),})}
def output_ports(self): """ Returns definitions of module output ports. """ # Prepare list of axes. axes = [AxisType(kind=AxisKind.Batch)] for size in self._sizes[1:]: axes.append(AxisType(kind=AxisKind.Any, size=size)) # Return neural type. # TODO: if self._type != "logsoftmax" return {"outputs": NeuralType(axes, LogprobsType())}
def test_asr_with_zero_ds(self): logging.info("Testing ASR NMs with ZeroDS and without pre-processing") path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../data/jasper_smaller.yaml")) with open(path) as file: jasper_model_definition = self.yaml.load(file) dl = nemo.backends.pytorch.common.ZerosDataLayer( size=100, dtype=torch.FloatTensor, batch_size=4, output_ports={ # "processed_signal": NeuralType( # { # 0: AxisType(BatchTag), # 1: AxisType(SpectrogramSignalTag, dim=64), # 2: AxisType(ProcessedTimeTag, dim=64), # } # ), # "processed_length": NeuralType({0: AxisType(BatchTag)}), # "transcript": NeuralType({0: AxisType(BatchTag), 1: AxisType(TimeTag, dim=64)}), # "transcript_length": NeuralType({0: AxisType(BatchTag)}), "processed_signal": NeuralType( (AxisType(AxisKind.Batch), AxisType(AxisKind.Dimension, 64), AxisType(AxisKind.Time, 64)), SpectrogramType(), ), "processed_length": NeuralType(tuple('B'), LengthsType()), "transcript": NeuralType((AxisType(AxisKind.Batch), AxisType(AxisKind.Time, 64)), LabelsType()), "transcript_length": NeuralType(tuple('B'), LengthsType()), }, ) jasper_encoder = nemo_asr.JasperEncoder( feat_in=jasper_model_definition['AudioToMelSpectrogramPreprocessor']['features'], **jasper_model_definition["JasperEncoder"], ) jasper_decoder = nemo_asr.JasperDecoderForCTC(feat_in=1024, num_classes=len(self.labels)) ctc_loss = nemo_asr.CTCLossNM(num_classes=len(self.labels)) # DAG processed_signal, p_length, transcript, transcript_len = dl() encoded, encoded_len = jasper_encoder(audio_signal=processed_signal, length=p_length) # logging.info(jasper_encoder) log_probs = jasper_decoder(encoder_output=encoded) loss = ctc_loss( log_probs=log_probs, targets=transcript, input_length=encoded_len, target_length=transcript_len, ) callback = nemo.core.SimpleLossLoggerCallback( tensors=[loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'), ) # Instantiate an optimizer to perform `train` action self.nf.train( [loss], callbacks=[callback], optimization_params={"num_epochs": 2, "lr": 0.0003}, optimizer="sgd", )
def test_infer_errors(self): data_source = nemo.backends.pytorch.common.ZerosDataLayer( size=1, dtype=torch.FloatTensor, batch_size=1, output_ports={ "dl_out": NeuralType((AxisType( AxisKind.Batch), AxisType(AxisKind.Dimension, 1)), ChannelType()) }, ) addten = AddsTen() minusten = SubtractsTen() zero_tensor = data_source() ten_tensor = addten(mod_in=zero_tensor) twenty_tensor = addten(mod_in=ten_tensor) thirty_tensor = addten(mod_in=twenty_tensor) with self.assertRaisesRegex(ValueError, "use_cache was set, but cache was empty"): evaluated_tensors = self.nf.infer( tensors=[twenty_tensor, thirty_tensor], verbose=False, use_cache=True) new_ten_tensor = minusten(mod_in=twenty_tensor) evaluated_tensors = self.nf.infer(tensors=[new_ten_tensor], verbose=False, cache=True) with self.assertRaisesRegex(ValueError, "cache was set but was not empty"): evaluated_tensors = self.nf.infer( tensors=[twenty_tensor, thirty_tensor], verbose=False, cache=True) self.nf.clear_cache() evaluated_tensors = self.nf.infer(tensors=[new_ten_tensor], verbose=False, cache=True) with self.assertRaisesRegex(ValueError, "cache and use_cache were both set."): evaluated_tensors = self.nf.infer( tensors=[twenty_tensor, thirty_tensor], verbose=False, cache=True, use_cache=True) self.assertEqual(evaluated_tensors[0][0].squeeze().data, 10)
def input_types(self): """ Returns definitions of module input ports. """ return { "images": NeuralType( axes=( AxisType(kind=AxisKind.Batch), AxisType(kind=AxisKind.Channel, size=1), AxisType(kind=AxisKind.Height, size=32), AxisType(kind=AxisKind.Width, size=32), ), elements_type=ImageValue(), ) }
def output_ports(self): """ Returns definitions of module output ports. """ # Prepare list of axes. axes = [AxisType(kind=AxisKind.Batch)] # Add the "additional dimensions". for _ in range(self._dimensions)[1:-1]: axes.append(AxisType(kind=AxisKind.Any)) # Add the last axis: input_size axes.append(AxisType(kind=AxisKind.Any, size=self._output_size)) # Return neural type: batch of "logits" of "any type". return {"outputs": NeuralType(axes, VoidType())}
def input_ports(self): """Returns definitions of module input ports. preds: 0: AxisType(RegressionTag) labels: 0: AxisType(RegressionTag) """ return { "preds": NeuralType({0: AxisType(RegressionTag)}), "labels": NeuralType({0: AxisType(RegressionTag)}), }
def test_short_vs_long_version(self): long_version = NeuralType( axes=(AxisType(AxisKind.Batch, None), AxisType(AxisKind.Dimension, None), AxisType(AxisKind.Time, None)), elements_type=AcousticEncodedRepresentation(), ) short_version = NeuralType(('B', 'D', 'T'), AcousticEncodedRepresentation()) self.assertEqual(long_version.compare(short_version), NeuralTypeComparisonResult.SAME) self.assertEqual(short_version.compare(long_version), NeuralTypeComparisonResult.SAME)
def input_ports(self): """ Returns definitions of module input ports. Batch of inputs, each represented as index [BATCH_SIZE x ... x INPUT_SIZE] """ # Prepare list of axes. axes = [AxisType(kind=AxisKind.Batch)] # Add the "additional dimensions". for _ in range(self._dimensions)[1:-1]: axes.append(AxisType(kind=AxisKind.Any)) # Add the last axis: input_size axes.append(AxisType(kind=AxisKind.Any, size=self._input_size)) # Return neural type. return {"inputs": NeuralType(axes, VoidType())}
def create_ports(): input_ports = { "log_probs": NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag) }), "log_probs_length": NeuralType({0: AxisType(BatchTag)}) } output_ports = {"predictions": NeuralType(None)} return input_ports, output_ports
def create_ports(): input_ports = { 'encoder_outputs': NeuralType( { 0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(ChannelTag), }, optional=True) } output_ports = { 'predictions': NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag) }), 'attention_weights': NeuralType({ 0: AxisType(BatchTag), 1: AxisType(TimeTag), 2: AxisType(TimeTag) }) } return input_ports, output_ports
def input_ports(self): """Returns definitions of module input ports. encoder_output: 0: AxisType(BatchTag) 1: AxisType(EncodedRepresentationTag) 2: AxisType(ProcessedTimeTag) """ return { "encoder_output": NeuralType( {0: AxisType(BatchTag), 1: AxisType(EncodedRepresentationTag), 2: AxisType(ProcessedTimeTag),} ) }
def create_ports(): input_ports = { "logits": NeuralType({ 0: AxisType(BatchTag), 1: AxisType(ChannelTag) }), "labels": NeuralType({ 0: AxisType(BatchTag), }) } output_ports = { "loss": NeuralType(None), } return input_ports, output_ports
def input_ports(self): """Returns definitions of module input ports. """ return { "logits": NeuralType({ 0: AxisType(BatchTag), 1: AxisType(ChannelTag), 2: AxisType(ChannelTag) }), "labels": NeuralType({ 0: AxisType(BatchTag), 1: AxisType(ChannelTag) }), }