Exemple #1
0
 def make_pipe():
     image = fn.external_source(source=image_gen)
     if device == 'gpu':
         image = image.gpu()
     axes, shape = fn.external_source(source=get_dynamic_axes,
                                      num_outputs=2)
     fill_value = fn.random.uniform(device='cpu', range=[0.0, 255.0])
     pad1 = fn.pad(image, axes=axes, fill_value=fill_value)
     pad2 = fn.pad(image, axes=axes, shape=shape, fill_value=fill_value)
     return image, axes, shape, pad1, pad2, fill_value
Exemple #2
0
def check_pad_per_sample_shapes_and_alignment(device='cpu',
                                              batch_size=3,
                                              ndim=2,
                                              num_iter=3):
    pipe = Pipeline(batch_size=batch_size,
                    num_threads=3,
                    device_id=0,
                    seed=1234)
    axes = (0, 1)
    with pipe:
        in_shape = fn.cast(fn.random.uniform(range=(10, 20), shape=(ndim, )),
                           dtype=types.INT32)
        in_data = fn.random.uniform(range=(0., 1.), shape=in_shape)
        if device == 'gpu':
            in_data = in_data.gpu()
        req_shape = fn.cast(fn.random.uniform(range=(21, 30), shape=(ndim, )),
                            dtype=types.INT32)
        req_align = fn.cast(fn.random.uniform(range=(3, 5), shape=(ndim, )),
                            dtype=types.INT32)
        out_pad_shape = fn.pad(in_data, axes=axes, align=None, shape=req_shape)
        out_pad_align = fn.pad(in_data, axes=axes, align=req_align, shape=None)
        out_pad_both = fn.pad(in_data,
                              axes=axes,
                              align=req_align,
                              shape=req_shape)
        pipe.set_outputs(in_shape, in_data, req_shape, req_align,
                         out_pad_shape, out_pad_align, out_pad_both)
    pipe.build()
    for _ in range(num_iter):
        outs = [
            out.as_cpu() if isinstance(out, TensorListGPU) else out
            for out in pipe.run()
        ]
        for i in range(batch_size):
            in_shape, in_data, req_shape, req_align, out_pad_shape, out_pad_align, out_pad_both = \
                [outs[out_idx].at(i) for out_idx in range(len(outs))]
            assert (in_shape == in_data.shape).all()
            # Pad to explicit shape
            assert (out_pad_shape.shape >= in_shape).all()
            assert (req_shape == out_pad_shape.shape).all()

            # Alignment only
            assert (out_pad_align.shape >= in_shape).all()
            assert is_aligned(out_pad_align.shape, req_align, axes)

            # Explicit shape + alignment
            assert (out_pad_both.shape >= in_shape).all()
            assert (req_shape <= out_pad_both.shape).all()
            assert is_aligned(out_pad_both.shape, req_align, axes)
Exemple #3
0
def many_input_pipeline(def_for_dataset, device, sources, input_names,
                        batches):
    """ Pipeline accepting multiple inputs via external source

    Parameters
    ----------
    def_for_dataset : bool
         True if this pipeline will be converted to TF Dataset
    device : str
        device that the Dataset will be placed ("cpu" or "gpu")
    sources : list of callables
        callbacks for the external sources in baseline pipeline otherwise None
    input_names : list of str
        Names of inputs placeholder for TF
    """
    inputs = []
    if def_for_dataset:
        for input_name, batch in zip(input_names, batches):
            if batch == "dataset":
                # Special value used in tests, reroute it to the default
                batch = None
            input = fn.external_source(name=input_name, batch=batch)
            input = input if device == 'cpu' else input.gpu()
            inputs.append(input)
    else:
        for source in sources:
            input = fn.external_source(source=source, batch=False)
            input = input if device == 'cpu' else input.gpu()
            inputs.append(input)
    processed = []
    for input in inputs:
        processed.append(fn.cast(input + 10, dtype=dali.types.INT32))
    results = fn.pad(inputs + processed)
    return tuple(results)
Exemple #4
0
def test_tf_experimental_source_disabled():
    pipe = Pipeline(10, 4, 0)
    with pipe:
        input = fn.external_source(source=lambda: np.full((4, 4), 0),
                                   batch=False)
        pipe.set_outputs(fn.pad(input))
    dali_tf.DALIDataset(pipe, output_dtypes=tf.int32)
Exemple #5
0
 def make_pipe():
     fake_data = fn.constant(idata=0,
                             shape=[10, 10, 3],
                             dtype=types.FLOAT,
                             device=device)
     axes = fn.random.uniform(range=wrong_axes_range,
                              shape=(2, ),
                              dtype=types.INT32)
     padded = fn.pad(fake_data, axes=axes)
     return padded
 def get_pipeline_desc(batch_size, num_threads, device, device_id, shard_id,
                       num_shards, def_for_dataset):
     pipe = Pipeline(batch_size, num_threads, device_id)
     with pipe:
         # Our callbacks may have state, to be able to run it twice, once in Dataset and once
         # with baseline test, we need to make a copy to preserve that state.
         es = fn.external_source(device=es_device, **copy.deepcopy(es_args))
         if device == "gpu" and es_device == "cpu":
             es = es.gpu()
         pad = fn.pad(es, device=device)
         pipe.set_outputs(pad)
     return pipe, None, dtype
Exemple #7
0
def setup_dali(
    image_file='/mnt/data/DATASETS/samples/images/image_110.jpg',
    image_dim=[800, 1600],
    batch_size=1,
    num_threads=4,
    device='mixed',
    device_id=0,
    output_dir='./out/',
):

    os.makedirs(os.path.dirname(output_dir), exist_ok=True)

    pipeline = dali.pipeline.Pipeline(batch_size=batch_size,
                                      num_threads=num_threads,
                                      device_id=device_id)

    with pipeline:
        data, _ = fn.file_reader(files=[image_file])
        # image preprocess
        images = fn.image_decoder(data, device=device)
        images = fn.resize(images,
                           size=image_dim,
                           mode="not_larger",
                           max_size=image_dim)
        images = fn.pad(images,
                        fill_value=0,
                        shape=[image_dim[0], image_dim[1], 1])
        images = fn.transpose(images, perm=[2, 0, 1])
        images = fn.cast(images, dtype=dali.types.FLOAT)
        images = images / 255.
        # input shape
        input_shape = np.float32((image_dim[0], image_dim[1], 1))
        # original shape
        shapes = fn.peek_image_shape(data)
        shapes = fn.cast(shapes, dtype=dali.types.FLOAT)
        # gather outputs
        out = [images, input_shape, shapes]
        pipeline.set_outputs(*out)

    pipeline.build()
    output = pipeline.run()
    img = output[0].at(0) if device == 'cpu' else output[0].as_cpu().at(0)

    img = img.transpose(1, 2, 0)  # HWC
    img = img[:, :, ::-1]  # BGR
    print(img)
    quit()
    cv2.imwrite(os.path.join(output_dir, 'dali_image.jpg'), img)
Exemple #8
0
def test_pad_cpu():
    pipe = Pipeline(batch_size=batch_size, num_threads=4, device_id=None)
    test_data_shape = [5, 4, 3]

    def get_data():
        out = [
            np.random.randint(0, 255, size=test_data_shape, dtype=np.uint8)
            for _ in range(batch_size)
        ]
        return out

    data = fn.external_source(source=get_data, layout="HWC")
    processed = fn.pad(data, fill_value=-1, axes=(0, ), shape=(10, ))
    pipe.set_outputs(processed)
    pipe.build()
    for _ in range(3):
        pipe.run()
Exemple #9
0
def check_layout(kwargs, input_datasets, layout):
    pipe = Pipeline(10, 4, 0)
    with pipe:
        input = fn.external_source(**kwargs)
        # Rely on the Pad internal check to ensure that External Source set layout
        pipe.set_outputs(fn.pad(input, axis_names=layout))

    with tf.device('/cpu:0'):
        dali_dataset = dali_tf.experimental.DALIDatasetWithInputs(
            input_datasets=input_datasets,
            pipeline=pipe,
            batch_size=pipe.max_batch_size,
            output_shapes=None,
            output_dtypes=tf.int64,
            num_threads=pipe.num_threads,
            device_id=pipe.device_id)

    run_dataset_eager_mode(dali_dataset, 10)
Exemple #10
0
def setup_dali(
    input_name='DALI_INPUT_0',
    image_dim=[896, 1536],
    batch_size=1,
    num_threads=4,
    device='cpu',
    device_id=0,
    output_dir='./out/',
):

    pipeline = dali.pipeline.Pipeline(batch_size=batch_size,
                                      num_threads=num_threads,
                                      device_id=device_id)

    with pipeline:
        data = fn.external_source(name=input_name, device="cpu")
        # image preprocess
        images = fn.image_decoder(data, device=device)
        images = fn.resize(images,
                           size=image_dim,
                           mode="not_larger",
                           max_size=image_dim)
        images = fn.pad(images,
                        fill_value=0,
                        shape=[image_dim[0], image_dim[1], 1])
        images = fn.transpose(images, perm=[2, 0, 1])
        images = fn.cast(images, dtype=dali.types.FLOAT)
        images = images / 255.
        # input shape
        input_shape = np.float32((image_dim[0], image_dim[1], 1))
        # original shape
        shapes = fn.peek_image_shape(data)
        shapes = fn.cast(shapes, dtype=dali.types.FLOAT)
        # gather outputs
        out = [images, input_shape, shapes]
        pipeline.set_outputs(*out)

    os.makedirs(os.path.dirname(output_dir), exist_ok=True)
    pipeline.serialize(filename=os.path.join(output_dir, 'model.dali'))
Exemple #11
0
def one_input_pipeline(def_for_dataset, device, source, external_source_device,
                       no_copy, batch):
    """Pipeline accepting single input via external source

    Parameters
    ----------
    def_for_dataset : bool
         True if this pipeline will be converted to TF Dataset
    device : str
        device that the Dataset will be placed ("cpu" or "gpu")
    source : callable
        callback for the external source in baseline pipeline otherwise None
    external_source_device : str
        Device that we want the external source in TF dataset to be placed
    """

    if def_for_dataset:
        if no_copy is None:
            # If no_copy is None, we infer it automatically and we use no_copy=True when
            # the input memory is matching the external source placement,
            # so the Dataset's placement is the same as external source's device,
            # otherwise for cross-backend we use False.
            no_copy = (device == external_source_device)
        if batch == "dataset":
            # Special value used in tests, reroute it to the default
            batch = None
        input = fn.external_source(name="input_placeholder",
                                   no_copy=no_copy,
                                   device=external_source_device,
                                   batch=batch)
    else:
        input = fn.external_source(name="actual_input",
                                   source=source,
                                   batch=False,
                                   device=external_source_device)
    input = input if device == 'cpu' else input.gpu()
    processed = fn.cast(input + 10, dtype=dali.types.INT32)
    input_padded, processed_padded = fn.pad([input, processed])
    return input_padded, processed_padded
def check_pad_to_square(device='cpu', batch_size=3, ndim=2, num_iter=3):
    pipe = Pipeline(batch_size=batch_size,
                    num_threads=3,
                    device_id=0,
                    seed=1234)
    axes = (0, 1)
    with pipe:
        in_shape = fn.cast(fn.random.uniform(range=(10, 20), shape=(ndim, )),
                           dtype=types.INT32)
        in_data = fn.reshape(fn.random.uniform(range=(0., 1.), shape=in_shape),
                             layout="HW")
        shape = fn.shapes(in_data, dtype=types.INT32)
        h = fn.slice(shape, 0, 1, axes=[0])
        w = fn.slice(shape, 1, 1, axes=[0])
        side = math.max(h, w)
        if device == 'gpu':
            in_data = in_data.gpu()
        out_data = fn.pad(in_data,
                          axis_names="HW",
                          shape=fn.cat(side, side, axis=0))
        pipe.set_outputs(in_data, out_data)
    pipe.build()
    for _ in range(num_iter):
        outs = [
            out.as_cpu() if isinstance(out, TensorListGPU) else out
            for out in pipe.run()
        ]
        for i in range(batch_size):
            in_data, out_data = \
                [outs[out_idx].at(i) for out_idx in range(len(outs))]
            in_shape = in_data.shape
            max_side = max(in_shape)
            for s in out_data.shape:
                assert s == max_side
            np.testing.assert_equal(out_data[:in_shape[0], :in_shape[1]],
                                    in_data)
            np.testing.assert_equal(out_data[in_shape[0]:, :], 0)
            np.testing.assert_equal(out_data[:, in_shape[1]:], 0)
Exemple #13
0
def dali_asr_pipeline(train_pipeline,  # True if training, False if validation
                      file_root,
                      file_list,
                      sample_rate,
                      silence_threshold,
                      resample_range,
                      discrete_resample_range,
                      window_size,
                      window_stride,
                      nfeatures,
                      nfft,
                      frame_splicing_factor,
                      dither_coeff,
                      pad_align,
                      preemph_coeff,
                      do_spectrogram_masking=False,
                      cutouts_generator=None,
                      shard_id=0,
                      n_shards=1,
                      preprocessing_device="gpu"):
    do_remove_silence = silence_threshold is not None

    def _div_ceil(dividend, divisor):
        return (dividend + (divisor - 1)) // divisor

    encoded, label = fn.readers.file(
        device="cpu", name="file_reader", file_root=file_root,
        file_list=file_list, shard_id=shard_id, num_shards=n_shards,
        shuffle_after_epoch=train_pipeline)

    speed_perturbation_coeffs = None
    if resample_range is not None:
        if discrete_resample_range:
            values = [resample_range[0], 1.0, resample_range[1]]
            speed_perturbation_coeffs = fn.random.uniform(device="cpu",
                                                          values=values)
        else:
            speed_perturbation_coeffs = fn.random.uniform(device="cpu",
                                                          range=resample_range)

    if train_pipeline and speed_perturbation_coeffs is not None:
        dec_sample_rate_arg = speed_perturbation_coeffs * sample_rate
    elif resample_range is None:
        dec_sample_rate_arg = sample_rate
    else:
        dec_sample_rate_arg = None

    audio, _ = fn.decoders.audio(encoded, sample_rate=dec_sample_rate_arg,
                                 dtype=types.FLOAT, downmix=True)
    if do_remove_silence:
        begin, length = fn.nonsilent_region(audio, cutoff_db=silence_threshold)
        audio = fn.slice(audio, begin, length, axes=[0])

    # Max duration drop is performed at DataLayer stage

    if preprocessing_device == "gpu":
        audio = audio.gpu()

    if dither_coeff != 0.:
        audio = audio + fn.random.normal(audio) * dither_coeff

    audio = fn.preemphasis_filter(audio, preemph_coeff=preemph_coeff)

    spec = fn.spectrogram(audio, nfft=nfft,
                          window_length=window_size * sample_rate,
                          window_step=window_stride * sample_rate)

    mel_spec = fn.mel_filter_bank(spec, sample_rate=sample_rate,
                                  nfilter=nfeatures, normalize=True)

    log_features = fn.to_decibels(mel_spec, multiplier=np.log(10),
                                  reference=1.0, cutoff_db=math.log(1e-20))

    log_features_len = fn.shapes(log_features)
    if frame_splicing_factor != 1:
        log_features_len = _div_ceil(log_features_len, frame_splicing_factor)

    log_features = fn.normalize(log_features, axes=[1])
    log_features = fn.pad(log_features, axes=[1], fill_value=0, align=pad_align)

    if train_pipeline and do_spectrogram_masking:
        anchors, shapes = fn.external_source(source=cutouts_generator,
                                             num_outputs=2, cycle=True)
        log_features = fn.erase(log_features, anchor=anchors, shape=shapes,
                                axes=[0, 1], fill_value=0,
                                normalized_anchor=True)

    # When modifying DALI pipeline returns, make sure you update `output_map`
    # in DALIGenericIterator invocation
    return log_features.gpu(), label.gpu(), log_features_len.gpu()
        def dali_jasper_pipe():
            if is_triton_pipeline:
                assert not self.train, "Pipeline for Triton shall be a validation pipeline"
                if torch.distributed.is_initialized():
                    raise RuntimeError(
                        "You're creating Triton pipeline, using multi-process mode. Please use single-process mode."
                    )
                encoded, label = fn.external_source(device="cpu",
                                                    name="DALI_INPUT_0",
                                                    no_copy=True)
            else:
                encoded, label = fn.readers.file(
                    device="cpu",
                    name="file_reader",
                    file_root=file_root,
                    file_list=file_list,
                    shard_id=shard_id,
                    num_shards=n_shards,
                    shuffle_after_epoch=train_pipeline)

            speed_perturbation_coeffs = None
            if resample_range is not None:
                if discrete_resample_range:
                    values = [
                        self.resample_range[0], 1.0, self.resample_range[1]
                    ]
                    speed_perturbation_coeffs = fn.random.uniform(
                        device="cpu", values=values)
                else:
                    speed_perturbation_coeffs = fn.random.uniform(
                        device="cpu", range=resample_range)

            if self.train and speed_perturbation_coeffs is not None:
                dec_sample_rate_arg = speed_perturbation_coeffs * self.sample_rate
            elif resample_range is None:
                dec_sample_rate_arg = self.sample_rate
            else:
                dec_sample_rate_arg = None

            audio, _ = fn.decoders.audio(encoded,
                                         sample_rate=dec_sample_rate_arg,
                                         dtype=types.FLOAT,
                                         downmix=True)

            if self.do_remove_silence:
                begin, length = fn.nonsilent_region(
                    audio, cutoff_db=silence_threshold)
                audio = fn.slice(audio, begin, length, axes=[0])

            # Max duration drop is performed at DataLayer stage

            if self.preprocessing_device == "gpu":
                audio = audio.gpu()

            if self.dither_coeff != 0.:
                audio = audio + fn.random.normal(
                    device=preprocessing_device) * self.dither_coeff

            audio = fn.preemphasis_filter(audio, preemph_coeff=preemph_coeff)

            spec = fn.spectrogram(audio,
                                  nfft=nfft,
                                  window_length=window_size * sample_rate,
                                  window_step=window_stride * sample_rate)

            mel_spec = fn.mel_filter_bank(spec,
                                          sample_rate=sample_rate,
                                          nfilter=self.nfeatures,
                                          normalize=True)

            log_features = fn.to_decibels(mel_spec,
                                          multiplier=np.log(10),
                                          reference=1.0,
                                          cutoff_db=math.log(1e-20))

            log_features_len = fn.shapes(log_features)
            if self.frame_splicing_factor != 1:
                log_features_len = self._div_ceil(log_features_len,
                                                  self.frame_splicing_factor)

            log_features = fn.normalize(log_features, axes=[1])
            log_features = fn.pad(log_features,
                                  axes=[1],
                                  fill_value=0,
                                  align=pad_align)

            if self.train and self._do_spectrogram_masking():
                anchors, shapes = fn.external_source(
                    source=self._cutouts_generator, num_outputs=2, cycle=True)
                log_features = fn.erase(log_features,
                                        anchor=anchors,
                                        shape=shapes,
                                        axes=[0, 1],
                                        fill_value=0,
                                        normalized_anchor=True)

            # When modifying DALI pipeline returns, make sure you update `output_map` in DALIGenericIterator invocation
            return log_features.gpu(), label.gpu(), log_features_len.gpu()