def __init__(self, batch_size, num_threads, device_id, **kwargs):
     super().__init__(batch_size, num_threads, device_id, **kwargs)
     self.oversampling = kwargs["oversampling"]
     self.crop_shape = types.Constant(np.array(self.patch_size),
                                      dtype=types.INT64)
     self.crop_shape_float = types.Constant(np.array(self.patch_size),
                                            dtype=types.FLOAT)
Example #2
0
 def __init__(self, batch_size, num_threads, device_id, **kwargs):
     super(TrainPipeline, self).__init__(batch_size, num_threads, device_id)
     self.dim = kwargs["dim"]
     self.oversampling = kwargs["oversampling"]
     self.input_x = get_numpy_reader(
         num_shards=kwargs["num_device"],
         files=kwargs["imgs"],
         seed=kwargs["seed"],
         shard_id=device_id,
         shuffle=True,
     )
     self.input_y = get_numpy_reader(
         num_shards=kwargs["num_device"],
         files=kwargs["lbls"],
         seed=kwargs["seed"],
         shard_id=device_id,
         shuffle=True,
     )
     self.patch_size = kwargs["patch_size"]
     if self.dim == 2:
         self.patch_size = [kwargs["batch_size_2d"]] + self.patch_size
     self.crop_shape = types.Constant(np.array(self.patch_size), dtype=types.INT64)
     self.crop_shape_float = types.Constant(np.array(self.patch_size), dtype=types.FLOAT)
     shard_id = int(os.getenv("LOCAL_RANK", "0"))
     if kwargs['set_aug_seed']:
         aug_seed = kwargs['seed'] + shard_id
         self.aug_seed_kwargs = {'seed': aug_seed}
         print("TrainPipeline augmentation seed: ", aug_seed)
     else:
         self.aug_seed_kwargs = {}
         print("TrainPipeline WO augmentation seed")
     self.augment = kwargs['augment']
Example #3
0
 def __init__(self, batch_size, num_threads, device_id, **kwargs):
     super(TFRecordTrain, self).__init__(batch_size, num_threads, device_id)
     self.dim = kwargs["dim"]
     self.seed = kwargs["seed"]
     self.oversampling = kwargs["oversampling"]
     self.input = ops.TFRecordReader(
         path=kwargs["tfrecords"],
         index_path=kwargs["tfrecords_idx"],
         features={
             "X_shape": tfrec.FixedLenFeature([self.dim + 1], tfrec.int64, 0),
             "Y_shape": tfrec.FixedLenFeature([self.dim + 1], tfrec.int64, 0),
             "X": tfrec.VarLenFeature([], tfrec.float32, 0.0),
             "Y": tfrec.FixedLenFeature([], tfrec.string, ""),
             "fname": tfrec.FixedLenFeature([], tfrec.string, ""),
         },
         num_shards=kwargs["gpus"],
         shard_id=device_id,
         random_shuffle=True,
         pad_last_batch=True,
         read_ahead=True,
         seed=self.seed,
     )
     self.patch_size = kwargs["patch_size"]
     self.crop_shape = types.Constant(np.array(self.patch_size), dtype=types.INT64)
     self.crop_shape_float = types.Constant(np.array(self.patch_size), dtype=types.FLOAT)
     self.layout = "CDHW" if self.dim == 3 else "CHW"
     self.axis_name = "DHW" if self.dim == 3 else "HW"
Example #4
0
 def __init__(self, batch_size, num_threads, device_id, **kwargs):
     super(TrainPipeline, self).__init__(batch_size, num_threads, device_id)
     self.dim = kwargs["dim"]
     self.oversampling = kwargs["oversampling"]
     self.input_x = get_numpy_reader(
         num_shards=kwargs["gpus"],
         files=kwargs["imgs"],
         seed=kwargs["seed"],
         shard_id=device_id,
         shuffle=True,
     )
     self.input_y = get_numpy_reader(
         num_shards=kwargs["gpus"],
         files=kwargs["lbls"],
         seed=kwargs["seed"],
         shard_id=device_id,
         shuffle=True,
     )
     self.patch_size = kwargs["patch_size"]
     if self.dim == 2:
         self.patch_size = [kwargs["batch_size_2d"]] + self.patch_size
     self.crop_shape = types.Constant(np.array(self.patch_size),
                                      dtype=types.INT64)
     self.crop_shape_float = types.Constant(np.array(self.patch_size),
                                            dtype=types.FLOAT)
Example #5
0
def decoder_slice_pipe(decoder_op, file_root, device, use_fast_idct):
    encoded, _ = fn.readers.file(file_root=file_root)
    start = types.Constant(np.array([0., 0.]))
    end = types.Constant(np.array([0.5, 0.5]))
    decoded = decoder_op(encoded, start, end, device=device, output_type=types.RGB,
                         use_fast_idct=use_fast_idct)
    return decoded
Example #6
0
 def define_graph(self):
     device = self.device
     return [
         # no-op
         ops.Reshape(device=device, shape=[1])(types.Constant(1.25)),
         # flatten with reshape op
         ops.Reshape(device=device)
         (types.Constant(np.array([[1, 2], [3, 4]], dtype=np.uint16),
                         device=device),
          shape=types.Constant([4]))
     ]
Example #7
0
    def __init__(self, params, num_threads, device_id):
        super(DaliPipeline, self).__init__(params.batch_size,
                                           num_threads,
                                           device_id,
                                           seed=12)

        with h5py.File(params.data_path, 'r') as f:
            # load hydro and clean up
            Hydro = f['Hydro'][...]
            self.Hydro = types.Constant(Hydro,
                                        shape=Hydro.shape,
                                        layout="DHWC",
                                        device="cpu")
            del Hydro

            # load nbody and clean up
            Nbody = f['Nbody'][...]
            self.Nbody = types.Constant(Nbody,
                                        shape=Nbody.shape,
                                        layout="DHWC",
                                        device="cpu")
            del Nbody

        #self.ndummy = np.zeros((20, 20, 20, 4), dtype=np.float32)
        #self.hdummy = np.zeros((20, 20, 20, 5), dtype=np.float32)
        #self.Nbody = types.Constant(self.ndummy, shape = self.ndummy.shape, layout = "DHWC", device="cpu")
        #self.Hydro = types.Constant(self.hdummy, shape = self.hdummy.shape, layout = "DHWC", device="cpu")

        #self.Nbody = ops.Constant(fdata = self.ndummy.flatten().tolist(), shape = self.ndummy.shape, layout = "DHWC", device = "cpu")
        #self.Hydro = ops.Constant(fdata = self.hdummy.flatten().tolist(), shape = self.hdummy.shape, layout = "DHWC", device = "cpu")

        self.do_rotate = True if params.rotate_input == 1 else False
        print("Enable Rotation" if self.do_rotate else "Disable Rotation")
        self.rng_angle = ops.Uniform(device="cpu", range=[-1.5, 2.5])
        self.rng_pos = ops.Uniform(device="cpu", range=[0., 1.])
        self.icast = ops.Cast(device="cpu", dtype=types.INT32)
        self.fcast = ops.Cast(device="cpu", dtype=types.FLOAT)
        self.crop = ops.Crop(device="cpu",
                             crop_d=params.data_size,
                             crop_h=params.data_size,
                             crop_w=params.data_size)
        self.rotate1 = ops.Rotate(device="gpu",
                                  axis=(1, 0, 0),
                                  interp_type=types.INTERP_LINEAR)
        self.rotate2 = ops.Rotate(device="gpu",
                                  axis=(0, 1, 0),
                                  interp_type=types.INTERP_LINEAR)
        self.rotate3 = ops.Rotate(device="gpu",
                                  axis=(0, 0, 1),
                                  interp_type=types.INTERP_LINEAR)
        self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2])
 def get_operand(self, operand, kind, operand_type):
     if kind == "const":
         return types.Constant(magic_number, np_types_to_dali[operand_type])
     elif kind == "cpu":
         return operand
     elif kind == "gpu":
         return operand.gpu()
Example #9
0
 def make_pipe():
     image = fn.external_source(source=image_gen)
     if device == "gpu":
         image = image.gpu()
     if get_dynamic_axes:
         axes, rel_start, rel_shape = fn.external_source(
             source=get_dynamic_axes, num_outputs=3)
     else:
         axes = types.Constant(np.array([0, 1], dtype=np.int32),
                               device="cpu")
         rel_start = fn.random.uniform(range=(0.1, 0.2),
                                       shape=(2, ),
                                       dtype=types.FLOAT,
                                       device=args_device)
         rel_shape = fn.random.uniform(range=(0.4, 0.6),
                                       shape=(2, ),
                                       dtype=types.FLOAT,
                                       device=args_device)
     if args_device == "gpu":
         sliced = fn.slice(image, rel_start, rel_shape, axes=axes)
         return image, axes, rel_start, rel_shape, sliced
     else:
         sliced1 = fn.slice(image,
                            rel_start=rel_start,
                            rel_shape=rel_shape,
                            axes=axes)
         sliced2 = fn.slice(image, rel_start, rel_shape, axes=axes)
         return image, axes, rel_start, rel_shape, sliced1, sliced2
Example #10
0
 def pipe(device, interp_type, test_data=checkerboard, out_size=out_size):
     data = types.Constant(test_data, device=device)
     data = fn.expand_dims(data, axes=[2])
     resized = fn.resize(data,
                         dtype=types.FLOAT,
                         min_filter=interp_type,
                         mag_filter=interp_type,
                         size=out_size)
     resized = fn.squeeze(resized, axes=[2])
     return resized
def test_variable_batch():
    pipe = Pipeline(6, 1, 0)
    batches = [
        [np.array(1), np.array(2)],
        [np.array(1)],
        [np.array(1), np.array(2), np.array(3), np.array(4), np.array(5), np.array(5)]
    ]
    dummy = fn.external_source(batches, cycle=True)
    val = np.float32([[1,2],[3,4]])
    pipe.set_outputs(types.Constant(val, device="cpu"), types.Constant(val, device="gpu"), dummy)
    pipe.build()
    for batch in batches:
        cpu, gpu, _ = pipe.run()
        assert len(cpu) == len(batch)
        assert len(gpu) == len(batch)
        gpu = gpu.as_cpu()
        for i in range(len(batch)):
            assert np.array_equal(cpu.at(i), val)
            assert np.array_equal(gpu.at(i), val)
Example #12
0
def create_dali_pipe(channel_first,
                     seq_len,
                     interp,
                     dtype,
                     w,
                     h,
                     batch_size=2):
    pipe = dali.pipeline.Pipeline(batch_size, 1, 0, 0)
    with pipe:
        layout = "FCHW" if channel_first else "FHWC"
        ext = fn.external_source(GetSequences(channel_first, seq_len,
                                              batch_size),
                                 layout=layout)
        resize_cpu_out = fn.resize(ext,
                                   resize_x=w,
                                   resize_y=h,
                                   interp_type=interp,
                                   dtype=dtype,
                                   save_attrs=True)
        resize_gpu_out = fn.resize(ext.gpu(),
                                   resize_x=w,
                                   resize_y=h,
                                   interp_type=interp,
                                   minibatch_size=4,
                                   dtype=dtype,
                                   save_attrs=True)
        dali_resized_cpu, size_cpu = resize_cpu_out
        dali_resized_gpu, size_gpu = resize_gpu_out
        # extract just HW part from the input shape
        shape_anchor = np.array([2 if channel_first else 1], dtype=np.float32)
        shape_shape = np.array([2], dtype=np.float32)
        ext_size = fn.slice(fn.cast(fn.shapes(ext), dtype=types.INT32),
                            types.Constant(shape_anchor, device="cpu"),
                            types.Constant(shape_shape, device="cpu"),
                            normalized_anchor=False,
                            normalized_shape=False,
                            axes=[0])
        pipe.set_outputs(dali_resized_cpu, dali_resized_gpu, ext_size,
                         size_cpu, size_gpu)
    return pipe
 def define_graph(self):
     device = self.device
     return [
         types.Constant(device = device, value = (1.25,2.5,3)),
         types.Constant(device = device, value = self.array([[[1,2]],[[3,4]]], dtype=self.dtype('int32'))),
         types.Constant(device = device, value = self.array([0,1,2,3,4], dtype=self.dtype('uint8'))),
         types.Constant(device = device, value = self.array([0.25,1.25,2.25,3.25,4.25], dtype=self.dtype('float16'))),
         types.Constant(device = device, value = 5.5, shape=(100,100), name="large"),
         types.Constant(device = device, value = -4, shape=(10,20)),
         types.Constant(device = device, value = [False, True, False])
     ]
Example #14
0
    def define_graph(self):
        self.jpegs, self.labels = self.input(name = "Reader")
        images = self.decode(self.jpegs)
        if self.warp.device == "gpu":
          images = images.gpu()
        if self.cast:
          images = self.cast(images)

        dynamic_size = types.Constant(np.array([240, 320], dtype=np.float32)) if self.use_dynamic_size else None

        if self.use_input:
          transform = self.transform_source()
          outputs = self.warp(images, transform, size = dynamic_size)
        else:
          outputs = self.warp(images, size = dynamic_size)
        return outputs
Example #15
0
    def nonsilent_pipe(data_arr=None, window_size=256, cutoff_value=-10, reference_power=None):
        if data_arr is None:
            raw, _ = fn.readers.file(files=audio_files)
            audio, _ = fn.decoders.audio(raw, dtype=types.INT16, downmix=True)
        else:
            audio = types.Constant(device='cpu', value=data_arr)

        begin_cpu, len_cpu = fn.nonsilent_region(
            audio, cutoff_db=cutoff_value, window_length=window_size,
            reference_power=reference_power,
        )
        begin_gpu, len_gpu = fn.nonsilent_region(
            audio.gpu(), cutoff_db=cutoff_value, window_length=window_size,
            reference_power=reference_power,
        )
        return begin_cpu, len_cpu, begin_gpu, len_gpu
Example #16
0
 def dali_pipe(data, label):
     fdata = types.Constant(data)
     flabel = types.Constant(label)
     return fdata, flabel
Example #17
0
def check_normal_distribution(device,
                              dtype,
                              shape=None,
                              use_shape_like_input=False,
                              variable_shape=False,
                              mean=0.0,
                              stddev=1.0,
                              variable_dist_params=False,
                              shape_gen_f=None,
                              niter=3,
                              batch_size=3,
                              device_id=0,
                              num_threads=3):
    pipe = Pipeline(batch_size=batch_size,
                    device_id=device_id,
                    num_threads=num_threads,
                    seed=123456)
    with pipe:
        shape_like_in = None
        shape_arg = None
        assert shape is None or shape_gen_f is None
        if variable_shape:
            if shape_gen_f is None:

                def shape_gen_f():
                    return random_shape(shape)

            if use_shape_like_input:
                shape_like_in = fn.external_source(
                    lambda: np.zeros(shape_gen_f()),
                    device=device,
                    batch=False)
                shape_out = fn.shapes(shape_like_in)
            else:
                shape_arg = fn.external_source(shape_gen_f, batch=False)
                shape_out = shape_arg
        else:
            if use_shape_like_input:
                shape_like_in = np.zeros(shape)
            else:
                shape_arg = shape
            # Can't make an empty list constant
            shape_out = types.Constant(shape if shape is not None and shape !=
                                       () else (1, ),
                                       dtype=types.INT32)

        mean_arg = None
        stddev_arg = None
        if variable_dist_params:
            mean_arg = fn.external_source(lambda: np.array(
                np.random.uniform(low=-100.0, high=100.0), dtype=np.float32),
                                          device='cpu',
                                          batch=False)
            stddev_arg = fn.external_source(lambda: np.array(
                np.random.uniform(low=1.0, high=100.0), dtype=np.float32),
                                            device='cpu',
                                            batch=False)
        else:
            mean_arg = mean
            stddev_arg = stddev
        inputs = [shape_like_in] if shape_like_in is not None else []
        out = fn.random.normal(*inputs,
                               device=device,
                               shape=shape_arg,
                               mean=mean_arg,
                               stddev=stddev_arg,
                               dtype=dtype)
        pipe.set_outputs(out, shape_out, mean_arg, stddev_arg)
    pipe.build()
    for i in range(niter):
        outputs = pipe.run()
        out, shapes, means, stddevs = tuple(outputs[i].as_cpu(
        ) if isinstance(outputs[i], TensorListGPU) else outputs[i]
                                            for i in range(len(outputs)))
        for sample_idx in range(batch_size):
            sample = np.array(out[sample_idx])
            if sample.shape == ():
                continue
            sample_shape = np.array(shapes[sample_idx])
            mean = np.array(means[sample_idx])
            stddev = np.array(stddevs[sample_idx])
            assert (sample.shape == sample_shape
                    ).all(), f"{sample.shape} != {sample_shape}"

            data = sample.flatten()
            data_len = len(data)

            # Checking sanity of the data
            if data_len >= 100 and dtype in [types.FLOAT, types.FLOAT64]:
                # Empirical rule:
                # ~68% of the observations within one standard deviation
                # ~95% of the observations within one standard deviation
                # ~99.7% of the observations within one standard deviation
                within_1stddevs = np.where((data > (mean - 1 * stddev))
                                           & (data < (mean + 1 * stddev)))
                p1 = len(within_1stddevs[0]) / data_len
                within_2stddevs = np.where((data > (mean - 2 * stddev))
                                           & (data < (mean + 2 * stddev)))
                p2 = len(within_2stddevs[0]) / data_len
                within_3stddevs = np.where((data > (mean - 3 * stddev))
                                           & (data < (mean + 3 * stddev)))
                p3 = len(within_3stddevs[0]) / data_len
                assert p3 > 0.9, f"{p3}"  # leave some room
                assert p2 > 0.8, f"{p2}"  # leave some room
                assert p1 > 0.5, f"{p1}"  # leave some room

                # It's not 100% mathematically correct, but makes do in case of this test
                _, pvalues_anderson, _ = st.anderson(data, dist='norm')
                assert pvalues_anderson[2] > 0.5
Example #18
0
 def pipe():
     data = np.array([np.random.rand(1, 2) for i in range(10)])
     label = np.array([np.random.rand(1, 3) for i in range(10)])
     fdata = types.Constant(data)
     flabel = types.Constant(label)
     return fdata, flabel
Example #19
0
def build_pipes(device, dim, batch_size, channel_first, mode, interp, dtype,
                w_input, h_input, d_input, use_size_arg, use_size_input,
                use_roi):
    dali_pipe = Pipeline(batch_size=batch_size,
                         num_threads=8,
                         device_id=0,
                         seed=1234)
    with dali_pipe:
        if dim == 2:
            files, labels = dali.fn.readers.caffe(path=db_2d_folder,
                                                  random_shuffle=True)
            images_cpu = dali.fn.decoders.image(files, device="cpu")
        else:
            images_cpu = dali.fn.external_source(
                source=random_3d_loader(batch_size), layout="DHWC")

        images_hwc = images_cpu if device == "cpu" else images_cpu.gpu()

        if channel_first:
            images = dali.fn.transpose(
                images_hwc,
                perm=[3, 0, 1, 2] if dim == 3 else [2, 0, 1],
                transpose_layout=True)
        else:
            images = images_hwc

        roi_start = None
        roi_end = None
        w = None
        h = None
        d = None
        size = None

        minibatch_size = 2 if dim == 3 else 8

        if use_roi:
            # Calculate absolute RoI
            in_size = fn.slice(fn.shapes(images_cpu),
                               types.Constant(0,
                                              dtype=types.FLOAT,
                                              device="cpu"),
                               types.Constant(dim,
                                              dtype=types.FLOAT,
                                              device="cpu"),
                               axes=[0],
                               normalized_shape=False)
            roi_start = fn.random.uniform(range=(0, 0.4), shape=[dim
                                                                 ]) * in_size
            roi_end = fn.random.uniform(range=(0.6, 1.0), shape=[dim
                                                                 ]) * in_size

        size_range = (10, 200) if dim == 3 else (10, 1000)

        if use_size_arg:
            if use_size_input:
                mask = fn.cast(fn.random.uniform(range=(0.8, 1.9),
                                                 shape=[dim]),
                               dtype=types.INT32)
                size = fn.random.uniform(range=size_range, shape=[dim]) * mask
            else:
                size = [300, 400] if dim == 2 else [80, 100, 120]

            resized = resize_dali(images,
                                  channel_first,
                                  dtype,
                                  interp,
                                  mode,
                                  size,
                                  None,
                                  None,
                                  None,
                                  roi_start,
                                  roi_end,
                                  minibatch_size=minibatch_size,
                                  max_size=max_size(dim))
        else:
            if w_input:
                has_w = fn.random.coin_flip(probability=0.8)
                w = fn.random.uniform(range=size_range) * has_w
            else:
                w = 320  # some fixed value

            if h_input:
                has_h = fn.random.coin_flip(probability=0.8)
                h = fn.random.uniform(range=size_range) * has_h
            else:
                h = 240  # some other fixed value

            if dim >= 3:
                if d_input:
                    has_d = fn.random.coin_flip(probability=0.8)
                    d = fn.random.uniform(range=size_range) * has_d
                else:
                    d = 31  # some other fixed value

            resized = resize_dali(images,
                                  channel_first,
                                  dtype,
                                  interp,
                                  mode,
                                  None,
                                  w,
                                  h,
                                  d,
                                  roi_start,
                                  roi_end,
                                  minibatch_size=minibatch_size,
                                  max_size=max_size(dim))

        outputs = [images, resized]
        if roi_start is not None and roi_end is not None:
            outputs += [roi_start, roi_end]

        for x in (d, h, w, size):
            if x is not None:
                if isinstance(x, _DataNode):
                    outputs.append(x)
                else:
                    outputs.append(
                        types.Constant(np.array(x, dtype=np.float32)))

        dali_pipe.set_outputs(*outputs)

    pil_pipe = Pipeline(batch_size=batch_size,
                        num_threads=8,
                        device_id=0,
                        exec_async=False,
                        exec_pipelined=False)
    with pil_pipe:
        images = fn.external_source(name="images",
                                    layout=layout_str(dim, channel_first))
        sizes = fn.external_source(name="size")
        roi_start = fn.external_source(name="roi_start")
        roi_end = fn.external_source(name="roi_end")
        resized = resize_PIL(dim, channel_first, dtype, interp, images, sizes,
                             roi_start, roi_end)
        resized = fn.reshape(resized, layout=layout_str(dim, channel_first))
        pil_pipe.set_outputs(resized)
    dali_pipe.build()
    pil_pipe.build()

    return dali_pipe, pil_pipe
Example #20
0
 def get_dali_pipe():
     data = types.Constant(1)
     return data
Example #21
0
def get_dali_pipe(value):
    data = types.Constant(value)
    return data
Example #22
0
    def _prepare_graph(self, define_graph = None):
        self._pipe = b.Pipeline(self._max_batch_size,
                                self._num_threads,
                                self._device_id,
                                self._seed,
                                self._exec_pipelined,
                                self._prefetch_queue_depth,
                                self._exec_async,
                                self._bytes_per_sample,
                                self._set_affinity,
                                self._max_streams,
                                self._default_cuda_stream_priority)
        self._pipe.SetExecutionTypes(self._exec_pipelined, self._exec_separated, self._exec_async)
        self._pipe.SetQueueSizes(self._cpu_queue_size, self._gpu_queue_size)
        self._pipe.EnableExecutorMemoryStats(self._enable_memory_stats)

        if define_graph is not None:
            if self._graph_out is not None:
                raise RuntimeError("Duplicate graph definition - `define_graph` argument "
                    "should not be specified when graph was defined with a call to `set_outputs`.")
        else:
            define_graph = self.define_graph

        if self._graph_out:
            outputs = self._graph_out
        else:
            with self:
                outputs = define_graph()
        if isinstance(outputs, tuple):
            outputs = list(outputs)
        elif not isinstance(outputs, list):
            outputs = [outputs]

        for i in range(len(outputs)):
            if isinstance(outputs[i], types.ScalarConstant):
                import nvidia.dali.ops
                outputs[i] = nvidia.dali.ops._instantiate_constant_node("cpu", outputs[i])
            elif not isinstance(outputs[i], DataNode):
                outputs[i] = types.Constant(outputs[i], device="cpu")
            _data_node._check(outputs[i])

        # Backtrack to construct the graph
        op_ids = set()
        edges = deque(list(outputs) + self._sinks)
        ops = []
        while edges:
            current_edge = edges.popleft()
            source_op = current_edge.source
            if source_op is None:
                raise RuntimeError(
                    "Pipeline encountered "
                    "Edge with no source op.")

            # To make sure we don't double count ops in
            # the case that they produce more than one
            # output, we keep track of the unique op ids
            # for each op we encounter and only add the
            # op if we have not already
            if source_op.id not in op_ids:
                op_ids.add(source_op.id)
                source_op.check_args()
                ops.append(source_op)
            else:
                # If the op was already added, we need to
                # change its position to the top of the list.
                # This ensures topological ordering of ops
                # when adding to the backend pipeline
                ops.remove(source_op)
                ops.append(source_op)
            for edge in source_op.inputs:
                if isinstance(edge, list):
                    for e in edge:
                        edges.append(e)
                else:
                    edges.append(edge)

        # Add the ops to the graph and build the backend
        related_logical_id = {}
        self._ops = []
        while ops:
            op = ops.pop()
            self._ops.append(op)
            if op.relation_id not in related_logical_id:
                related_logical_id[op.relation_id] = self._pipe.AddOperator(op.spec, op.name)
            else:
                self._pipe.AddOperator(op.spec, op.name, related_logical_id[op.relation_id])
        self._prepared = True
        self._setup_input_callbacks()
        self._names_and_devices = [(e.name, e.device) for e in outputs]