def _run_test_cat(num_inputs, layout, ndim, axis, axis_name): num_iter = 3 batch_size = 4 if ndim is None: ndim = len(layout) ref_axis = layout.find(axis_name) if axis_name is not None else axis if axis is not None else 0 assert ref_axis >= 0 axis_arg = None if axis_name else axis pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=3, device_id=0) with pipe: inputs = fn.external_source( input_generator(num_inputs, batch_size, ndim, ref_axis), num_outputs=num_inputs, layout=layout) out_cpu = fn.cat(*inputs, axis=axis_arg, axis_name=axis_name) out_gpu = fn.cat(*(x.gpu() for x in inputs), axis=axis_arg, axis_name=axis_name) pipe.set_outputs(out_cpu, out_gpu, *inputs) pipe.build() for iter in range(num_iter): o_cpu, o_gpu, *inputs = pipe.run() ref = ref_cat(inputs, ref_axis) check_batch(o_cpu, ref, batch_size, eps=0, expected_layout=layout) check_batch(o_gpu, ref, batch_size, eps=0, expected_layout=layout)
def test_cat_empty_input(): pipe = dali.pipeline.Pipeline(batch_size = 1, num_threads = 3, device_id = 0) with pipe: src1 = dali.types.Constant(np.array( [[1, 2, 3 ,4], [5, 6, 7, 8], [9,10,11,12]])) src2 = dali.types.Constant(np.array( [[], [], []], dtype=np.int32)) src3 = dali.types.Constant(np.array( [[13,14,15], [16,17,18], [19,20,21]])) out_cpu = fn.cat(src1, src2, src3, axis = 1) out_gpu = fn.cat(src1.gpu(), src2.gpu(), src3.gpu(), axis = 1) pipe.set_outputs(out_cpu, out_gpu) pipe.build() o = pipe.run() o = list(o) o[1] = o[1].as_cpu(); ref = np.array([[1, 2, 3, 4,13,14,15], [5, 6, 7, 8,16,17,18], [9,10,11,12,19,20,21]]) assert np.array_equal(o[0].at(0), ref) assert np.array_equal(o[1].at(0), ref)
def define_graph(self): inputs, bboxes, labels, polygons, vertices = fn.readers.coco( file_root=self.file_root, annotations_file=self.annotation_file, skip_empty=True, shard_id=self.share_id, num_shards=self.num_gpus, ratio=True, ltrb=True, polygon_masks = True, random_shuffle=self.random_shuffle, shuffle_after_epoch=self.shuffle_after_epoch, name="Reader") input_shape = fn.slice(fn.cast(fn.peek_image_shape(inputs), dtype=types.INT32), 0, 2, axes=[0]) h = fn.slice(input_shape, 0, 1, axes = [0], dtype=types.FLOAT) w = fn.slice(input_shape, 1, 1, axes = [0], dtype=types.FLOAT) short_side = math.min(w, h) scale = fn.random.uniform(range=[0.3, 1.]) crop_side = fn.cast(math.ceil(scale * short_side), dtype=types.INT32) crop_shape = fn.cat(crop_side, crop_side) anchor_rel, shape_rel, bboxes, labels, bbox_indices = fn.random_bbox_crop( bboxes, labels, input_shape=input_shape, crop_shape=crop_shape, shape_layout="HW", thresholds=[0.], # No minimum intersection-over-union, for demo purposes allow_no_crop=False, # No-crop is disallowed, for demo purposes seed=-1, # Fixed random seed for deterministic results bbox_layout="xyXY", # left, top, right, back output_bbox_indices=True, # Output indices of the filtered bounding boxes total_num_attempts=1024, ) polygons, vertices = fn.segmentation.select_masks( bbox_indices, polygons, vertices ) images = fn.decoders.image_slice( inputs, anchor_rel, shape_rel, normalized_anchor=False, normalized_shape=False, device='mixed' ) images = fn.color_space_conversion(images, image_type=types.RGB, output_type=types.BGR) MT_1_vertices = fn.transforms.crop( to_start=(0.0, 0.0), to_end=fn.cat(w, h) ) MT_2_vertices = fn.transforms.crop( from_start=anchor_rel, from_end=(anchor_rel + shape_rel), to_start=(0.0, 0.0), to_end=(1., 1.) ) vertices = fn.coord_transform(fn.coord_transform(vertices, MT=MT_1_vertices), MT=MT_2_vertices) targets = fn.cat( bboxes, fn.reshape(vertices, shape=[-1, 10]), axis=1) interp_methods = [types.INTERP_LINEAR, types.INTERP_CUBIC, types.INTERP_LANCZOS3, types.INTERP_GAUSSIAN, types.INTERP_NN, types.INTERP_TRIANGULAR] interp_method = fn.random.uniform(values=[int(x) for x in interp_methods], dtype=types.INT32) interp_method = fn.reinterpret(interp_method, dtype=types.INTERP_TYPE) images = fn.resize(images, dtype=types.FLOAT, size=self.input_dim, interp_type=interp_method) labels = labels.gpu() targets = targets.gpu() return (images, targets, labels)
def test_cat_numpy_array(): pipe = dali.pipeline.Pipeline(1,1,0) src = fn.external_source([[np.array([[10,11],[12,13]], dtype=np.float32)]]) pipe.set_outputs(fn.cat(src, np.array([[20],[21]], dtype=np.float32), axis=1)) pipe.build() o = pipe.run() assert np.array_equal(o[0].at(0), np.array([[10,11,20],[12,13,21]]))
def incorrect_input_sets_pipeline(): jpegs, _ = fn.readers.file(file_root=file_root, seed=42, random_shuffle=True) images = fn.decoders.image(jpegs, seed=42) output = fn.cat([images, images, images], [images, images]) return tuple(output)
def kwargs_len_change(): input = [np.zeros(1)] * 8 inputs = [input] * 2 kwargs = {} if kwargs_len_change.change: kwargs_len_change.change = False kwargs['axis'] = 0 return fn.cat(*inputs, **kwargs)
def inputs_len_change(): input = [np.zeros(1)] * 8 if inputs_len_change.change: inputs_len_change.change = False inputs = [input] else: inputs = [input] * 2 return fn.cat(*inputs)
def multiple_input_sets_pipeline(): jpegs = [ fn.readers.file(file_root=file_root, seed=42, random_shuffle=True)[0] for _ in range(6) ] images = fn.decoders.image(jpegs, seed=42) cropped_images = fn.random_resized_crop(images, size=(224, 224), seed=42) output = fn.cat(cropped_images[:3], cropped_images[3:]) return tuple(output)
def test_cat_cpu(): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=None) data = fn.external_source(source=get_data, layout="HWC") data2 = fn.external_source(source=get_data, layout="HWC") data3 = fn.external_source(source=get_data, layout="HWC") pixel_pos = fn.cat(data, data2, data3) pipe.set_outputs(pixel_pos) pipe.build() for _ in range(3): pipe.run()
def test_cat_all_empty(): pipe = dali.pipeline.Pipeline(batch_size = 1, num_threads = 3, device_id = 0) with pipe: src1 = dali.types.Constant(np.array( [[], [], []], dtype=np.int32)) out_cpu = fn.cat(src1, src1, src1, axis = 1) out_gpu = fn.cat(src1.gpu(), src1.gpu(), src1.gpu(), axis = 1) pipe.set_outputs(out_cpu, out_gpu) pipe.build() o = pipe.run() o = list(o) o[1] = o[1].as_cpu(); ref = np.array([[], [], []], dtype=np.int32) assert np.array_equal(o[0].at(0), ref) assert np.array_equal(o[1].at(0), ref)
def dali_frame_splicing_graph(x, nfeatures, x_len, stacking=1, subsampling=1): if stacking > 1: seq = [x] for n in range(1, stacking): f = fn.slice(x, n, x_len, axes=(1, ), out_of_bounds_policy='pad', fill_values=0) seq.append(f) x = fn.cat(*seq, axis=0) nfeatures = nfeatures * stacking if subsampling > 1: out_len = (x_len + subsampling - 1) // subsampling m = fn.transforms.scale(scale=[subsampling, 1], center=[0.5, 0]) x = fn.reshape(x, rel_shape=[1, 1, -1], layout="HWC") # Layout required by WarpAffine size = fn.cat(nfeatures, out_len) x = fn.warp_affine(x, matrix=m, size=size, interp_type=types.INTERP_NN) x = fn.reshape(x, rel_shape=[1, 1], layout="ft") return x
def tfrecord_pipeline(dspath, batch_size, num_threads, device="cpu", device_id=None, shard_id=0, num_shards=1, reader_name="Reader", seq=True, chroms=False, chroms_vlog=False, target=True, target_vlog=True, label=False, random_shuffle=True): pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id) feature_description = {} feature_description["seq"] = tfrec.VarLenFeature(tfrec.float32, -1.0) feature_description["label"] = tfrec.FixedLenFeature([], tfrec.int64, -1) feature_description["target"] = tfrec.FixedLenFeature([], tfrec.float32, -1.0) for ct in dspath["chromatin_tracks"]: feature_description[ct] = tfrec.VarLenFeature(tfrec.float32, -1.0) with pipe: inputs = fn.readers.tfrecord( name=reader_name, path=dspath['TFRecord'], index_path=dspath['TFRecord_idx'], features=feature_description, shard_id = shard_id, num_shards = num_shards, random_shuffle=random_shuffle, read_ahead=True, prefetch_queue_depth=20, pad_last_batch=True) if device=="gpu": inputs['seq'] = inputs['seq'].gpu() for ct in dspath["chromatin_tracks"]: inputs[ct] = inputs[ct].gpu() inputs['target'] = inputs['target'].gpu() inputs['label'] = inputs['label'].gpu() seqdata = fn.expand_dims(inputs['seq'], axes=1, device=device) seqdata = fn.reshape(seqdata, shape=(4, -1), device=device) chromsdata = fn.cat(*[fn.expand_dims(inputs[ct], axes=0, device=device) for ct in dspath["chromatin_tracks"]], axis=0, device=device) sample = [] if seq: sample.append(seqdata) if chroms: if chroms_vlog: sample.append(log(chromsdata + 1)) else: sample.append(chromsdata) if target: if target_vlog: sample.append(log(inputs['target'] + 1)) else: sample.append(inputs['target']) if label: sample.append(inputs['label']) pipe.set_outputs(*sample) return pipe
def dali_reflect_pad_graph(x, x_len, pad_amount): def flip_1d(x): # TODO(janton): remove the layout trick when Flip supports arbitrary data layouts x = fn.reshape(x, shape=(-1, 1, 1), layout="HWC") x = fn.flip(x, vertical=1) x = fn.reshape(x, shape=(-1, ), layout="t") return x pad_start = fn.slice(x, 1, pad_amount, axes=(0, )) pad_start = flip_1d(pad_start) pad_end = fn.slice(x, x_len - pad_amount - 1, pad_amount, axes=(0, )) pad_end = flip_1d(pad_end) x = fn.cat(pad_start, x, pad_end, axis=0) return x
def check_random_mask_pixel(ndim=2, batch_size=3, min_extent=20, max_extent=50): pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=4, device_id=0, seed=1234) with pipe: # Input mask in_shape_dims = [fn.cast(fn.random.uniform(range=(min_extent, max_extent + 1), shape=(1,), device='cpu'), dtype=types.INT32) for d in range(ndim)] in_shape = fn.cat(*in_shape_dims, axis=0) in_mask = fn.cast(fn.random.uniform(range=(0, 2), device='cpu', shape=in_shape), dtype=types.INT32) fg_pixel1 = fn.segmentation.random_mask_pixel(in_mask, foreground=1) # > 0 fg_pixel2 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, threshold=0.99) # > 0.99 fg_pixel3 = fn.segmentation.random_mask_pixel(in_mask, foreground=1, value=2) # == 2 rnd_pixel = fn.segmentation.random_mask_pixel(in_mask, foreground=0) coin_flip = fn.random.coin_flip(probability=0.7) fg_biased = fn.segmentation.random_mask_pixel(in_mask, foreground=coin_flip) # Demo purposes: Taking a random pixel and produce a valid anchor to feed slice crop_shape = in_shape - 2 # We want to force the center adjustment, therefore the large crop shape anchor = fn.cast(fg_pixel1, dtype=types.INT32) - crop_shape // 2 anchor = math.min(math.max(0, anchor), in_shape - crop_shape) out_mask = fn.slice(in_mask, anchor, crop_shape, axes=tuple(range(ndim))) pipe.set_outputs(in_mask, fg_pixel1, fg_pixel2, fg_pixel3, rnd_pixel, coin_flip, fg_biased, anchor, crop_shape, out_mask) pipe.build() for iter in range(3): outputs = pipe.run() for idx in range(batch_size): in_mask = outputs[0].at(idx) fg_pixel1 = outputs[1].at(idx).tolist() fg_pixel2 = outputs[2].at(idx).tolist() fg_pixel3 = outputs[3].at(idx).tolist() rnd_pixel = outputs[4].at(idx).tolist() coin_flip = outputs[5].at(idx).tolist() fg_biased = outputs[6].at(idx).tolist() anchor = outputs[7].at(idx).tolist() crop_shape = outputs[8].at(idx).tolist() out_mask = outputs[9].at(idx) assert in_mask[tuple(fg_pixel1)] > 0 assert in_mask[tuple(fg_pixel2)] > 0.99 assert in_mask[tuple(fg_pixel3)] == 2 assert in_mask[tuple(fg_biased)] > 0 or not coin_flip for d in range(ndim): assert 0 <= anchor[d] and anchor[d] + crop_shape[d] <= in_mask.shape[d] assert out_mask.shape == tuple(crop_shape)
def check_pad_to_square(device='cpu', batch_size=3, ndim=2, num_iter=3): pipe = Pipeline(batch_size=batch_size, num_threads=3, device_id=0, seed=1234) axes = (0, 1) with pipe: in_shape = fn.cast(fn.random.uniform(range=(10, 20), shape=(ndim, )), dtype=types.INT32) in_data = fn.reshape(fn.random.uniform(range=(0., 1.), shape=in_shape), layout="HW") shape = fn.shapes(in_data, dtype=types.INT32) h = fn.slice(shape, 0, 1, axes=[0]) w = fn.slice(shape, 1, 1, axes=[0]) side = math.max(h, w) if device == 'gpu': in_data = in_data.gpu() out_data = fn.pad(in_data, axis_names="HW", shape=fn.cat(side, side, axis=0)) pipe.set_outputs(in_data, out_data) pipe.build() for _ in range(num_iter): outs = [ out.as_cpu() if isinstance(out, TensorListGPU) else out for out in pipe.run() ] for i in range(batch_size): in_data, out_data = \ [outs[out_idx].at(i) for out_idx in range(len(outs))] in_shape = in_data.shape max_side = max(in_shape) for s in out_data.shape: assert s == max_side np.testing.assert_equal(out_data[:in_shape[0], :in_shape[1]], in_data) np.testing.assert_equal(out_data[in_shape[0]:, :], 0) np.testing.assert_equal(out_data[:, in_shape[1]:], 0)
def check_bbox_random_crop_adjust_polygons(file_root, annotations_file, batch_size=3, num_iters=4, num_threads=4, device_id=0, seed=1234): pipe = Pipeline(batch_size=batch_size, num_threads=num_threads, device_id=device_id, seed=seed) with pipe: # Read data from COCO # ratio=True means both bboxes and masks coordinates will be # relative to the image dimensions (range [0.0, 1.0]) inputs, in_bboxes, labels, in_polygons, in_vertices = \ fn.readers.coco( file_root=file_root, annotations_file=annotations_file, shard_id=0, num_shards=1, ratio=True, ltrb=True, polygon_masks=True ) # Generate a random crop. out_bboxes are adjusted to the crop window slice_anchor, slice_shape, out_bboxes, labels, bbox_indices = \ fn.random_bbox_crop( in_bboxes, labels, aspect_ratio=[0.5, 2.0], thresholds=[0, 0.1, 0.3, 0.5, 0.7, 0.9], scaling=[0.3, 1.0], bbox_layout='xyXY', output_bbox_indices=True ) # Crop the image _ = fn.decoders.image_slice(inputs, slice_anchor, slice_shape, device='mixed', axis_names='WH') sel_polygons, sel_vertices = fn.segmentation.select_masks( bbox_indices, in_polygons, in_vertices) # Adjust masks coordinates to the coordinate space of the cropped image MT = fn.transforms.crop(from_start=slice_anchor, from_end=(slice_anchor + slice_shape)) out_vertices = fn.coord_transform(sel_vertices, MT=MT) # Converting to absolute coordinates (demo purposes) image_shape = fn.peek_image_shape(inputs, dtype=types.FLOAT) h = fn.slice(image_shape, 0, 1, axes=[0]) w = fn.slice(image_shape, 1, 1, axes=[0]) # Original bboxes bbox_x = fn.slice(in_bboxes, 0, 1, axes=[1]) bbox_y = fn.slice(in_bboxes, 1, 1, axes=[1]) bbox_X = fn.slice(in_bboxes, 2, 1, axes=[1]) bbox_Y = fn.slice(in_bboxes, 3, 1, axes=[1]) in_bboxes_abs = fn.cat(bbox_x * w, bbox_y * h, bbox_X * w, bbox_Y * h, axis=1) # Transform to convert relative coordinates to absolute scale_rel_to_abs = fn.transforms.scale(scale=fn.cat(w, h)) # Selected vertices (relative coordinates) sel_vertices_abs = fn.coord_transform(out_vertices, MT=scale_rel_to_abs) # Output bboxes bbox2_x = fn.slice(out_bboxes, 0, 1, axes=[1]) bbox2_y = fn.slice(out_bboxes, 1, 1, axes=[1]) bbox2_X = fn.slice(out_bboxes, 2, 1, axes=[1]) bbox2_Y = fn.slice(out_bboxes, 3, 1, axes=[1]) out_bboxes_abs = fn.cat(bbox2_x * w, bbox2_y * h, bbox2_X * w, bbox2_Y * h, axis=1) # Output vertices (absolute coordinates) out_vertices_abs = fn.coord_transform(out_vertices, MT=scale_rel_to_abs) # Clamped coordinates out_vertices_clamped = math.clamp(out_vertices, 0.0, 1.0) out_vertices_clamped_abs = fn.coord_transform(out_vertices_clamped, MT=scale_rel_to_abs) pipe.set_outputs(in_vertices, sel_vertices, sel_vertices_abs, out_vertices, out_vertices_clamped, out_vertices_abs, out_vertices_clamped_abs, in_bboxes, in_bboxes_abs, out_bboxes, out_bboxes_abs, in_polygons, sel_polygons, image_shape, slice_anchor, slice_shape, bbox_indices) pipe.build() # Enough iterations to see an example with more than one bounding box for i in range(num_iters): outs = pipe.run() for j in range(batch_size): (in_vertices, sel_vertices, sel_vertices_abs, out_vertices, out_vertices_clamped, out_vertices_abs, out_vertices_clamped_abs, in_bboxes, in_bboxes_abs, out_bboxes, out_bboxes_abs, in_polygons, sel_polygons, image_shape, slice_anchor, slice_shape, bbox_indices) = (outs[k].at(j) for k in range(len(outs))) # Checking that the output polygon descriptors are the ones associated with the # selected bounding boxes expected_polygons_list = [] expected_vertices_list = [] ver_count = 0 for k in range(in_polygons.shape[0]): mask_id = in_polygons[k][0] in_ver_start_idx = in_polygons[k][1] in_ver_end_idx = in_polygons[k][2] pol_nver = in_ver_end_idx - in_ver_start_idx if mask_id in bbox_indices: expected_polygons_list.append( [mask_id, ver_count, ver_count + pol_nver]) for j in range(in_ver_start_idx, in_ver_end_idx): expected_vertices_list.append(in_vertices[j]) ver_count = ver_count + pol_nver expected_sel_polygons = np.array(expected_polygons_list) np.testing.assert_equal(expected_sel_polygons, sel_polygons) # Checking the selected vertices correspond to the selected masks expected_sel_vertices = np.array(expected_vertices_list) np.testing.assert_equal(expected_sel_vertices, sel_vertices) # Chekc that the vertices are correctly mapped to the cropping window expected_out_vertices = np.copy(expected_sel_vertices) crop_x, crop_y = slice_anchor crop_w, crop_h = slice_shape for v in range(expected_out_vertices.shape[0]): expected_out_vertices[v, 0] = (expected_out_vertices[v, 0] - crop_x) / crop_w expected_out_vertices[v, 1] = (expected_out_vertices[v, 1] - crop_y) / crop_h np.testing.assert_allclose(expected_out_vertices, out_vertices, rtol=1e-4) # Checking the conversion to absolute coordinates h, w, _ = image_shape wh = np.array([w, h]) whwh = np.array([w, h, w, h]) expected_out_vertices_abs = expected_out_vertices * wh np.testing.assert_allclose(expected_out_vertices_abs, out_vertices_abs, rtol=1e-4) # Checking clamping of the relative coordinates expected_out_vertices_clamped = np.clip(expected_out_vertices, a_min=0.0, a_max=1.0) np.testing.assert_allclose(expected_out_vertices_clamped, out_vertices_clamped, rtol=1e-4) # Checking clamping of the absolute coordinates expected_out_vertices_clamped_abs = np.clip( expected_out_vertices_abs, 0, wh) np.testing.assert_allclose(expected_out_vertices_clamped_abs, out_vertices_clamped_abs, rtol=1e-4) # Checking scaling of the bounding boxes expected_in_bboxes_abs = in_bboxes * whwh np.testing.assert_allclose(expected_in_bboxes_abs, in_bboxes_abs, rtol=1e-4) # Check box selection and mapping to the cropping window expected_out_bboxes = np.copy(in_bboxes[bbox_indices, :]) for k in range(expected_out_bboxes.shape[0]): expected_out_bboxes[k, 0] = (expected_out_bboxes[k, 0] - crop_x) / crop_w expected_out_bboxes[k, 1] = (expected_out_bboxes[k, 1] - crop_y) / crop_h expected_out_bboxes[k, 2] = (expected_out_bboxes[k, 2] - crop_x) / crop_w expected_out_bboxes[k, 3] = (expected_out_bboxes[k, 3] - crop_y) / crop_h expected_out_bboxes = np.clip(expected_out_bboxes, a_min=0.0, a_max=1.0) np.testing.assert_allclose(expected_out_bboxes, out_bboxes, rtol=1e-4) expected_out_bboxes_abs = expected_out_bboxes * whwh np.testing.assert_allclose(expected_out_bboxes_abs, out_bboxes_abs, rtol=1e-4)
def create_image_pipeline( batch_size, num_threads, device_id, image0_list, image1_list, flow_list, valBool, ): pipeline = Pipeline(batch_size, num_threads, device_id, seed=2) with pipeline: if valBool: shuffleBool = False else: shuffleBool = True """ READ FILES """ image0, _ = fn.readers.file( file_root=args.data, files=image0_list, random_shuffle=shuffleBool, name="Reader", seed=1, ) image1, _ = fn.readers.file( file_root=args.data, files=image1_list, random_shuffle=shuffleBool, seed=1, ) flo = fn.readers.numpy( file_root=args.data, files=flow_list, random_shuffle=shuffleBool, seed=1, ) """ DECODE AND RESHAPE """ image0 = fn.decoders.image(image0, device="cpu") image0 = fn.reshape(image0, layout="HWC") image1 = fn.decoders.image(image1, device="cpu") image1 = fn.reshape(image1, layout="HWC") images = fn.cat(image0, image1, axis=2) flo = fn.reshape(flo, layout="HWC") if valBool: images = fn.resize(images, resize_x=162, resize_y=122) else: """ CO-TRANSFORM """ # random translate # angle_rng = fn.random.uniform(range=(-90, 90)) # images = fn.rotate(images, angle=angle_rng, fill_value=0) # flo = fn.rotate(flo, angle=angle_rng, fill_value=0) images = fn.random_resized_crop( images, size=[122, 162], # 122, 162 random_aspect_ratio=[1.3, 1.4], random_area=[0.8, 0.9], seed=1, ) flo = fn.random_resized_crop( flo, size=[122, 162], random_aspect_ratio=[1.3, 1.4], random_area=[0.8, 0.9], seed=1, ) # coin1 = fn.random.coin_flip(dtype=types.DALIDataType.BOOL, seed=10) # coin1_n = coin1 ^ True # coin2 = fn.random.coin_flip(dtype=types.DALIDataType.BOOL, seed=20) # coin2_n = coin2 ^ True # images = ( # fn.flip(images, horizontal=1, vertical=1) * coin1 * coin2 # + fn.flip(images, horizontal=1) * coin1 * coin2_n # + fn.flip(images, vertical=1) * coin1_n * coin2 # + images * coin1_n * coin2_n # ) # flo = ( # fn.flip(flo, horizontal=1, vertical=1) * coin1 * coin2 # + fn.flip(flo, horizontal=1) * coin1 * coin2_n # + fn.flip(flo, vertical=1) * coin1_n * coin2 # + flo * coin1_n * coin2_n # ) # _flo = flo # flo_0 = fn.slice(_flo, axis_names="C", start=0, shape=1) # flo_1 = fn.slice(_flo, axis_names="C", start=1, shape=1) # flo_0 = flo_0 * coin1 * -1 + flo_0 * coin1_n # flo_1 = flo_1 * coin2 * -1 + flo_1 * coin2_n # # flo = noflip + vertical flip + horizontal flip + both_flip # # A horizontal flip is around the vertical axis (switch left and right) # # So for a vertical flip coin1 is activated and needs to give +1, coin2 is activated needs to give -1 # # for a horizontal flip coin1 is activated and needs to be -1, coin2_n needs +1 # # no flip coin coin1_n +1, coin2_n +1 # flo = fn.cat(flo_0, flo_1, axis_name="C") """ NORMALIZE """ images = fn.crop_mirror_normalize( images, mean=[0, 0, 0, 0, 0, 0], std=[255, 255, 255, 255, 255, 255]) images = fn.crop_mirror_normalize( images, mean=[0.45, 0.432, 0.411, 0.45, 0.432, 0.411], std=[1, 1, 1, 1, 1, 1], ) flo = fn.crop_mirror_normalize( flo, mean=[0, 0], std=[args.div_flow, args.div_flow]) pipeline.set_outputs(images, flo) return pipeline
def load_tfrecord(directory, batch_size, training): tfrecord = [] tfrecord_idx = [] for f in os.listdir(directory): fullpath = os.path.join(directory, f) if not os.path.isfile(fullpath): continue if f.endswith(".tfrecord"): tfrecord.append(fullpath) if f.endswith(".idx"): tfrecord_idx.append(fullpath) tfrecord.sort() tfrecord_idx.sort() pipe = Pipeline(batch_size=batch_size, num_threads=32, device_id=0) with pipe: inputs = fn.tfrecord_reader( path=tfrecord, index_path=tfrecord_idx, features={ "frame_one": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_two": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_three": tfrec.FixedLenFeature((), tfrec.string, ""), "frame_four": tfrec.FixedLenFeature((), tfrec.string, ""), "plus_one_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_one_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_two_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_two_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_three_position": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "plus_three_orientation": tfrec.FixedLenFeature([3], tfrec.float32, 0.0), "speed": tfrec.FixedLenFeature([], tfrec.float32, 0.0), }) frame1 = inputs["frame_one"] frame1 = fn.image_decoder(frame1, device="mixed", output_type=types.RGB) # frame1 = fn.resize(frame1, device="gpu", resize_shorter=256.) frame1 = fn.crop_mirror_normalize(frame1, device="gpu", dtype=types.FLOAT, mean=[0., 0., 0.], std=[1., 1., 1.]) frame1 = fn.transpose(frame1, device="gpu", perm=[1, 2, 0]) frame2 = inputs["frame_two"] frame2 = fn.image_decoder(frame2, device="mixed", output_type=types.RGB) # frame2 = fn.resize(frame2, device="gpu", resize_shorter=256.) frame2 = fn.crop_mirror_normalize(frame2, device="gpu", dtype=types.FLOAT, mean=[0., 0., 0.], std=[1., 1., 1.]) frame2 = fn.transpose(frame2, device="gpu", perm=[1, 2, 0]) position = inputs["plus_one_position"].gpu() orientation = inputs["plus_one_orientation"].gpu() speed = inputs["speed"].gpu() image = fn.cat(frame1, frame2, device="gpu", axis=2) pose = fn.cat(position, orientation, device="gpu", axis=0) pipe.set_outputs(image, pose, speed) # Define shapes and types of the outputs shapes = ((batch_size, 480, 640), (batch_size, 6), (batch_size)) dtypes = (tf.float32, tf.float32) # Create dataset return dali_tf.DALIDataset(pipeline=pipe, batch_size=batch_size, output_shapes=shapes, output_dtypes=dtypes, device_id=0)