Example #1
0
    def get_next_batch(self, batch_idx):
        batch_size, orig_im_size = self.batch_size, self.orig_im_size
        if batch_idx == self.num_batch - 1:
            batch_size = self.last_batch_size
        elif batch_idx >= self.num_batch:
            print('batch index larger than the total number of batches')
            return
        im_heights, im_widths, num_scale = self.im_heights, self.im_widths, self.num_scale
        ou_height, ou_width = self.output_height, self.output_width
        images = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 3))
            for i in range(num_scale)
        ]
        depths = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 1))
            for i in range(num_scale)
        ]
        flows = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 2))
            for i in range(num_scale)
        ]
        label = np.zeros((batch_size, ou_height, ou_width))

        for n in range(batch_size):
            data_idx = self.sample_index[batch_idx * self.batch_size + n]
            image = np.array(Image.open(self.meta['image'][data_idx]))
            image = image / 255.0
            depth = flowlib.read_disp_png(self.meta['depth'][data_idx])
            flow = flowlib.read_flow_png(self.meta['flow'][data_idx])
            flow = flow[:, :, 0:2]
            if flow.shape[0] < self.orig_im_size[0]:
                pad_length = self.orig_im_size[0] - flow.shape[0]
                flow = np.pad(flow, ((pad_length, 0), (0, 0), (0, 0)),
                              'constant')
            seg = np.array(Image.open(self.meta['seg'][data_idx]))

            if self.data_augment:
                image, depth, flow, seg = self.data_augmentation(
                    image, depth, flow, seg)

            for i in range(num_scale):
                images[i][n, :, :, :] = cv2.resize(
                    image, (im_widths[i], im_heights[i]),
                    interpolation=cv2.INTER_AREA)
                depths[i][n, :, :,
                          0] = cv2.resize(depth, (im_widths[i], im_heights[i]),
                                          interpolation=cv2.INTER_AREA)
                flows[i][n, :, :, :] = cv2.resize(
                    flow, (im_widths[i], im_heights[i]),
                    interpolation=cv2.INTER_AREA)
            label[n, :, :] = cv2.resize(seg, (ou_width, ou_height),
                                        interpolation=cv2.INTER_NEAREST)
        sample = {
            'images': images,
            'depths': depths,
            'flows': flows,
            'seg': label
        }
        return sample
Example #2
0
    def get_one_sample(self, image_name, depth_name, flow_name, seg_name):
        batch_size, orig_im_size = 1, self.orig_im_size

        im_heights, im_widths, num_scale = self.im_heights, self.im_widths, self.num_scale
        ou_height, ou_width = self.output_height, self.output_width
        images = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 3))
            for i in range(num_scale)
        ]
        depths = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 1))
            for i in range(num_scale)
        ]
        flows = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 2))
            for i in range(num_scale)
        ]
        label = np.zeros((batch_size, ou_height, ou_width))

        for n in range(batch_size):
            image = np.array(Image.open(image_name))
            image = image / 255.0
            image = image[:768, :, :]
            depth = flowlib.read_disp_png(depth_name)
            if depth.shape[0] > 768:
                depth = depth[:768, :]
            flow = flowlib.read_flow_png(flow_name)
            flow = flow[:, :, 0:2]
            if flow.shape[0] > 768:
                flow = flow[:768, :, :]
            seg = np.array(Image.open(seg_name))
            seg = seg[:768, :, :]

            for i in range(num_scale):
                images[i][0, :, :, :] = cv2.resize(
                    image, (im_widths[i], im_heights[i]),
                    interpolation=cv2.INTER_AREA)
                depths[i][0, :, :,
                          0] = cv2.resize(depth, (im_widths[i], im_heights[i]),
                                          interpolation=cv2.INTER_AREA)
                flows[i][0, :, :, :] = cv2.resize(
                    flow, (im_widths[i], im_heights[i]),
                    interpolation=cv2.INTER_AREA)
            label[0, :, :] = cv2.resize(seg, (ou_width, ou_height),
                                        interpolation=cv2.INTER_NEAREST)
        sample = {
            'images': images,
            'flows': flows,
            'depths': depths,
            'seg': label
        }
        return sample
Example #3
0
    def get_one_sample(self, image_name, depth_name, flow_name, box_name):
        image = np.array(Image.open(image_name))
        image = image / 255.0
        depth = flowlib.read_disp_png(depth_name)
        flow = flowlib.read_flow_png(flow_name)

        box_and_label = []
        with open(box_name) as txt_file:
            box_info = txt_file.readlines()
        for row in box_info:
            row = row.strip().split(' ')
            if not row[0] in ['Car']:
                continue
            row[0] = self.class_map[row[0]]
            box_and_label.append(row)
        box_and_label = np.array(box_and_label).astype(np.float)
        if box_and_label.shape[0] == 0:
            box = []
            label = []
        else:
            box = box_and_label[:, 4:8]
            label = box_and_label[:, 0]

        batch_size = 1
        orig_im_size = self.orig_im_size
        im_heights, im_widths, num_scale = self.im_heights, self.im_widths, self.num_scale
        ou_heights, ou_widths = self.output_heights, self.output_widths
        images = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 3))
            for i in range(num_scale)
        ]
        orig_image = np.zeros(
            (batch_size, orig_im_size[0], orig_im_size[1], 3))
        depths = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 1))
            for i in range(num_scale)
        ]
        orig_depth = np.zeros(
            (batch_size, orig_im_size[0], orig_im_size[1], 1))
        flows = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 3))
            for i in range(num_scale)
        ]
        orig_flow = np.zeros((batch_size, orig_im_size[0], orig_im_size[1], 3))
        boxes = []
        label_maps = [
            np.zeros((batch_size, ou_heights[i], ou_widths[i], 1))
            for i in range(num_scale)
        ]
        offsets = [
            np.zeros((batch_size, ou_heights[i], ou_widths[i], 4))
            for i in range(num_scale)
        ]

        # image, depth, flow, box = self.crop_image(image, depth, flow, box)
        im_height, im_width = image.shape[0], image.shape[1]

        for i in range(num_scale):
            images[i][0, :, :, :] = cv2.resize(image,
                                               (im_widths[i], im_heights[i]),
                                               interpolation=cv2.INTER_AREA)
            depths[i][0, :, :, 0] = cv2.resize(depth,
                                               (im_widths[i], im_heights[i]),
                                               interpolation=cv2.INTER_AREA)
            flows[i][0, :, :, 0:2] = cv2.resize(flow[:, :, 0:2],
                                                (im_widths[i], im_heights[i]),
                                                interpolation=cv2.INTER_AREA)
            flows[i][0, :, :, 2] = cv2.resize(flow[:, :, 2],
                                              (im_widths[i], im_heights[i]),
                                              interpolation=cv2.INTER_NEAREST)
        orig_image[0, :, :, :] = cv2.resize(
            image, (self.orig_im_size[1], self.orig_im_size[0]),
            interpolation=cv2.INTER_AREA)
        orig_depth[0, :, :, 0] = cv2.resize(
            depth, (self.orig_im_size[1], self.orig_im_size[0]),
            interpolation=cv2.INTER_AREA)
        orig_flow[0, :, :, 0:2] = cv2.resize(
            flow[:, :, 0:2], (self.orig_im_size[1], self.orig_im_size[0]),
            interpolation=cv2.INTER_AREA)
        orig_flow[0, :, :,
                  2] = cv2.resize(flow[:, :, 2],
                                  (self.orig_im_size[1], self.orig_im_size[0]),
                                  interpolation=cv2.INTER_NEAREST)
        if len(box) > 0:
            box[:, 0] = box[:, 0] * 1.0 / im_width
            box[:, 1] = box[:, 1] * 1.0 / im_height
            box[:, 2] = box[:, 2] * 1.0 / im_width
            box[:, 3] = box[:, 3] * 1.0 / im_height
            boxes.append(box)
            x = (box[:, 0] + box[:, 2]) * 1.0 / 2
            y = (box[:, 1] + box[:, 3]) * 1.0 / 2
            w = (box[:, 2] - box[:, 0]) * 1.0
            h = (box[:, 3] - box[:, 1]) * 1.0
            for k in range(box.shape[0]):
                for i in range(num_scale):
                    # if w[k] < 1.0 / ou_widths[i] or h[k] < 1.0 / ou_heights[i]:
                    #     continue
                    # x_c = np.int(np.round(x[k] * ou_widths[i]))
                    # y_c = np.int(np.round(y[k] * ou_heights[i]))
                    # label_maps[i][0, y_c, x_c, 0] = 1  # Only works for car detection
                    # # x1 = max(x_c - 1, 0)
                    # # x2 = min(x_c + 1, self.output_widths[i]-1)
                    # # y1 = max(y_c - 1, 0)
                    # # y2 = min(y_c + 1, self.output_heights[i]-1)
                    # # label_maps[i][0, y1:y2, x1:x2, 0] = 1  # Only works for car detection
                    # # offsets[i][0, y_c, x_c, 0] = w[k]
                    # # offsets[i][0, y_c, x_c, 1] = h[k]
                    # offsets[i][0, y_c, x_c, 0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                    # offsets[i][0, y_c, x_c, 1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                    # offsets[i][0, y_c, x_c, 2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                    # offsets[i][0, y_c, x_c, 3] = box[k, 3] - y_c * 1.0 / ou_heights[i]

                    if w[k] < 2.0 / ou_widths[i] or h[k] < 2.0 / ou_heights[i]:
                        continue

                    x_c = np.int(np.floor(x[k] * ou_widths[i]))
                    y_c = np.int(np.floor(y[k] * ou_heights[i]))
                    if x_c < 0 or x_c >= self.output_widths[
                            i] or y_c < 0 or y_c >= self.output_heights[i]:
                        continue
                    label_maps[i][0, y_c, x_c,
                                  0] = 1  # Only works for car detection
                    offsets[i][0, y_c, x_c,
                               0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                    offsets[i][0, y_c, x_c,
                               1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                    offsets[i][0, y_c, x_c,
                               2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                    offsets[i][0, y_c, x_c,
                               3] = box[k, 3] - y_c * 1.0 / ou_heights[i]

                    x_c = np.int(np.floor(x[k] * ou_widths[i]))
                    y_c = np.int(np.ceil(y[k] * ou_heights[i]))
                    if x_c < 0 or x_c >= self.output_widths[
                            i] or y_c < 0 or y_c >= self.output_heights[i]:
                        continue
                    label_maps[i][0, y_c, x_c,
                                  0] = 1  # Only works for car detection
                    offsets[i][0, y_c, x_c,
                               0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                    offsets[i][0, y_c, x_c,
                               1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                    offsets[i][0, y_c, x_c,
                               2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                    offsets[i][0, y_c, x_c,
                               3] = box[k, 3] - y_c * 1.0 / ou_heights[i]

                    x_c = np.int(np.ceil(x[k] * ou_widths[i]))
                    y_c = np.int(np.floor(y[k] * ou_heights[i]))
                    if x_c < 0 or x_c >= self.output_widths[
                            i] or y_c < 0 or y_c >= self.output_heights[i]:
                        continue
                    label_maps[i][0, y_c, x_c,
                                  0] = 1  # Only works for car detection
                    offsets[i][0, y_c, x_c,
                               0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                    offsets[i][0, y_c, x_c,
                               1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                    offsets[i][0, y_c, x_c,
                               2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                    offsets[i][0, y_c, x_c,
                               3] = box[k, 3] - y_c * 1.0 / ou_heights[i]

                    x_c = np.int(np.ceil(x[k] * ou_widths[i]))
                    y_c = np.int(np.ceil(y[k] * ou_heights[i]))
                    if x_c < 0 or x_c >= self.output_widths[
                            i] or y_c < 0 or y_c >= self.output_heights[i]:
                        continue
                    label_maps[i][0, y_c, x_c,
                                  0] = 1  # Only works for car detection
                    offsets[i][0, y_c, x_c,
                               0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                    offsets[i][0, y_c, x_c,
                               1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                    offsets[i][0, y_c, x_c,
                               2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                    offsets[i][0, y_c, x_c,
                               3] = box[k, 3] - y_c * 1.0 / ou_heights[i]
        else:
            boxes.append([])
        labels = label
        for i in range(num_scale):
            images[i] = images[i].transpose((0, 3, 1, 2))
            depths[i] = depths[i].transpose((0, 3, 1, 2))
            flows[i] = flows[i].transpose((0, 3, 1, 2))
            label_maps[i] = label_maps[i].transpose((0, 3, 1, 2))
            offsets[i] = offsets[i].transpose((0, 3, 1, 2))
        orig_image = orig_image.transpose((0, 3, 1, 2))
        orig_depth = orig_depth.transpose((0, 3, 1, 2))
        orig_flow = orig_flow.transpose((0, 3, 1, 2))
        return images, orig_image, depths, orig_depth, flows, orig_flow, boxes, \
            label_maps, offsets
Example #4
0
    def get_next_batch(self, status='train', cnt=0, index=None):
        if status == 'train':
            anno = self.train_anno
        elif status == 'test':
            anno = self.test_anno
        else:
            logging.error('Error: wrong status')
        if index is None:
            index = np.arange(len(anno['img']))
        batch_size, orig_im_size = self.batch_size, self.orig_im_size
        im_heights, im_widths, num_scale = self.im_heights, self.im_widths, self.num_scale
        ou_heights, ou_widths = self.output_heights, self.output_widths
        images = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 3))
            for i in range(num_scale)
        ]
        orig_image = np.zeros(
            (batch_size, orig_im_size[0], orig_im_size[1], 3))
        depths = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 1))
            for i in range(num_scale)
        ]
        orig_depth = np.zeros(
            (batch_size, orig_im_size[0], orig_im_size[1], 1))
        flows = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 3))
            for i in range(num_scale)
        ]
        orig_flow = np.zeros((batch_size, orig_im_size[0], orig_im_size[1], 3))
        boxes = []
        label_maps = [
            np.zeros((batch_size, ou_heights[i], ou_widths[i], 1))
            for i in range(num_scale)
        ]
        offsets = [
            np.zeros((batch_size, ou_heights[i], ou_widths[i], 4))
            for i in range(num_scale)
        ]
        restart = False
        for n in range(batch_size):
            if cnt >= len(index):
                cnt = 0
                restart = True
            image = np.array(Image.open(anno['img'][index[cnt]]))
            image = image / 255.0
            depth = flowlib.read_disp_png(anno['depth'][index[cnt]])
            flow = flowlib.read_flow_png(anno['flow'][index[cnt]])
            box = np.array(anno['box'][index[cnt]])
            label = anno['label'][index[cnt]]

            if status == 'train':
                image, depth, flow, box = self.data_augmentation(
                    image, depth, flow, box)
                image, depth, flow, box = self.crop_image(
                    image, depth, flow, box)
            im_height, im_width = image.shape[0], image.shape[1]

            for i in range(num_scale):
                images[i][n, :, :, :] = cv2.resize(
                    image, (im_widths[i], im_heights[i]),
                    interpolation=cv2.INTER_AREA)
                depths[i][n, :, :,
                          0] = cv2.resize(depth, (im_widths[i], im_heights[i]),
                                          interpolation=cv2.INTER_AREA)
                flows[i][n, :, :,
                         0:2] = cv2.resize(flow[:, :, 0:2],
                                           (im_widths[i], im_heights[i]),
                                           interpolation=cv2.INTER_AREA)
                flows[i][n, :, :,
                         2] = cv2.resize(flow[:, :, 2],
                                         (im_widths[i], im_heights[i]),
                                         interpolation=cv2.INTER_NEAREST)
            if status == 'train':
                orig_image[n, :, :, :] = image
                orig_depth[n, :, :, 0] = depth
                orig_flow[n, :, :, :] = flow
            else:
                orig_image[n, :, :, :] = cv2.resize(
                    image, (self.orig_im_size[1], self.orig_im_size[0]),
                    interpolation=cv2.INTER_AREA)
                orig_depth[n, :, :, 0] = cv2.resize(
                    depth, (self.orig_im_size[1], self.orig_im_size[0]),
                    interpolation=cv2.INTER_AREA)
                orig_flow[n, :, :, 0:2] = cv2.resize(
                    flow[:, :,
                         0:2], (self.orig_im_size[1], self.orig_im_size[0]),
                    interpolation=cv2.INTER_AREA)
                orig_flow[n, :, :, 2] = cv2.resize(
                    flow[:, :,
                         2], (self.orig_im_size[1], self.orig_im_size[0]),
                    interpolation=cv2.INTER_NEAREST)
            if len(box) > 0:
                box[:, 0] = box[:, 0] * 1.0 / im_width
                box[:, 1] = box[:, 1] * 1.0 / im_height
                box[:, 2] = box[:, 2] * 1.0 / im_width
                box[:, 3] = box[:, 3] * 1.0 / im_height
                boxes.append(box)

                x = (box[:, 0] + box[:, 2]) * 1.0 / 2
                y = (box[:, 1] + box[:, 3]) * 1.0 / 2
                w = (box[:, 2] - box[:, 0]) * 1.0
                h = (box[:, 3] - box[:, 1]) * 1.0
                for k in range(box.shape[0]):
                    for i in range(num_scale):
                        # if w[k] < 1.0 / ou_widths[i] or h[k] < 1.0 / ou_heights[i]:
                        #     continue
                        # x_c = np.int(np.round(x[k] * ou_widths[i]))
                        # y_c = np.int(np.round(y[k] * ou_heights[i]))
                        # if x_c < 0 or x_c >= self.output_widths[i] or y_c < 0 or y_c >= self.output_heights[i]:
                        #     continue
                        # label_maps[i][n, y_c, x_c, 0] = 1  # Only works for car detection
                        # # x1 = max(x_c - 1, 0)
                        # # x2 = min(x_c + 1, self.output_widths[i]-1)
                        # # y1 = max(y_c - 1, 0)
                        # # y2 = min(y_c + 1, self.output_heights[i]-1)
                        # # label_maps[i][n, y1:y2, x1:x2, 0] = 1  # Only works for car detection
                        # offsets[i][n, y_c, x_c, 0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                        # offsets[i][n, y_c, x_c, 1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                        # offsets[i][n, y_c, x_c, 2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                        # offsets[i][n, y_c, x_c, 3] = box[k, 3] - y_c * 1.0 / ou_heights[i]

                        if w[k] < 2.0 / ou_widths[i] or h[
                                k] < 2.0 / ou_heights[i]:
                            continue

                        x_c = np.int(np.floor(x[k] * ou_widths[i]))
                        y_c = np.int(np.floor(y[k] * ou_heights[i]))
                        if x_c < 0 or x_c >= self.output_widths[
                                i] or y_c < 0 or y_c >= self.output_heights[i]:
                            continue
                        label_maps[i][n, y_c, x_c,
                                      0] = 1  # Only works for car detection
                        offsets[i][n, y_c, x_c,
                                   0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                        offsets[i][n, y_c, x_c,
                                   1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                        offsets[i][n, y_c, x_c,
                                   2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                        offsets[i][n, y_c, x_c,
                                   3] = box[k, 3] - y_c * 1.0 / ou_heights[i]

                        x_c = np.int(np.floor(x[k] * ou_widths[i]))
                        y_c = np.int(np.ceil(y[k] * ou_heights[i]))
                        if x_c < 0 or x_c >= self.output_widths[
                                i] or y_c < 0 or y_c >= self.output_heights[i]:
                            continue
                        label_maps[i][n, y_c, x_c,
                                      0] = 1  # Only works for car detection
                        offsets[i][n, y_c, x_c,
                                   0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                        offsets[i][n, y_c, x_c,
                                   1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                        offsets[i][n, y_c, x_c,
                                   2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                        offsets[i][n, y_c, x_c,
                                   3] = box[k, 3] - y_c * 1.0 / ou_heights[i]

                        x_c = np.int(np.ceil(x[k] * ou_widths[i]))
                        y_c = np.int(np.floor(y[k] * ou_heights[i]))
                        if x_c < 0 or x_c >= self.output_widths[
                                i] or y_c < 0 or y_c >= self.output_heights[i]:
                            continue
                        label_maps[i][n, y_c, x_c,
                                      0] = 1  # Only works for car detection
                        offsets[i][n, y_c, x_c,
                                   0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                        offsets[i][n, y_c, x_c,
                                   1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                        offsets[i][n, y_c, x_c,
                                   2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                        offsets[i][n, y_c, x_c,
                                   3] = box[k, 3] - y_c * 1.0 / ou_heights[i]

                        x_c = np.int(np.ceil(x[k] * ou_widths[i]))
                        y_c = np.int(np.ceil(y[k] * ou_heights[i]))
                        if x_c < 0 or x_c >= self.output_widths[
                                i] or y_c < 0 or y_c >= self.output_heights[i]:
                            continue
                        label_maps[i][n, y_c, x_c,
                                      0] = 1  # Only works for car detection
                        offsets[i][n, y_c, x_c,
                                   0] = box[k, 0] - x_c * 1.0 / ou_widths[i]
                        offsets[i][n, y_c, x_c,
                                   1] = box[k, 1] - y_c * 1.0 / ou_heights[i]
                        offsets[i][n, y_c, x_c,
                                   2] = box[k, 2] - x_c * 1.0 / ou_widths[i]
                        offsets[i][n, y_c, x_c,
                                   3] = box[k, 3] - y_c * 1.0 / ou_heights[i]
            else:
                boxes.append(np.zeros((0, 4)))
            cnt = cnt + 1
        for i in range(num_scale):
            images[i] = images[i].transpose((0, 3, 1, 2))
            depths[i] = depths[i].transpose((0, 3, 1, 2))
            flows[i] = flows[i].transpose((0, 3, 1, 2))
            label_maps[i] = label_maps[i].transpose((0, 3, 1, 2))
            offsets[i] = offsets[i].transpose((0, 3, 1, 2))
        orig_image = orig_image.transpose((0, 3, 1, 2))
        orig_depth = orig_depth.transpose((0, 3, 1, 2))
        orig_flow = orig_flow.transpose((0, 3, 1, 2))
        return images, orig_image, depths, orig_depth, flows, orig_flow, boxes, \
            label_maps, offsets, cnt, restart
Example #5
0
    def get_one_sample(self, image_name, depth_name, flow_name, box_name):
        batch_size, orig_im_size = 1, self.orig_im_size
        im_heights, im_widths, num_scale = self.im_heights, self.im_widths, self.num_scale
        ou_heights, ou_widths = self.output_heights, self.output_widths
        images = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 3))
            for i in range(num_scale)
        ]
        depths = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 1))
            for i in range(num_scale)
        ]
        flows = [
            np.zeros((batch_size, im_heights[i], im_widths[i], 3))
            for i in range(num_scale)
        ]
        heatmaps = [
            np.zeros((batch_size, ou_heights[i], ou_widths[i], 1))
            for i in range(num_scale)
        ]
        offsets = [
            np.zeros((batch_size, ou_heights[i], ou_widths[i], 4))
            for i in range(num_scale)
        ]

        orig_image = np.zeros(
            (batch_size, orig_im_size[0], orig_im_size[1], 3))
        orig_depth = np.zeros(
            (batch_size, orig_im_size[0], orig_im_size[1], 1))
        orig_flow = np.zeros((batch_size, orig_im_size[0], orig_im_size[1], 3))
        boxes = []

        for n in range(batch_size):
            image = np.array(Image.open(image_name))
            image = image / 255.0
            depth = flowlib.read_disp_png(depth_name)
            flow = flowlib.read_flow_png(flow_name)
            box = read_box(box_name)

            if self.data_augment:
                image, depth, flow, box = self.flip_image(
                    image, depth, flow, box)
                image, depth, flow, box = self.resize_crop_image(
                    image, depth, flow, box)

            for i in range(num_scale):
                images[i][n, :, :, :] = cv2.resize(
                    image, (im_widths[i], im_heights[i]),
                    interpolation=cv2.INTER_AREA)
                depths[i][n, :, :,
                          0] = cv2.resize(depth, (im_widths[i], im_heights[i]),
                                          interpolation=cv2.INTER_AREA)
                flows[i][n, :, :,
                         0:2] = cv2.resize(flow[:, :, 0:2],
                                           (im_widths[i], im_heights[i]),
                                           interpolation=cv2.INTER_AREA)
                flows[i][n, :, :,
                         2] = cv2.resize(flow[:, :, 2],
                                         (im_widths[i], im_heights[i]),
                                         interpolation=cv2.INTER_NEAREST)
            im_height, im_width = image.shape[0], image.shape[1]
            box = self.rescale_box(box, im_height, im_width)
            boxes.append(box)
            xb, yb, wb, hb = self.get_box_center_size(box)

            for k in range(box.shape[0]):
                for i in range(num_scale):
                    if wb[k] < 2.0 / ou_widths[i] or hb[
                            k] < 2.0 / ou_heights[i]:
                        continue

                    x = np.int(np.floor(xb[k] * ou_widths[i]))
                    y = np.int(np.floor(yb[k] * ou_heights[i]))
                    if x < 0 or x >= ou_widths[i] or y < 0 or y >= ou_heights[
                            i]:
                        continue
                    heatmaps[i][n, y, x, 0] = 1  # Only works for car detection
                    offsets[i][n, y, x, 0] = box[k, 0] - x * 1.0 / ou_widths[i]
                    offsets[i][n, y, x,
                               1] = box[k, 1] - y * 1.0 / ou_heights[i]
                    offsets[i][n, y, x, 2] = box[k, 2] - x * 1.0 / ou_widths[i]
                    offsets[i][n, y, x,
                               3] = box[k, 3] - y * 1.0 / ou_heights[i]

                    x = np.int(np.floor(xb[k] * ou_widths[i]))
                    y = np.int(np.ceil(yb[k] * ou_heights[i]))
                    if x < 0 or x >= ou_widths[i] or y < 0 or y >= ou_heights[
                            i]:
                        continue
                    heatmaps[i][n, y, x, 0] = 1  # Only works for car detection
                    offsets[i][n, y, x, 0] = box[k, 0] - x * 1.0 / ou_widths[i]
                    offsets[i][n, y, x,
                               1] = box[k, 1] - y * 1.0 / ou_heights[i]
                    offsets[i][n, y, x, 2] = box[k, 2] - x * 1.0 / ou_widths[i]
                    offsets[i][n, y, x,
                               3] = box[k, 3] - y * 1.0 / ou_heights[i]

                    x = np.int(np.ceil(xb[k] * ou_widths[i]))
                    y = np.int(np.floor(yb[k] * ou_heights[i]))
                    if x < 0 or x >= ou_widths[i] or y < 0 or y >= ou_heights[
                            i]:
                        continue
                    heatmaps[i][n, y, x, 0] = 1  # Only works for car detection
                    offsets[i][n, y, x, 0] = box[k, 0] - x * 1.0 / ou_widths[i]
                    offsets[i][n, y, x,
                               1] = box[k, 1] - y * 1.0 / ou_heights[i]
                    offsets[i][n, y, x, 2] = box[k, 2] - x * 1.0 / ou_widths[i]
                    offsets[i][n, y, x,
                               3] = box[k, 3] - y * 1.0 / ou_heights[i]

                    x = np.int(np.ceil(xb[k] * ou_widths[i]))
                    y = np.int(np.ceil(yb[k] * ou_heights[i]))
                    if x < 0 or x >= ou_widths[i] or y < 0 or y >= ou_heights[
                            i]:
                        continue
                    heatmaps[i][n, y, x, 0] = 1  # Only works for car detection
                    offsets[i][n, y, x, 0] = box[k, 0] - x * 1.0 / ou_widths[i]
                    offsets[i][n, y, x,
                               1] = box[k, 1] - y * 1.0 / ou_heights[i]
                    offsets[i][n, y, x, 2] = box[k, 2] - x * 1.0 / ou_widths[i]
                    offsets[i][n, y, x,
                               3] = box[k, 3] - y * 1.0 / ou_heights[i]

            if self.data_augment:
                orig_image[n, :, :, :] = image
                orig_depth[n, :, :, 0] = depth
                orig_flow[n, :, :, :] = flow
            else:
                orig_image[n, :, :, :] = cv2.resize(
                    image, (orig_im_size[1], orig_im_size[0]),
                    interpolation=cv2.INTER_AREA)
                orig_depth[n, :, :,
                           0] = cv2.resize(depth,
                                           (orig_im_size[1], orig_im_size[0]),
                                           interpolation=cv2.INTER_AREA)
                orig_flow[n, :, :,
                          0:2] = cv2.resize(flow[:, :, 0:2],
                                            (orig_im_size[1], orig_im_size[0]),
                                            interpolation=cv2.INTER_AREA)
                orig_flow[n, :, :,
                          2] = cv2.resize(flow[:, :, 2],
                                          (orig_im_size[1], orig_im_size[0]),
                                          interpolation=cv2.INTER_NEAREST)
        sample = {
            'images': images,
            'flows': flows,
            'depths': depths,
            'heatmaps': heatmaps,
            'offsets': offsets,
            'orig_image': orig_image,
            'orig_depth': orig_depth,
            'orig_flow': orig_flow,
            'boxes': boxes
        }
        return sample