Exemple #1
0
def samples_MatrixNetAnchors(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size
    categories = db.configs["categories"]
    input_size = db.configs["input_size"]

    border = db.configs["border"]
    lighting = db.configs["lighting"]
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"]
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]
    base_layer_range = db.configs["base_layer_range"]
    cutout_flag = db.configs["cutout"]
    max_dim = db.configs["train_image_max_dim"]

    width_thresholds = db.configs["width_thresholds"]
    height_thresholds = db.configs["height_thresholds"]
    layers_range = db.configs["layers_range"]
    max_tag_len = 256

    _dict = {}
    output_sizes = []
    # indexing layer map
    for i, l in enumerate(layers_range):
        for j, e in enumerate(l):
            if e != -1:
                output_sizes.append([
                    input_size[0] // (8 * 2**(j)),
                    input_size[1] // (8 * 2**(i))
                ])
                _dict[(i + 1) * 10 + (j + 1)] = e

    layers_range = [_dict[i] for i in sorted(_dict)]
    fpn_flag = set(_dict.keys()) == set([11, 22, 33, 44, 55])
    print("FPN", fpn_flag)
    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    anchors_heatmaps = [
        np.zeros((batch_size, categories, output_size[0], output_size[1]),
                 dtype=np.float32) for output_size in output_sizes
    ]

    tl_corners_regrs = [
        np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
        for output_size in output_sizes
    ]
    br_corners_regrs = [
        np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
        for output_size in output_sizes
    ]

    anchors_tags = [
        np.zeros((batch_size, max_tag_len), dtype=np.int64)
        for output_size in output_sizes
    ]
    tag_masks = [
        np.zeros((batch_size, max_tag_len), dtype=bool)
        for output_size in output_sizes
    ]
    tag_lens = [
        np.zeros((batch_size, ), dtype=np.int32)
        for output_size in output_sizes
    ]

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()

        db_ind = db.db_inds[k_ind]
        k_ind = (k_ind + 1) % db_size

        # reading image
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        # reading detections
        detections = db.detections(db_ind)

        if cutout_flag:
            image = cutout(image, detections)

        if not debug and rand_crop:
            image, detections = random_crop(image,
                                            detections,
                                            rand_scales,
                                            input_size,
                                            border=border)
        else:
            image, detections = _full_image_crop(image, detections)

        image, detections = _resize_image(image, detections, input_size)
        detections = _clip_detections(image, detections)

        # flipping an image randomly

        if not debug and np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            width = image.shape[1]
            detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1

        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
        images[b_ind] = image.transpose((2, 0, 1))

        for ind, detection in enumerate(detections):
            for olayer_idx in layer_map_using_ranges(
                    detection[2] - detection[0], detection[3] - detection[1],
                    layers_range, fpn_flag):

                width_ratio = output_sizes[olayer_idx][1] / input_size[1]
                height_ratio = output_sizes[olayer_idx][0] / input_size[0]

                category = int(detection[-1]) - 1
                xtl, ytl = detection[0], detection[1]
                xbr, ybr = detection[2], detection[3]

                fxtl = (xtl * width_ratio)
                fytl = (ytl * height_ratio)
                fxbr = (xbr * width_ratio)
                fybr = (ybr * height_ratio)

                mx = output_sizes[olayer_idx][1] - 1
                my = output_sizes[olayer_idx][0] - 1

                xc = int(min(round((fxtl + fxbr) / 2), mx))
                yc = int(min(round((fytl + fybr) / 2), my))

                if gaussian_bump:
                    width = detection[2] - detection[0]
                    height = detection[3] - detection[1]

                    width = math.ceil(width * width_ratio)
                    height = math.ceil(height * height_ratio)

                    if gaussian_rad == -1:
                        radius = gaussian_radius((height, width), gaussian_iou)
                        radius = max(0, int(radius))
                    else:
                        radius = gaussian_rad

                    draw_gaussian(
                        anchors_heatmaps[olayer_idx][b_ind, category],
                        [xc, yc], radius)

                else:
                    anchors_heatmaps[olayer_idx][b_ind, category, yc, xc] = 1

                tag_ind = tag_lens[olayer_idx][b_ind]
                min_y, max_y, min_x, max_x = map(lambda x: x / 8 / 2,
                                                 base_layer_range)

                tl_corners_regrs[olayer_idx][b_ind, tag_ind, :] = [
                    ((xc - fxtl) - (max_x + min_x) / 2) / (max_x - min_x),
                    ((yc - fytl) - (max_y + min_y) / 2) / (max_y - min_y)
                ]
                br_corners_regrs[olayer_idx][b_ind, tag_ind, :] = [
                    ((fxbr - xc) - (max_x + min_x) / 2) / (max_x - min_x),
                    ((fybr - yc) - (max_y + min_y) / 2) / (max_y - min_y)
                ]

                anchors_tags[olayer_idx][
                    b_ind, tag_ind] = yc * output_sizes[olayer_idx][1] + xc

                tag_lens[olayer_idx][b_ind] += 1

    for b_ind in range(batch_size):
        for olayer_idx in range(len(tag_lens)):
            tag_len = tag_lens[olayer_idx][b_ind]
            tag_masks[olayer_idx][b_ind, :tag_len] = 1

    images = [torch.from_numpy(images)]
    anchors_heatmaps = [
        torch.from_numpy(anchors) for anchors in anchors_heatmaps
    ]
    tl_corners_regrs = [torch.from_numpy(c) for c in tl_corners_regrs]
    br_corners_regrs = [torch.from_numpy(c) for c in br_corners_regrs]
    anchors_tags = [torch.from_numpy(t) for t in anchors_tags]
    tag_masks = [torch.from_numpy(tags) for tags in tag_masks]

    return {
        "xs": [images, anchors_tags],
        "ys":
        [anchors_heatmaps, tl_corners_regrs, br_corners_regrs, tag_masks]
    }, k_ind
Exemple #2
0
def kp_detection(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size

    categories = db.configs["categories"]
    input_size = db.configs["input_size"]
    output_size = db.configs["output_sizes"][0]

    border = db.configs["border"]
    lighting = db.configs["lighting"]
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"]
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]

    max_tag_len = 500

    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    tl_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    br_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    ct_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    ct_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_lens = np.zeros((batch_size, ), dtype=np.int32)

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()

        db_ind = db.db_inds[k_ind]
        k_ind = (k_ind + 1) % db_size

        # reading image
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)
        #         print("reading image", image_file)
        # reading detections
        detections = db.detections(db_ind)
        #         print("reading detections", detections)

        # cropping an image randomly
        if not debug and rand_crop:
            image, detections = random_crop(image,
                                            detections,
                                            rand_scales,
                                            input_size,
                                            border=border)
        else:
            image, detections = _full_image_crop(image, detections)

        image, detections = _resize_image(image, detections, input_size)
        detections = _clip_detections(image, detections)

        width_ratio = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        # flipping an image randomly
        if not debug and np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            width = image.shape[1]
            detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1
#         print("after flopping", detections)
        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                #                 print("before rand color")
                #                 print(data_rng)
                color_jittering_(data_rng, image)
                #                 print("this test for color")
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)


#             print("after rand color")
#             image = image.astype(np.float32) / 255.
            normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))
        #         print("modify detections", detections)
        for ind, detection in enumerate(detections):
            category = int(detection[-1]) - 1
            #category = 0

            xtl, ytl = detection[0], detection[1]
            xbr, ybr = detection[2], detection[3]
            xct, yct = (detection[2] + detection[0]) / 2., (detection[3] +
                                                            detection[1]) / 2.

            fxtl = (xtl * width_ratio)
            fytl = (ytl * height_ratio)
            fxbr = (xbr * width_ratio)
            fybr = (ybr * height_ratio)
            fxct = (xct * width_ratio)
            fyct = (yct * height_ratio)

            xtl = int(fxtl)
            ytl = int(fytl)
            xbr = int(fxbr)
            ybr = int(fybr)
            xct = int(fxct)
            yct = int(fyct)

            if gaussian_bump:
                width = detection[2] - detection[0]
                height = detection[3] - detection[1]

                width = math.ceil(width * width_ratio)
                height = math.ceil(height * height_ratio)

                if gaussian_rad == -1:
                    radius = gaussian_radius((height, width), gaussian_iou)
                    radius = max(0, int(radius))
                else:
                    radius = gaussian_rad

                draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
                draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
                draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct],
                              radius,
                              delte=5)

            else:
                tl_heatmaps[b_ind, category, ytl, xtl] = 1
                br_heatmaps[b_ind, category, ybr, xbr] = 1
                ct_heatmaps[b_ind, category, yct, xct] = 1

            tag_ind = tag_lens[b_ind]
            tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl]
            br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr]
            ct_regrs[b_ind, tag_ind, :] = [fxct - xct, fyct - yct]
            tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl
            br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr
            ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct
            tag_lens[b_ind] += 1

    for b_ind in range(batch_size):
        tag_len = tag_lens[b_ind]
        tag_masks[b_ind, :tag_len] = 1

    images = torch.from_numpy(images)
    tl_heatmaps = torch.from_numpy(tl_heatmaps)
    br_heatmaps = torch.from_numpy(br_heatmaps)
    ct_heatmaps = torch.from_numpy(ct_heatmaps)
    tl_regrs = torch.from_numpy(tl_regrs)
    br_regrs = torch.from_numpy(br_regrs)
    ct_regrs = torch.from_numpy(ct_regrs)
    tl_tags = torch.from_numpy(tl_tags)
    br_tags = torch.from_numpy(br_tags)
    ct_tags = torch.from_numpy(ct_tags)
    tag_masks = torch.from_numpy(tag_masks)
    #     print("finish this image")
    return {
        "xs": [images, tl_tags, br_tags, ct_tags],
        "ys": [
            tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs,
            br_regrs, ct_regrs
        ]
    }, k_ind
Exemple #3
0
def kp_detection(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size

    categories = db.configs["categories"]
    input_size = db.configs["input_size"]
    output_size = db.configs["output_sizes"][0]

    border = db.configs["border"]
    lighting = db.configs["lighting"]
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"]
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]

    max_tag_len = 128

    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    tl_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    br_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    tl_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    br_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    tl_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    br_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_lens = np.zeros((batch_size, ), dtype=np.int32)

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()
        flag = False
        while not flag:
            db_ind = db.db_inds[k_ind]
            k_ind = (k_ind + 1) % db_size
            # reading image
            image_file = db.image_file(db_ind)
            if os.path.exists(image_file):
                image = cv2.imread(image_file)
                flag = True
        # reading detections
        detections = db.detections(db_ind)

        # cropping an image randomly
        if not debug and rand_crop:
            image, detections = random_crop(image,
                                            detections,
                                            rand_scales,
                                            input_size,
                                            border=border)
        else:
            image, detections = _full_image_crop(image, detections)
        #print("Image_size")
        #print(image.shape)
        image, detections = _resize_image(image, detections, input_size)
        detections = _clip_detections(image, detections)

        width_ratio = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
            #normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))

        for ind, detection in enumerate(detections):

            category = int(detection[-1]) - 1
            #print("Category: %d" %category)
            #print("Detections: %d" % len(detections))
            xtl, ytl = detection[0], detection[1]
            xbr, ybr = detection[2], detection[3]

            fxtl = (xtl * width_ratio)
            fytl = (ytl * height_ratio)
            fxbr = (xbr * width_ratio)
            fybr = (ybr * height_ratio)

            xtl = int(fxtl)
            ytl = int(fytl)
            xbr = int(fxbr)
            ybr = int(fybr)

            if gaussian_bump:
                width = detection[2] - detection[0]
                height = detection[3] - detection[1]

                width = math.ceil(width * width_ratio)
                height = math.ceil(height * height_ratio)

                if gaussian_rad == -1:
                    radius = gaussian_radius((height, width), gaussian_iou)
                    radius = max(0, int(radius))
                else:
                    radius = gaussian_rad

                draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
                draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
            else:
                tl_heatmaps[b_ind, category, ytl, xtl] = 1
                br_heatmaps[b_ind, category, ybr, xbr] = 1

            tag_ind = tag_lens[b_ind]
            tl_regrs[b_ind, tag_ind, :] = [fxtl - xtl, fytl - ytl]
            br_regrs[b_ind, tag_ind, :] = [fxbr - xbr, fybr - ybr]
            tl_tags[b_ind, tag_ind] = ytl * output_size[1] + xtl
            br_tags[b_ind, tag_ind] = ybr * output_size[1] + xbr
            tag_lens[b_ind] += 1
            if tag_lens[b_ind] >= max_tag_len - 1:
                print("Too many targets, skip!")
                print(tag_lens[b_ind])
                print(image_file)
                break
            #print("Pre_tag_ing:%d" %tag_ind)
    for b_ind in range(batch_size):
        tag_len = tag_lens[b_ind]
        tag_masks[b_ind, :tag_len] = 1

    images = torch.from_numpy(images)
    tl_heatmaps = torch.from_numpy(tl_heatmaps)
    br_heatmaps = torch.from_numpy(br_heatmaps)
    tl_regrs = torch.from_numpy(tl_regrs)
    br_regrs = torch.from_numpy(br_regrs)
    tl_tags = torch.from_numpy(tl_tags)
    br_tags = torch.from_numpy(br_tags)
    tag_masks = torch.from_numpy(tag_masks)

    return {
        "xs": [images, tl_tags, br_tags],
        "ys": [tl_heatmaps, br_heatmaps, tag_masks, tl_regrs, br_regrs]
    }, k_ind
Exemple #4
0
def samples_MatrixNetCorners(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size
    categories = db.configs["categories"]
    input_size = db.configs["input_size"]

    border = db.configs["border"]
    lighting = db.configs["lighting"]
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"]
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]
    cutout_flag = db.configs["cutout"]
    max_dim = db.configs["train_image_max_dim"]

    width_thresholds = db.configs["width_thresholds"]
    height_thresholds = db.configs["height_thresholds"]
    layers_range = db.configs["layers_range"]

    max_tag_len = 128

    _dict = {}
    output_sizes = []
    # indexing layer map
    for i, l in enumerate(layers_range):
        for j, e in enumerate(l):
            if e != -1:
                output_sizes.append([
                    input_size[0] // (8 * 2**(j)),
                    input_size[1] // (8 * 2**(i))
                ])
                _dict[(i + 1) * 10 + (j + 1)] = e

    layers_range = [_dict[i] for i in sorted(_dict)]
    fpn_flag = set(_dict.keys()) == set([11, 22, 33, 44, 55])
    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    tl_heatmaps = [
        np.zeros((batch_size, categories, output_size[0], output_size[1]),
                 dtype=np.float32) for output_size in output_sizes
    ]
    br_heatmaps = [
        np.zeros((batch_size, categories, output_size[0], output_size[1]),
                 dtype=np.float32) for output_size in output_sizes
    ]
    tl_regrs = [
        np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
        for output_size in output_sizes
    ]
    center_regrs = [
        np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
        for output_size in output_sizes
    ]
    br_regrs = [
        np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
        for output_size in output_sizes
    ]

    tl_tags = [
        np.zeros((batch_size, max_tag_len), dtype=np.int64)
        for output_size in output_sizes
    ]
    br_tags = [
        np.zeros((batch_size, max_tag_len), dtype=np.int64)
        for output_size in output_sizes
    ]

    tag_masks = [
        np.zeros((batch_size, max_tag_len), dtype=bool)
        for output_size in output_sizes
    ]
    tag_lens = [
        np.zeros((batch_size, ), dtype=np.int32)
        for output_size in output_sizes
    ]

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()

        db_ind = db.db_inds[k_ind]
        k_ind = (k_ind + 1) % db_size

        # reading image
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        # reading detections
        detections = db.detections(db_ind)

        if cutout_flag:
            image = cutout(image, detections)

        if not debug and rand_crop:
            image, detections = random_crop(image,
                                            detections,
                                            rand_scales,
                                            input_size,
                                            border=border)
        else:
            image, detections = _full_image_crop(image, detections)

        image, detections = _resize_image(image, detections, input_size)
        detections = _clip_detections(image, detections)

        if False:
            for j in range(1):
                color = np.random.random((3, )) * 0.6 + 0.4
                color = color * 255
                color = color.astype(np.int32).tolist()
                for bbox in detections:

                    bbox = bbox[0:4].astype(np.int32)
                    cv2.rectangle(image, (bbox[0], bbox[1]),
                                  (bbox[2], bbox[3]), color, 2)
            cv2.imwrite('test.jpg', image)

        # flipping an image randomly
        if not debug and np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            width = image.shape[1]
            detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1

        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)

        images[b_ind] = image.transpose((2, 0, 1))

        for ind, detection in enumerate(detections):
            for olayer_idx in layer_map_using_ranges(
                    detection[2] - detection[0], detection[3] - detection[1],
                    layers_range, fpn_flag):

                width_ratio = output_sizes[olayer_idx][1] / input_size[1]
                height_ratio = output_sizes[olayer_idx][0] / input_size[0]

                category = int(detection[-1]) - 1
                xtl, ytl = detection[0], detection[1]
                xbr, ybr = detection[2], detection[3]

                fxtl = (xtl * width_ratio)
                fytl = (ytl * height_ratio)
                fxbr = (xbr * width_ratio)
                fybr = (ybr * height_ratio)

                mx = output_sizes[olayer_idx][1] - 1
                my = output_sizes[olayer_idx][0] - 1

                xtl = int(min(round(fxtl), mx))
                ytl = int(min(round(fytl), my))
                xbr = int(min(round(fxbr), mx))
                ybr = int(min(round(fybr), my))
                if gaussian_bump:
                    width = detection[2] - detection[0]
                    height = detection[3] - detection[1]

                    width = math.ceil(width * width_ratio)
                    height = math.ceil(height * height_ratio)

                    if gaussian_rad == -1:
                        radius = gaussian_radius((height, width), gaussian_iou)
                        radius = max(0, int(radius))
                    else:
                        radius = gaussian_rad

                    draw_gaussian(tl_heatmaps[olayer_idx][b_ind, category],
                                  [xtl, ytl], radius)
                    draw_gaussian(br_heatmaps[olayer_idx][b_ind, category],
                                  [xbr, ybr], radius)

                else:
                    tl_heatmaps[olayer_idx][b_ind, category, ytl, xtl] = 1
                    br_heatmaps[olayer_idx][b_ind, category, ybr, xbr] = 1

                tag_ind = tag_lens[olayer_idx][b_ind]
                tl_regrs[olayer_idx][b_ind,
                                     tag_ind, :] = [fxtl - xtl, fytl - ytl]
                br_regrs[olayer_idx][b_ind,
                                     tag_ind, :] = [fxbr - xbr, fybr - ybr]

                center_regrs[olayer_idx][b_ind, tag_ind, :] = [
                    (fxbr - fxtl) / 2.0 / output_sizes[-1][1],
                    (fybr - fytl) / 2.0 / output_sizes[-1][0]
                ]

                tl_tags[olayer_idx][
                    b_ind, tag_ind] = ytl * output_sizes[olayer_idx][1] + xtl
                br_tags[olayer_idx][
                    b_ind, tag_ind] = ybr * output_sizes[olayer_idx][1] + xbr
                tag_lens[olayer_idx][b_ind] += 1

    for b_ind in range(batch_size):
        for olayer_idx in range(len(tag_lens)):
            tag_len = tag_lens[olayer_idx][b_ind]
            tag_masks[olayer_idx][b_ind, :tag_len] = 1

    images = [torch.from_numpy(images)]
    tl_heatmaps = [torch.from_numpy(tl) for tl in tl_heatmaps]
    br_heatmaps = [torch.from_numpy(br) for br in br_heatmaps]

    tl_regrs = [torch.from_numpy(tl) for tl in tl_regrs]
    br_regrs = [torch.from_numpy(br) for br in br_regrs]
    center_regrs = [torch.from_numpy(c) for c in center_regrs]
    tl_tags = [torch.from_numpy(tl) for tl in tl_tags]
    br_tags = [torch.from_numpy(br) for br in br_tags]

    tag_masks = [torch.from_numpy(tags) for tags in tag_masks]
    return {
        "xs": [images, tl_tags, br_tags],
        "ys": [
            tl_heatmaps, br_heatmaps, tag_masks, tl_regrs, br_regrs,
            center_regrs
        ]
    }, k_ind
Exemple #5
0
def kp_detection(db, k_ind, data_aug, debug):
    ################################################################
    # kp_detectin , input whole dataset,
    # from dataset load a batch images and annotations
    # based on the annotations build relevant heatmat, regression tag,
    ################################################################

    # train.py--> train()--->init_parallel_jobs --->for each thread: prefetch_data---> sample_data ---> kp_detection
    # input: in training   db is a MSCOCO instance and dataset is trainval2014
    #        in validation db is a MSCOCO instance and dataset is minival2014
    # k_ind first call it is 0, then it will change inside kp_detection method. yes it is k_ind = (k_ind+1)%db_size
    # data_aug   is true when training , and it is false when validating
    # debug is set in sample_data method. it is set to False in both case
    data_rng   = system_configs.data_rng
    # check in config.py  data_rng = np.random.RandomState(123)
    batch_size = system_configs.batch_size
    # check in CenteNet-104.py   batch_size = 48

    # this is check in COCO class db_config content is listed below,
    # "db": {
    #         "rand_scale_min": 0.6,
    #         "rand_scale_max": 1.4,
    #         "rand_scale_step": 0.1,
    #         "rand_scales": null,
    #
    #         "rand_crop": true,
    #         "rand_color": true,
    #
    #         "border": 128,
    #         "gaussian_bump": true,
    #
    #         "input_size": [511, 511],
    #         "output_sizes": [[128, 128]],
    #
    #         "test_scales": [1],
    #
    #         "top_k": 70,
    #         "categories": 80,
    #         "kp_categories": 1,
    #         "ae_threshold": 0.5,
    #         "nms_threshold": 0.5,
    #
    #         "max_per_image": 100
    #         }
    # and above para is from CenterNet-104.py
    # if there is any para cant find in CenterNet-104,then goto db/detection.py to chekc

    categories   = db.configs["categories"]#  80
    input_size   = db.configs["input_size"]# [511,511]
    output_size  = db.configs["output_sizes"][0] # [ 128, 128]

    border        = db.configs["border"] # 128
    lighting      = db.configs["lighting"] # from detection.py   lighting  = true
    rand_crop     = db.configs["rand_crop"] # true
    rand_color    = db.configs["rand_color"] # true
    rand_scales   = db.configs["rand_scales"]
    # check CenterNet-104.json
    #         "rand_scale_min": 0.6,
    #         "rand_scale_max": 1.4,
    #         "rand_scale_step": 0.1,
    #         "rand_scales": null,
    # and check detection.py
    #             if self._configs["rand_scales"] is None:
    #             self._configs["rand_scales"] = np.arange(
    #                 self._configs["rand_scale_min"],
    #                 self._configs["rand_scale_max"],
    #                 self._configs["rand_scale_step"]
    #             )
    # so here rand_scales = np.arange(0.6,1.4,0.1) that is 0.6  0.7  0.8  0.9 ....  1.4


    gaussian_bump = db.configs["gaussian_bump"] # from detection.py   true
    gaussian_iou  = db.configs["gaussian_iou"] # from detection.py   0.7
    gaussian_rad  = db.configs["gaussian_radius"] # from detection.py  -1

    max_tag_len = 128

    # allocating memory
    images      = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32)
                            #  48     ,3  ,    511,           511
    tl_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
                            #  48     ,     80    ,      128      ,      128
    br_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
                            #  48     ,     80    ,      128      ,      128
    ct_heatmaps = np.zeros((batch_size, categories, output_size[0], output_size[1]), dtype=np.float32)
                            #  48     ,     80    ,      128      ,      128
    tl_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
                            #  48     ,     128    , 2
    br_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
                            #  48     ,     128    , 2
    ct_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
                            #  48     ,     128    , 2
    tl_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    br_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    ct_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    tag_masks   = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_lens    = np.zeros((batch_size, ), dtype=np.int32)
                            #    48   ,

    db_size = db.db_inds.size
    # back to db/coco.py to check db.db_inds
    # self._db_inds = np.arange(len(self._image_ids))
    # so here db_size means how many images does this dataset has. eg.10000 images then db_size = 10000
    for b_ind in range(batch_size): # iterate images one by one
        if not debug and k_ind == 0:
            db.shuffle_inds()
            # since when we call, we always set debug to False no matter it is training or validation
            # and k_ind only have one chance to be 0, that is when we first call ke_detection
            # this shuffle_inds() method is written in base.py

        db_ind = db.db_inds[k_ind]
        # db_inds are shuffled in the first iteration, then take the index useing k_ind
        k_ind  = (k_ind + 1) % db_size
        #

        # reading image
        image_file = db.image_file(db_ind)
        image      = cv2.imread(image_file)

        # reading detections
        detections = db.detections(db_ind)
        # db is a MSCOCO instance, and MSCOCO.detection is written in db/coco.py
        # in train.py , MSCOCO is initialized and MSCOCO._detections are filled with all annotations infomation.
        # here db.detections(db_ind)
        # db_ind is the id of an image
        # then use the id to get the annotation of that image
        # so here detections is the label infomation of a single image

        # cropping an image randomly
        if not debug and rand_crop:
            image, detections = random_crop(image, detections, rand_scales, input_size, border=border)
            # image is cropped and detections(bounding box is changed at the same time)
        else:
            image, detections = _full_image_crop(image, detections)

        image, detections = _resize_image(image, detections, input_size)
        # resize image and detections to another shape at the same time.
        # And there is risk that the detections are not within the boundaries of the image.
        detections = _clip_detections(image, detections)
        # so here clip the detections keep you away from above metioned risk.
        # make all the detections within the boundaries


        width_ratio  = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        #input size and output size can be found in CenterNet-104.json
        # input size = 511,511
        # output size = 128,128
        # so width_ratio = 511/128 = 3.9921875

        # flipping an image randomly
        if not debug and np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            width    = image.shape[1]
            detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1

        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
            normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))
        # make image to be channel first

        for ind, detection in enumerate(detections):
            # all these operations are for one single image
            # since below code will apply scale to detections,
            # detections should be integers not within (0,1) range
            category = int(detection[-1]) - 1
            #category = 0

            xtl, ytl = detection[0], detection[1]
            xbr, ybr = detection[2], detection[3]
            xct, yct = (detection[2] + detection[0])/2., (detection[3]+detection[1])/2.

            fxtl = (xtl * width_ratio)
            fytl = (ytl * height_ratio)
            fxbr = (xbr * width_ratio)
            fybr = (ybr * height_ratio)
            fxct = (xct * width_ratio)
            fyct = (yct * height_ratio)

            xtl = int(fxtl)
            ytl = int(fytl)
            xbr = int(fxbr)
            ybr = int(fybr)
            xct = int(fxct)
            yct = int(fyct)

            if gaussian_bump: # CenterNet-104 set to true
                width  = detection[2] - detection[0]# original value
                height = detection[3] - detection[1]

                width  = math.ceil(width * width_ratio) # multiply ratio so it is  for output size
                height = math.ceil(height * height_ratio)

                if gaussian_rad == -1:# -1 means auto calculate gaussian rad
                    # match CenterNet-104 setting
                    radius = gaussian_radius((height, width), gaussian_iou)
                    # gaussian_iou = 0.7
                    radius = max(0, int(radius)) # eg. if an obj bounding box is 50,80, then the radius is just 17 or so
                else:
                    radius = gaussian_rad

                draw_gaussian(tl_heatmaps[b_ind, category], [xtl, ytl], radius)
                draw_gaussian(br_heatmaps[b_ind, category], [xbr, ybr], radius)
                draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius, delte = 5)
                # all three inputs are zeros with shape    48     ,     80    ,      128      ,      128
                # tl_heatmaps[b_ind, category] is 128 x 128
                # top left corner
                # bottom right corner
                # center corner each one has an heatmap
                # about the delte para , topleft and bottom right are both set to 6,
                # why center heatmap set it to 5?
                # in draw_gaussian: sigma=diameter / delte  so the bigger delte ,the smaller sigma, and the heatmap value
                # in that keypoint is higher,
                # here it set the heatmap value of center keypoint larger than two corner keypoints.
                # important****** the

            else:
                tl_heatmaps[b_ind, category, ytl, xtl] = 1
                br_heatmaps[b_ind, category, ybr, xbr] = 1
                ct_heatmaps[b_ind, category, yct, xct] = 1
                # if---else   if is using gaussian distribution,and else if use only one peak

            tag_ind                      = tag_lens[b_ind]
            # tag_lens is (batch_size,)
            # and b_ind is the image index within batch
            # tag_lens is used to store how many detections the image has.
            # you can confirm with 6 lines below
            tl_regrs[b_ind, tag_ind, :]  = [fxtl - xtl, fytl - ytl]
            br_regrs[b_ind, tag_ind, :]  = [fxbr - xbr, fybr - ybr]
            ct_regrs[b_ind, tag_ind, :]  = [fxct - xct, fyct - yct]
            # all the three regression varibles are 3 dementional.
            # (b_ind,tag_ind,2)
            # for example. in one batch we have 48 images,
            # for each image we have differnt numbers of detections, may be first image has 4 detections.
            # may be the second has 15 detections.
            # but when we forward the network,we need it to have stable shape.
            # so here is how these arrays are initialized.
            # ct_regrs    = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
            tl_tags[b_ind, tag_ind]      = ytl * output_size[1] + xtl
            br_tags[b_ind, tag_ind]      = ybr * output_size[1] + xbr
            ct_tags[b_ind, tag_ind]      = yct * output_size[1] + xct
            # these 3 arrays are used together with above three arrays.
            # these 3 are used to store the integer part of the scale to outputsize detection
            # the above 3 variables are used to store the fractions.
            # ct_tags     = np.zeros((batch_size, max_tag_len), dtype=np.int64)
            tag_lens[b_ind]             += 1

    for b_ind in range(batch_size):
        # for image in batches
        tag_len = tag_lens[b_ind]# how many detections the image has
        tag_masks[b_ind, :tag_len] = 1
        # tag_masks first appears in the begining of this method.
        # tag_masks   = np.zeros((batch_size, max_tag_len), dtype=np.uint8) this is how it is initialized


    images      = torch.from_numpy(images)
    tl_heatmaps = torch.from_numpy(tl_heatmaps)
    br_heatmaps = torch.from_numpy(br_heatmaps)
    ct_heatmaps = torch.from_numpy(ct_heatmaps)
    tl_regrs    = torch.from_numpy(tl_regrs)
    br_regrs    = torch.from_numpy(br_regrs)
    ct_regrs    = torch.from_numpy(ct_regrs)
    tl_tags     = torch.from_numpy(tl_tags)
    br_tags     = torch.from_numpy(br_tags)
    ct_tags     = torch.from_numpy(ct_tags)
    tag_masks   = torch.from_numpy(tag_masks)

    return {
        "xs": [images, tl_tags, br_tags, ct_tags],
        "ys": [tl_heatmaps, br_heatmaps, ct_heatmaps, tag_masks, tl_regrs, br_regrs, ct_regrs]
    }, k_ind
def kp_detection(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size

    categories = db.configs["categories"]
    input_size = db.configs["input_size"]
    output_size = db.configs["output_sizes"][0]

    border = db.configs["border"]
    lighting = db.configs["lighting"]
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"]
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]

    max_tag_len = 128

    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    t_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    l_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    b_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    r_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    ct_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    t_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    l_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    b_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    r_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    t_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    l_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    b_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    r_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    ct_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_lens = np.zeros((batch_size, ), dtype=np.int32)

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()

        db_ind = db.db_inds[k_ind]
        k_ind = (k_ind + 1) % db_size

        # reading image
        image_file = db.image_file(db_ind)
        image = cv2.imread(image_file)

        # reading detections
        detections, extreme_pts = db.detections(db_ind)

        # cropping an image randomly
        if rand_crop:
            image, detections, extreme_pts = random_crop_pts(image,
                                                             detections,
                                                             extreme_pts,
                                                             rand_scales,
                                                             input_size,
                                                             border=border)
        else:
            assert 0
            # image, detections = _full_image_crop(image, detections)

        image, detections, extreme_pts = _resize_image_pts(
            image, detections, extreme_pts, input_size)
        detections, extreme_pts = _clip_detections_pts(image, detections,
                                                       extreme_pts)

        width_ratio = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        # flipping an image randomly
        if np.random.uniform() > 0.5:
            image[:] = image[:, ::-1, :]
            width = image.shape[1]
            detections[:, [0, 2]] = width - detections[:, [2, 0]] - 1
            extreme_pts[:, :, 0] = width - extreme_pts[:, :, 0] - 1
            extreme_pts[:, 1, :], extreme_pts[:, 3, :] = \
                extreme_pts[:, 3, :].copy(), extreme_pts[:, 1, :].copy()

        image = image.astype(np.float32) / 255.
        if not debug:
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
        normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))

        for ind, detection in enumerate(detections):
            category = int(detection[-1]) - 1
            extreme_pt = extreme_pts[ind]

            xt, yt = extreme_pt[0, 0], extreme_pt[0, 1]
            xl, yl = extreme_pt[1, 0], extreme_pt[1, 1]
            xb, yb = extreme_pt[2, 0], extreme_pt[2, 1]
            xr, yr = extreme_pt[3, 0], extreme_pt[3, 1]
            xct = (xl + xr) / 2
            yct = (yt + yb) / 2

            fxt = (xt * width_ratio)
            fyt = (yt * height_ratio)
            fxl = (xl * width_ratio)
            fyl = (yl * height_ratio)
            fxb = (xb * width_ratio)
            fyb = (yb * height_ratio)
            fxr = (xr * width_ratio)
            fyr = (yr * height_ratio)
            fxct = (xct * width_ratio)
            fyct = (yct * height_ratio)

            xt = int(fxt)
            yt = int(fyt)
            xl = int(fxl)
            yl = int(fyl)
            xb = int(fxb)
            yb = int(fyb)
            xr = int(fxr)
            yr = int(fyr)
            xct = int(fxct)
            yct = int(fyct)

            if gaussian_bump:
                width = detection[2] - detection[0]
                height = detection[3] - detection[1]

                width = math.ceil(width * width_ratio)
                height = math.ceil(height * height_ratio)

                if gaussian_rad == -1:
                    radius = gaussian_radius((height, width), gaussian_iou)
                    radius = max(0, int(radius))
                else:
                    radius = gaussian_rad
                draw_gaussian(t_heatmaps[b_ind, category], [xt, yt], radius)
                draw_gaussian(l_heatmaps[b_ind, category], [xl, yl], radius)
                draw_gaussian(b_heatmaps[b_ind, category], [xb, yb], radius)
                draw_gaussian(r_heatmaps[b_ind, category], [xr, yr], radius)
                draw_gaussian(ct_heatmaps[b_ind, category], [xct, yct], radius)
            else:
                t_heatmaps[b_ind, category, yt, xt] = 1
                l_heatmaps[b_ind, category, yl, xl] = 1
                b_heatmaps[b_ind, category, yb, xb] = 1
                r_heatmaps[b_ind, category, yr, xr] = 1

            tag_ind = tag_lens[b_ind]
            t_regrs[b_ind, tag_ind, :] = [fxt - xt, fyt - yt]
            l_regrs[b_ind, tag_ind, :] = [fxl - xl, fyl - yl]
            b_regrs[b_ind, tag_ind, :] = [fxb - xb, fyb - yb]
            r_regrs[b_ind, tag_ind, :] = [fxr - xr, fyr - yr]
            t_tags[b_ind, tag_ind] = yt * output_size[1] + xt
            l_tags[b_ind, tag_ind] = yl * output_size[1] + xl
            b_tags[b_ind, tag_ind] = yb * output_size[1] + xb
            r_tags[b_ind, tag_ind] = yr * output_size[1] + xr
            ct_tags[b_ind, tag_ind] = yct * output_size[1] + xct
            tag_lens[b_ind] += 1

    for b_ind in range(batch_size):
        tag_len = tag_lens[b_ind]
        tag_masks[b_ind, :tag_len] = 1

    if debug:
        debugger = Debugger(num_classes=80)
        t_hm = debugger.gen_colormap(t_heatmaps[0])
        l_hm = debugger.gen_colormap(l_heatmaps[0])
        b_hm = debugger.gen_colormap(b_heatmaps[0])
        r_hm = debugger.gen_colormap(r_heatmaps[0])
        ct_hm = debugger.gen_colormap(ct_heatmaps[0])
        img = images[0] * db.std.reshape(3, 1, 1) + db.mean.reshape(3, 1, 1)
        img = (img * 255).astype(np.uint8).transpose(1, 2, 0)
        debugger.add_blend_img(img, t_hm, 't_hm')
        debugger.add_blend_img(img, l_hm, 'l_hm')
        debugger.add_blend_img(img, b_hm, 'b_hm')
        debugger.add_blend_img(img, r_hm, 'r_hm')
        debugger.add_blend_img(
            img, np.maximum(np.maximum(t_hm, l_hm), np.maximum(b_hm, r_hm)),
            'extreme')
        debugger.add_blend_img(img, ct_hm, 'center')
        debugger.show_all_imgs(pause=True)

    images = torch.from_numpy(images)
    t_heatmaps = torch.from_numpy(t_heatmaps)
    l_heatmaps = torch.from_numpy(l_heatmaps)
    b_heatmaps = torch.from_numpy(b_heatmaps)
    r_heatmaps = torch.from_numpy(r_heatmaps)
    ct_heatmaps = torch.from_numpy(ct_heatmaps)
    t_regrs = torch.from_numpy(t_regrs)
    l_regrs = torch.from_numpy(l_regrs)
    b_regrs = torch.from_numpy(b_regrs)
    r_regrs = torch.from_numpy(r_regrs)
    t_tags = torch.from_numpy(t_tags)
    l_tags = torch.from_numpy(l_tags)
    b_tags = torch.from_numpy(b_tags)
    r_tags = torch.from_numpy(r_tags)
    ct_tags = torch.from_numpy(ct_tags)
    tag_masks = torch.from_numpy(tag_masks)

    return {
        "xs": [images, t_tags, l_tags, b_tags, r_tags, ct_tags],
        "ys": [
            t_heatmaps, l_heatmaps, b_heatmaps, r_heatmaps, ct_heatmaps,
            tag_masks, t_regrs, l_regrs, b_regrs, r_regrs
        ]
    }, k_ind
Exemple #7
0
def kp_detection(db, k_ind):
    data_rng     = system_configs.data_rng
    batch_size   = system_configs.batch_size
    input_size   = db.configs["input_size"]
    lighting     = db.configs["lighting"]
    rand_color   = db.configs["rand_color"]
    images   = np.zeros((batch_size, 3, input_size[0], input_size[1]), dtype=np.float32) # b, 3, H, W
    masks    = np.zeros((batch_size, 1, input_size[0], input_size[1]), dtype=np.float32)  # b, 1, H, W
    gt_lanes = []

    db_size = db.db_inds.size # 3268 | 2782

    for b_ind in range(batch_size):

        if k_ind == 0:
            db.shuffle_inds()

        db_ind = db.db_inds[k_ind]
        k_ind  = (k_ind + 1) % db_size

        # reading ground truth
        item  = db.detections(db_ind) # all in the raw coordinate
        img   = cv2.imread(item['path'])
        mask  = np.ones((1, img.shape[0], img.shape[1], 1), dtype=np.bool)
        label = item['label']
        transform = True
        if transform:
            line_strings = db.lane_to_linestrings(item['old_anno']['lanes'])
            line_strings = LineStringsOnImage(line_strings, shape=img.shape)
            img, line_strings, mask = db.transform(image=img, line_strings=line_strings, segmentation_maps=mask)
            line_strings.clip_out_of_image_()
            new_anno = {'path': item['path'], 'lanes': db.linestrings_to_lanes(line_strings)}
            new_anno['categories'] = item['categories']
            label = db._transform_annotation(new_anno, img_wh=(input_size[1], input_size[0]))['label']

        # clip polys
        tgt_ids   = label[:, 0]
        label = label[tgt_ids > 0]

        # make lower the same
        label[:, 1][label[:, 1] < 0] = 1
        label[:, 1][...] = np.min(label[:, 1])

        label = np.stack([label] * batch_size, axis=0)
        gt_lanes.append(torch.from_numpy(label.astype(np.float32)))

        img = (img / 255.).astype(np.float32)
        if rand_color:
            color_jittering_(data_rng, img)
            if lighting:
                lighting_(data_rng, img, 0.1, db.eig_val, db.eig_vec)
        normalize_(img, db.mean, db.std)
        images[b_ind]   = img.transpose((2, 0, 1))
        masks[b_ind]    = np.logical_not(mask[:, :, :, 0])

    images   = torch.from_numpy(images)
    masks    = torch.from_numpy(masks)

    return {
               "xs": [images, masks],
               "ys": [images, *gt_lanes]
           }, k_ind
Exemple #8
0
def kp_detection(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size

    categories = db.configs["categories"]
    input_size = db.configs["input_size"]
    output_size = db.configs["output_sizes"][0]

    border = db.configs["border"]
    lighting = db.configs["lighting"]
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"]
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]

    max_tag_len = 256
    max_tag_len_group = 128
    max_group_len = 16
    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    key_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    hybrid_heatmaps = np.zeros(
        (batch_size, categories, output_size[0], output_size[1]),
        dtype=np.float32)
    key_regrs = np.zeros((batch_size, max_tag_len, 2), dtype=np.float32)
    key_tags = np.zeros((batch_size, max_tag_len), dtype=np.int64)
    key_tags_grouped = np.zeros((batch_size, max_group_len, max_tag_len_group),
                                dtype=np.int64)
    tag_masks = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_masks_grouped = np.zeros(
        (batch_size, max_group_len, max_tag_len_group), dtype=np.uint8)
    hybrid_masks_grouped = np.zeros(
        (batch_size, max_group_len, max_tag_len_group), dtype=np.uint8)
    tag_lens = np.zeros((batch_size, ), dtype=np.int32)
    tag_group_lens = np.zeros((batch_size, ), dtype=np.int32)

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()
        flag = False
        while not flag:
            db_ind = db.db_inds[k_ind]
            k_ind = (k_ind + 1) % db_size
            # reading image
            image_file = db.image_file(db_ind)
            # print(image_file)
            image = cv2.imread(image_file)
            if image.any() != None:
                flag = True
            temp = db.detections(db_ind)
            if temp == None:
                flag = False
        ori_size = image.shape
        #print(temp)
        (detections, categories) = temp
        detections = detections[0:max_group_len]
        categories = categories[0:max_group_len]
        # cropping an image randomly
        if not debug and rand_crop:
            image, detections, scale = random_crop_line(image,
                                                        detections,
                                                        rand_scales,
                                                        input_size,
                                                        border=border)
        else:
            image, detections = _full_image_crop(image, detections)
        # print("Image_size")
        # print(image.shape)
        image, detections = _resize_image(image, detections, input_size)
        detections = _clip_detections(image, detections)

        width_ratio = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
            # normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))

        for ind, (detection,
                  category) in enumerate(zip(detections, categories)):
            category = 0
            # print("Category: %d" %category)
            # print("Detections: %d" % len(detections))
            fdetection = detection.copy()
            fdetection[0:len(fdetection
                             ):2] = detection[0:len(detection):2] * width_ratio
            fdetection[1:len(fdetection):
                       2] = detection[1:len(detection):2] * height_ratio
            detection = fdetection.astype(np.int32)

            if gaussian_bump:
                width = ori_size[1] / 50 / 4 / scale
                height = ori_size[0] / 50 / 4 / scale

                if gaussian_rad == -1:
                    radius = gaussian_radius((height, width), gaussian_iou)
                    radius = max(0, int(radius))
                else:
                    radius = gaussian_rad
                for k in range(int(len(detection) / 2)):
                    if not (detection[2 * k] == 0 or detection[2 * k + 1] == 0
                            or detection[2 * k] >=
                            (output_size[1] - 1e-2) or detection[2 * k + 1] >=
                            (output_size[0] - 1e-2)):
                        if key_heatmaps[b_ind, category, detection[2 * k + 1],
                                        detection[2 * k]] < 0.85:
                            draw_gaussian(
                                key_heatmaps[b_ind, category],
                                [detection[2 * k], detection[2 * k + 1]],
                                radius)
                        else:
                            draw_gaussian(
                                key_heatmaps[b_ind, category],
                                [detection[2 * k], detection[2 * k + 1]],
                                radius)
                            draw_gaussian(
                                hybrid_heatmaps[b_ind, category],
                                [detection[2 * k], detection[2 * k + 1]],
                                radius)
            else:
                key_heatmaps[b_ind, category, detection[2 * k + 1],
                             detection[2 * k]] = 1

            for k in range(int(len(detection) / 2)):
                if not (detection[2 * k] == 0 or detection[2 * k + 1] == 0
                        or detection[2 * k] >=
                        (output_size[1] - 1e-2) or detection[2 * k + 1] >=
                        (output_size[0] - 1e-2)):
                    if tag_lens[
                            b_ind] >= max_tag_len - 1 or k > max_tag_len_group - 1:
                        print("Too many targets, skip!")
                        print(tag_lens[b_ind])
                        print(image_file)
                        break
                    tag_ind = tag_lens[b_ind]
                    key_regrs[b_ind, tag_ind, :] = [
                        fdetection[2 * k] - detection[2 * k],
                        fdetection[2 * k + 1] - detection[2 * k + 1]
                    ]
                    key_tags[b_ind, tag_ind] = detection[
                        2 * k + 1] * output_size[1] + detection[2 * k]
                    key_tags_grouped[b_ind, ind, k] = detection[
                        2 * k + 1] * output_size[1] + detection[2 * k]
                    tag_lens[b_ind] += 1
                    if hybrid_heatmaps[b_ind, category, detection[2 * k + 1],
                                       detection[2 * k]] < 0.85:
                        tag_masks_grouped[b_ind, ind, k] = 1
                    # print("Pre_tag_ing:%d" %tag_ind)
            tag_len = tag_lens[b_ind]
            tag_group_lens[b_ind] += 1
            tag_masks[b_ind, :tag_len] = 1

    tag_masks_grouped = tag_masks_grouped * (1 - hybrid_masks_grouped)
    images = torch.from_numpy(images)
    key_heatmaps = torch.from_numpy(key_heatmaps)
    key_regrs = torch.from_numpy(key_regrs)
    key_tags = torch.from_numpy(key_tags)
    tag_masks = torch.from_numpy(tag_masks)
    key_tags_grouped = torch.from_numpy(key_tags_grouped)
    tag_group_lens = torch.from_numpy(tag_group_lens)
    hybrid_heatmaps = torch.from_numpy(hybrid_heatmaps)
    tag_masks_grouped = torch.from_numpy(tag_masks_grouped)
    return {
        "xs": [images, key_tags, key_tags_grouped, tag_group_lens],
        "ys": [
            key_heatmaps, hybrid_heatmaps, tag_masks, tag_masks_grouped,
            key_regrs
        ]
    }, k_ind
Exemple #9
0
def kp_detection(db, k_ind, data_aug, debug):
    data_rng = system_configs.data_rng
    batch_size = system_configs.batch_size

    categories = db.configs["categories"]
    input_size = db.configs["input_size"]
    output_size = db.configs["output_sizes"][0]

    border = db.configs["border"]
    lighting = db.configs["lighting"] and data_aug
    rand_crop = db.configs["rand_crop"]
    rand_color = db.configs["rand_color"] and data_aug
    rand_scales = db.configs["rand_scales"]
    gaussian_bump = db.configs["gaussian_bump"]
    gaussian_iou = db.configs["gaussian_iou"]
    gaussian_rad = db.configs["gaussian_radius"]

    max_tag_len = 16
    max_group_len = 16
    num_feature = 8
    # allocating memory
    images = np.zeros((batch_size, 3, input_size[0], input_size[1]),
                      dtype=np.float32)
    ps_tags = np.zeros((batch_size, max_tag_len * 8 * 4), dtype=np.int64)
    ng_tags = np.zeros((batch_size, max_tag_len * 8 * 4), dtype=np.int64)
    ps_weights = np.zeros((batch_size, max_tag_len * 8 * 4), dtype=np.float32)
    ng_weights = np.zeros((batch_size, max_tag_len * 8 * 4), dtype=np.float32)
    tag_masks_ps = np.zeros((batch_size, max_tag_len), dtype=np.uint8)
    tag_masks_ng = np.zeros((batch_size, max_tag_len), dtype=np.uint8)

    db_size = db.db_inds.size
    for b_ind in range(batch_size):
        if not debug and k_ind == 0:
            db.shuffle_inds()
        flag = False
        while not flag:
            db_ind = db.db_inds[k_ind]
            k_ind = (k_ind + 1) % db_size
            # reading image
            image_file = db.image_file(db_ind)
            # print(image_file)
            image = cv2.imread(image_file)
            if image.any() != None:
                flag = True
            (ps_detections, ng_detections) = db.detections(db_ind)
            if ps_detections is None:
                flag = False
                continue
            if len(ps_detections) < 1:
                flag = False
                continue
            ori_size = image.shape
            #print(temp)
            ps_detections = np.array(ps_detections)
            ng_detections = np.array(ng_detections)
            # cropping an image randomly
            if not debug and rand_crop:
                image, ps_detections, ng_detections, scale = random_crop_line(
                    image,
                    ps_detections,
                    ng_detections,
                    rand_scales,
                    input_size,
                    border=border)
            else:
                image, detections = _full_image_crop(image, detections)
            # print("Image_size")
            # print(image.shape)
            image, ps_detections, ng_detections = _resize_image(
                image, ps_detections, ng_detections, input_size)
            ps_detections, ng_detections = _clip_detections(
                image, ps_detections, ng_detections)
            if len(ps_detections) < 1:
                flag = False

        np.random.shuffle(ps_detections)
        np.random.shuffle(ng_detections)
        ps_detections = ps_detections[0:max_group_len]
        ng_detections = ng_detections[0:max_group_len]
        #cv2.imwrite('test.png', image)
        width_ratio = output_size[1] / input_size[1]
        height_ratio = output_size[0] / input_size[0]

        if not debug:
            image = image.astype(np.float32) / 255.
            if rand_color:
                color_jittering_(data_rng, image)
                if lighting:
                    lighting_(data_rng, image, 0.1, db.eig_val, db.eig_vec)
            # normalize_(image, db.mean, db.std)
        images[b_ind] = image.transpose((2, 0, 1))

        ps_fdetections = ps_detections.copy()
        ps_fdetections[:, :, 0] = ps_detections[:, :, 0] * width_ratio
        ps_fdetections[:, :, 1] = ps_detections[:, :, 1] * height_ratio
        ng_fdetections = ng_detections.copy()
        if len(ng_detections) > 0:
            ng_fdetections[:, :, 0] = ng_detections[:, :, 0] * width_ratio
            ng_fdetections[:, :, 1] = ng_detections[:, :, 1] * height_ratio
        tag_ind = 0
        for k in range(len(ps_detections)):
            sp = ps_fdetections[k, 0]
            ep = ps_fdetections[k, 1]
            p_points, p_weights = _get_sample_point(sp, ep, num_feature)
            for kth in range(len(p_points)):
                p_point = p_points[kth]
                p_weight = p_weights[kth]
                for sth in range(4):
                    ps_tags[b_ind, tag_ind +
                            sth] = p_point[sth][1] * output_size[1] + p_point[
                                sth][0]
                    ps_weights[b_ind, tag_ind + sth] = p_weight[sth]
                tag_ind += 4
            tag_masks_ps[b_ind, k] = 1
        tag_ind = 0
        for k in range(len(ng_detections)):
            sp = ng_fdetections[k, 0]
            ep = ng_fdetections[k, 1]
            n_points, n_weights = _get_sample_point(sp, ep, num_feature)
            for kth in range(len(n_points)):
                n_point = n_points[kth]
                n_weight = n_weights[kth]
                for sth in range(4):
                    ng_tags[b_ind, tag_ind +
                            sth] = n_point[sth][1] * output_size[1] + n_point[
                                sth][0]
                    ng_weights[b_ind, tag_ind + sth] = n_weight[sth]
                tag_ind += 4
            tag_masks_ng[b_ind, k] = 1
    ps_tags = np.clip(ps_tags, 0, 127 * 127)
    ng_tags = np.clip(ng_tags, 0, 127 * 127)
    images = torch.from_numpy(images)
    ps_tags = torch.from_numpy(ps_tags)
    ng_tags = torch.from_numpy(ng_tags)
    ps_weights = torch.from_numpy(ps_weights)
    ng_weights = torch.from_numpy(ng_weights)
    tag_masks_ps = torch.from_numpy(tag_masks_ps)
    tag_masks_ng = torch.from_numpy(tag_masks_ng)
    return {
        "xs": [images, ps_tags, ng_tags, ps_weights, ng_weights],
        "ys": [
            torch.zeros([batch_size, 16], dtype=torch.int64),
            torch.ones([batch_size, 16], dtype=torch.int64), tag_masks_ps,
            tag_masks_ng
        ]
    }, k_ind