Exemple #1
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    if cfg.dataset == 'coco':
        num_classes = 80
        colors = COCO_COLORS
        names = COCO_NAMES
    elif cfg.dataset == 'DETRAC':
        num_classes = 3
        colors = DETRAC_COLORS
        names = DETRAC_NAMES
    elif cfg.dataset == 'kins':
        num_classes = 7
        colors = KINS_COLORS
        names = KINS_NAMES
    else:
        print('Please specify correct dataset name.')
        raise NotImplementedError

    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    # Set up parameters for outputing video
    output_folder = os.path.join(cfg.root_dir, 'demo')
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    width = cfg.video_width
    height = cfg.video_height
    fps = cfg.video_fps  # output video configuration
    video_out = cv2.VideoWriter(
        os.path.join(output_folder, cfg.output_video_file),
        cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height))
    text_out = open(os.path.join(output_folder, cfg.output_text_file), 'w')
    dictionary = np.load(cfg.dictionary_file)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    elif 'resdcn' in cfg.arch:
        model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]),
                                head_conv=64,
                                num_classes=num_classes,
                                num_codes=cfg.n_codes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading images
    speed_list = []
    frame_list = sorted(os.listdir(cfg.img_dir))
    n_frames = len(frame_list)

    for frame_id in range(n_frames):
        frame_name = frame_list[frame_id]
        image_path = os.path.join(cfg.img_dir, frame_name)

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size[0] > 0 and cfg.img_size[1] > 0:
                img_height, img_width = cfg.img_size[0], cfg.img_size[1]
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            segmentations = []
            predicted_codes = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)
                hmap, regs, w_h_, offsets, _, _, codes = model(
                    imgs[scale]['image'])[-1]
                output = [hmap, regs, w_h_, codes, offsets]

                segms = ctsegm_scale_decode(
                    *output,
                    torch.from_numpy(dictionary.astype(np.float32)).to(
                        cfg.device),
                    K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(
                    1, -1, segms.shape[2])[0]

                top_preds = {}
                code_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(
                        segms[:, 2 * j:2 * j + 2], imgs[scale]['center'],
                        imgs[scale]['scale'],
                        (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 +
                      2] = transform_preds(
                          segms[:,
                                cfg.num_vertices * 2:cfg.num_vertices * 2 + 2],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 +
                      4] = transform_preds(
                          segms[:, cfg.num_vertices * 2 +
                                2:cfg.num_vertices * 2 + 4],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 +
                                             5].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale

                segmentations.append(top_preds)
                predicted_codes.append(code_preds)

            segms_and_scores = {
                j: np.concatenate([d[j] for d in segmentations], axis=0)
                for j in range(1, num_classes + 1)
            }  # a Dict label: segments
            scores = np.hstack([
                segms_and_scores[j][:, cfg.num_vertices * 2 + 4]
                for j in range(1, num_classes + 1)
            ])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 +
                                                     4] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]
                    # codes_and_scores[j] = codes_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            output_image = original_image
            blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8)

            counter = 1
            for lab in segms_and_scores:
                if cfg.dataset == 'coco':
                    if names[lab] not in display_cat and cfg.dataset != 'kins':
                        continue
                for idx in range(len(segms_and_scores[lab])):
                    res = segms_and_scores[lab][idx]
                    contour, bbox, score = res[:-5], res[-5:-1], res[-1]
                    bbox[0] = np.clip(bbox[0], 0, width - 1)
                    bbox[1] = np.clip(bbox[1], 0, height - 1)
                    bbox[2] = np.clip(bbox[2], 0, width - 1)
                    bbox[3] = np.clip(bbox[3], 0, height - 1)

                    polygon = contour.reshape((-1, 2))
                    polygon[:, 0] = np.clip(polygon[:, 0], 0, width - 1)
                    polygon[:, 1] = np.clip(polygon[:, 1], 0, height - 1)
                    if score > cfg.detect_thres:
                        text = names[lab] + ' %.2f' % score
                        label_size = cv2.getTextSize(text,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     0.3, 1)
                        text_location = [
                            int(bbox[0]) + 2,
                            int(bbox[1]) + 2,
                            int(bbox[0]) + 2 + label_size[0][0],
                            int(bbox[1]) + 2 + label_size[0][1]
                        ]
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=colors[lab], thickness=2)
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=nice_colors[names[lab]], thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=nice_colors[names[lab]])

                        cv2.polylines(output_image, [polygon.astype(np.int32)],
                                      True,
                                      color=nice_colors[names[lab]],
                                      thickness=2)
                        cv2.drawContours(blend_mask,
                                         [polygon.astype(np.int32)],
                                         contourIdx=-1,
                                         color=nice_colors[names[lab]],
                                         thickness=-1)

                        # add to text file
                        new_line = '{0},{1},{2:.3f},{3:.3f},{4:.3f},{5:.3f},{6:.4f}\n'.format(
                            str(frame_id + 1), counter, int(bbox[0]),
                            int(bbox[1]),
                            int(bbox[2]) - int(bbox[0]),
                            int(bbox[3]) - int(bbox[1]), score)
                        counter += 1
                        text_out.write(new_line)

            dst_img = cv2.addWeighted(output_image, 0.4, blend_mask, 0.6, 0)
            dst_img[blend_mask == 0] = output_image[blend_mask == 0]
            output_image = dst_img

            cv2.imshow('Frames', output_image)
            video_out.write(output_image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    print('Test frame rate:', 1. / np.mean(speed_list))
Exemple #2
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    else:
        model = get_pose_net(num_layers=int(cfg.arch.split('_')[-1]),
                             num_classes=80)
        # raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading COCO validation images
    annotation_file = '{}/annotations/instances_{}.json'.format(
        cfg.data_dir, cfg.data_type)
    coco = COCO(annotation_file)

    # Load all annotations
    cats = coco.loadCats(coco.getCatIds())
    nms = [cat['name'] for cat in cats]
    catIds = coco.getCatIds(catNms=nms)
    # imgIds = coco.getImgIds(catIds=catIds)
    imgIds = coco.getImgIds()
    # annIds = coco.getAnnIds(catIds=catIds)
    # all_anns = coco.loadAnns(ids=annIds)
    # print(len(imgIds), imgIds)

    for id in imgIds:
        annt_ids = coco.getAnnIds(imgIds=[id])
        annotations_per_img = coco.loadAnns(ids=annt_ids)
        # print('All annots: ', len(annotations_per_img), annotations_per_img)
        img = coco.loadImgs(id)[0]
        image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type,
                                          img['file_name'])
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 1 or h_img < 1:
            continue

        img_original = cv2.imread(image_path)
        img_connect = cv2.imread(image_path)
        img_recon = cv2.imread(image_path)
        print('Image id: ', id)

        for annt in annotations_per_img:
            if annt['iscrowd'] == 1 or type(annt['segmentation']) != list:
                continue

            polygons = get_connected_polygon_using_mask(
                annt['segmentation'], (h_img, w_img),
                n_vertices=cfg.num_vertices,
                closing_max_kernel=60)
            gt_bbox = annt['bbox']
            gt_x1, gt_y1, gt_w, gt_h = gt_bbox
            contour = np.array(polygons).reshape((-1, 2))

            # Downsample the contour to fix number of vertices
            if len(contour) > cfg.num_vertices:
                resampled_contour = resample(contour, num=cfg.num_vertices)
            else:
                resampled_contour = turning_angle_resample(
                    contour, cfg.num_vertices)

            resampled_contour[:, 0] = np.clip(resampled_contour[:, 0], gt_x1,
                                              gt_x1 + gt_w)
            resampled_contour[:, 1] = np.clip(resampled_contour[:, 1], gt_y1,
                                              gt_y1 + gt_h)

            clockwise_flag = check_clockwise_polygon(resampled_contour)
            if not clockwise_flag:
                fixed_contour = np.flip(resampled_contour, axis=0)
            else:
                fixed_contour = resampled_contour.copy()

            # Indexing from the left-most vertex, argmin x-axis
            idx = np.argmin(fixed_contour[:, 0])
            indexed_shape = np.concatenate(
                (fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

            x1, y1, x2, y2 = gt_x1, gt_y1, gt_x1 + gt_w, gt_y1 + gt_h

            # bbox_width, bbox_height = x2 - x1, y2 - y1
            # bbox = [x1, y1, bbox_width, bbox_height]
            # bbox_center = np.array([(x1 + x2) / 2., (y1 + y2) / 2.])
            bbox_center = np.mean(indexed_shape, axis=0)

            centered_shape = indexed_shape - bbox_center

            # visualize resampled points with multiple parts in image side by side
            for cnt in range(len(annt['segmentation'])):
                polys = np.array(annt['segmentation'][cnt]).reshape((-1, 2))
                cv2.polylines(img_original, [polys.astype(np.int32)],
                              True, (10, 10, 255),
                              thickness=2)
                # cv2.drawContours(img_original, [polys.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            cv2.polylines(img_connect, [indexed_shape.astype(np.int32)],
                          True, (10, 10, 255),
                          thickness=2)
            # cv2.drawContours(img_connect, [indexed_shape.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            learned_val_codes, _ = fast_ista(centered_shape.reshape((1, -1)),
                                             dictionary,
                                             lmbda=0.1,
                                             max_iter=60)
            recon_contour = np.matmul(learned_val_codes, dictionary).reshape(
                (-1, 2))
            recon_contour = recon_contour + bbox_center
            cv2.polylines(img_recon, [recon_contour.astype(np.int32)],
                          True, (10, 10, 255),
                          thickness=2)
            # cv2.drawContours(img_recon, [recon_contour.astype(np.int32)], contourIdx=-1, color=(10, 10, 255), thickness=-1)

            # plot gt mean and std
            # image = cv2.imread(image_path)
            # # cv2.ellipse(image, center=(int(contour_mean[0]), int(contour_mean[1])),
            # #             axes=(int(contour_std[0]), int(contour_std[1])),
            # #             angle=0, startAngle=0, endAngle=360, color=(0, 255, 0),
            # #             thickness=2)
            # cv2.rectangle(image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)),
            #               pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)),
            #               color=(0, 255, 0), thickness=2)
            # cv2.polylines(image, [fixed_contour.astype(np.int32)], True, (0, 0, 255))
            # cv2.rectangle(image, pt1=(int(min(fixed_contour[:, 0])), int(min(fixed_contour[:, 1]))),
            #               pt2=(int(max(fixed_contour[:, 0])), int(max(fixed_contour[:, 1]))),
            #               color=(255, 0, 0), thickness=2)
            # cv2.imshow('GT segments', image)
            # if cv2.waitKey() & 0xFF == ord('q'):
            #     break

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            segmentations = []
            predicted_codes = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                # segms, codes_ = ctsegm_scaled_decode_debug(*output, torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                #                             K=cfg.test_topk)
                segms = ctsegm_code_n_offset_decode(
                    *output,
                    torch.from_numpy(dictionary.astype(np.float32)).to(
                        cfg.device),
                    K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(
                    1, -1, segms.shape[2])[0]
                # codes_ = codes_.detach().cpu().numpy().reshape(1, -1, codes_.shape[2])[0]

                top_preds = {}
                code_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(
                        segms[:, 2 * j:2 * j + 2], imgs[scale]['center'],
                        imgs[scale]['scale'],
                        (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 +
                      2] = transform_preds(
                          segms[:,
                                cfg.num_vertices * 2:cfg.num_vertices * 2 + 2],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 +
                      4] = transform_preds(
                          segms[:, cfg.num_vertices * 2 +
                                2:cfg.num_vertices * 2 + 4],
                          imgs[scale]['center'], imgs[scale]['scale'],
                          (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 +
                                             5].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale
                    # code_preds[j + 1] = codes_[inds, :]

                segmentations.append(top_preds)
                predicted_codes.append(code_preds)

            segms_and_scores = {
                j: np.concatenate([d[j] for d in segmentations], axis=0)
                for j in range(1, num_classes + 1)
            }  # a Dict label: segments
            # codes_and_scores = {j: np.concatenate([d[j] for d in predicted_codes], axis=0)
            #                     for j in range(1, num_classes + 1)}  # a Dict label: segments
            scores = np.hstack([
                segms_and_scores[j][:, cfg.num_vertices * 2 + 4]
                for j in range(1, num_classes + 1)
            ])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 +
                                                     4] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]
                    # codes_and_scores[j] = codes_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            output_image = original_image
            blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8)
            # print(blend_mask.shape)

            for lab in segms_and_scores:
                for idx in range(len(segms_and_scores[lab])):
                    res = segms_and_scores[lab][idx]
                    # c_ = codes_and_scores[lab][idx]
                    # for res in segms_and_scores[lab]:
                    contour, bbox, score = res[:-5], res[-5:-1], res[-1]
                    bbox[0] = np.clip(bbox[0], 0, w_img)
                    bbox[1] = np.clip(bbox[1], 0, h_img)
                    bbox[2] = np.clip(bbox[2], 0, w_img)
                    bbox[3] = np.clip(bbox[3], 0, h_img)
                    if score > cfg.detect_thres:
                        text = names[lab]  # + ' %.2f' % score
                        # label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5)
                        polygon = contour.reshape((-1, 2))
                        # print('Shape: Poly -- ', polygon.shape)
                        # print(polygon)
                        polygon[:, 0] = np.clip(polygon[:, 0], 0, w_img - 1)
                        polygon[:, 1] = np.clip(polygon[:, 1], 0, h_img - 1)

                        # use bb tools to draw predictions
                        color = random.choice(COLOR_WORLD)
                        bb.add(output_image, bbox[0], bbox[1], bbox[2],
                               bbox[3], text, color)
                        cv2.polylines(output_image, [polygon.astype(np.int32)],
                                      True,
                                      RGB_DICT[color],
                                      thickness=1)
                        cv2.drawContours(blend_mask,
                                         [polygon.astype(np.int32)],
                                         contourIdx=-1,
                                         color=RGB_DICT[color],
                                         thickness=-1)

                        # color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
                        # contour_mean = np.mean(polygon, axis=0)
                        # contour_std = np.std(polygon, axis=0)
                        # center_x, center_y = np.mean(polygon, axis=0).astype(np.int32)
                        # text_location = [bbox[0] + 1, bbox[1] + 1,
                        #                  bbox[1] + label_size[0][0] + 1,
                        #                  bbox[0] + label_size[0][1] + 1]
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=color, thickness=1)
                        # cv2.rectangle(output_image, pt1=(int(np.min(polygon[:, 0])), int(np.min(polygon[:, 1]))),
                        #               pt2=(int(np.max(polygon[:, 0])), int(np.max(polygon[:, 1]))),
                        #               color=(0, 255, 0), thickness=1)
                        # cv2.polylines(output_image, [polygon.astype(np.int32)], True, color, thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=2, fontScale=0.5,
                        #             color=(255, 0, 0))
                        # cv2.putText(output_image, text, org=(int(bbox[0]), int(bbox[1])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.5,
                        #             color=color)

                        # show the histgram for predicted codes
                        # fig = plt.figure()
                        # plt.plot(np.arange(cfg.n_codes), c_.reshape((-1,)), color='green',
                        #          marker='o', linestyle='dashed', linewidth=2, markersize=6)
                        # plt.ylabel('Value of each coefficient')
                        # plt.xlabel('All predicted {} coefficients'.format(cfg.n_codes))
                        # plt.title('Distribution of the predicted coefficients for {}'.format(text))
                        # plt.show()

            value = [255, 255, 255]
            dst_img = cv2.addWeighted(output_image, 0.5, blend_mask, 0.5, 0)
            dst_img[blend_mask == 0] = output_image[blend_mask == 0]
            img_original = cv2.copyMakeBorder(img_original, 0, 0, 0, 10,
                                              cv2.BORDER_CONSTANT, None, value)
            img_connect = cv2.copyMakeBorder(img_connect, 0, 0, 10, 10,
                                             cv2.BORDER_CONSTANT, None, value)
            img_recon = cv2.copyMakeBorder(img_recon, 0, 0, 10, 10,
                                           cv2.BORDER_CONSTANT, None, value)
            dst_img = cv2.copyMakeBorder(dst_img, 0, 0, 10, 0,
                                         cv2.BORDER_CONSTANT, None, value)
            im_cat = np.concatenate(
                (img_original, img_connect, img_recon, dst_img), axis=1)
            # im_cat = np.concatenate((img_original, img_connect, img_recon), axis=1)
            cv2.imshow('GT:Resample:Recons:Predict', im_cat)
            if cv2.waitKey() & 0xFF == ord('q'):
                break
Exemple #3
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes)
    elif 'resdcn' in cfg.arch:
        model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]), head_conv=64,
                                num_classes=num_classes, num_codes=cfg.n_codes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading COCO validation images
    if 'train' in cfg.data_type:
        annotation_file = '{}/annotations/instances_train2017.json'.format(cfg.data_dir)
        cfg.data_type = 'train2017'
    elif 'test' in cfg.data_type:
        annotation_file = '{}/annotations/image_info_test-dev2017.json'.format(cfg.data_dir)
        cfg.data_type = 'test2017'
    else:
        annotation_file = '{}/annotations/instances_val2017.json'.format(cfg.data_dir)
        cfg.data_type = 'val2017'
    coco = COCO(annotation_file)

    # Load all annotations
    # cats = coco.loadCats(coco.getCatIds())
    # nms = [cat['name'] for cat in cats]
    # catIds = coco.getCatIds(catNms=nms)
    # imgIds = np.sort(coco.getImgIds()).tolist()
    imgIds = coco.getImgIds()
    # annIds = coco.getAnnIds(catIds=catIds)
    # all_anns = coco.loadAnns(ids=annIds)

    for img_id in imgIds:
        img = coco.loadImgs(img_id)[0]
        image_path = '%s/coco/%s/%s' % (cfg.data_dir, cfg.data_type, img['file_name'])
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 1 or h_img < 1:
            continue

        ann_ids = coco.getAnnIds(imgIds=img_id)
        gt_anns = coco.loadAnns(ids=ann_ids)

        # plot gt mean and std
        # image = cv2.imread(image_path)
        # # cv2.ellipse(image, center=(int(contour_mean[0]), int(contour_mean[1])),
        # #             axes=(int(contour_std[0]), int(contour_std[1])),
        # #             angle=0, startAngle=0, endAngle=360, color=(0, 255, 0),
        # #             thickness=2)
        # cv2.rectangle(image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)),
        #               pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)),
        #               color=(0, 255, 0), thickness=2)
        # cv2.polylines(image, [fixed_contour.astype(np.int32)], True, (0, 0, 255))
        # cv2.rectangle(image, pt1=(int(min(fixed_contour[:, 0])), int(min(fixed_contour[:, 1]))),
        #               pt2=(int(max(fixed_contour[:, 0])), int(max(fixed_contour[:, 1]))),
        #               color=(255, 0, 0), thickness=2)
        # cv2.imshow('GT segments', image)
        # if cv2.waitKey() & 0xFF == ord('q'):
        #     break

        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if image is None:
            continue
        print('Loading image of id:', img_id)

        # plotting the groundtruth
        gt_image = image.copy()
        gt_blend_mask = np.zeros(shape=gt_image.shape, dtype=np.uint8)
        for ann_ in gt_anns:
            if ann_['iscrowd'] == 1:
                continue
            polygons_ = ann_['segmentation']
            use_color_key = COLOR_WORLD[random.randint(1, len(COLOR_WORLD)) - 1]
            for poly in polygons_:
                poly = np.array(poly).reshape((-1, 2))
                cv2.polylines(gt_image, [poly.astype(np.int32)], True,
                              color=switch_tuple(RGB_DICT[use_color_key]),
                              thickness=2)
                cv2.drawContours(gt_blend_mask, [poly.astype(np.int32)], contourIdx=-1,
                                 color=switch_tuple(RGB_DICT[use_color_key]),
                                 thickness=-1)

        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2], dtype=np.float32)
                scaled_size = np.array([img_width, img_height], dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :]
            img = img.transpose(2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {'image': torch.from_numpy(img).float(),
                           'center': np.array(center),
                           'scale': np.array(scaled_size),
                           'fmap_h': np.array(img_height // 4),
                           'fmap_w': np.array(img_width // 4)}

        with torch.no_grad():
            segmentations = []
            predicted_codes = []
            start_time = time.time()
            print('Start running model ......')
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)
                hmap, regs, w_h_, _, _, codes, offsets = model(imgs[scale]['image'])[-1]
                output = [hmap, regs, w_h_, codes, offsets]

                segms = ctsegm_scale_decode(*output,
                                            torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                                            K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(1, -1, segms.shape[2])[0]

                top_preds = {}
                code_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(segms[:, 2 * j:2 * j + 2],
                                                                imgs[scale]['center'],
                                                                imgs[scale]['scale'],
                                                                (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2] = transform_preds(
                    segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2],
                    imgs[scale]['center'],
                    imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4] = transform_preds(
                    segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4],
                    imgs[scale]['center'],
                    imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 + 5].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale

                segmentations.append(top_preds)
                predicted_codes.append(code_preds)

            segms_and_scores = {j: np.concatenate([d[j] for d in segmentations], axis=0)
                                for j in range(1, num_classes + 1)}  # a Dict label: segments
            scores = np.hstack(
                [segms_and_scores[j][:, cfg.num_vertices * 2 + 4] for j in range(1, num_classes + 1)])

            print('Image processing time {:.4f} sec, preparing output image ......'.format(time.time() - start_time))

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 + 4] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]

            # Use opencv functions to output
            output_image = original_image
            blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8)

            counter = 1
            for lab in segms_and_scores:
                # if cfg.dataset == 'coco':
                #     if names[lab] not in display_cat and cfg.dataset != 'kins':
                #         continue
                for idx in range(len(segms_and_scores[lab])):
                    res = segms_and_scores[lab][idx]
                    contour, bbox, score = res[:-5], res[-5:-1], res[-1]
                    bbox[0] = np.clip(bbox[0], 0, width - 1)
                    bbox[1] = np.clip(bbox[1], 0, height - 1)
                    bbox[2] = np.clip(bbox[2], 0, width - 1)
                    bbox[3] = np.clip(bbox[3], 0, height - 1)

                    polygon = contour.reshape((-1, 2))
                    polygon[:, 0] = np.clip(polygon[:, 0], 0, width - 1)
                    polygon[:, 1] = np.clip(polygon[:, 1], 0, height - 1)
                    if score > cfg.detect_thres:
                        # text = names[lab] + ' %.2f' % score
                        # label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1)
                        # text_location = [int(bbox[0]) + 2, int(bbox[1]) + 2,
                        #                  int(bbox[0]) + 2 + label_size[0][0],
                        #                  int(bbox[1]) + 2 + label_size[0][1]]
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=colors[lab], thickness=2)
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=nice_colors[names[lab]], thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=nice_colors[names[lab]])

                        use_color_key = COLOR_WORLD[random.randint(1, len(COLOR_WORLD)) - 1]
                        cv2.polylines(output_image, [polygon.astype(np.int32)], True,
                                      color=switch_tuple(RGB_DICT[use_color_key]),
                                      thickness=2)
                        cv2.drawContours(blend_mask, [polygon.astype(np.int32)], contourIdx=-1,
                                         color=switch_tuple(RGB_DICT[use_color_key]),
                                         thickness=-1)

                        counter += 1

            dst_img = cv2.addWeighted(output_image, 0.4, blend_mask, 0.6, 0)
            dst_img[blend_mask == 0] = output_image[blend_mask == 0]

            gt_dst_img = cv2.addWeighted(gt_image, 0.4, gt_blend_mask, 0.6, 0)
            gt_dst_img[gt_blend_mask == 0] = gt_image[gt_blend_mask == 0]

            cat_image = np.concatenate([dst_img, gt_dst_img], axis=1)

            cv2.imshow('Frames', cat_image)

            if cv2.waitKey() & 0xFF == ord('q'):
                break
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading COCO validation images
    annotation_file = '{}/annotations/instances_{}.json'.format(
        cfg.data_dir, cfg.data_type)
    coco = COCO(annotation_file)

    # Load all annotations
    imgIds = coco.getImgIds()

    det_results = []
    seg_results = []

    for img_id in imgIds:
        img = coco.loadImgs(img_id)[0]
        image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type,
                                          img['file_name'])
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 1 or h_img < 1:
            continue

        image = cv2.imread(image_path)
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            # print('In with no_grads()')
            segmentations = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                segms = ctsegm_decode(*output,
                                      torch.from_numpy(
                                          dictionary.astype(np.float32)).to(
                                              cfg.device),
                                      K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(
                    1, -1, segms.shape[2])[0]

                top_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(
                        segms[:, 2 * j:2 * j + 2], imgs[scale]['center'],
                        imgs[scale]['scale'],
                        (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 +
                                             1].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2] /= scale

                segmentations.append(top_preds)

            segms_and_scores = {
                j: np.concatenate([d[j] for d in segmentations], axis=0)
                for j in range(1, num_classes + 1)
            }  # a Dict label: segments
            scores = np.hstack([
                segms_and_scores[j][:, cfg.num_vertices * 2]
                for j in range(1, num_classes + 1)
            ])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2]
                                 >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]

            # generate coco results for server eval
            # print('generate coco results for server eval ...')
            for lab in segms_and_scores:
                for res in segms_and_scores[lab]:
                    poly, score = res[:-1], res[-1]
                    recon_contour = poly.reshape((-1, 2))
                    recon_contour[:, 0] = np.clip(recon_contour[:, 0], 0,
                                                  img_width - 1)
                    recon_contour[:, 1] = np.clip(recon_contour[:, 1], 0,
                                                  img_height - 1)
                    category_id = int(COCO_IDS[lab - 1])
                    if score > cfg.detect_thres:
                        x1, y1, x2, y2 = int(min(recon_contour[:, 0])), int(min(recon_contour[:, 1])), \
                                         int(max(recon_contour[:, 0])), int(max(recon_contour[:, 1]))
                        bbox = [x1, y1, x2 - x1, y2 - y1]
                        det = {
                            'image_id': int(img_id),
                            'category_id': int(category_id),
                            'score': float("{:.2f}".format(score)),
                            'bbox': bbox
                        }
                        det_results.append(det)

                        # convert polygons to rle masks
                        poly = np.ndarray.flatten(
                            recon_contour,
                            order='C').tolist()  # row major flatten
                        rles = cocomask.frPyObjects([poly], img_height,
                                                    img_width)
                        rle = cocomask.merge(rles)
                        m = cocomask.decode(rle)
                        rle_new = encode_mask(m.astype(np.uint8))

                        seg = {
                            'image_id': int(img_id),
                            'category_id': int(category_id),
                            'score': float("{:.2f}".format(score)),
                            'segmentation': rle_new
                        }
                        seg_results.append(seg)

    with open(
            '{}/coco_result/{}_det_results_v{}.json'.format(
                cfg.root_dir, cfg.data_type, cfg.num_vertices), 'w') as f_det:
        json.dump(det_results, f_det)
    with open(
            '{}/coco_result/{}_seg_results_v{}.json'.format(
                cfg.root_dir, cfg.data_type, cfg.num_vertices), 'w') as f_seg:
        json.dump(seg_results, f_seg)

    # run COCO detection evaluation
    print('Running COCO detection val17 evaluation ...')
    coco_pred = coco.loadRes('{}/coco_result/{}_det_results_v{}.json'.format(
        cfg.root_dir, cfg.data_type, cfg.num_vertices))
    imgIds = sorted(coco.getImgIds())
    coco_eval = COCOeval(coco, coco_pred, 'bbox')
    coco_eval.params.imgIds = imgIds
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    print(
        '---------------------------------------------------------------------------------'
    )
    print('Running COCO segmentation val17 evaluation ...')
    coco_pred = coco.loadRes('{}/coco_result/{}_seg_results_v{}.json'.format(
        cfg.root_dir, cfg.data_type, cfg.num_vertices))
    coco_eval = COCOeval(coco, coco_pred, 'segm')
    coco_eval.params.imgIds = imgIds
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()
Exemple #5
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    # Set up parameters for outputing video
    output_name = 'demo/'
    width = cfg.video_width
    height = cfg.video_height
    fps = cfg.video_fps  # output video configuration
    video_out = cv2.VideoWriter(cfg.output_video_dir,
                                cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height))
    text_out = open(cfg.output_text_dir, 'w')

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading images
    speed_list = []
    frame_list = sorted(os.listdir(cfg.img_dir))
    n_frames = len(frame_list)

    for frame_id in range(n_frames):
        frame_name = frame_list[frame_id]
        image_path = os.path.join(cfg.img_dir, frame_name)

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2], dtype=np.float32)
                scaled_size = np.array([img_width, img_height], dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :]
            img = img.transpose(2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {'image': torch.from_numpy(img).float(),
                           'center': np.array(center),
                           'scale': np.array(scaled_size),
                           'fmap_h': np.array(img_height // 4),
                           'fmap_w': np.array(img_width // 4)}

        with torch.no_grad():
            detections = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                dets = ctdet_decode(*output, K=cfg.test_topk)
                dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0]

                top_preds = {}
                dets[:, :2] = transform_preds(dets[:, 0:2],
                                              imgs[scale]['center'],
                                              imgs[scale]['scale'],
                                              (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                dets[:, 2:4] = transform_preds(dets[:, 2:4],
                                               imgs[scale]['center'],
                                               imgs[scale]['scale'],
                                               (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                cls = dets[:, -1]
                for j in range(num_classes):
                    inds = (cls == j)
                    top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                    top_preds[j + 1][:, :4] /= scale

                detections.append(top_preds)

            bbox_and_scores = {}
            for j in range(1, num_classes + 1):
                bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0)
                if len(cfg.test_scales) > 1:
                    soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
            scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                    bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            # output_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
            speed_list.append(time.time() - start_time)
            output_image = original_image

            counter = 1
            for lab in bbox_and_scores:
                if cfg.dataset == 'coco':
                    if names[lab] not in DETRAC_compatible_names:
                        continue
                for boxes in bbox_and_scores[lab]:
                    x1, y1, x2, y2, score = boxes
                    if score > cfg.detect_thres:
                        text = names[lab] + '%.2f' % score
                        label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1)
                        text_location = [x1 + 2, y1 + 2,
                                         x1 + 2 + label_size[0][0],
                                         y1 + 2 + label_size[0][1]]
                        # cv2.rectangle(output_image, pt1=(int(x1), int(y1)),
                        #               pt2=(int(x2), int(y2)),
                        #               color=colors[lab], thickness=2)
                        cv2.rectangle(output_image, pt1=(int(x1), int(y1)),
                                      pt2=(int(x2), int(y2)),
                                      color=(0, 255, 0), thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=(0, 0, 255))

                        # add to text file
                        new_line = '{0},{1},{2:.3f},{3:.3f},{4:.3f},{5:.3f},{6:.4f}\n'.format(str(frame_id + 1),
                                                                                              counter,
                                                                                              x1, y1, x2 - x1, y2 - y1,
                                                                                              score)
                        counter += 1
                        text_out.write(new_line)

            cv2.imshow('Frames'.format(frame_id), output_image)
            video_out.write(output_image)
            if cv2.waitKey(5) & 0xFF == ord('q'):
                break

    print('Test frame rate:', 1. / np.mean(speed_list))
Exemple #6
0
def main():
    # Create Test set labels for DETRAC
    detrac_root = cfg.label_dir
    dataType = 'Test'
    test_images = list()
    test_objects = list()

    annotation_folder = 'DETRAC-{}-Annotations-XML'.format(dataType)
    annotation_path = os.path.join(detrac_root, annotation_folder)
    if not os.path.exists(annotation_path):
        print('annotation_path not exist')
        raise FileNotFoundError

    label_file = os.path.join(annotation_path, cfg.video_name + '.xml')
    tree = ET.parse(label_file)
    root = tree.getroot()
    object_list = list()

    Box_dict = {}
    for obj in root.iter('frame'):
        boxes = list()
        frame_num = int(obj.attrib['num'])
        target_list = obj.find('target_list')
        for target in target_list:
            bbox = target.find('box').attrib
            left = float(bbox['left'])
            top = float(bbox['top'])
            width = float(bbox['width'])
            height = float(bbox['height'])
            boxes.append([left, top, left + width,
                          top + height])  # x1, y1, x2, y2

        Box_dict[frame_num] = boxes

    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 150
    num_classes = 80 if cfg.dataset == 'coco' else 4

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    # Set up parameters for outputing video
    width = cfg.video_width
    height = cfg.video_height
    fps = cfg.video_fps  # output video configuration
    video_out = cv2.VideoWriter(
        os.path.join(cfg.root_dir, cfg.video_name + '_compare.mkv'),
        cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, (width, height))

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5,
                     nstack=2,
                     dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4],
                     num_classes=num_classes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading images
    speed_list = []
    frame_list = sorted(os.listdir(os.path.join(cfg.img_dir, cfg.video_name)))
    n_frames = len(frame_list)

    for frame_id in range(n_frames):
        frame_n = frame_id + 1
        frame_name = frame_list[frame_id]
        image_path = os.path.join(cfg.img_dir, cfg.video_name, frame_name)

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.],
                                  dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size],
                                       dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2],
                                  dtype=np.float32)
                scaled_size = np.array([img_width, img_height],
                                       dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0,
                                             [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(
                COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN,
                dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD,
                            dtype=np.float32)[None, None, :]
            img = img.transpose(
                2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {
                'image': torch.from_numpy(img).float(),
                'center': np.array(center),
                'scale': np.array(scaled_size),
                'fmap_h': np.array(img_height // 4),
                'fmap_w': np.array(img_width // 4)
            }

        with torch.no_grad():
            detections = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                dets = ctdet_decode(*output, K=cfg.test_topk)
                dets = dets.detach().cpu().numpy().reshape(
                    1, -1, dets.shape[2])[0]

                top_preds = {}
                dets[:, :2] = transform_preds(
                    dets[:, 0:2], imgs[scale]['center'], imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                dets[:, 2:4] = transform_preds(
                    dets[:, 2:4], imgs[scale]['center'], imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                cls = dets[:, -1]
                for j in range(num_classes):
                    inds = (cls == j)
                    top_preds[j + 1] = dets[inds, :5].astype(np.float32)
                    top_preds[j + 1][:, :4] /= scale

                detections.append(top_preds)

            bbox_and_scores = {}
            for j in range(1, num_classes + 1):
                bbox_and_scores[j] = np.concatenate([d[j] for d in detections],
                                                    axis=0)
                if len(cfg.test_scales) > 1:
                    soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
            scores = np.hstack(
                [bbox_and_scores[j][:, 4] for j in range(1, num_classes + 1)])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
                    bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            speed_list.append(time.time() - start_time)
            output_image = original_image

            # Plot the GT boxes
            gt_bboxes = Box_dict[frame_n]
            for rect in gt_bboxes:
                x1, y1, x2, y2 = float(rect[0]), float(rect[1]), float(
                    rect[2]), float(rect[3])
                cv2.rectangle(output_image,
                              pt1=(int(x1), int(y1)),
                              pt2=(int(x2), int(y2)),
                              color=(0, 255, 0),
                              thickness=2)

            counter = 1
            for lab in bbox_and_scores:
                if cfg.dataset == 'coco':
                    if names[lab] not in DETRAC_compatible_names:
                        continue
                for boxes in bbox_and_scores[lab]:
                    x1, y1, x2, y2, score = boxes
                    if score > cfg.detect_thres:
                        text = names[lab] + '%.2f' % score
                        label_size = cv2.getTextSize(text,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     0.3, 1)
                        text_location = [
                            x1 + 2, y1 + 2, x1 + 2 + label_size[0][0],
                            y1 + 2 + label_size[0][1]
                        ]
                        cv2.rectangle(output_image,
                                      pt1=(int(x1), int(y1)),
                                      pt2=(int(x2), int(y2)),
                                      color=(0, 0, 255),
                                      thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=(0, 0, 255))

            cv2.imshow('Frames'.format(frame_id), output_image)
            video_out.write(output_image)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    print('Test frame rate:', 1. / np.mean(speed_list))
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes)
    elif 'resdcn' in cfg.arch:
        model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]), head_conv=64,
                                num_classes=num_classes, num_codes=cfg.n_codes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading COCO validation images
    if 'train' in cfg.data_type:
        annotation_file = '{}/annotations/instances_train2017.json'.format(cfg.data_dir)
        cfg.data_type = 'train2017'
    else:
        annotation_file = '{}/annotations/instances_val2017.json'.format(cfg.data_dir)
        cfg.data_type = 'val2017'
    coco = COCO(annotation_file)

    # Load all annotations
    cats = coco.loadCats(coco.getCatIds())
    # nms = [cat['name'] for cat in cats]
    nms = ['giraffe']
    catIds = coco.getCatIds(catNms=nms)
    imgIds = coco.getImgIds(catIds=catIds)
    annIds = coco.getAnnIds(catIds=catIds)
    all_anns = coco.loadAnns(ids=annIds)

    for annotation in all_anns:
        if annotation['iscrowd'] == 1 or type(annotation['segmentation']) != list or len(
                annotation['segmentation']) > 1:
            continue

        img = coco.loadImgs(annotation['image_id'])[0]
        image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type, img['file_name'])
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 350 or h_img < 350:
            continue

        polygons = annotation['segmentation'][0]
        gt_bbox = annotation['bbox']
        gt_x1, gt_y1, gt_w, gt_h = gt_bbox
        contour = np.array(polygons).reshape((-1, 2))
        if cv2.contourArea(contour.astype(np.int32)) < 200:
            continue

        # Downsample the contour to fix number of vertices
        fixed_contour = resample(contour, num=cfg.num_vertices)

        clockwise_flag = check_clockwise_polygon(fixed_contour)
        if not clockwise_flag:
            fixed_contour = np.flip(fixed_contour, axis=0)
        # else:
        #     fixed_contour = indexed_shape.copy()

        # Indexing from the left-most vertex, argmin x-axis
        idx = np.argmin(fixed_contour[:, 0])
        indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

        indexed_shape[:, 0] = np.clip(indexed_shape[:, 0], gt_x1, gt_x1 + gt_w)
        indexed_shape[:, 1] = np.clip(indexed_shape[:, 1], gt_y1, gt_y1 + gt_h)

        updated_bbox = [np.min(indexed_shape[:, 0]), np.min(indexed_shape[:, 1]),
                        np.max(indexed_shape[:, 0]), np.max(indexed_shape[:, 1])]
        w, h = updated_bbox[2] - updated_bbox[0], updated_bbox[3] - updated_bbox[1]
        contour_mean = np.mean(indexed_shape, axis=0)
        # contour_std = np.std(indexed_shape, axis=0)
        # if contour_std < 1e-6 or contour_std == np.inf or contour_std == np.nan:  # invalid shapes
        #     continue

        norm_shape = (indexed_shape - contour_mean) / np.array([w / 2., h / 2.])
        gt_codes, _ = fast_ista(norm_shape.reshape((1, -1)), dictionary, lmbda=0.005, max_iter=80)

        recon_contour = np.matmul(gt_codes, dictionary).reshape((-1, 2)) * np.array([w / 2., h / 2.])
        recon_contour = recon_contour + contour_mean

        image = cv2.imread(image_path, cv2.IMREAD_COLOR)
        if image is None:
            continue

        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2], dtype=np.float32)
                scaled_size = np.array([img_width, img_height], dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :]
            img = img.transpose(2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {'image': torch.from_numpy(img).float(),
                           'center': np.array(center),
                           'scale': np.array(scaled_size),
                           'fmap_h': np.array(img_height // 4),
                           'fmap_w': np.array(img_width // 4)}

        with torch.no_grad():
            segmentations = []
            predicted_codes = []
            mass_centers = []
            start_time = time.time()
            print('Start running model ......')
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)
                hmap, regs, w_h_, _, _, codes, offsets = model(imgs[scale]['image'])[-1]
                output = [hmap, regs, w_h_, codes, offsets]

                # segms = ctsegm_scale_decode(*output,
                #                             torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                #                             K=cfg.test_topk)
                # print(len(output))
                segms, pred_codes, pred_center = ctsegm_scale_decode_debug(*output,
                                                                           torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                                                                           K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(1, -1, segms.shape[2])[0]
                pred_codes = pred_codes.detach().cpu().numpy().reshape(-1, pred_codes.shape[-1])
                pred_center = pred_center.detach().cpu().numpy().reshape(-1, 2)

                top_preds = {}
                code_preds = {}
                center_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(segms[:, 2 * j:2 * j + 2],
                                                                imgs[scale]['center'],
                                                                imgs[scale]['scale'],
                                                                (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2] = transform_preds(
                    segms[:, cfg.num_vertices * 2:cfg.num_vertices * 2 + 2],
                    imgs[scale]['center'],
                    imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4] = transform_preds(
                    segms[:, cfg.num_vertices * 2 + 2:cfg.num_vertices * 2 + 4],
                    imgs[scale]['center'],
                    imgs[scale]['scale'],
                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
                # For mass center
                pred_center = transform_preds(pred_center,
                                              imgs[scale]['center'],
                                              imgs[scale]['scale'],
                                              (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 + 5].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2 + 4] /= scale
                    center_preds[j + 1] = pred_center[inds, :] / scale
                    code_preds[j + 1] = pred_codes[inds, :]

                segmentations.append(top_preds)
                predicted_codes.append(code_preds)
                mass_centers.append(center_preds)

            segms_and_scores = {j: np.concatenate([d[j] for d in segmentations], axis=0)
                                for j in range(1, num_classes + 1)}  # a Dict label: segments
            segms_and_codes = {j: np.concatenate([d[j] for d in predicted_codes], axis=0)
                               for j in range(1, num_classes + 1)}
            segms_and_centers = {j: np.concatenate([d[j] for d in mass_centers], axis=0)
                                 for j in range(1, num_classes + 1)}
            scores = np.hstack(
                [segms_and_scores[j][:, cfg.num_vertices * 2 + 4] for j in range(1, num_classes + 1)])

            print('Image processing time {:.4f} sec, preparing output image ......'.format(time.time() - start_time))

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2 + 4] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]
                    segms_and_codes[j] = segms_and_codes[j][keep_inds]
                    segms_and_centers[j] = segms_and_centers[j][keep_inds]

            # Use opencv functions to output
            # output_image = original_image
            # blend_mask = np.zeros(shape=output_image.shape, dtype=np.uint8)

            counter = 1
            for lab in segms_and_scores:
                output_image = original_image.copy()
                # if cfg.dataset == 'coco':
                #     if names[lab] not in display_cat and cfg.dataset != 'kins':
                #         continue
                for idx in range(len(segms_and_scores[lab])):
                    res = segms_and_scores[lab][idx]
                    p_code = segms_and_codes[lab][idx]
                    p_center = segms_and_centers[lab][idx]
                    contour, bbox, score = res[:-5], res[-5:-1], res[-1]
                    bbox[0] = np.clip(bbox[0], 0, width - 1)
                    bbox[1] = np.clip(bbox[1], 0, height - 1)
                    bbox[2] = np.clip(bbox[2], 0, width - 1)
                    bbox[3] = np.clip(bbox[3], 0, height - 1)

                    polygon = contour.reshape((-1, 2))
                    polygon[:, 0] = np.clip(polygon[:, 0], 0, width - 1)
                    polygon[:, 1] = np.clip(polygon[:, 1], 0, height - 1)
                    if score > cfg.detect_thres:
                        # text = names[lab] + ' %.2f' % score
                        # label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.3, 1)
                        # text_location = [int(bbox[0]) + 2, int(bbox[1]) + 2,
                        #                  int(bbox[0]) + 2 + label_size[0][0],
                        #                  int(bbox[1]) + 2 + label_size[0][1]]
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=colors[lab], thickness=2)
                        # cv2.rectangle(output_image, pt1=(int(bbox[0]), int(bbox[1])),
                        #               pt2=(int(bbox[2]), int(bbox[3])),
                        #               color=nice_colors[names[lab]], thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.3,
                        #             color=nice_colors[names[lab]])

                        # use_color_key = COLOR_WORLD[random.randint(1, len(COLOR_WORLD)) - 1]
                        # cv2.polylines(output_image, [polygon.astype(np.int32)], True,
                        #               color=switch_tuple(RGB_DICT[use_color_key]),
                        #               thickness=2)
                        # cv2.drawContours(blend_mask, [polygon.astype(np.int32)], contourIdx=-1,
                        #                  color=switch_tuple(RGB_DICT[use_color_key]),
                        #                  thickness=-1)

                        # plot the polygons/contours
                        cv2.polylines(output_image, [recon_contour.astype(np.int32)], True,
                                      color=switch_tuple(RGB_DICT['green']), thickness=2)
                        cv2.polylines(output_image, [polygon.astype(np.int32)], True,
                                      color=switch_tuple(RGB_DICT['red']), thickness=2)

                        # plot the mass center location
                        cv2.circle(output_image, tuple(contour_mean.astype(np.int32).tolist()),
                                   radius=9, color=switch_tuple(RGB_DICT['green']), thickness=-1)
                        cv2.circle(output_image, tuple(p_center.astype(np.int32).tolist()),
                                   radius=9, color=switch_tuple(RGB_DICT['red']), thickness=-1)

                        # dst_img = cv2.addWeighted(output_image, 0.4, blend_mask, 0.6, 0)
                        # dst_img[blend_mask == 0] = output_image[blend_mask == 0]
                        # output_image = dst_img

                        cv2.imshow('Frames', output_image)
                        if cv2.waitKey() & 0xFF == ord('q'):
                            break

                        counter += 1
                        # show histogram
                        fig, (ax1, ax2) = plt.subplots(1, 2)
                        # plot 1
                        bins = np.linspace(-2, 2, 30)
                        ax1.hist(gt_codes.reshape((-1,)).tolist(), bins=bins, color='g', density=False, alpha=0.5)
                        ax1.hist(p_code.reshape((-1,)).tolist(), bins=bins, color='r', density=False, alpha=0.5)
                        ax1.legend(['GT Coeffs', 'Pred Coeffs'])
                        ax1.set_xlabel('Sparse Coefficients')
                        ax1.set_ylabel('Counts')
                        ax1.set_title('Histogram of Coefficients')

                        # plot 2
                        ax2.plot(gt_codes.reshape((-1,)), 'g*-', linewidth=2, markersize=6)
                        ax2.plot(p_code.reshape((-1,)), 'ro--', linewidth=1, markersize=5)
                        ax2.legend(['GT Coeffs', 'Pred Coeffs'])
                        ax2.set_xlabel('Coefficients Index')
                        ax2.set_ylabel('Value')
                        ax2.set_title('Coefficients')

                        plt.show()
                        plt.close()
Exemple #8
0
def main():
    cfg.device = torch.device('cuda')
    torch.backends.cudnn.benchmark = False

    max_per_image = 100
    num_classes = 80 if cfg.dataset == 'coco' else 4
    dictionary = np.load(cfg.dictionary_file)

    colors = COCO_COLORS if cfg.dataset == 'coco' else DETRAC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else DETRAC_NAMES
    for j in range(len(names)):
        col_ = [c * 255 for c in colors[j]]
        colors[j] = tuple(col_)

    print('Creating model and recover from checkpoint ...')
    if 'hourglass' in cfg.arch:
        model = exkp(n=5, nstack=2, dims=[256, 256, 384, 384, 384, 512],
                     modules=[2, 2, 2, 2, 2, 4], num_classes=num_classes)
    else:
        raise NotImplementedError

    model = load_demo_model(model, cfg.ckpt_dir)
    model = model.to(cfg.device)
    model.eval()

    # Loading COCO validation images
    annotation_file = '{}/annotations/instances_{}.json'.format(cfg.data_dir, cfg.data_type)
    coco = COCO(annotation_file)

    # Load all annotations
    cats = coco.loadCats(coco.getCatIds())
    nms = [cat['name'] for cat in cats]
    catIds = coco.getCatIds(catNms=nms)
    imgIds = coco.getImgIds(catIds=catIds)
    annIds = coco.getAnnIds(catIds=catIds)
    all_anns = coco.loadAnns(ids=annIds)

    for annotation in all_anns:
        if annotation['iscrowd'] == 1 or type(annotation['segmentation']) != list:
            continue

        img = coco.loadImgs(annotation['image_id'])[0]
        image_path = '%s/images/%s/%s' % (cfg.data_dir, cfg.data_type, img['file_name'])
        w_img = int(img['width'])
        h_img = int(img['height'])
        if w_img < 1 or h_img < 1:
            continue

        polygons = annotation['segmentation'][0]
        gt_bbox = annotation['bbox']
        gt_x1, gt_y1, gt_w, gt_h = gt_bbox
        contour = np.array(polygons).reshape((-1, 2))

        # Downsample the contour to fix number of vertices
        fixed_contour = resample(contour, num=cfg.num_vertices)

        # Indexing from the left-most vertex, argmin x-axis
        idx = np.argmin(fixed_contour[:, 0])
        indexed_shape = np.concatenate((fixed_contour[idx:, :], fixed_contour[:idx, :]), axis=0)

        clockwise_flag = check_clockwise_polygon(indexed_shape)
        if not clockwise_flag:
            fixed_contour = np.flip(indexed_shape, axis=0)
        else:
            fixed_contour = indexed_shape.copy()

        fixed_contour[:, 0] = np.clip(fixed_contour[:, 0], gt_x1, gt_x1 + gt_w)
        fixed_contour[:, 1] = np.clip(fixed_contour[:, 1], gt_y1, gt_y1 + gt_h)

        contour_mean = np.mean(fixed_contour, axis=0)
        contour_std = np.std(fixed_contour, axis=0)
        # norm_shape = (fixed_contour - contour_mean) / np.sqrt(np.sum(contour_std ** 2.))

        # plot gt mean and std
        # image = cv2.imread(image_path)
        # # cv2.ellipse(image, center=(int(contour_mean[0]), int(contour_mean[1])),
        # #             axes=(int(contour_std[0]), int(contour_std[1])),
        # #             angle=0, startAngle=0, endAngle=360, color=(0, 255, 0),
        # #             thickness=2)
        # cv2.rectangle(image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)),
        #               pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)),
        #               color=(0, 255, 0), thickness=2)
        # cv2.polylines(image, [fixed_contour.astype(np.int32)], True, (0, 0, 255))
        # cv2.rectangle(image, pt1=(int(min(fixed_contour[:, 0])), int(min(fixed_contour[:, 1]))),
        #               pt2=(int(max(fixed_contour[:, 0])), int(max(fixed_contour[:, 1]))),
        #               color=(255, 0, 0), thickness=2)
        # cv2.imshow('GT segments', image)
        # if cv2.waitKey() & 0xFF == ord('q'):
        #     break

        image = cv2.imread(image_path)
        original_image = image.copy()
        height, width = image.shape[0:2]
        padding = 127 if 'hourglass' in cfg.arch else 31
        imgs = {}
        for scale in cfg.test_scales:
            new_height = int(height * scale)
            new_width = int(width * scale)

            if cfg.img_size > 0:
                img_height, img_width = cfg.img_size, cfg.img_size
                center = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
                scaled_size = max(height, width) * 1.0
                scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32)
            else:
                img_height = (new_height | padding) + 1
                img_width = (new_width | padding) + 1
                center = np.array([new_width // 2, new_height // 2], dtype=np.float32)
                scaled_size = np.array([img_width, img_height], dtype=np.float32)

            img = cv2.resize(image, (new_width, new_height))
            trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height])
            img = cv2.warpAffine(img, trans_img, (img_width, img_height))

            img = img.astype(np.float32) / 255.
            img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else DETRAC_MEAN, dtype=np.float32)[None, None, :]
            img /= np.array(COCO_STD if cfg.dataset == 'coco' else DETRAC_STD, dtype=np.float32)[None, None, :]
            img = img.transpose(2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

            # if cfg.test_flip:
            #     img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

            imgs[scale] = {'image': torch.from_numpy(img).float(),
                           'center': np.array(center),
                           'scale': np.array(scaled_size),
                           'fmap_h': np.array(img_height // 4),
                           'fmap_w': np.array(img_width // 4)}

        with torch.no_grad():
            segmentations = []
            start_time = time.time()
            for scale in imgs:
                imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

                output = model(imgs[scale]['image'])[-1]
                segms = ctsegm_decode(*output, torch.from_numpy(dictionary.astype(np.float32)).to(cfg.device),
                                      K=cfg.test_topk)
                segms = segms.detach().cpu().numpy().reshape(1, -1, segms.shape[2])[0]

                top_preds = {}
                for j in range(cfg.num_vertices):
                    segms[:, 2 * j:2 * j + 2] = transform_preds(segms[:, 2 * j:2 * j + 2],
                                                                imgs[scale]['center'],
                                                                imgs[scale]['scale'],
                                                                (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))

                clses = segms[:, -1]
                for j in range(num_classes):
                    inds = (clses == j)
                    top_preds[j + 1] = segms[inds, :cfg.num_vertices * 2 + 1].astype(np.float32)
                    top_preds[j + 1][:, :cfg.num_vertices * 2] /= scale

                segmentations.append(top_preds)

            segms_and_scores = {j: np.concatenate([d[j] for d in segmentations], axis=0)
                                for j in range(1, num_classes + 1)}  # a Dict label: segments
            scores = np.hstack(
                [segms_and_scores[j][:, cfg.num_vertices * 2] for j in range(1, num_classes + 1)])

            if len(scores) > max_per_image:
                kth = len(scores) - max_per_image
                thresh = np.partition(scores, kth)[kth]
                for j in range(1, num_classes + 1):
                    keep_inds = (segms_and_scores[j][:, cfg.num_vertices * 2] >= thresh)
                    segms_and_scores[j] = segms_and_scores[j][keep_inds]

            # Use opencv functions to output a video
            output_image = original_image

            for lab in segms_and_scores:
                for res in segms_and_scores[lab]:
                    contour, score = res[:-1], res[-1]
                    if score > cfg.detect_thres:
                        text = names[lab] + ' %.2f' % score
                        label_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_COMPLEX, 0.5, 1)
                        polygon = contour.reshape((-1, 2))
                        contour_mean = np.mean(polygon, axis=0)
                        contour_std = np.std(polygon, axis=0)
                        center_x, center_y = np.mean(polygon, axis=0).astype(np.int32)
                        text_location = [center_x, center_y,
                                         center_x + label_size[0][0],
                                         center_y + label_size[0][1]]
                        # cv2.rectangle(output_image, pt1=(int(contour_mean[0] - contour_std[0] / 2.), int(contour_mean[1] - contour_std[1] / 2.)),
                        #               pt2=(int(contour_mean[0] + contour_std[0] / 2.), int(contour_mean[1] + contour_std[1] / 2.)),
                        #               color=(0, 255, 0), thickness=1)
                        cv2.polylines(output_image, [polygon.astype(np.int32)], True, (255, 0, 0), thickness=2)
                        # cv2.putText(output_image, text, org=(int(text_location[0]), int(text_location[3])),
                        #             fontFace=cv2.FONT_HERSHEY_COMPLEX, thickness=1, fontScale=0.5,
                        #             color=(0, 0, 255))

            cv2.imshow('Results', output_image)
            if cv2.waitKey() & 0xFF == ord('q'):
                break
Exemple #9
0
def main():
  cfg.device = torch.device('cuda')
  torch.backends.cudnn.benchmark = False

  max_per_image = 100

  image = cv2.imread(cfg.img_dir)
  # orig_image = image
  height, width = image.shape[0:2]
  padding = 127 if 'hourglass' in cfg.arch else 31
  imgs = {}
  for scale in cfg.test_scales:
    new_height = int(height * scale)
    new_width = int(width * scale)

    if cfg.img_size > 0:
      img_height, img_width = cfg.img_size, cfg.img_size
      center = np.array([new_width / 2., new_height / 2.], dtype=np.float32)
      scaled_size = max(height, width) * 1.0
      scaled_size = np.array([scaled_size, scaled_size], dtype=np.float32)
    else:
      img_height = (new_height | padding) + 1
      img_width = (new_width | padding) + 1
      center = np.array([new_width // 2, new_height // 2], dtype=np.float32)
      scaled_size = np.array([img_width, img_height], dtype=np.float32)

    img = cv2.resize(image, (new_width, new_height))
    trans_img = get_affine_transform(center, scaled_size, 0, [img_width, img_height])
    img = cv2.warpAffine(img, trans_img, (img_width, img_height))

    img = img.astype(np.float32) / 255.
    img -= np.array(COCO_MEAN if cfg.dataset == 'coco' else VOC_MEAN, dtype=np.float32)[None, None, :]
    img /= np.array(COCO_STD if cfg.dataset == 'coco' else VOC_STD, dtype=np.float32)[None, None, :]
    img = img.transpose(2, 0, 1)[None, :, :, :]  # from [H, W, C] to [1, C, H, W]

    if cfg.test_flip:
      img = np.concatenate((img, img[:, :, :, ::-1].copy()), axis=0)

    imgs[scale] = {'image': torch.from_numpy(img).float(),
                   'center': np.array(center),
                   'scale': np.array(scaled_size),
                   'fmap_h': np.array(img_height // 4),
                   'fmap_w': np.array(img_width // 4)}

  print('Creating model...')
  if 'hourglass' in cfg.arch:
    model = get_hourglass[cfg.arch]
  elif 'resdcn' in cfg.arch:
    model = get_pose_resdcn(num_layers=int(cfg.arch.split('_')[-1]),
                         num_classes=80 if cfg.dataset == 'coco' else 20)
  else:
    raise NotImplementedError

  model = load_demo_model(model, cfg.ckpt_dir)
  model = model.to(cfg.device)
  model.eval()

  with torch.no_grad():
    detections = []
    for scale in imgs:
      imgs[scale]['image'] = imgs[scale]['image'].to(cfg.device)

      output = model(imgs[scale]['image'])[-1]
      dets = ctdet_decode(*output, K=cfg.test_topk)
      dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2])[0]

      top_preds = {}
      dets[:, :2] = transform_preds(dets[:, 0:2],
                                    imgs[scale]['center'],
                                    imgs[scale]['scale'],
                                    (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
      dets[:, 2:4] = transform_preds(dets[:, 2:4],
                                     imgs[scale]['center'],
                                     imgs[scale]['scale'],
                                     (imgs[scale]['fmap_w'], imgs[scale]['fmap_h']))
      cls = dets[:, -1]
      for j in range(80):
        inds = (cls == j)
        top_preds[j + 1] = dets[inds, :5].astype(np.float32)
        top_preds[j + 1][:, :4] /= scale

      detections.append(top_preds)

    bbox_and_scores = {}
    for j in range(1, 81 if cfg.dataset == 'coco' else 21):
      bbox_and_scores[j] = np.concatenate([d[j] for d in detections], axis=0)
      # if len(cfg.test_scales) > 1:
      #   soft_nms(bbox_and_scores[j], Nt=0.5, method=2)
    scores = np.hstack([bbox_and_scores[j][:, 4] for j in range(1, 81 if cfg.dataset == 'coco' else 21)])

    if len(scores) > max_per_image:
      kth = len(scores) - max_per_image
      thresh = np.partition(scores, kth)[kth]
      for j in range(1, 81 if cfg.dataset == 'coco' else 21):
        keep_inds = (bbox_and_scores[j][:, 4] >= thresh)
        bbox_and_scores[j] = bbox_and_scores[j][keep_inds]

    # plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    # plt.show()
    fig = plt.figure(0)
    colors = COCO_COLORS if cfg.dataset == 'coco' else VOC_COLORS
    names = COCO_NAMES if cfg.dataset == 'coco' else VOC_NAMES
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    for lab in bbox_and_scores:
      for boxes in bbox_and_scores[lab]:
        x1, y1, x2, y2, score = boxes
        if score > 0.3:
          plt.gca().add_patch(Rectangle((x1, y1), x2 - x1, y2 - y1,
                                        linewidth=2, edgecolor=colors[lab], facecolor='none'))
          plt.text(x1 + 3, y1 + 3, names[lab] + '%.2f' % score,
                   bbox=dict(facecolor=colors[lab], alpha=0.5), fontsize=7, color='k')

    fig.patch.set_visible(False)
    plt.axis('off')
    plt.savefig('demo_results.png', dpi=300, transparent=True)
    plt.show()