예제 #1
0
파일: test.py 프로젝트: zzzzlalala/gluon-cv
def test(dataset, tracker, opt, ctx):
    """SiamRPN test."""
    cv2 = try_import_cv2()
    for v_idx, video in enumerate(dataset):
        if opt.video != '':
            if video.name != opt.video:
                continue
        toc = 0
        pred_bboxes = []
        scores = []
        track_times = []
        for idx, (img, gt_bbox) in enumerate(video):
            tic = cv2.getTickCount()
            if idx == 0:
                x_max, y_max, gt_w, gt_t = get_axis_aligned_bbox(
                    np.array(gt_bbox))
                gt_bbox_ = [
                    x_max - (gt_w - 1) / 2, y_max - (gt_t - 1) / 2, gt_w, gt_t
                ]
                tracker.init(img, gt_bbox_, ctx)
                pred_bbox = gt_bbox_
                scores.append(None)
                pred_bboxes.append(pred_bbox)
            else:
                outputs = tracker.track(img, ctx)
                pred_bbox = outputs['bbox']
                pred_bboxes.append(pred_bbox)
                scores.append(outputs['best_score'])
            toc += cv2.getTickCount() - tic
            track_times.append(
                (cv2.getTickCount() - tic) / cv2.getTickFrequency())
            if idx == 0:
                cv2.destroyAllWindows()
            if opt.vis and idx > 0:
                gt_bbox = list(map(int, gt_bbox))
                pred_bbox = list(map(int, pred_bbox))
                cv2.rectangle(
                    img, (gt_bbox[0], gt_bbox[1]),
                    (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]),
                    (0, 255, 0), 3)
                cv2.rectangle(
                    img, (pred_bbox[0], pred_bbox[1]),
                    (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]),
                    (0, 255, 255), 3)
                cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX,
                            1, (0, 255, 255), 2)
                cv2.imshow(video.name, img)
                cv2.waitKey(1)
        toc /= cv2.getTickFrequency()
        model_path = os.path.join(opt.results_path, opt.dataset,
                                  opt.model_path.split('/')[-1].split('.')[0])
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        with open(os.path.join(model_path, '{}.txt'.format(video.name)),
                  'w') as f_w:
            for per_pbbox in pred_bboxes:
                f_w.write(','.join([str(i) for i in per_pbbox]) + '\n')
        print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
            v_idx + 1, video.name, toc,
            len(video) / toc))
예제 #2
0
    def __init__(self,
                 name,
                 root,
                 video_dir,
                 init_rect,
                 img_names,
                 gt_rect,
                 attr,
                 load_img=False):
        self.name = name
        self.video_dir = video_dir
        self.init_rect = init_rect
        self.gt_traj = gt_rect
        self.attr = attr
        self.pred_trajs = {}
        self.img_names = [os.path.join(root, x) for x in img_names]
        self.imgs = None
        cv2 = try_import_cv2()

        if load_img:
            self.imgs = [cv2.imread(x) for x in self.img_names]
            self.width = self.imgs[0].shape[1]
            self.height = self.imgs[0].shape[0]
        else:
            img = cv2.imread(self.img_names[0])
            assert img is not None, self.img_names[0]
            self.width = img.shape[1]
            self.height = img.shape[0]
예제 #3
0
 def __iter__(self):
     for i in range(len(self.img_names)):
         if self.imgs is not None:
             yield self.imgs[i], self.gt_traj[i]
         else:
             cv2 = try_import_cv2()
             yield cv2.imread(self.img_names[i]), self.gt_traj[i]
예제 #4
0
def crop_xml(args, xml, sub_set_crop_path, instance_size=511):
    """
    Dataset curation

    Parameters
    ----------
    xml: str , xml
    sub_set_crop_path: str, xml crop path
    instance_size: int, instance_size
    """
    cv2 = try_import_cv2()
    xmltree = ET.parse(xml)
    objects = xmltree.findall('object')

    frame_crop_base_path = os.path.join(sub_set_crop_path, xml.split('/')[-1].split('.')[0])
    if not os.path.isdir(frame_crop_base_path):
        makedirs(frame_crop_base_path)
    img_path = xml.replace('xml', 'JPEG').replace('Annotations', 'Data')
    im = cv2.imread(img_path)
    avg_chans = np.mean(im, axis=(0, 1))

    for id, object_iter in enumerate(objects):
        bndbox = object_iter.find('bndbox')
        bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
                int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
        z, x = crop_like_SiamFC(im, bbox, instance_size=instance_size, padding=avg_chans)
        cv2.imwrite(os.path.join(args.download_dir, frame_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(0, id)), z)
        cv2.imwrite(os.path.join(args.download_dir, frame_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(0, id)), x)
예제 #5
0
def crop_video(args, sub_set, video, crop_path, ann_base_path):
    """
    Dataset curation

    Parameters
    ----------
    sub_set: str , sub_set
    video: str, video number
    crop_path: str, crop_path
    ann_base_path: str, Annotations base path
    """
    cv2 = try_import_cv2()
    video_crop_base_path = os.path.join(crop_path, sub_set, video)
    if not os.path.isdir(video_crop_base_path):
        makedirs(video_crop_base_path)
    sub_set_base_path = os.path.join(ann_base_path, sub_set)
    xmls = sorted(glob.glob(os.path.join(sub_set_base_path, video, '*.xml')))
    for xml in xmls:
        xmltree = ET.parse(xml)
        objects = xmltree.findall('object')
        objs = []
        filename = xmltree.findall('filename')[0].text
        im = cv2.imread(xml.replace('xml', 'JPEG').replace('Annotations', 'Data'))
        avg_chans = np.mean(im, axis=(0, 1))
        for object_iter in objects:
            trackid = int(object_iter.find('trackid').text)
            bndbox = object_iter.find('bndbox')
            bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
                    int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
            z, x = crop_like_SiamFC(im, bbox, instance_size=args.instance_size, padding=avg_chans)
            cv2.imwrite(os.path.join(args.download_dir, video_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(int(filename), trackid)), z)
            cv2.imwrite(os.path.join(args.download_dir, video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(filename), trackid)), x)
예제 #6
0
 def __init__(self, shift, scale, blur, flip, color):
     self.shift = shift
     self.scale = scale
     self.blur = blur
     self.flip = flip
     self.color = color
     self.rgbVar = np.array([[-0.55919361, 0.98062831, -0.41940627],
                             [1.72091413, 0.19879334, -1.82968581],
                             [4.64467907, 4.73710203, 4.88324118]],
                            dtype=np.float32)
     self.cv2 = try_import_cv2()
예제 #7
0
def crop_img(img, anns, set_crop_base_path, set_img_base_path, instance_size=511):
    """
    Dataset curation

    Parameters
    ----------
    img: dic, img
    anns: str, video number
    set_crop_base_path: str, crop result path
    set_img_base_path: str, ori image path
    """
    frame_crop_base_path = os.path.join(set_crop_base_path, img['file_name'].split('/')[-1].split('.')[0])
    if not os.path.isdir(frame_crop_base_path): makedirs(frame_crop_base_path)
    cv2 = try_import_cv2()
    im = cv2.imread('{}/{}'.format(set_img_base_path, img['file_name']))
    avg_chans = np.mean(im, axis=(0, 1))
    for trackid, ann in enumerate(anns):
        rect = ann['bbox']
        bbox = [rect[0], rect[1], rect[0] + rect[2], rect[1] + rect[3]]
        if rect[2] <= 0 or rect[3] <= 0:
            continue
        z, x = crop_like_SiamFC(im, bbox, instance_size=instance_size, padding=avg_chans)
        cv2.imwrite(os.path.join(args.download_dir, frame_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(0, trackid)), z)
        cv2.imwrite(os.path.join(args.download_dir, frame_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(0, trackid)), x)
예제 #8
0
def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)):
    """
    crop image

    Parameters
    ----------
    image: np.array, image
    bbox: np or list, bbox coordinate [xmin,ymin,xmax,ymax]
    out_sz: int , crop image size

    Return:
        crop result
    """
    a = (out_sz - 1) / (bbox[2] - bbox[0])
    b = (out_sz - 1) / (bbox[3] - bbox[1])
    c = -a * bbox[0]
    d = -b * bbox[1]
    mapping = np.array([[a, 0, c], [0, b, d]]).astype(np.float)
    cv2 = try_import_cv2()
    crop = cv2.warpAffine(image,
                          mapping, (out_sz, out_sz),
                          borderMode=cv2.BORDER_CONSTANT,
                          borderValue=padding)
    return crop
예제 #9
0
    def __init__(
        self,
        data_path=os.path.expanduser('~/.mxnet/datasets'),
        dataset_names=('vid', 'yt_bb', 'coco', 'det'),
        detaset_root=('vid/crop511', 'yt_bb/crop511', 'coco/crop511',
                      'det/crop511'),
        detaset_anno=('vid/train.json', 'yt_bb/train.json',
                      'coco/train2017.json', 'det/train.json'),
        dataset_frame_range=(100, 3, 1, 1),
        dataset_num_use=(100000, -1, -1, -1),
        train_search_size=255,
        train_exemplar_size=127,
        anchor_stride=8,
        anchor_ratios=(0.33, 0.5, 1, 2, 3),
        train_base_size=0,
        train_output_size=17,
        template_shift=4,
        template_scale=0.05,
        template_blur=0,
        template_flip=0,
        template_color=1.0,
        search_shift=64,
        search_scale=0.18,
        search_blur=0,
        search_flip=0,
        search_color=1.0,
        videos_per_epoch=600000,
        train_epoch=50,
        train_thr_high=0.6,
        train_thr_low=0.3,
        train_pos_num=16,
        train_neg_num=16,
        train_total_num=64,
        gray=0.0,
        neg=0.05,
    ):
        super(TrkDataset, self).__init__()
        self.train_search_size = train_search_size
        self.train_exemplar_size = train_exemplar_size
        self.anchor_stride = anchor_stride
        self.anchor_ratios = list(anchor_ratios)
        self.train_base_size = train_base_size
        self.train_output_size = train_output_size
        self.data_path = data_path
        self.dataset_names = list(dataset_names)
        self.detaset_root = list(detaset_root)
        self.detaset_anno = list(detaset_anno)
        self.dataset_frame_range = list(dataset_frame_range)
        self.dataset_num_use = list(dataset_num_use)
        self.template_shift = template_shift
        self.template_scale = template_scale
        self.template_blur = template_blur
        self.template_flip = template_flip
        self.template_color = template_color
        self.search_shift = search_shift
        self.search_scale = search_scale
        self.search_blur = search_blur
        self.search_flip = search_flip
        self.search_color = search_color
        self.videos_per_epoch = videos_per_epoch
        self.train_epoch = train_epoch
        self.train_thr_high = train_thr_high
        self.train_thr_low = train_thr_low
        self.train_pos_num = train_pos_num
        self.train_neg_num = train_neg_num
        self.train_total_num = train_total_num
        self.gray = gray
        self.neg = neg
        self.cv2 = try_import_cv2()
        self.logger = logging.getLogger()
        desired_size = (self.train_search_size - self.train_exemplar_size) / \
            self.anchor_stride + 1 + self.train_base_size
        if desired_size != self.train_output_size:
            raise Exception('size not match!')

        # create anchor target
        self.anchor_target = AnchorTarget(
            anchor_stride=self.anchor_stride,
            anchor_ratios=self.anchor_ratios,
            train_search_size=self.train_search_size,
            train_output_size=self.train_output_size,
            train_thr_high=self.train_thr_high,
            train_thr_low=self.train_thr_low,
            train_pos_num=self.train_pos_num,
            train_neg_num=self.train_neg_num,
            train_total_num=self.train_total_num)
        # create sub dataset
        self.all_dataset = []
        start = 0
        self.num = 0

        for idx in range(len(self.dataset_names)):
            sub_dataset = SubDataset(
                self.dataset_names[idx],
                os.path.join(self.data_path, self.detaset_root[idx]),
                os.path.join(self.data_path, self.detaset_anno[idx]),
                self.dataset_frame_range[idx], self.dataset_num_use[idx],
                start)
            start += sub_dataset.num
            self.num += sub_dataset.num_use

            sub_dataset.log()
            self.all_dataset.append(sub_dataset)

        # data augmentation
        self.template_aug = SiamRPNaugmentation(self.template_shift,
                                                self.template_scale,
                                                self.template_blur,
                                                self.template_flip,
                                                self.template_color)
        self.search_aug = SiamRPNaugmentation(self.search_shift,
                                              self.search_scale,
                                              self.search_blur,
                                              self.search_flip,
                                              self.search_color)
        videos_per_epoch = self.videos_per_epoch
        self.num = videos_per_epoch if videos_per_epoch > 0 else self.num
        self.num *= self.train_epoch
        self.pick = self.shuffle()
예제 #10
0
    def get_subwindow(self, img, pos, model_sz, original_sz, avg_chans, ctx):
        """
        Adjust the position of the frame to prevent the boundary from being exceeded.
        If the boundary is exceeded,
        the average value of each channel of the image is used to replace the exceeded value.

        Parameters
        ----------
        im : np.ndarray
            BGR based image
        pos : list
            center position
        model_sz : array
            exemplar size, x is 127, z is 287 in ours
        original_sz: array
            original size
        avg_chans : array
            channel average
        ctx : mxnet.Context
            Context such as mx.cpu(), mx.gpu(0).

        Returns
        -------
            rejust window though avg channel
        """
        cv2 = try_import_cv2()
        if isinstance(pos, float):
            pos = [pos, pos]
        im_sz = img.shape
        original_c = (original_sz + 1) / 2
        context_xmin = np.floor(pos[0] - original_c + 0.5)
        context_xmax = context_xmin + original_sz - 1
        context_ymin = np.floor(pos[1] - original_c + 0.5)
        context_ymax = context_ymin + original_sz - 1
        left_pad = int(max(0., -context_xmin))
        top_pad = int(max(0., -context_ymin))
        right_pad = int(max(0., context_xmax - im_sz[1] + 1))
        bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))

        context_xmin = context_xmin + left_pad
        context_xmax = context_xmax + left_pad
        context_ymin = context_ymin + top_pad
        context_ymax = context_ymax + top_pad
        im_h, im_w, im_c = img.shape

        if any([top_pad, bottom_pad, left_pad, right_pad]):
            #If there is a pad, use the average channels to complete
            size = (im_h + top_pad + bottom_pad, im_w + left_pad + right_pad,
                    im_c)
            te_im = np.zeros(size, np.uint8)
            te_im[top_pad:top_pad + im_h, left_pad:left_pad + im_w, :] = img
            if top_pad:
                te_im[0:top_pad, left_pad:left_pad + im_w, :] = avg_chans
            if bottom_pad:
                te_im[im_h + top_pad:, left_pad:left_pad + im_w, :] = avg_chans
            if left_pad:
                te_im[:, 0:left_pad, :] = avg_chans
            if right_pad:
                te_im[:, im_w + left_pad:, :] = avg_chans
            im_patch = te_im[int(context_ymin):int(context_ymax + 1),
                             int(context_xmin):int(context_xmax + 1), :]
        else:
            #If there is no pad, crop Directly
            im_patch = img[int(context_ymin):int(context_ymax + 1),
                           int(context_xmin):int(context_xmax + 1), :]
        if not np.array_equal(model_sz, original_sz):
            im_patch = cv2.resize(im_patch, (model_sz, model_sz))
        im_patch = im_patch.transpose(2, 0, 1)
        im_patch = im_patch[np.newaxis, :, :, :]
        im_patch = im_patch.astype(np.float32)
        im_patch = mx.nd.array(im_patch, ctx)
        return im_patch
예제 #11
0
 def __getitem__(self, idx):
     if self.imgs is None:
         cv2 = try_import_cv2()
         return cv2.imread(self.img_names[idx]), self.gt_traj[idx]
     else:
         return self.imgs[idx], self.gt_traj[idx]
예제 #12
0
 def load_img(self):
     if self.imgs is None:
         cv2 = try_import_cv2()
         self.imgs = [cv2.imread(x) for x in self.img_names]
         self.width = self.imgs[0].shape[1]
         self.height = self.imgs[0].shape[0]
예제 #13
0
"""SiamRPN Demo script.
Code adapted from https://github.com/STVIR/pysot"""
import os
import argparse
import matplotlib.pyplot as plt
import numpy as np
import mxnet as mx
from gluoncv import model_zoo, utils
from gluoncv.model_zoo.siamrpn.siamrpn_tracker import SiamRPNTracker as build_tracker
from gluoncv.model_zoo.siamrpn.siamrpn_tracker import get_axis_aligned_bbox
from gluoncv.utils.filesystem import try_import_cv2
cv2 = try_import_cv2()


def parse_args():
    """ benchmark test."""
    parser = argparse.ArgumentParser(description='make ovject tracking.')
    parser.add_argument(
        '--data-dir',
        type=str,
        default='',
        help='if video-loader set to True, data-dir store videos frames.')
    parser.add_argument(
        '--video-loader',
        action='store_true',
        default=True,
        help='if set to True, read videos directly instead of reading frames.')
    parser.add_argument(
        '--video-path',
        default=
        'https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/tracking/Coke.mp4',
예제 #14
0
    def __init__(
            self,
            root_bgs=os.path.expanduser(
                '/media/hp/data/BGSDecom/FrameDifference/bgs'),
            root_fgs=os.path.expanduser(
                '/media/hp/data/BGSDecom/FrameDifference/fgs'),
            setting=os.path.
        expanduser(
            '/home/hp/.mxnet/datasets/ucf101/ucfTrainTestlist/ucf101_train_split_2_rawframes.txt'
        ),
            train=True,
            test_mode=False,
            name_pattern='img_%05d.jpg',
            video_ext='mp4',
            is_color=True,
            modality='rgb',
            num_segments_bgs=1,
            num_segments_fgs=1,
            new_length_bgs=1,
            new_length_fgs=5,
            new_step_bgs=1,
            new_step_fgs=1,
            new_width=340,
            new_height=256,
            target_width=224,
            target_height=224,
            temporal_jitter=False,
            video_loader=False,
            use_decord=False,
            transform=None):

        super(UCF101_2stream, self).__init__()

        from gluoncv.utils.filesystem import try_import_cv2, try_import_decord, try_import_mmcv
        self.cv2 = try_import_cv2()
        self.root_bgs = root_bgs
        self.root_fgs = root_fgs
        self.setting = setting
        self.train = train
        self.test_mode = test_mode
        self.is_color = is_color
        self.modality = modality
        self.num_segments_bgs = num_segments_bgs
        self.num_segments_fgs = num_segments_fgs
        self.new_height = new_height
        self.new_width = new_width
        self.new_length_fgs = new_length_fgs
        self.new_length_bgs = new_length_bgs
        self.new_step_bgs = new_step_bgs
        self.new_step_fgs = new_step_fgs
        self.skip_length_bgs = self.new_length_bgs * self.new_step_bgs
        self.skip_length_fgs = self.new_length_fgs * self.new_step_fgs
        self.target_height = target_height
        self.target_width = target_width
        self.transform = transform
        self.temporal_jitter = temporal_jitter  # False
        self.video_loader = video_loader
        self.video_ext = video_ext
        self.use_decord = use_decord

        if self.video_loader:
            if self.use_decord:
                self.decord = try_import_decord()
            else:
                self.mmcv = try_import_mmcv()

#        self.classes, self.class_to_idx = self._find_classes(root)
        self.clips = self._make_dataset(root_bgs, root_fgs, setting)
        if len(self.clips) == 0:
            raise (RuntimeError("Found 0 video clips in subfolders of: " +
                                root_bgs + "\n"
                                "Check your data directory (opt.data-dir)."))

        if name_pattern:
            self.name_pattern = name_pattern
        else:
            if self.modality == "rgb":
                self.name_pattern = "img_%05d.jpg"
            elif self.modality == "flow":
                self.name_pattern = "flow_%s_%05d.jpg"
    def __init__(self,
                 root,
                 setting,
                 train=True,
                 test_mode=False,
                 name_pattern='img_%05d.jpg',
                 video_ext='mp4',
                 is_color=True,
                 modality='rgb',
                 num_segments=1,
                 num_crop=1,
                 new_length=1,
                 new_step=1,
                 new_width=340,
                 new_height=256,
                 target_width=224,
                 target_height=224,
                 temporal_jitter=False,
                 video_loader=False,
                 use_decord=False,
                 slowfast=False,
                 slow_temporal_stride=16,
                 fast_temporal_stride=2,
                 data_aug='v1',
                 lazy_init=False,
                 transform=None):

        super(VideoClsCustom, self).__init__()

        from gluoncv.utils.filesystem import try_import_cv2, try_import_decord, try_import_mmcv
        self.cv2 = try_import_cv2()
        self.root = root
        self.setting = setting
        self.train = train
        self.test_mode = test_mode
        self.is_color = is_color
        self.modality = modality
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.new_height = new_height
        self.new_width = new_width
        self.new_length = new_length
        self.new_step = new_step
        self.skip_length = self.new_length * self.new_step
        self.target_height = target_height
        self.target_width = target_width
        self.transform = transform
        self.temporal_jitter = temporal_jitter
        self.name_pattern = name_pattern
        self.video_loader = video_loader
        self.video_ext = video_ext
        self.use_decord = use_decord
        self.slowfast = slowfast
        self.slow_temporal_stride = slow_temporal_stride
        self.fast_temporal_stride = fast_temporal_stride
        self.data_aug = data_aug
        self.lazy_init = lazy_init

        if self.slowfast:
            assert slow_temporal_stride % fast_temporal_stride == 0, 'slow_temporal_stride needs to be multiples of slow_temporal_stride, please set it accordinly.'
            assert not temporal_jitter, 'Slowfast dataloader does not support temporal jitter. Please set temporal_jitter=False.'
            assert new_step == 1, 'Slowfast dataloader only support consecutive frames reading, please set new_step=1.'

        if self.video_loader:
            if self.use_decord:
                self.decord = try_import_decord()
            else:
                self.mmcv = try_import_mmcv()

        if not self.lazy_init:
            self.clips = self._make_dataset(root, setting)
            if len(self.clips) == 0:
                raise (
                    RuntimeError("Found 0 video clips in subfolders of: " +
                                 root + "\n"
                                 "Check your data directory (opt.data-dir)."))