Esempio n. 1
0
    def _parse_prop_file(self, stats=None):
        print('loading prop_file ' + self.prop_file)
        prop_info = load_proposal_file(self.prop_file)
    
        self.video_list = [VideoRecord(p, self.num_classes) for p in prop_info]
        
        print('max number of proposal in one video is %d' % max([len(v.proposals) for v in self.video_list]))
        print('create video list')  # empty proposal problem starts
        if self.exclude_empty and not self.test_mode:
            self.video_list = list(filter(lambda x: len(x.gt) > 0, self.video_list))

        self.video_dict = {v.id: v for v in self.video_list}
        
        if not self.test_mode:
            # construct three pools:
            # 1. Foreground
            # 2. Background
            # 3. Incomplete

            self.fg_pool = []
            self.bg_pool = []
            self.incomp_pool = []

            for v in self.video_list:
                self.fg_pool.extend([(v.id, prop) for prop in v.get_fg(self.fg_iou_thresh, self.gt_as_fg)])

                incomp, bg = v.get_negatives(self.incomplete_iou_thresh, self.bg_iou_thresh,
                                            self.bg_coverage_thresh, self.incomplete_overlap_thresh)

                self.incomp_pool.extend([(v.id, prop) for prop in incomp])
                self.bg_pool.extend([(v.id, prop) for prop in bg])
            if stats is None:
                self._compute_regresssion_stats()
            else:
                self.stats = stats
Esempio n. 2
0
    def _parse_prop_file(self, stats=None):
        prop_info = load_proposal_file(self.prop_file)

        self.video_list = [SSNVideoRecord(p) for p in prop_info]

        if self.exclude_empty:
            self.video_list = list(filter(lambda x: len(x.gt) > 0, self.video_list))

        self.video_dict = {v.id: v for v in self.video_list}

        # construct three pools:
        # 1. Foreground
        # 2. Background
        # 3. Incomplete

        self.fg_pool = []
        self.bg_pool = []
        self.incomp_pool = []

        for v in self.video_list:
            self.fg_pool.extend([(v.id, prop) for prop in v.get_fg(self.fg_iou_thresh, self.gt_as_fg)])

            incomp, bg = v.get_negatives(self.incomplete_iou_thresh, self.bg_iou_thresh,
                                         self.bg_coverage_thresh, self.incomplete_overlap_thresh)

            self.incomp_pool.extend([(v.id, prop) for prop in incomp])
            self.bg_pool.extend([(v.id, prop) for prop in bg])

        if stats is None:
            self._compute_regresssion_stats()
        else:
            self.stats = stats

        if self.verbose:
            print("""
            
            SSNDataset: Proposal file {prop_file} parsed.
            
            There are {pnum} usable proposals from {vnum} videos.
            {fnum} foreground proposals
            {inum} incomplete_proposals
            {bnum} background_proposals
            
            Sampling config:
            FG/BG/INC: {fr}/{br}/{ir}
            Video Centric: {vc}
            
            Epoch size multiplier: {em}
            
            Regression Stats:
            Location: mean {stats[0][0]:.05f} std {stats[1][0]:.05f}
            Duration: mean {stats[0][1]:.05f} std {stats[1][1]:.05f}
            """.format(prop_file=self.prop_file, pnum=len(self.fg_pool) + len(self.bg_pool) + len(self.incomp_pool),
                       fnum=len(self.fg_pool), inum=len(self.incomp_pool), bnum=len(self.bg_pool),
                       fr=self.fg_per_video, br=self.bg_per_video, ir=self.incomplete_per_video, vnum=len(self.video_dict),
                       vc=self.video_centric, stats=self.stats, em=self.epoch_multiplier))
        else:
            print("""
                        SSNDataset: Proposal file {prop_file} parsed.   
            """.format(prop_file=self.prop_file))
Esempio n. 3
0
    def _parse_prop_file(self, stats=None):
        prop_info = load_proposal_file(self.prop_file, self.mode)

        self.video_list = [VideoRecord(p, self.mode) for p in prop_info]

        if self.exclude_empty:
            self.video_list = list(filter(lambda x: len(x.gt) > 0, self.video_list))

        self.video_dict = {v.id: v for v in self.video_list}

        # construct three pools:
        # 1. Foreground
        # 2. Background
        # 3. Incomplete

        self.fg_pool = []
        self.bg_pool = []
        self.incomp_pool = []
        # 注意:是所有视频的props放在一起的
        for v in self.video_list:
            self.fg_pool.extend([(v.id, prop) for prop in v.get_fg(self.fg_iou_thresh, self.gt_as_fg)]) # if add gt into `FG`

            incomp, bg = v.get_negatives(self.incomplete_iou_thresh, self.bg_iou_thresh,
                                         self.bg_coverage_thresh, self.incomplete_overlap_thresh)

            self.incomp_pool.extend([(v.id, prop) for prop in incomp])
            self.bg_pool.extend([(v.id, prop) for prop in bg])

        if stats is None:
            self._compute_regresssion_stats()
        else:
            self.stats = stats
    def _parse_prop_file(self, stats=None):
        prop_info = load_proposal_file(self.prop_file)

        self.video_list = [SSNVideoRecord(p) for p in prop_info]

        if self.exclude_empty:
            self.video_list = list(filter(lambda x: len(x.gt) > 0, self.video_list))

        self.video_dict = {v.id: v for v in self.video_list}

        # construct three pools:
        # 1. Foreground
        # 2. Background
        # 3. Incomplete

        self.fg_pool = []
        self.bg_pool = []
        self.incomp_pool = []

        for v in self.video_list:
            self.fg_pool.extend([(v.id, prop) for prop in v.get_fg(self.fg_iou_thresh, self.gt_as_fg)])

            incomp, bg = v.get_negatives(self.incomplete_iou_thresh, self.bg_iou_thresh,
                                         self.bg_coverage_thresh, self.incomplete_overlap_thresh)

            self.incomp_pool.extend([(v.id, prop) for prop in incomp])
            self.bg_pool.extend([(v.id, prop) for prop in bg])

        if stats is None:
            self._compute_regresssion_stats()
        else:
            self.stats = stats

        if self.verbose:
            print("""
            
            SSNDataset: Proposal file {prop_file} parsed.
            
            There are {pnum} usable proposals from {vnum} videos.
            {fnum} foreground proposals
            {inum} incomplete_proposals
            {bnum} background_proposals
            
            Sampling config:
            FG/BG/INC: {fr}/{br}/{ir}
            Video Centric: {vc}
            
            Epoch size multiplier: {em}
            
            Regression Stats:
            Location: mean {stats[0][0]:.05f} std {stats[1][0]:.05f}
            Duration: mean {stats[0][1]:.05f} std {stats[1][1]:.05f}
            """.format(prop_file=self.prop_file, pnum=len(self.fg_pool) + len(self.bg_pool) + len(self.incomp_pool),
                       fnum=len(self.fg_pool), inum=len(self.incomp_pool), bnum=len(self.bg_pool),
                       fr=self.fg_per_video, br=self.bg_per_video, ir=self.incomplete_per_video, vnum=len(self.video_dict),
                       vc=self.video_centric, stats=self.stats, em=self.epoch_multiplier))
        else:
            print("""
                        SSNDataset: Proposal file {prop_file} parsed.   
            """.format(prop_file=self.prop_file))
Esempio n. 5
0
    def _parse_prop_file(self):
        prop_info = load_proposal_file(self.prop_file)

        self.video_list = [BinaryVideoRecord(p) for p in prop_info]

        if self.exclude_empty:
            self.video_list = list(
                [x for x in self.video_list if len(x.gt) > 0])

        self.video_dict = {v.id: v for v in self.video_list}

        # construct two pools:
        # 1. Foreground
        # 2. Background

        self.fg_pool = []
        self.bg_pool = []

        for v in self.video_list:
            self.fg_pool.extend([
                (v.id, prop)
                for prop in v.get_fg(self.fg_iou_thresh, self.gt_as_fg)
            ])
            self.bg_pool.extend([(v.id, prop)
                                 for prop in v.get_bg(self.bg_iou_thresh)])

        if self.verbose:
            print(("""
            
            BinaryDataSet: Proposal file {prop_file} parse.

            There are {pnum} usable proposals from {vnum} videos.
            {fnum} foreground proposals
            {bnum} background proposals

            Sampling config:
            FG/BG: {fr}/{br}
            
            Epoch size muiltiplier: {em}
            """.format(
                prop_file=self.prop_file,
                pnum=len(self.fg_pool) + len(self.bg_pool),
                fnum=len(self.fg_pool),
                bnum=len(self.bg_pool),
                fr=self.fg_per_video,
                br=self.bg_per_video,
                vnum=len(self.video_dict),
                em=self.epoch_multiplier,
            )))
        else:
            print(("""
                       BinaryDataset: proposal file {prop_file} parsed.
            """.format(prop_file=self.prop_file)))
Esempio n. 6
0
    def _parse_prop_file(self, stats=None):
        # 返回一个长为199的数组,里面的每个元素为一个tuple
        # 每个tuple内包含以下信息: (vid,n_frames,gt_boxes,pr_boxes)
        prop_info = load_proposal_file(
            self.prop_file)  # return vid, n_frame, gt_boxes, pr_boxes

        # 生成每个视频对应的videorecord对象(每个record对象里又包含一个instance对象),之所以要转成videorecord是为了后续调用record里的方法构建三种pool
        self.video_list = [PGCNVideoRecord(p) for p in prop_info]

        if self.exclude_empty:
            self.video_list = list(
                filter(lambda x: len(x.gt) > 0, self.video_list))  # 过滤掉没有gt的视频

        # 将video_list转为video_dict
        self.video_dict = {v.id: v for v in self.video_list}

        # construct three pools:
        # 1. Foreground
        # 2. Background
        # 3. Incomplete

        self.fg_pool = []
        self.bg_pool = []
        self.incomp_pool = []

        for v in self.video_list:
            # 构建fg_pool,根据阈值获取满足条件的prop
            # fg_iou_thresh : 0.7
            # 每个提议包含以下信息
            #    (label,best_iou,coverage,start_frame,end_frame,overlap_self,loc_reg,size_reg)
            self.fg_pool.extend([
                (v.id, prop)
                for prop in v.get_fg(self.fg_iou_thresh, self.gt_as_fg)
            ])

            # 构建bg_pool和incomp_pool
            # incomplete_iou_thresh : 0.3
            # bg_iou_thresh : 0.01
            # bg_coverage_thresh : 0.02
            # incomplete_overlap_thresh : 0.01
            incomp, bg = v.get_negatives(self.incomplete_iou_thresh,
                                         self.bg_iou_thresh,
                                         self.bg_coverage_thresh,
                                         self.incomplete_overlap_thresh)

            self.incomp_pool.extend([(v.id, prop) for prop in incomp])
            self.bg_pool.extend([(v.id, prop) for prop in bg])

        if stats is None:
            self._compute_regresssion_stats()  # 计算所有回归的均值和标准差
        else:
            self.stats = stats
Esempio n. 7
0
    def _parse_prop_file(self, stats=None):
        prop_info = load_proposal_file(
            self.prop_file)  # 读取proposal文件 # video_name, 该视频的帧数, gt, proposals

        self.video_list = [PGCNVideoRecord(p) for p in prop_info
                           ]  # 处理每一个video的信息,每个video都含有很多proposal

        if self.exclude_empty:
            self.video_list = list(
                filter(lambda x: len(x.gt) > 0, self.video_list))

        self.video_dict = {v.id: v for v in self.video_list}

        # construct three pools: pool的元素(videoname, proposal)
        # 1. Foreground
        # 2. Background
        # 3. Incomplete

        self.fg_pool = []
        self.bg_pool = []
        self.incomp_pool = []

        for v in self.video_list:  # for each video
            self.fg_pool.extend([
                (v.id, prop)
                for prop in v.get_fg(self.fg_iou_thresh, self.gt_as_fg)
            ])  # 得到前景

            incomp, bg = v.get_negatives(
                self.incomplete_iou_thresh,
                self.bg_iou_thresh,  # 选择背景与不完整片段
                self.bg_coverage_thresh,
                self.incomplete_overlap_thresh)

            self.incomp_pool.extend([(v.id, prop) for prop in incomp])
            self.bg_pool.extend([(v.id, prop) for prop in bg])

        if stats is None:
            self._compute_regresssion_stats()  # 计算均值与方差
        else:
            self.stats = stats
Esempio n. 8
0
    def __init__(self, feat_root, feat_model, prop_file=None,
                 subset_videos=None, body_seg=5, video_centric=True,
                 test_mode=False, feat_stride=16, input_dim=1024,
                 prop_per_video=12, fg_ratio=6, bg_ratio=6,
                 fg_iou_thresh=0.7, bg_iou_thresh=0.01,
                 bg_coverage_thresh=0.02, sample_duration=100*16,
                 gt_as_fg=True, test_interval=6, verbose=True,
                 exclude_empty=True, epoch_multiplier=1,
                 use_flow=True, only_flow=False, num_local=8,
                 frame_path='../../data/activitynet/activity_net_frames'):

        self.verbose = verbose
        self.num_local = num_local

        self.body_seg = body_seg
        self.video_centric = video_centric
        self.exclude_empty = exclude_empty
        self.epoch_multiplier = epoch_multiplier
        self.input_dim = input_dim
        self.feat_stride = feat_stride
        assert feat_stride % 8 == 0
        self.sample_duration = sample_duration // feat_stride

        self.test_mode = test_mode
        self.test_interval = test_interval

        self.fg_iou_thresh = fg_iou_thresh
        self.bg_iou_thresh = bg_iou_thresh

        self.bg_coverage_thresh = bg_coverage_thresh
        self.starting_ratio = 0.5
        self.ending_ratio = 0.5

        self.gt_as_fg = gt_as_fg
        denum = fg_ratio + bg_ratio

        self.fg_per_video = int(prop_per_video * (fg_ratio / denum))
        self.bg_per_video = int(prop_per_video * (bg_ratio / denum))

        # set the directory for the optical-flow features
        if feat_model.endswith('_trained'):
            feat_flow_rpath = os.path.join(feat_root, 'i3d_flow_trained')
        else:
            feat_flow_rpath = os.path.join(feat_root, 'i3d_flow')
        print("using flow feature from {}".format(feat_flow_rpath))

        # obatin the h5 feature directory
        flow_h5_path = os.path.join(feat_flow_rpath, 'i3d_flow_feature.hdf5')
        flow_feat_key = 'i3d_flow_feature'
        feat_rgb_path = os.path.join(feat_root, feat_model)
        if feat_model == 'i3d_rgb' or feat_model == 'i3d_rgb_trained':
            rgb_h5_path = os.path.join(feat_rgb_path, 'i3d_rgb_feature.hdf5')
            rgb_feat_key = 'i3d_rgb_feature'
        elif feat_model == 'inception_resnet_v2':
            rgb_h5_path = os.path.join(
                feat_rgb_path, 'new_inception_resnet.hdf5')
            rgb_feat_key = 'inception_resnet_v2'
        elif feat_model == 'inception_resnet_v2_trained':
            rgb_h5_path = os.path.join(
                feat_rgb_path, 'inception_resnet_v2_trained.hdf5')
            rgb_feat_key = 'inception_resnet_v2'
        else:
            raise NotImplementedError('this feature has been extracted !')
        print("using rgb feature from {}".format(rgb_h5_path))

        if prop_file:
            prop_info = load_proposal_file(prop_file)
            frame_counts = {}
            for i, vid_info in enumerate(prop_info):
                vid_name = os.path.split(vid_info[0])[1]
                frame_counts[vid_name] = int(vid_info[1])
        else:
            frame_counts = None
        self.video_list = [BinaryVideoRecord(x, frame_path, flow_h5_path, rgb_h5_path, flow_feat_key, rgb_feat_key,
                                             frame_counts, use_flow=use_flow, only_flow=only_flow, feat_stride=feat_stride, 
                                             sample_duration=self.sample_duration) for x in subset_videos]