コード例 #1
0
            action_category = l.rstrip().split(" ")[-1]
            class_id = class_list.index(action_category)
            if movie_name in movie_instances.keys():
                movie_instances[movie_name].append(
                    (gt_start, gt_end, class_id))
            else:
                movie_instances[movie_name] = [(gt_start, gt_end, class_id)]

    return movie_instances


save_file = '/home/zwei/Dev/NetModules/ActionLocalizationDevs/PropEval/thumos14_val_groundtruth.csv'

data_frame = []

pathVars = path_vars.PathVars()

movie_instances = ReadAnnotations(class_list=pathVars.classnames)

for i, _key in enumerate(movie_instances):

    # fps = movie_fps[_key]

    frm_num = pathVars.video_frames[_key]
    for line in movie_instances[_key]:
        start = int(line[0])
        end = int(line[1])
        label_idx = int(line[2])
        data_frame.append([end, start, label_idx, frm_num, _key])

results = pd.DataFrame(
コード例 #2
0
    def __init__(self, seq_length=360, overlap=0.9, feature_file_ext='npy'):
        self.PathVars = path_vars.PathVars()
        self.feature_directory = '/home/zwei/datasets/THUMOS14/features/BNInception'
        self.feature_file_ext = feature_file_ext
        self.annotation_file = '/home/zwei/Dev/NetModules/ActionLocalizationDevs/PropEval/thumos14_val_groundtruth.csv'
        print("Reading training data list from {:s}\t clip=len:{:d}".format(
            self.annotation_file, seq_length))
        self.movie_instances = {}
        ground_truth = pd.read_csv(self.annotation_file, sep=' ')
        n_ground_truth = len(ground_truth)
        for i_pos in range(n_ground_truth):
            s_ground_truth = ground_truth.loc[[i_pos]]
            movie_name = s_ground_truth['video-name'].values[0]
            n_frames = self.PathVars.video_frames[movie_name]
            if movie_name == miss_name:
                # print("DEB")
                continue
            else:
                gt_start = s_ground_truth['f-init'].values[0]
                gt_end = min(s_ground_truth['f-end'].values[0], n_frames)
                if movie_name in self.movie_instances.keys():
                    self.movie_instances[movie_name].append((gt_start, gt_end))
                else:
                    self.movie_instances[movie_name] = [(gt_start, gt_end)]
        n_positive_instances = 0
        total_reps = 0
        #TODO: during training, we can remove the repeats, in the test, the repeats come out as you have to evaluate on different overlapped clipps
        for s_name in self.movie_instances.keys():

            s_action_list = self.movie_instances[s_name]
            orig_len = len(s_action_list)
            s_action_list = list(set(s_action_list))
            s_action_list.sort()
            cur_len = len(s_action_list)
            # print("{:s}\t reps{:d}".format(s_name, orig_len-cur_len))
            total_reps += orig_len - cur_len
            n_positive_instances += len(s_action_list)
            self.movie_instances[s_name] = s_action_list
        print("{:d} reps found".format(total_reps))
        self.instances = []
        self.maximum_outputs = 0
        self.seq_len = seq_length
        for s_movie_name in self.movie_instances.keys():
            s_movie_instance = self.movie_instances[s_movie_name]
            # s_movie_instance = list(set(s_movie_instance))
            n_frames = self.PathVars.video_frames[s_movie_name]
            start_idx = 0
            # end_idx = start_idx+(self.seq_len)*self.unit_size
            # TODO: detail here, not covered the the ones that are expanding out
            get_outbound = False
            while start_idx < n_frames:
                end_idx = start_idx + self.seq_len
                if end_idx >= n_frames:
                    #TODO: should we add 1 offset?
                    start_idx = start_idx - (end_idx - n_frames)
                    end_idx = n_frames
                    get_outbound = True

                s_instance = {}
                s_instance['name'] = s_movie_name
                s_instance['start'] = start_idx
                s_instance['end'] = end_idx
                s_instance['actions'] = []

                for s_action in s_movie_instance:
                    if s_action[0] >= start_idx and s_action[1] < end_idx:
                        s_instance['actions'].append(s_action)

                if len(s_instance['actions']) > self.maximum_outputs:
                    self.maximum_outputs = len(s_instance['actions'])
                self.instances.append(s_instance)
                if get_outbound:
                    break
                start_idx = int(start_idx + (1 - overlap) * self.seq_len)

        print(
            "{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}"
            .format(len(self.movie_instances), len(self.instances),
                    n_positive_instances, self.maximum_outputs))
コード例 #3
0
    def __init__(self,
                 seq_length=50,
                 unit_size=16,
                 overlap=0.5,
                 feature_file_ext='mat'):
        self.PathVars = path_vars.PathVars()
        self.unit_size = unit_size
        self.feature_directory = '/home/zwei/datasets/THUMOS14/features/c3d'
        self.feature_file_ext = feature_file_ext
        self.annotation_file = '/home/zwei/Dev/TURN_TAP_ICCV17/turn_codes/val_training_samples.txt'
        print("Reading training data list from {:s}\t clip=len:{:d}".format(
            self.annotation_file, seq_length))
        self.movie_instances = {}

        with open(self.annotation_file) as f:
            for l in f:
                movie_name = l.rstrip().split(" ")[0]
                # clip_start = float(l.rstrip().split(" ")[1])
                # clip_end = float(l.rstrip().split(" ")[2])
                gt_start = float(l.rstrip().split(" ")[3])
                gt_end = float(l.rstrip().split(" ")[4])
                round_gt_start = np.round(
                    gt_start / self.unit_size) * self.unit_size + 1
                round_gt_end = np.round(
                    gt_end / self.unit_size) * self.unit_size + 1
                action_category = l.rstrip().split(" ")[-1]
                class_id = self.PathVars.classnames.index(action_category)
                #TODO: current we want to remove the overlap with different classes
                if movie_name in self.movie_instances.keys():
                    # self.movie_instances[movie_name].append((gt_start, gt_end, round_gt_start, round_gt_end, class_id))
                    self.movie_instances[movie_name].append(
                        (gt_start, gt_end, round_gt_start, round_gt_end))

                else:
                    # self.movie_instances[movie_name] = [(gt_start, gt_end, round_gt_start, round_gt_end, class_id)]
                    self.movie_instances[movie_name] = [
                        (gt_start, gt_end, round_gt_start, round_gt_end)
                    ]
        #TODO: remove the repeats
        n_positive_instances = 0
        for s_name in self.movie_instances.keys():
            s_action_list = self.movie_instances[s_name]
            s_action_list = list(set(s_action_list))
            s_action_list.sort()
            n_positive_instances += len(s_action_list)
            self.movie_instances[s_name] = s_action_list
        self.seq_len = seq_length

        self.instances = []
        self.maximum_outputs = 0
        for s_movie_name in self.movie_instances.keys():
            s_movie_instance = self.movie_instances[s_movie_name]
            # s_movie_instance = list(set(s_movie_instance))
            n_frames = self.PathVars.video_frames[s_movie_name]
            start_idx = 0
            end_idx = (start_idx + self.seq_len) * self.unit_size
            while end_idx < n_frames:
                s_instance = {}
                s_instance['name'] = s_movie_name
                s_instance['start'] = start_idx
                s_instance['end'] = end_idx
                s_instance['actions'] = []

                for s_action in s_movie_instance:
                    if s_action[2] >= start_idx and s_action[3] < end_idx:
                        s_instance['actions'].append(s_action)

                if len(s_instance['actions']) > self.maximum_outputs:
                    self.maximum_outputs = len(s_instance['actions'])
                self.instances.append(s_instance)
                start_idx = int(start_idx + self.seq_len * self.unit_size -
                                self.seq_len * overlap * self.unit_size)
                end_idx = start_idx + self.seq_len * unit_size
        print(
            "{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}"
            .format(len(self.movie_instances), len(self.instances),
                    n_positive_instances, self.maximum_outputs))
コード例 #4
0
    def __init__(self,
                 seq_length=360,
                 overlap=0.9,
                 sample_rate=1,
                 dataset_split='val',
                 feature_file_ext='npy'):
        self.PathVars = path_vars.PathVars()
        self.feature_directory = '/home/zwei/datasets/THUMOS14/features/BNInception'
        self.feature_file_ext = feature_file_ext
        self.annotation_file = '/home/zwei/Dev/NetModules/ActionLocalizationDevs/action_det_prep/thumos14_tag_{:s}_proposal_list.csv'.format(
            dataset_split)
        print(
            "Reading training data list from {:s}\t clip len:{:d}, sample rate: {:d}"
            .format(self.annotation_file, seq_length, sample_rate))
        self.movie_instances = {}
        ground_truth = pd.read_csv(self.annotation_file, sep=' ')
        n_ground_truth = len(ground_truth)
        for i_pos in range(n_ground_truth):
            s_ground_truth = ground_truth.loc[[i_pos]]
            movie_name = s_ground_truth['video-name'].values[0]
            n_frames = self.PathVars.video_frames[movie_name]
            if movie_name in miss_name:
                continue
            else:
                gt_start = s_ground_truth['f-init'].values[0]
                gt_end = min(s_ground_truth['f-end'].values[0], n_frames)
                if movie_name in self.movie_instances.keys():
                    self.movie_instances[movie_name].append((gt_start, gt_end))
                else:
                    self.movie_instances[movie_name] = [(gt_start, gt_end)]
        n_positive_instances = 0
        total_reps = 0
        #Update: during training, we can remove the repeats, in the test, the repeats come out as you have to evaluate on different overlapped clipps
        for s_name in self.movie_instances.keys():

            s_action_list = self.movie_instances[s_name]
            orig_len = len(s_action_list)
            s_action_list = list(set(s_action_list))
            s_action_list.sort()  # sort from left to right
            cur_len = len(s_action_list)
            # print("{:s}\t reps{:d}".format(s_name, orig_len-cur_len))
            total_reps += orig_len - cur_len
            n_positive_instances += len(s_action_list)
            self.movie_instances[s_name] = s_action_list
        print("{:d} reps found".format(total_reps))

        self.instances = []
        self.maximum_outputs = 0
        self.seq_len = seq_length
        self.sample_rate = sample_rate

        for s_movie_name in self.movie_instances.keys():
            s_movie_instance = self.movie_instances[s_movie_name]
            n_frames = self.PathVars.video_frames[s_movie_name]
            start_idx = 0
            isInbound = True
            while start_idx < n_frames and isInbound:
                end_idx = start_idx + self.seq_len
                #UPDATE: cannot set to >, since we want to set isInbound to False this time
                if end_idx >= n_frames:
                    isInbound = False
                    start_idx = start_idx - (end_idx - n_frames)
                    end_idx = n_frames

                s_instance = {}
                s_instance['name'] = s_movie_name
                s_instance['start'] = start_idx
                s_instance['end'] = end_idx
                s_instance['actions'] = []
                #TODO: also think about here, perhaps keep the ones that are overlap with the current clip over a threshod?
                #TODO: in this way, how are we assigning them scores?
                s_instance_window = [start_idx, end_idx]
                for s_action in s_movie_instance:
                    #Update: here include the partially overlaps...
                    if compute_intersection(s_action, s_instance_window) == 1:
                        s_action_start = max(s_action[0], s_instance_window[0])
                        s_action_end = min(
                            s_action[1], s_instance_window[1] -
                            1)  #TODO:check if here should minus 1
                        #TODO: add overlap rate here!
                        s_instance['actions'].append(
                            [s_action_start, s_action_end])

                if len(s_instance['actions']) > self.maximum_outputs:
                    self.maximum_outputs = len(s_instance['actions'])
                self.instances.append(s_instance)

                start_idx = int(start_idx + (1 - overlap) * self.seq_len)

        print(
            "{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}"
            .format(len(self.movie_instances), len(self.instances),
                    n_positive_instances, self.maximum_outputs))
コード例 #5
0
    def __init__(self,
                 seq_length=50,
                 unit_size=16.,
                 overlap=0.9,
                 feature_file_ext='mat'):
        self.PathVars = path_vars.PathVars()
        self.unit_size = unit_size
        self.feature_directory = '/home/zwei/datasets/THUMOS14/features/c3d'
        self.feature_file_ext = feature_file_ext
        self.annotation_file = '/home/zwei/Dev/NetModules/ActionLocalizationDevs/PropEval/thumos14_test_groundtruth.csv'
        print("Reading training data list from {:s}\t clip=len:{:d}".format(
            self.annotation_file, seq_length))
        self.movie_instances = {}
        ground_truth = pd.read_csv(self.annotation_file, sep=' ')
        n_ground_truth = len(ground_truth)
        for i_pos in range(n_ground_truth):
            s_ground_truth = ground_truth.loc[[i_pos]]
            movie_name = s_ground_truth['video-name'].values[0]
            if movie_name == miss_name:
                # print("DEB")
                continue
            else:
                gt_start = s_ground_truth['f-init'].values[0]
                gt_end = s_ground_truth['f-end'].values[0]

                c3d_gt_start = np.floor(gt_start / self.unit_size)
                c3d_gt_end = np.floor(gt_end / self.unit_size)
                if c3d_gt_end == c3d_gt_start:
                    c3d_gt_end += 1

                if movie_name in self.movie_instances.keys():
                    # self.movie_instances[movie_name].append((gt_start, gt_end, round_gt_start, round_gt_end, class_id))
                    self.movie_instances[movie_name].append(
                        (c3d_gt_start, c3d_gt_end))

                else:
                    # self.movie_instances[movie_name] = [(gt_start, gt_end, round_gt_start, round_gt_end, class_id)]
                    self.movie_instances[movie_name] = [(c3d_gt_start,
                                                         c3d_gt_end)]
        #TODO: remove the repeats (disabled)
        n_positive_instances = 0
        total_reps = 0
        for s_name in self.movie_instances.keys():

            s_action_list = self.movie_instances[s_name]
            orig_len = len(s_action_list)
            s_action_list = list(set(s_action_list))
            s_action_list.sort()
            cur_len = len(s_action_list)
            # print("{:s}\t reps{:d}".format(s_name, orig_len-cur_len))
            total_reps += orig_len - cur_len
            n_positive_instances += len(s_action_list)
            self.movie_instances[s_name] = s_action_list
        print("{:d} reps found".format(total_reps))
        self.instances = []
        self.maximum_outputs = 0
        self.seq_len = seq_length
        for s_movie_name in self.movie_instances.keys():
            s_movie_instance = self.movie_instances[s_movie_name]
            # s_movie_instance = list(set(s_movie_instance))
            n_frames = int(self.PathVars.video_frames[s_movie_name] /
                           self.unit_size)
            if n_frames <= self.seq_len:
                continue
            start_idx = 0
            get_outbound = False
            while start_idx < n_frames:
                end_idx = start_idx + self.seq_len
                if end_idx >= n_frames:
                    #TODO: should we add 1 offset?
                    start_idx = start_idx - (end_idx - n_frames)
                    end_idx = n_frames
                    get_outbound = True

                s_instance = {}
                s_instance['name'] = s_movie_name
                s_instance['start'] = start_idx
                s_instance['end'] = end_idx
                s_instance['actions'] = []

                for s_action in s_movie_instance:
                    if s_action[0] >= start_idx and s_action[1] < end_idx:
                        s_instance['actions'].append(s_action)

                if len(s_instance['actions']) > self.maximum_outputs:
                    self.maximum_outputs = len(s_instance['actions'])
                self.instances.append(s_instance)
                if get_outbound:
                    break

                start_idx = int(start_idx + (1 - overlap) * self.seq_len)

        print(
            "{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}"
            .format(len(self.movie_instances), len(self.instances),
                    n_positive_instances, self.maximum_outputs))