def _cache_entity_data(self, csv_file_path):
        entity_set = set()

        csv_data = io.csv_to_list(csv_file_path)
        csv_data.pop(0)  # CSV header
        for csv_row in csv_data:
            video_id = csv_row[0]
            entity_id = csv_row[-3]
            timestamp = csv_row[1]

            speech_label = self._postprocess_speech_label(csv_row[-2])
            entity_label = self._postprocess_entity_label(csv_row[-2])
            minimal_entity_data = (entity_id, timestamp, entity_label)

            # Store minimal entity data
            if video_id not in self.entity_data.keys():
                self.entity_data[video_id] = {}
            if entity_id not in self.entity_data[video_id].keys():
                self.entity_data[video_id][entity_id] = []
                entity_set.add((video_id, entity_id))
            self.entity_data[video_id][entity_id].append(minimal_entity_data)

            #Store speech meta-data
            if video_id not in self.speech_data.keys():
                self.speech_data[video_id] = {}
            if timestamp not in self.speech_data[video_id].keys():
                self.speech_data[video_id][timestamp] = speech_label

            #max operation yields if someone is speaking.
            new_speech_label = max(self.speech_data[video_id][timestamp],
                                   speech_label)
            self.speech_data[video_id][timestamp] = new_speech_label

        return entity_set
    def _cache_entity_data_forward(self, csv_file_path, target_video):
        entity_list = list()

        csv_data = io.csv_to_list(csv_file_path)
        csv_data.pop(0)  # CSV header
        for csv_row in csv_data:
            video_id = csv_row[0]
            if video_id != target_video:
                continue

            entity_id = csv_row[-3]
            timestamp = csv_row[1]
            entity_label = self._postprocess_entity_label(csv_row[-2])

            entity_list.append((video_id, entity_id, timestamp))
            minimal_entity_data = (entity_id, timestamp, entity_label
                                   )  # sfate to ingore label here

            if video_id not in self.entity_data.keys():
                self.entity_data[video_id] = {}

            if entity_id not in self.entity_data[video_id].keys():
                self.entity_data[video_id][entity_id] = []
            self.entity_data[video_id][entity_id].append(minimal_entity_data)

        return entity_list
    def _cache_feature_file(self, csv_file):
        entity_data = {}
        feature_list = []
        ts_to_entity = {}

        print('load feature data', csv_file)
        csv_data = io.csv_to_list(csv_file)
        for csv_row in csv_data:
            video_id = csv_row[0]
            ts = csv_row[1]
            entity_id = csv_row[2]
            features = self._decode_feature_data_from_csv(csv_row[-1])
            label = int(float(csv_row[3]))

            # entity_data
            if video_id not in entity_data.keys():
                entity_data[video_id] = {}
            if entity_id not in entity_data[video_id].keys():
                entity_data[video_id][entity_id] = {}
            if ts not in entity_data[video_id][entity_id].keys():
                entity_data[video_id][entity_id][ts] = []
            entity_data[video_id][entity_id][ts] = (features, label)
            feature_list.append((video_id, entity_id, ts))

            # ts_to_entity
            if video_id not in ts_to_entity.keys():
                ts_to_entity[video_id] = {}
            if ts not in ts_to_entity[video_id].keys():
                ts_to_entity[video_id][ts] = []
            ts_to_entity[video_id][ts].append(entity_id)

        print('loaded ', len(feature_list), ' features')
        return entity_data, feature_list, ts_to_entity
Example #4
0
    ava_ground_truth_dir = '.../AVA/csv/val'  #AVA original ground truth files
    temporary_dir = '.../temp/activeSpeakers'  #Just an empty temporary dir

    # The script will generate these two, use them for the official AVA evaluation
    dataset_predictions_csv = '.../Forwards/ActiveSpeakers/publish/final/STE.csv'  #file with final predictions
    dataset_gt_csv = '...Forwards/ActiveSpeakers/publish/final/gt.csv'  # Utility file to use the official evaluation tool

    #cleanup temp dir
    del_files = glob.glob(temporary_dir + '/*')
    for f in del_files:
        os.remove(f)

    pred_files, gt_files = select_files(forward_dir, ava_ground_truth_dir)

    for idx, (pf, gtf) in enumerate(zip(pred_files, gt_files)):
        prediction_data = csv_to_list(pf)
        gt_data = csv_to_list(gtf)

        print('Match', os.path.basename(pf), len(prediction_data),
              len(gt_data))
        if len(prediction_data) != len(gt_data):
            raise Exception('Groundtruth and prediction dont match in lenght')

        post_processed_predictions = prediction_postprocessing(
            prediction_data, 1)

        #reformat into ava required style
        for idx in range(len(post_processed_predictions)):
            post_processed_predictions[idx] = [
                gt_data[idx][0], gt_data[idx][1], gt_data[idx][2],
                gt_data[idx][3], gt_data[idx][4], gt_data[idx][5],