def _create_bags(self, bagging_strategy):
        bags = []
        for eye, value in self.occurrences.items():
            bag_label = self.labels_df[self.labels_df.image.str.contains(
                eye)].iloc[0].level
            prefix = 'pos' if bag_label == 1 else 'neg'

            eye_frames = sorted([
                f for f in os.listdir(join(self.root_dir, prefix))
                if get_video_desc(f)['eye_id'] == eye
            ],
                                key=lambda n: get_video_desc(n)['frame_id'])
            if bagging_strategy == 'random':
                random.shuffle(eye_frames)
            if value <= self.num_frames:
                bags.append({
                    'frames': eye_frames,
                    'label': bag_label,
                    'name': f'{eye}_{0}',
                    'shortname': eye
                })
            else:
                for i, start_idx in enumerate(
                        range(0, len(eye_frames), self.num_frames)):
                    bags.append({
                        'frames':
                        eye_frames[start_idx:start_idx + self.num_frames],
                        'label':
                        bag_label,
                        'name':
                        f'{eye}_{i}'
                    })
        return bags
Esempio n. 2
0
def refine_dataframe(df, files):
    df_refined = pd.DataFrame(columns=df.columns)
    for row in df.itertuples():
        video_desc = get_video_desc(row[1])
        level = row[2]
        prefix = 'pos' if level > 0 else 'neg'

        matching_files = [f for f in files if get_video_desc(f)['eye_id'] == video_desc['eye_id']]
        for match in matching_files:
            df_refined = df_refined.append({'image': match, 'level': 1 if level > 0 else 0}, ignore_index=True)
    
    return df_refined
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        severity = self.labels_df.iloc[idx, 1]

        if self.use_prefix:
            prefix = 'pos' if severity == 1 else 'neg'
        else:
            prefix = ''
        img_name = os.path.join(self.root_dir, prefix,
                                self.labels_df.iloc[idx, 0] + self.file_type)
        img = cv2.imread(img_name)
        assert img is not None, f'Image {img_name} has to exist'
        # image = image[:,:,[2, 1, 0]]

        sample = {
            'image':
            img,
            'label':
            severity,
            'eye_id':
            get_video_desc(self.labels_df.iloc[idx, 0],
                           only_eye=True)['eye_id'],
            'name':
            self.labels_df.iloc[idx, 0]
        }
        if self.transform:
            sample['image'] = img[:, :, [2, 1, 0]]
            sample['image'] = self.transform(sample['image'])
        if self.augs:
            sample['image'] = self.augs(image=img)['image']
        return sample
 def _create_bags(self):
     bags = []
     for eye, value in self.occurrences.items():
         bag_label = self.labels_df[self.labels_df.image.str.contains(
             eye)].iloc[0].level
         prefix = 'pos' if bag_label == 1 else 'neg'
         eye_frames = [
             f for f in os.listdir(join(self.root_dir, prefix))
             if get_video_desc(f)['eye_id'] == eye
         ]
         eye_frames = random.sample(eye_frames, len(eye_frames))
         if value <= self.max_bag_size:
             bags.append({
                 'frames': eye_frames,
                 'label': bag_label,
                 'name': f'{eye}_{0}',
                 'shortname': eye
             })
         else:
             for i, start_idx in enumerate(
                     range(0, len(eye_frames), self.max_bag_size)):
                 bags.append({
                     'frames':
                     eye_frames[start_idx:start_idx + self.max_bag_size],
                     'label':
                     bag_label,
                     'name':
                     f'{eye}_{i}'
                 })
     # print('Created bags: ', len(bags), self.occurrences)
     return bags
 def __getitem__(self, idx):
     assert not torch.is_tensor(idx)
     severity = self.labels_df.iloc[idx, 1]
     if self.use_prefix:
         prefix = 'pos' if severity == 1 else 'neg'
     else:
         prefix = ''
     img_name = os.path.join(self.root_dir, prefix,
                             self.labels_df.iloc[idx, 0] + self.file_type)
     processed_name = os.path.join(
         self.root_dir + self.suffix, prefix,
         self.labels_df.iloc[idx, 0] + self.file_type)
     img = cv2.imread(img_name)
     processed_img = cv2.imread(processed_name)
     sample = {
         'image':
         None,
         'label':
         severity,
         'eye_id':
         get_video_desc(self.labels_df.iloc[idx, 0],
                        only_eye=True)['eye_id']
     }
     if self.augs:
         img = self.augs(image=img)['image']
         processed_img = self.augs(image=processed_img)['image']
         sample['image'] = torch.cat([img, processed_img])
     return sample
Esempio n. 6
0
def blow_up_df(file_list, df, eye_id, eye_level):
    files_ids = set([get_video_desc(f)['snippet_id'] for f in file_list])
    for i, _ in enumerate(files_ids):
        df = df.append({
            'image': f'{eye_id}_{i:02d}',
            'level': eye_level
        },
                       ignore_index=True)

    return df
Esempio n. 7
0
def expand_df_with_snippet_info(label_train, label_val, output_path):
    for set_str in ['train', 'val']:
        df = pd.read_csv(join(
            output_path, label_train)) if set_str == 'train' else pd.read_csv(
                join(output_path, label_val))
        df_refined = pd.DataFrame(columns=df.columns)
        files = {
            'pos': os.listdir(join(output_path, set_str, 'pos')),
            'neg': os.listdir(join(output_path, set_str, 'neg'))
        }
        splinters = []

        for i, row in tqdm(df.iterrows(),
                           total=len(df),
                           desc='Refining labels'):
            cur_eye_id = row['image']
            prefix = 'pos' if row['level'] == 1 else 'neg'

            corres_files = sorted(
                [f for f in files[prefix] if cur_eye_id in f],
                reverse=True,
                key=lambda name: get_video_desc(name)['snippet_id'])
            splintered_video = check_splinter(corres_files)
            if splintered_video:
                splinters.append(cur_eye_id)
                corres_files_splinter = [
                    f for f in corres_files
                    if len(get_video_desc(f)['eye_id']) > 5
                ]
                df_refined = blow_up_df(
                    corres_files_splinter, df_refined,
                    f'{cur_eye_id}{get_video_desc(corres_files_splinter[0])["eye_id"][-1]}',
                    row['level'])
            df_refined = blow_up_df(corres_files, df_refined, cur_eye_id,
                                    row['level'])
        df_refined.to_csv(join(output_path, f'labels_{set_str}_refined.csv'),
                          index=False)
        print(
            'These splintered videos were detected and included in the expanded labels.csv:'
        )
        [print('Splinter: ', spi) for spi in splinters]
 def get_weight(self, idx):
     if torch.is_tensor(idx):
         idx = idx.tolist()
     severity = self.labels_df.iloc[idx, 1]
     weight = self.ratio if severity == 0 else 1.0
     if self.occur_balance:
         weight /= self.grade_count[get_video_desc(
             self.labels_df.iloc[idx, 0])['eye_id']]
     if self.boost > 1.0 and severity == 1 and self.labels_df.iloc[idx,
                                                                   2] == 1:
         weight *= (1. + self.labels_df.iloc[idx, 3])
     return weight
def run(input_path, labels_path, dataset, mode):
    """
    Takes a CSV file that only holds eye-ids. For every frame corresponding to these frames a new entry with the same DR grading will be created.
    :param input_path: Absolute path to input folder
    :param labels_path: Absolute path to label file that will be refined
    :param dataset: Dataset name (train/val/...)
    :param mode: Determines operation mode (snippets / frames)
    :return:
    """
    df = pd.read_csv(labels_path)
    df[COL_CLASS] = df[COL_CLASS].astype(int)
    df_refined = pd.DataFrame(columns=df.columns)

    files = {
        'pos': os.listdir(join(input_path, dataset, 'pos')),
        'neg': os.listdir(join(input_path, dataset, 'neg'))
    }

    for i, row in tqdm(df.iterrows(), total=len(df)):
        video_desc = get_video_desc(row['image'])
        level = row['level']
        prefix = 'pos' if row['level'] > 0 else 'neg'

        matching_files = [
            f for f in files[prefix]
            if get_video_desc(f)['eye_id'] == video_desc['eye_id'] and (
                mode == 'frames'
                or get_video_desc(f)['snippet_id'] == video_desc['snippet_id'])
        ]
        for file in matching_files:
            df_refined = df_refined.append(
                {
                    'image': file,
                    'level': 1 if level > 0 else 0
                },
                ignore_index=True)

    df_refined.to_csv(join(input_path, f'labels_{dataset}_frames.csv'),
                      index=False)
Esempio n. 10
0
def run(labels_path, min_conf=80):
    df = pd.read_csv(labels_path)

    drop_list = []
    for i, row in tqdm(df.iterrows()):
        video_path = row['image']
        video_desc = get_video_desc(video_path)

        if int(video_desc['confidence']) < min_conf:
            drop_list.append(i)
    print(f'Dropping {len(drop_list)} rows')
    df.drop(df.index[drop_list], inplace=True)
    df.to_csv(labels_path, index=False)
    def __getitem__(self, idx):
        assert not torch.is_tensor(idx)

        image_idx = idx

        severity = self.labels_df.iloc[image_idx, 1]
        prefix = 'pos' if severity == 1 else 'neg'

        snip_name = self.labels_df.iloc[image_idx, 0]
        video_desc = get_video_desc(snip_name)

        video_all_frames = [
            f for f in os.listdir(os.path.join(self.root_dir, prefix))
            if video_desc['eye_id'] == get_video_desc(f)['eye_id']
        ]
        # if len(frame_index) - 1 < video_index:
        #    print('Problem with video ', video_name, video_index)

        # frame_names = sorted([f for f in files if video_desc['snippet_id'] == get_video_desc(f)['snippet_id']], key=lambda n: get_video_desc(n)['frame_id'])
        selection = np.random.randint(
            0, len(video_all_frames),
            self.num_frames)  # Generate random indicies
        selected_frames = [video_all_frames[idx] for idx in selection]

        sample = {
            'frames': [],
            'label': severity,
            'name': video_desc['eye_id'][:5]
        }
        for name in selected_frames:
            img = cv2.imread(os.path.join(self.root_dir, prefix, name))
            img = self.augs(image=img)['image'] if self.augs else img
            sample['frames'].append(img)

        sample['frames'] = torch.stack(
            sample['frames']) if self.augs else np.stack(sample['frames'])
        return sample
 def __init__(self,
              csv_file,
              root_dir,
              augmentations=None,
              balance_ratio=1.0,
              max_bag_size=100):
     self.labels_df = pd.read_csv(csv_file)
     self.root_dir = root_dir
     self.augs = augmentations
     self.ratio = balance_ratio
     self.max_bag_size = max_bag_size
     self.occurrences = {}
     for row in self.labels_df.itertuples():
         eye_id = nn_utils.get_video_desc(row.image)['eye_id']
         entry = self.occurrences.get(eye_id)
         self.occurrences[
             eye_id] = self.occurrences[eye_id] + 1 if entry else 1
     self.bags = self._create_bags()
def run(input_path):
    """
    Create statistics for dataset
    :param input_path: Absolute path to the label file of a dataset
    :return:
    """
    df = pd.read_csv(input_path)
    names = {}
    for row in df.itertuples():
        eye_id = nn_utils.get_video_desc(row.image)['eye_id']
        entry = names.get(eye_id)
        if entry:
            names[eye_id] += 1
        else:
            names[eye_id] = 1

    name_arr = np.array([*names.values()])
    print(
        f'Dataset stats:\n Mean> {name_arr.mean()},\n Standard Deviaton> {name_arr.std()},\n Median> {np.median(name_arr)},\n Histogram(5)> {np.histogram(name_arr, bins=5)}, \n Histogram(10)> {np.histogram(name_arr, bins=10)}'
    )
 def __init__(self,
              csv_file,
              root_dir,
              num_frames,
              file_type='.png',
              balance_ratio=1.0,
              augmentations=None,
              bagging_strategy='snippet'):
     self.labels_df = pd.read_csv(csv_file)
     self.root_dir = root_dir
     self.file_type = file_type
     self.augs = augmentations
     self.ratio = balance_ratio
     self.num_frames = num_frames
     self.bag_strategy = bagging_strategy
     self.occurrences = {}
     for row in self.labels_df.itertuples():
         eye_id = nn_utils.get_video_desc(row.image)['eye_id']
         entry = self.occurrences.get(eye_id)
         self.occurrences[
             eye_id] = self.occurrences[eye_id] + 1 if entry else 1
     self.bags = self._create_bags(bagging_strategy)
 def __init__(self,
              csv_file,
              root_dir,
              file_type='.png',
              balance_ratio=1.0,
              transform=None,
              augmentations=None,
              use_prefix=False,
              boost_frames=1.0,
              occur_balance=False):
     """
     Retina Dataset for normal single frame data samples
     :param csv_file: path to csv file with labels
     :param root_dir: path to folder with sample images
     :param file_type: file ending of images (e.g '.jpg')
     :param balance_ratio: adjust sample weight in case of unbalanced classes
     :param transform: pytorch data augmentation
     :param augmentations: albumentation data augmentation
     :param use_prefix: data folder contains subfolders for classes (pos / neg)
     :param boost_frames: boost frames if a third weak prediciton column is available
     """
     self.labels_df = pd.read_csv(csv_file)
     self.grade_count = Counter([
         get_video_desc(name)['eye_id']
         for name in self.labels_df['image'].tolist()
     ]) if occur_balance else None
     self.root_dir = root_dir
     self.file_type = file_type
     self.transform = transform
     self.augs = augmentations
     self.ratio = balance_ratio
     self.use_prefix = use_prefix
     self.boost = boost_frames
     self.occur_balance = occur_balance
     assert transform is None or augmentations is None
     assert (boost_frames > 1.0
             and len(self.labels_df.columns) > 2) or boost_frames == 1.0
Esempio n. 16
0
def check_splinter(file_list):
    for file in file_list:
        if len(get_video_desc(file)['eye_id']) > 5:
            # print('Detected splinter for: ', file)
            return True