def img_gen_from_anno_gen(
    anno_gen,
    config,
    group=None,
    target_col=None,
):
    while True:
        anno_batch_df = next(anno_gen)
        batch_x_list = []
        batch_y_list = []
        for id_, row in anno_batch_df.iterrows():
            img = load_img(
                id_,
                resize=None,
                channels=config['channels'],
                group=group or row['group'],
            )

            labels = str_to_labels(row[target_col])
            y_vec = np.array([1 if class_id in labels else 0 for class_id in config['class_ids']])

            batch_y_list.append(y_vec)
            batch_x_list.append(img / 255.)

        batch_x = np.array(batch_x_list, dtype=np.float32)
        batch_y = np.array(batch_y_list, dtype=np.float32)

        yield batch_x, batch_y
 def determine_n_windows_fn(id_):
     if type(config['set_n_windows']) is str:
         targets = str_to_labels(set_n_windows_anno.loc[id_, 'Target'])
         n_windows = np.max(cut_summary.iloc[targets]['n_windows'].values)
         return n_windows
     else:
         return config['set_n_windows']
Beispiel #3
0
def show_status(target_col='Target', **kwargs):
    kwargs = kwargs or {'train_with_hpa': 'data/train_with_hpa.csv'}
    n_samples_list = {}
    for id_, path in kwargs.items():
        anno = pd.read_csv(path, index_col=0)
        n_classes = len(class_labels)
        xs = []
        for target_str in anno[target_col]:
            targets = str_to_labels(target_str)
            x = np.zeros(n_classes, dtype='int')
            x[targets] = 1
            xs.append(x)
        xx = np.array(xs)
        n_samples_list[id_] = np.sum(xx, axis=0)

    cut_summary = pd.DataFrame(
        {
            'organelle': class_labels,
            **n_samples_list,
            # 'pct_samples': n_samples_per_class / len(anno),
            # 'expected_n_samples_in_test': (n_samples_per_class / len(anno) * 11702).round().astype(int),
        },
        index=pd.Index(range(n_classes), name='class_id'),
    )
    save_path = 'tmp/class_distribution.csv'
    cut_summary.to_csv(save_path)
    debug(f"saved to {save_path}")
    print(cut_summary)
Beispiel #4
0
def get_corrected_target_for_one_row(task):
    id_, row = task
    labels = str_to_labels(row['Target'])
    if all([x not in labels for x in classes_to_correct]):
        return {'id_': id_, 'corrected_target': row['Target']}

    for class_to_correct in classes_to_correct:
        if class_to_correct not in labels:
            continue

        rects = correction_dict[f"markerListForClass{class_to_correct}"].get(
            row['source_img_id'])
        if rects is None or not any([
                window_contains_rect(
                    (row[['left', 'top', 'right', 'bottom']]), rect)
                for rect in rects
        ]):
            debug(f"removed {class_to_correct} from {id_} ({labels})")
            labels.remove(class_to_correct)

    return {'id_': id_, 'corrected_target': labels_to_str(labels)}
 def row_has_class_i(row):
     target = str_to_labels(row['Target'])
     return int(class_id) in target
Beispiel #6
0
def row_has_rare_class(row):
    labels = str_to_labels(row['Target'])
    return any([x in labels for x in rare_classes])
from os.path import join as pjoin
from lib.utils import multiprocessing, load_img
import numpy as np
from lib.utils import str_to_labels

CLASS_ID = 17

df = pd.read_csv('data/train_with_hpa.csv', index_col=0)

folder_to_original = f'tmp/selected_imgs_{CLASS_ID}/original'
folder_to_green = f'tmp/selected_imgs_{CLASS_ID}/green'

os.makedirs(folder_to_original, exist_ok=True)
os.makedirs(folder_to_green, exist_ok=True)

df_filtered = df.loc[[CLASS_ID in str_to_labels(x) for x in df['Target']]]


def processing_one_img(task):
    id_, row = task

    img = load_img(
        id_,
        resize=None,
        group=row['group'],
        channels=['red', 'green', 'blue'],
    )

    img_fn = f"{id_}.jpg"

    imsave(pjoin(folder_to_original, img_fn), img)
 def filter_fixed(row):
     return n in str_to_labels(row['Target'])
def crop(config):
    if config['output_windowed_imgs_path'] is not None:
        makedirs(config['output_windowed_imgs_path'], exist_ok=True)

    if type(config['set_n_windows']) is str:
        set_n_windows_anno = pd.read_csv(config['set_n_windows'], index_col=0)

        n_classes = 28

        xs = []
        for target_str in set_n_windows_anno['Target']:
            targets = str_to_labels(target_str)
            x = np.zeros(n_classes, dtype='int')
            x[targets] = 1
            xs.append(x)
        xx = np.array(xs)
        n_samples_per_class = np.sum(xx, axis=0)
        cut_summary = pd.DataFrame(
            {
                'organelle': class_labels,
                'n_samples': n_samples_per_class,
                'n_windows':
                np.round(1500 / n_samples_per_class).astype(int) + 1
            },
            index=range(n_classes),
        )
        print(cut_summary)
        estimated_n_windows = np.sum(cut_summary['n_samples'].values *
                                     cut_summary['n_windows'].values)
        print(f'estimated_n_windows = {estimated_n_windows}')

    def determine_n_windows_fn(id_):
        if type(config['set_n_windows']) is str:
            targets = str_to_labels(set_n_windows_anno.loc[id_, 'Target'])
            n_windows = np.max(cut_summary.iloc[targets]['n_windows'].values)
            return n_windows
        else:
            return config['set_n_windows']

    anno = config['anno'].copy()
    anno['n_windows'] = [determine_n_windows_fn(id_) for id_ in anno.index]

    crop_task_list = [{
        'id_': id_,
        'row': row,
        'config': config,
    } for id_, row in anno.iterrows()]

    with Pool(config['n_threads']) as p:
        result_iter = p.imap_unordered(crop_one_id, crop_task_list)

        result_list = []
        for i_result, result in enumerate(result_iter):
            info(
                f"({i_result}/{len(crop_task_list)}) {result['id_']}  ->  ({len(result['df'])})"
            )
            result_list.append(result)

    if config['output_windowed_imgs_path'] is not None:
        windowed_anno = pd.concat([x['df'] for x in result_list])
        print(windowed_anno)
        if 'ERROR' in windowed_anno['left']:
            warn(f'There were errors!')
        windowed_anno.to_csv(config['output_windowed_anno_csv_path'])

    def save_collage(field):
        display_imgs([x[field] for x in result_list],
                     save_as=pjoin(config['collage_output_path'],
                                   f"{config['run_tag']}-0-{field}.jpg"))

    if config['collage_output_path'] is not None:
        save_collage('blue_channel')
        save_collage('thresholded_img')
        save_collage('labeled_img')
        save_collage('minimap')