예제 #1
0
파일: train.py 프로젝트: hci-unihd/batchlib
def load_features_and_labels(path,
                             feature_table_name='',
                             label_table_name='',
                             outlier_table_name=None):
    with open_file(path, 'r') as f:
        cols, table = read_table(f, feature_table_name)

    label_ids = table[:, cols.index('label_ids')]

    marker_pattern = 'marker'
    feat_names = [name for name in cols if marker_pattern in name]
    feat_ids = [ii for ii, name in enumerate(cols) if marker_pattern in name]
    feats = table[:, feat_ids]

    with open_file(path, 'r') as f:
        cols, table = read_table(f, label_table_name)

    this_label_ids = table[:, cols.index('label_ids')]
    assert np.array_equal(label_ids, this_label_ids)
    labels = table[:, cols.index('infected_labels')]

    # filter out outliers from the training data if we have an outlier table
    if outlier_table_name is not None:
        with open_file(path, 'r') as f:
            cols, table = read_table(f, outlier_table_name)
        outlier_col_name = 'is_outlier'
        outlier = table[:, cols.index(outlier_col_name)]
        mask = outlier != 1
    else:
        mask = None

    if mask is not None:
        feats, labels = feats[mask], labels[mask]

    return feats, labels, feat_names
예제 #2
0
def export_default_table(table_file, table_name, output_path, output_format=None, skip_existing=True):
    if os.path.exists(output_path) and skip_existing:
        return

    # table file can be a file path or an opened file object
    if isinstance(table_file, str):
        with open_file(table_file, 'r') as f:
            columns, table = read_table(f, table_name)
    else:
        columns, table = read_table(table_file, table_name)
    export_table(columns, table, output_path, output_format)
예제 #3
0
파일: train.py 프로젝트: hci-unihd/batchlib
def copy_labels(in_path, out_path):
    in_table_key = ''
    with open_file(in_path, 'r') as f:
        col_names, table = read_table(f, in_table_key)

    with open_file(out_path, 'a') as f:
        write_table(f)
예제 #4
0
    def _load_table(path, columns=None):

        return_cols = columns is None

        with open_file(path, 'r') as f:
            this_columns, this_table = read_table(f, table_name)

            has_props_table = has_table(f, props_table_name)
            if has_props_table:
                prop_cols, prop_table = read_table(f, props_table_name)
                anchors = np.concatenate([prop_table[:, prop_cols.index('anchor_x')][:, None],
                                          prop_table[:, prop_cols.index('anchor_y')][:, None]], axis=1)
            else:
                n_cells = len(this_table)
                anchors = np.array(2 * [n_cells * [None]]).T

        if columns is None:
            columns = initial_columns + this_columns

        if len(anchors) != len(this_table):
            assert has_props_table
            label_ids1 = this_table[:, this_columns.index('label_id')]
            label_ids2 = prop_table[:, prop_cols.index('label_id')]
            assert len(label_ids2) > len(label_ids1)
            # we assume it's sorted by label ids !
            keep_anchors = np.isin(label_ids2, label_ids1)
            anchors = anchors[keep_anchors]

        image_name = os.path.splitext(os.path.split(path)[1])[0]
        well_name = image_name_to_well_name(image_name)
        site_name = image_name_to_site_name(image_name)

        plate_col = np.array([plate_name] * len(this_table))
        well_col = np.array([well_name] * len(this_table))
        site_col = np.array([site_name] * len(this_table))
        res_table = np.concatenate([plate_col[:, None],
                                    well_col[:, None],
                                    site_col[:, None],
                                    anchors,
                                    this_table], axis=1)
        if return_cols:
            return res_table, columns
        else:
            return res_table
예제 #5
0
def load_labels(path):
    key = 'proofread_infected_labels'
    with h5py.File(path, 'r') as f:
        if has_table(f, key):
            print("Loading proofread labels for", path)
            _, infected_labels = read_table(f, key)
            infected_labels = infected_labels[:, 1]
        else:
            print("Did not find proofread labels for", path)
            infected_labels = None
    return infected_labels
예제 #6
0
def cell_outliers_per_plate(plate):
    image_files = glob(os.path.join(plate, '*.h5'))
    outlier_tab_name = 'cell_segmentation_serum_IgG/serum_IgG_outliers'

    n, n_outliers = 0, 0
    for imf in image_files:
        with h5py.File(imf, 'r') as f:
            cols, tab = read_table(f, outlier_tab_name)
        n += len(tab)
        outliers = tab[:, cols.index('is_outlier')] == 1
        n_outliers += outliers.sum()
    return n, n_outliers
예제 #7
0
def get_well_bg_fractions(name, root):
    folder = os.path.join(root, name)
    table_path = os.path.join(folder, f'{name}_table.hdf5')
    bg_table_key = 'wells/backgrounds'
    with h5py.File(table_path, 'r') as f:
        col_names, table = read_table(f, bg_table_key)

    wells = table[:, col_names.index('well_name')]
    fractions = table[:, col_names.index('background_fraction')]
    values = table[:, col_names.index('serum_IgG_median')]

    return dict(zip(wells, values)), dict(zip(wells, fractions))
예제 #8
0
def get_actual_bg_well(name,
                       root,
                       channel_dict,
                       bg_table_key='plate/backgrounds_from_min_well'):
    folder = os.path.join(root, name)
    table_path = os.path.join(folder, f'{name}_table.hdf5')
    with h5py.File(table_path, 'r') as f:
        col_names, table = read_table(f, bg_table_key)
    return {
        chan_name: table[0, col_names.index(well_key)]
        for chan_name, well_key in channel_dict.items()
    }
예제 #9
0
def quantify_well_or_image(folder, name):
    plate_name = os.path.split(folder)[1]
    table_path = os.path.join(folder, f'{plate_name}_table.hdf5')
    with h5py.File(table_path, 'r') as f:
        cols, tab = read_table(f, f'{name}/default')
    n = len(tab)
    outliers = tab[:, cols.index('IgG_is_outlier')] == 1
    n_outliers = outliers.sum()

    outlier_type = tab[:, cols.index('IgG_outlier_type')]
    n_manual = len([otype for otype in outlier_type if 'manual: 1' in otype])

    return n, n_outliers, n_manual
예제 #10
0
def determine_igm_mad(root):
    plates = glob(os.path.join(root, '*IgM*'))
    print(plates)
    mads = []
    mad_key = 'serum_IgM_mad'
    for plate in plates:
        plate_name = os.path.split(plate)[1]
        table_path = os.path.join(plate, f'{plate_name}_table.hdf5')
        with open_file(table_path, 'r') as f:
            cols, table = read_table(f, 'plate/backgrounds_min_well')
        mad = table[:, cols.index(mad_key)]
        print(plate_name, ":", mad)
        if 'plate2' in plate_name:
            continue
        mads.append(mad)

    print()
    print("Mean IgM MAD:")
    print(np.mean(mads))
예제 #11
0
def get_image_edges_and_labels(path,
                               saturation_factor=1.5,
                               edge_width=2,
                               return_seg=False):
    with h5py.File(path, 'r') as f:
        serum = normalize(read_image(f, 'serum_IgG'))
        marker = quantile_normalize(read_image(f, 'marker'))
        nuclei = normalize(read_image(f, 'nuclei'))

        seg = read_image(f, 'cell_segmentation')
        _, infected_labels = read_table(f, 'infected_cell_labels')
        assert len(infected_labels) == seg.max() + 1
        assert infected_labels.shape[1] == 2

    bg_mask = seg == 0

    def subtract_bg(raw):
        bg = np.median(raw[bg_mask])
        raw -= bg
        return raw

    serum = subtract_bg(serum)
    marker = subtract_bg(marker)
    nuclei = subtract_bg(nuclei)

    infected_labels = infected_labels[:, 1]
    edges = get_edge_segmentation(seg, edge_width)

    raw = np.concatenate(
        [marker[..., None], serum[..., None], nuclei[..., None]], axis=-1)
    if saturation_factor > 1:
        raw = skc.rgb2hsv(raw)
        raw[..., 1] *= saturation_factor
        raw = skc.hsv2rgb(raw).clip(0, 1)

    if return_seg:
        return raw, edges, infected_labels, seg
    else:
        return raw, edges, infected_labels
예제 #12
0
def bg_dict_for_plots(bg_params, table_path):
    bg_dict = {}
    with open_file(table_path, 'r') as f:
        for channel_name, bg_param in bg_params.items():
            if isinstance(bg_param, str):
                assert has_table(f, bg_param)
                cols, table = read_table(f, bg_param)
                bg_src = bg_param.split('/')[-1]
                if bg_param == 'plate/backgrounds':
                    bg_val = table[0, cols.index(f'{channel_name}_median')]
                    bg_msg = f' as {bg_val}'
                elif bg_param == 'plate/backgrounds_min_well':
                    bg_val = table[0, cols.index(f'{channel_name}_median')]
                    bg_wells = table[0, cols.index(f'{channel_name}_min_well')]
                    bg_src = f' the wells {bg_wells}'
                    bg_msg = f' as {bg_val}'
                else:
                    bg_msg = ''
                bg_info = f'background computed from {bg_src}{bg_msg}'
            else:
                bg_info = f'background fixed to {bg_param}'
            bg_dict[channel_name] = bg_info
    return bg_dict
예제 #13
0
def export_scores(folder_list, output_path,
                  score_patterns=DEFAULT_SCORE_PATTERNS,
                  table_name='wells/default', metadata_repository=None,
                  filter_outliers=True):
    """
    """

    def name_matches_score(name):
        return any(pattern in name for pattern in score_patterns)

    # first pass: find all column names that match the pattern
    result_columns = []
    for folder in folder_list:
        plate_name = os.path.split(folder)[1]
        table_path = os.path.join(folder, plate_name + '_table.hdf5')
        if not os.path.exists(table_path):
            raise RuntimeError(f"Did not find a result table @ {table_path}")

        with open_file(table_path, 'r') as f:
            col_names, _ = read_table(f, table_name)
        assert 'well_name' in col_names
        col_names = [col_name for col_name in col_names if name_matches_score(col_name)]
        result_columns.extend(col_names)

    result_columns = ['well_name'] + list(set(result_columns))
    columns = ['plate_name'] + result_columns

    # append cohort_id, elisa results and cohort_type (positive/control/unknow) if we have db
    if metadata_repository is not None:
        db_metadata = ['cohort_id', 'cohort', 'cohort_type'] + TEST_NAMES
        columns += db_metadata

    # second pass: load the tables
    table = []
    for folder in folder_list:
        plate_name = os.path.split(folder)[1]
        table_path = os.path.join(folder, plate_name + '_table.hdf5')
        with open_file(table_path, 'r') as f:
            this_result_columns, this_result_table = read_table(f, table_name)

        if filter_outliers:
            this_result_table = outliers_to_nan(this_result_table,
                                                this_result_columns,
                                                score_patterns)

        this_len = len(this_result_table)
        plate_col = np.array([plate_name] * this_len)

        col_ids = [this_result_columns.index(name) if name in this_result_columns else -1
                   for name in result_columns]
        this_table = [np.array([None] * this_len)[:, None] if col_id == -1 else
                      this_result_table[:, col_id:col_id + 1] for col_id in col_ids]

        this_table = np.concatenate([plate_col[:, None]] + this_table, axis=1)

        # extend table with the values from DB
        if metadata_repository is not None:
            metadata = _get_db_metadata(this_table[:, 1], metadata_repository, plate_name)
            assert len(metadata) == len(this_table)
            assert metadata.shape[1] == len(db_metadata), f"{metadata.shape[1], len(db_metadata)}"
            this_table = np.concatenate([this_table, metadata], axis=1)

        table.append(this_table)

    logger.info(f'Columns: {columns}')
    table = np.concatenate(table, axis=0)
    export_table(columns, table, output_path)