Ejemplo n.º 1
0
    def save(self, tile_indices, output_dir, data, compress=6):
        region_index, tile_index, tx, ty = tile_indices
        img_label, stats = data

        # Save label volumes if present (use compression as these are often highly redundant)
        label_tile_path = None
        if img_label is not None:
            label_tile_path = cytokit_io.get_cytometry_image_path(
                region_index, tx, ty)
            cytokit_io.save_tile(osp.join(output_dir, label_tile_path),
                                 img_label,
                                 config=self.config,
                                 compress=compress)

        # Save statistics if present
        stats_path = None
        if stats is not None:
            # Append useful metadata to cytometry stats (align these names to those used in config.TileDims)
            # and export as csv
            stats.insert(0, 'tile_y', ty)
            stats.insert(0, 'tile_x', tx)
            stats.insert(0, 'tile_index', tile_index)
            stats.insert(0, 'region_index', region_index)
            stats_path = cytokit_io.get_cytometry_stats_path(
                region_index, tx, ty)
            cytokit_io.save_csv(osp.join(output_dir, stats_path),
                                stats,
                                index=False)

        return label_tile_path, stats_path
Ejemplo n.º 2
0
def aggregate(config, output_dir):
    """Aggregate cytometry data associated with an experiment into a single dataframe

    Args:
        config: Experiment configuration
        output_dir: Output directory for experiment
    Returns:
        DataFrame containing concatenation of all tile-based cytometry datasets with a global
            cell id as well as global x/y coordinates (where "global" means across region)
    """

    # Load per-tile csv exports
    df = []
    for idx in config.get_tile_indices():
        path = cytokit_io.get_cytometry_stats_path(idx.region_index,
                                                   idx.tile_x, idx.tile_y)
        path = osp.join(output_dir, path)
        if not osp.exists(path):
            logger.warning(
                'Expected cytometry data file at "%s" does not exist.  '
                'It will be ignored but this may be worth investigating', path)
            continue
        df.append(pd.read_csv(path))
    df = pd.concat(df)

    # Start inserting before 'id' to get order rid, rx, ry (so they have to be inserted in reverse order)
    id_idx = df.columns.tolist().index('id')

    # Determine region coords for tile coordinate / point coordinate pairs
    def get_region_point_coords(r):
        tile_coord = r['tile_x'], r['tile_y']
        tile_point = r['x'], r['y']
        return config.get_region_point_coordinates(tile_coord, tile_point)

    reg_coords = []
    if len(df) > 0:
        reg_coords = df[['tile_x', 'tile_y', 'x',
                         'y']].apply(get_region_point_coords, axis=1)

    # Add region / global coordinates as separate fields
    df.insert(id_idx, 'ry', [c[1] for c in reg_coords])
    df.insert(id_idx, 'rx', [c[0] for c in reg_coords])

    # Insert global id for cells (i.e. across region)
    df.insert(id_idx, 'rid', np.arange(len(df)))

    return df