def save(self, tile_indices, output_dir, data, compress=6): region_index, tile_index, tx, ty = tile_indices img_label, stats = data # Save label volumes if present (use compression as these are often highly redundant) label_tile_path = None if img_label is not None: label_tile_path = cytokit_io.get_cytometry_image_path( region_index, tx, ty) cytokit_io.save_tile(osp.join(output_dir, label_tile_path), img_label, config=self.config, compress=compress) # Save statistics if present stats_path = None if stats is not None: # Append useful metadata to cytometry stats (align these names to those used in config.TileDims) # and export as csv stats.insert(0, 'tile_y', ty) stats.insert(0, 'tile_x', tx) stats.insert(0, 'tile_index', tile_index) stats.insert(0, 'region_index', region_index) stats_path = cytokit_io.get_cytometry_stats_path( region_index, tx, ty) cytokit_io.save_csv(osp.join(output_dir, stats_path), stats, index=False) return label_tile_path, stats_path
def aggregate(config, output_dir): """Aggregate cytometry data associated with an experiment into a single dataframe Args: config: Experiment configuration output_dir: Output directory for experiment Returns: DataFrame containing concatenation of all tile-based cytometry datasets with a global cell id as well as global x/y coordinates (where "global" means across region) """ # Load per-tile csv exports df = [] for idx in config.get_tile_indices(): path = cytokit_io.get_cytometry_stats_path(idx.region_index, idx.tile_x, idx.tile_y) path = osp.join(output_dir, path) if not osp.exists(path): logger.warning( 'Expected cytometry data file at "%s" does not exist. ' 'It will be ignored but this may be worth investigating', path) continue df.append(pd.read_csv(path)) df = pd.concat(df) # Start inserting before 'id' to get order rid, rx, ry (so they have to be inserted in reverse order) id_idx = df.columns.tolist().index('id') # Determine region coords for tile coordinate / point coordinate pairs def get_region_point_coords(r): tile_coord = r['tile_x'], r['tile_y'] tile_point = r['x'], r['y'] return config.get_region_point_coordinates(tile_coord, tile_point) reg_coords = [] if len(df) > 0: reg_coords = df[['tile_x', 'tile_y', 'x', 'y']].apply(get_region_point_coords, axis=1) # Add region / global coordinates as separate fields df.insert(id_idx, 'ry', [c[1] for c in reg_coords]) df.insert(id_idx, 'rx', [c[0] for c in reg_coords]) # Insert global id for cells (i.e. across region) df.insert(id_idx, 'rid', np.arange(len(df))) return df