def aggregate_scores(out_dir): """Once all images have been scored, aggregate the per-image-set (i.e. per-day) score data to a single file for the whole experiment. """ out_dir = pathlib.Path(out_dir) well_names = None all_scores = {} for scorefile in out_dir.glob('*/scores.pickle'): # find all such files below outdir scores = util.load(scorefile) data = util.load(scorefile.parent / 'metadata.pickle') assert data.well_names == scores.well_names if well_names is None: well_names = data.well_names else: assert well_names == data.well_names all_scores[data.date] = data.age, scores.well_scores assert len(all_scores) > 0 # makes sure there are files to score! dates, ages_and_scores = zip(*sorted(all_scores.items())) ages, scores = zip(*ages_and_scores) ages = numpy.array(ages) scores = numpy.array(scores).T data_out = [[''] + [d.isoformat() for d in dates]] data_out += [[''] + [str(a) for a in ages]] for well_name, score in zip(well_names, scores): data_out += [[well_name] + [str(s) for s in score]] util.dump_csv(data_out, out_dir/'scores.csv') util.dump(out_dir / 'scores.pickle', dates=dates, ages=ages, well_names=well_names, scores=scores)
def score_image_set(out_dir, score_params, ignore_previous=False): """Score wells for a single day's scanned images. Parameters: out_dir: directory in which well_images directory is found, and to which score data will be written. score_params: configuration information for scoring wells for movement. This must be a parameter dictionary suitable to pass to score_wells.score_wells() ignore_previous: if False, and stored results already exist, skip processing """ out_dir = pathlib.Path(out_dir) score_file = out_dir / 'scores.pickle' if score_file.exists() and not ignore_previous: return print('scoring images for {}'.format(out_dir)) well_names = util.load(out_dir / 'metadata.pickle').well_names well_mask = freeimage.read(str(out_dir.parent / 'well_mask.png')) > 0 well_dir = out_dir / 'well_images' well_images = [] for well_name in well_names: images = [freeimage.read(str(image)) for image in sorted(well_dir.glob(well_name+'-*.png'))] well_images.append(images) well_scores = score_wells.score_wells(well_images, well_mask, **score_params) util.dump(score_file, well_names=well_names, well_scores=well_scores) scores_out = [[name, str(score)] for name, score in zip(well_names, well_scores)] util.dump_csv(scores_out, out_dir / 'scores.csv')
def load(self): data = util.load(self.out_dir / 'statuses.pickle') assert self.status_codes == data.status_codes assert self.well_names == data.well_names self.statuses = data.statuses self.well_index = data.well_index self.set_well(self.well_index)
def load_data(scored_dir): """Load score data, and if available, lifespan data, from a processed directory. Returns a Data object with attributes: dates ages well_names scores and if lifespan data are found, also: states lifespans last_alive_indices If manually-annotated well-statuses from 'statuses.pickle' are found, also: eval_well_statuses If manually-annotated lifespans from 'evaluations.pickle' are found, also: eval_last_alive_indices """ scored_dir = pathlib.Path(scored_dir) data = util.Data() scores = util.load(scored_dir / 'scores.pickle') for name in ('dates', 'ages', 'well_names', 'scores'): setattr(data, name, getattr(scores, name)) lifespan_data = scored_dir / 'lifespans.pickle' if lifespan_data.exists(): lifespans = util.load(lifespan_data) assert numpy.all(scores.ages == lifespans.ages) and scores.well_names == lifespans.well_names for name in ('states', 'lifespans', 'last_alive_indices'): setattr(data, name, getattr(lifespans, name)) eval_data = scored_dir / 'evaluations.pickle' if eval_data.exists(): evaluated = util.load(eval_data) data.eval_last_alive_indices = evaluated.last_alive_indices data.eval_lifespans = estimate_lifespans.last_alive_indices_to_lifespans(evaluated.last_alive_indices, data.ages) status_data = scored_dir / 'statuses.pickle' if status_data.exists(): statuses = util.load(status_data) status_codes = [statuses.status_codes[i] for i in statuses.statuses] status_dict = dict(zip(statuses.well_names, status_codes)) all_statuses = [] for well_name in data.well_names: if well_name in status_dict: all_statuses.append(status_dict[well_name]) else: all_statuses.append(statuses.status_codes[0]) data.eval_well_statuses = all_statuses return data
def load_training_data(training_data_files): """Load data from one or more training data files. If multiple files are provided, merge their data.""" states, scores, ages, lifespans = [], [], [], [] if isinstance(training_data_files, (str, pathlib.Path)): training_data_files = [training_data_files] for training_data in training_data_files: training = util.load(training_data) # training.states, training.scores are shape-(n_worms, n_timepoints) arrays # training.ages is n_timepoints long. We want to flatten both into lists states.extend(training.states.flat) scores.extend(training.scores.flat) ages.extend(list(training.ages) * training.states.shape[0]) training_lifespans = estimate_lifespans.states_to_lifespans(training.states, training.ages) lifespans.extend(estimate_lifespans.cleanup_lifespans(training_lifespans, training.ages)) training_out = util.Data(states=numpy.array(states), scores=numpy.array(scores), ages=numpy.array(ages), lifespans=numpy.array(lifespans)) return training_out
def load(self): data = util.load(self.out_dir / 'evaluations.pickle') self.last_alive_indices = data.last_alive_indices self.well_index = data.well_index self.set_well(self.well_index)