Ejemplo n.º 1
0
def score_image_set(out_dir, score_params, ignore_previous=False):
    """Score wells for a single day's scanned images.

    Parameters:
    out_dir: directory in which well_images directory is found, and to which score
        data will be written.
    score_params: configuration information for scoring wells for movement.
        This must be a parameter dictionary suitable to pass to score_wells.score_wells()
    ignore_previous: if False, and stored results already exist, skip processing
    """
    out_dir = pathlib.Path(out_dir)
    score_file = out_dir / 'scores.pickle'
    if score_file.exists() and not ignore_previous:
        return
    print('scoring images for {}'.format(out_dir))
    well_names = util.load(out_dir / 'metadata.pickle').well_names
    well_mask = freeimage.read(str(out_dir.parent / 'well_mask.png')) > 0
    well_dir = out_dir / 'well_images'
    well_images = []
    for well_name in well_names:
        images = [freeimage.read(str(image)) for image in sorted(well_dir.glob(well_name+'-*.png'))]
        well_images.append(images)
    well_scores = score_wells.score_wells(well_images, well_mask, **score_params)
    util.dump(score_file, well_names=well_names, well_scores=well_scores)
    scores_out = [[name, str(score)] for name, score in zip(well_names, well_scores)]
    util.dump_csv(scores_out, out_dir / 'scores.csv')
Ejemplo n.º 2
0
def aggregate_scores(out_dir):
    """Once all images have been scored, aggregate the per-image-set (i.e. per-day)
    score data to a single file for the whole experiment.
    """
    out_dir = pathlib.Path(out_dir)
    well_names = None
    all_scores = {}
    for scorefile in out_dir.glob('*/scores.pickle'): # find all such files below outdir
        scores = util.load(scorefile)
        data = util.load(scorefile.parent / 'metadata.pickle')
        assert data.well_names == scores.well_names
        if well_names is None:
            well_names = data.well_names
        else:
            assert well_names == data.well_names
        all_scores[data.date] = data.age, scores.well_scores
    assert len(all_scores) > 0 # makes sure there are files to score!
    dates, ages_and_scores = zip(*sorted(all_scores.items()))
    ages, scores = zip(*ages_and_scores)
    ages = numpy.array(ages)
    scores = numpy.array(scores).T
    data_out = [[''] + [d.isoformat() for d in dates]]
    data_out += [[''] + [str(a) for a in ages]]
    for well_name, score in zip(well_names, scores):
        data_out += [[well_name] + [str(s) for s in score]]
    util.dump_csv(data_out, out_dir/'scores.csv')
    util.dump(out_dir / 'scores.pickle', dates=dates, ages=ages, well_names=well_names, scores=scores)
Ejemplo n.º 3
0
def extract_image_set(image_files, out_dir, date, age, plate_params, ignore_previous=False):
    """Find wells in a set of scanner images and extract each well into a separate image
    for further processing.

    Parameters:
    image_files: list of paths to a set of images.
    out_dir: path to write out the extracted images and metadata.
    date: date object referring to image scan date
    age: age in days of the worms in these images
    plate_params: configuration information for extracting wells from the plates.
        This must be a parameter dictionary suitable to pass to extract_wells.extract_wells()
    ignore_previous: if False, and stored results already exist, skip processing
    """
    out_dir = pathlib.Path(out_dir)
    metadata = out_dir / 'metadata.pickle'
    if metadata.exists() and not ignore_previous:
        return
    images = []
    print('extracting images for {}'.format(out_dir))
    well_mask = freeimage.read(str(out_dir.parent / 'well_mask.png')) > 0
    for image_file in image_files:
        image = freeimage.read(image_file)
        if image.dtype == numpy.uint16:
            image = (image >> 8).astype(numpy.uint8)
        images.append(image)
    well_names, well_images, well_centroids = extract_wells.extract_wells(images, well_mask, **plate_params)
    well_dir = util.get_dir(out_dir / 'well_images')
    for well_name, well_image_set in zip(well_names, well_images):
        for i, image in enumerate(well_image_set):
            freeimage.write(image, str(well_dir/well_name)+'-{}.png'.format(i))
    util.dump(metadata, date=date, age=age, well_names=well_names, well_centroids=well_centroids)
 def save_lifespans(self):
     util.dump(self.out_dir / 'evaluations.pickle',
         last_alive_indices=self.last_alive_indices,
         well_index=self.well_index)
     lifespans = estimate_lifespans.last_alive_indices_to_lifespans(self.last_alive_indices, self.ages)
     lifespans_out = [('well name', 'lifespan')] + [(wn, str(ls)) for wn, ls in zip(self.well_names, lifespans)]
     util.dump_csv(lifespans_out, self.out_dir/'evaluated_lifespans.csv')
 def save_status(self):
     util.dump(self.out_dir / 'statuses.pickle',
         statuses=self.statuses,
         well_names=self.well_names,
         status_codes=self.status_codes,
         well_index=self.well_index)
     status_out = [('well name', 'status')] + [(wn, self.status_codes[i]) for wn, i in zip(self.well_names, self.statuses)]
     util.dump_csv(status_out, self.out_dir/'evaluated_statuses.csv')
Ejemplo n.º 6
0
def make_training_data(scored_dir, training_data, annotation_file=None):
    """Given a scored directory and a pickle file of manual annotation data,
    create a file of training data for the Hidden Markov Model for lifespan
    estimation.

    If manually provided well-status information is present, this function will
    skip empty wells and those with multiple worms. If no status information is
    available, this function will skip all wells which were ignored in the
    manual annotation data (empty / DOA / multiple worms).

    Parameters:
    scored_dir: corresponds to out_dir parameter to process_image_dir() --
        the parent directory of all of the extracted and scored images.
    training_data: path to write training data file with calibration information.
    annotation_file: if None (default), use 'evaluated_lifespans.csv' in the
        scored_dir. Otherwise load a custom file path.
    """
    data = load_data(scored_dir)
    if annotation_file is None:
        annotation_file = pathlib.Path(scored_dir) / 'evaluated_lifespans.csv'
    csv_well_names, csv_lifespans = read_lifespan_annotation_csv(annotation_file)
    good_well_names, good_lifespans = [], []
    if hasattr(data, 'eval_well_statuses'):
        statuses = dict(zip(data.well_names, data.eval_well_statuses))
    else:
        statuses = {}
    for name, lifespan in zip(csv_well_names, csv_lifespans):
        # If statuses are present, include DOA worms and living worms
        # in the training data. Exclude empty and  multi-worm wells.
        # If statuses are not present, exclude all wells with '-1' lifespans, which
        # refers to any of empty/DOA/multi-worm states.
        if (name in statuses and statuses[name] == 'DOA') or lifespan != -1 :
            good_well_names.append(name)
            good_lifespans.append(lifespan)
    states = estimate_lifespans.lifespans_to_states(good_lifespans, data.ages)

    # it could be that the wells in the CSV are only a subset of the wells in the data,
    # so find the indices in the data for just these wells.
    indices_of_data_wells = {well:i for i, well in enumerate(data.well_names)}
    good_well_indices = [indices_of_data_wells[well] for well in good_well_names]
    scores = data.scores[good_well_indices]
    util.dump(training_data, states=states, ages=data.ages, scores=scores)
Ejemplo n.º 7
0
def calculate_lifespans(scored_dir, training_data):
    """Once well images have been scored, estimate worm lifespans.

    Parameters:
    scored_dir: corresponds to out_dir parameter to process_image_dir() --
        the parent directory of all of the extracted and scored images.
    training_data: paths to one or more training data files with calibration information.
    """
    scored_dir = pathlib.Path(scored_dir)
    data = load_data(scored_dir)
    training = load_training_data(training_data)
    states = estimate_lifespans.simple_hmm(data.scores, data.ages,
        training.lifespans, training.ages, training.scores, training.states,
        lifespan_sigma=6)[0]
    # states = estimate_lifespans.estimate_lifespans(data.scores, data.ages, training.states, training.scores, training.ages)
    lifespans = estimate_lifespans.states_to_lifespans(states, data.ages)
    last_alive_indices = estimate_lifespans.states_to_last_alive_indices(states)
    lifespans_out = [('well name', 'lifespan')]+[(well_name, str(lifespan)) for well_name, lifespan in zip(data.well_names, lifespans)]
    util.dump_csv(lifespans_out, scored_dir/'lifespans.csv')
    util.dump(scored_dir/'lifespans.pickle', well_names=data.well_names, ages=data.ages, states=states,
        lifespans=lifespans, last_alive_indices=last_alive_indices)