def test_report_init(): antigen_array = np.empty(shape=(2, 3), dtype='U100') antigen_array[0, 0] = 'antigen_0_0' antigen_array[1, 2] = 'antigen_1_2' antigen_array[0, 1] = 'suuuuuuuuper_loooooooong_antigen_name' constants.ANTIGEN_ARRAY = antigen_array constants.RUN_PATH = 'test_run_dir' # Create instance reporter = report.ReportWriter() # Check paths assert reporter.od_path == 'test_run_dir/median_ODs.xlsx' assert reporter.int_path == 'test_run_dir/median_intensities.xlsx' assert reporter.bg_path == 'test_run_dir/median_backgrounds.xlsx' # Check that antigens have correct names and indices antigen_df = reporter.antigen_df assert antigen_df.shape == (3, 3) antigen = antigen_df.loc[(antigen_df['grid_row'] == 0) & (antigen_df['grid_col'] == 0), 'antigen'].values[0] assert antigen == '0_0_antigen_0_0' antigen = antigen_df.loc[(antigen_df['grid_row'] == 1) & (antigen_df['grid_col'] == 2), 'antigen'].values[0] assert antigen == '1_2_antigen_1_2' antigen = antigen_df.loc[(antigen_df['grid_row'] == 0) & (antigen_df['grid_col'] == 1), 'antigen'].values[0] assert antigen == '0_1_suuuuuuuuper_loooooooong_an'
def test_load_existing_reports(report_test): # Write od, intensity and background reports antigen_names = ['0_0_antigen_0_0', '1_2_antigen_1_2'] xlsx_path = os.path.join(constants.RUN_PATH, 'median_intensities.xlsx') with pd.ExcelWriter(xlsx_path) as writer: for antigen_name in antigen_names: sheet_df = report_test[antigen_name] sheet_df.to_excel(writer, sheet_name=antigen_name) xlsx_path = os.path.join(constants.RUN_PATH, 'median_backgrounds.xlsx') with pd.ExcelWriter(xlsx_path) as writer: for antigen_name in antigen_names: sheet_df = report_test[antigen_name] sheet_df.to_excel(writer, sheet_name=antigen_name) xlsx_path = os.path.join(constants.RUN_PATH, 'median_ODs.xlsx') with pd.ExcelWriter(xlsx_path) as writer: for antigen_name in antigen_names: sheet_df = report_test[antigen_name] sheet_df.to_excel(writer, sheet_name=antigen_name) # Load existing reports and make sure they're the same reporter = report.ReportWriter() reporter.load_existing_reports() for antigen_name in antigen_names: int_df = reporter.report_int[antigen_name] int_df.equals(report_test[antigen_name]) bg_df = reporter.report_bg[antigen_name] bg_df.equals(report_test[antigen_name]) od_df = reporter.report_od[antigen_name] od_df.equals(report_test[antigen_name])
def test_load_missing_int_reports(report_test): xlsx_path = os.path.join(constants.RUN_PATH, 'median_ODs.xlsx') with pd.ExcelWriter(xlsx_path) as writer: for antigen_name in ['0_0_antigen_0_0', '1_2_antigen_1_2']: sheet_df = report_test[antigen_name] sheet_df.to_excel(writer, sheet_name=antigen_name) # Make sure we get an assertion error reporter = report.ReportWriter() with pytest.raises(AssertionError): reporter.load_existing_reports()
def test_antigen_df(): antigen_array = np.empty(shape=(2, 3), dtype='U100') antigen_array[0, 0] = 'antigen_0_0' antigen_array[1, 2] = 'antigen_1_2' constants.ANTIGEN_ARRAY = antigen_array # Create instance reporter = report.ReportWriter() antigen_df = reporter.get_antigen_df() assert antigen_df.shape == (2, 3) assert list(antigen_df) == ['antigen', 'grid_row', 'grid_col'] assert list(antigen_df['antigen'].values) == \ ['0_0_antigen_0_0', '1_2_antigen_1_2']
def test_assign_well_to_plate(): well_name = 'D11' # Create some spots df_cols = [ 'grid_row', 'grid_col', 'intensity_median', 'bg_median', 'od_norm' ] spots_df = pd.DataFrame(columns=df_cols) df_row = { 'grid_row': 0, 'grid_col': 0, 'intensity_median': 1., 'bg_median': .5, 'od_norm': .75 } spots_df = spots_df.append(df_row, ignore_index=True) df_row = { 'grid_row': 0, 'grid_col': 1, 'intensity_median': 0., 'bg_median': .5, 'od_norm': .2 } spots_df = spots_df.append(df_row, ignore_index=True) df_row = { 'grid_row': 1, 'grid_col': 2, 'intensity_median': .1, 'bg_median': .2, 'od_norm': .3 } spots_df = spots_df.append(df_row, ignore_index=True) # Create antigens with matching indices antigen_array = np.empty(shape=(2, 3), dtype='U100') antigen_array[0, 0] = 'antigen_0_0' antigen_array[1, 2] = 'antigen_1_2' constants.ANTIGEN_ARRAY = antigen_array # Assign wells to plate and check values reporter = report.ReportWriter() reporter.create_new_reports() reporter.assign_well_to_plate(well_name=well_name, spots_df=spots_df) assert list(reporter.report_int['0_0_antigen_0_0']) ==\ ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'] assert list(reporter.report_int['0_0_antigen_0_0'].index) ==\ ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'] assert reporter.report_int['0_0_antigen_0_0'].at['D', '11'] == 1. assert reporter.report_bg['0_0_antigen_0_0'].at['D', '11'] == .5 assert reporter.report_od['0_0_antigen_0_0'].at['D', '11'] == .75 assert reporter.report_int['1_2_antigen_1_2'].at['D', '11'] == .1 assert reporter.report_bg['1_2_antigen_1_2'].at['D', '11'] == .2 assert reporter.report_od['1_2_antigen_1_2'].at['D', '11'] == .3
def test_create_new_reports(report_test): # Check the dicts that will be reports reporter = report.ReportWriter() reporter.create_new_reports() assert list(reporter.report_int) == [ '0_0_antigen_0_0', '1_2_antigen_1_2', ] # Check first dataframe plate_df = reporter.report_int['0_0_antigen_0_0'] assert plate_df.shape == (8, 12) assert list(plate_df) ==\ ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'] assert list(plate_df.index) == ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
def test_write_reports(tmpdir_factory): output_dir = tmpdir_factory.mktemp("output_dir") constants.RUN_PATH = output_dir antigen_array = np.empty(shape=(2, 3), dtype='U100') antigen_array[0, 0] = 'antigen_0_0' antigen_array[1, 2] = 'antigen_1_2' constants.ANTIGEN_ARRAY = antigen_array # Create fake spots dataframe df_cols = [ 'grid_row', 'grid_col', 'intensity_median', 'bg_median', 'od_norm' ] spots_df = pd.DataFrame(columns=df_cols) df_row = { 'grid_row': 0, 'grid_col': 0, 'intensity_median': 1., 'bg_median': .5, 'od_norm': .75 } spots_df = spots_df.append(df_row, ignore_index=True) df_row = { 'grid_row': 1, 'grid_col': 2, 'intensity_median': .1, 'bg_median': .2, 'od_norm': .3 } spots_df = spots_df.append(df_row, ignore_index=True) # Create report instance and write a few wells reporter = report.ReportWriter() reporter.create_new_reports() reporter.assign_well_to_plate('C11', spots_df) reporter.assign_well_to_plate('D4', spots_df) spots_df.loc[1, 'od_norm'] = 10 reporter.assign_well_to_plate('A7', spots_df) reporter.write_reports() # Load reports and check values od_path = os.path.join(output_dir, 'median_ODs.xlsx') report_od = pd.read_excel(od_path, sheet_name=None, index_col=0) assert report_od['0_0_antigen_0_0'].at['C', '11'] == .75 assert report_od['1_2_antigen_1_2'].at['C', '11'] == .3 assert report_od['0_0_antigen_0_0'].at['D', '4'] == .75 assert report_od['1_2_antigen_1_2'].at['D', '4'] == .3 assert report_od['0_0_antigen_0_0'].at['A', '7'] == .75 assert report_od['1_2_antigen_1_2'].at['A', '7'] == 10.
def test_load_existing_reports_wrong_antigens_bg(report_test): # Write od, intensity and background reports antigen_names = ['0_0_antigen_0_0', '1_2_antigen_1_2'] xlsx_path = os.path.join(constants.RUN_PATH, 'median_intensities.xlsx') with pd.ExcelWriter(xlsx_path) as writer: for antigen_name in antigen_names: sheet_df = report_test[antigen_name] sheet_df.to_excel(writer, sheet_name=antigen_name + 'wrong') xlsx_path = os.path.join(constants.RUN_PATH, 'median_backgrounds.xlsx') with pd.ExcelWriter(xlsx_path) as writer: for antigen_name in antigen_names: sheet_df = report_test[antigen_name] sheet_df.to_excel(writer, sheet_name=antigen_name) xlsx_path = os.path.join(constants.RUN_PATH, 'median_ODs.xlsx') with pd.ExcelWriter(xlsx_path) as writer: for antigen_name in antigen_names: sheet_df = report_test[antigen_name] sheet_df.to_excel(writer, sheet_name=antigen_name) # Make sure we get an assertion error reporter = report.ReportWriter() with pytest.raises(AssertionError): reporter.load_existing_reports()
def test_load_missing_reports(report_test): # Make sure we get an assertion error reporter = report.ReportWriter() with pytest.raises(AssertionError): reporter.load_existing_reports()
def interp(input_dir, output_dir): MetaData(input_dir, output_dir) # Initialize background estimator bg_estimator = background_estimator.BackgroundEstimator2D( block_size=128, order=2, normalize=False, ) reporter = report.ReportWriter() well_xlsx_path = os.path.join( constants.RUN_PATH, 'stats_per_well.xlsx', ) well_xlsx_writer = pd.ExcelWriter(well_xlsx_path) antigen_df = reporter.get_antigen_df() antigen_df.to_excel(well_xlsx_writer, sheet_name='antigens') # ================ # loop over images => good place for multiproc? careful with columns in report # ================ well_images = io_utils.get_image_paths(input_dir) for well_name, im_path in well_images.items(): start = time.time() image = io_utils.read_gray_im(im_path) spot_props_array = txt_parser.create_array( constants.params['rows'], constants.params['columns'], dtype=object, ) bgprops_array = txt_parser.create_array( constants.params['rows'], constants.params['columns'], dtype=object, ) # finding center of well and cropping well_center, well_radi, well_mask = image_parser.find_well_border( image, detmethod='region', segmethod='otsu') im_crop, _ = img_processing.crop_image_at_center( image, well_center, 2 * well_radi, 2 * well_radi) # find center of spots from crop spot_mask = img_processing.thresh_and_binarize(im_crop, method='bright_spots') spot_props = image_parser.generate_props(spot_mask, intensity_image=im_crop) # if debug: crop_coords = image_parser.grid_from_centroids( spot_props, constants.params['rows'], constants.params['columns']) # convert to float64 im_crop = im_crop / np.iinfo(im_crop.dtype).max background = bg_estimator.get_background(im_crop) spots_df, spot_props = array_gen.get_spot_intensity( coords=crop_coords, im=im_crop, background=background, params=constants.params, ) # Write metrics for each spot in grid in current well spots_df.to_excel(well_xlsx_writer, sheet_name=well_name) # Assign well OD, intensity, and background stats to plate reporter.assign_well_to_plate(well_name, spots_df) stop = time.time() print(f"\ttime to process={stop-start}") # SAVE FOR DEBUGGING if constants.DEBUG: # Save spot and background intensities. output_name = os.path.join(constants.RUN_PATH, well_name) # # Save mask of the well, cropped grayscale image, cropped spot segmentation. io.imsave(output_name + "_well_mask.png", (255 * well_mask).astype('uint8')) io.imsave(output_name + "_crop.png", (255 * im_crop).astype('uint8')) io.imsave(output_name + "_crop_binary.png", (255 * spot_mask).astype('uint8')) # Evaluate accuracy of background estimation with green (image), magenta (background) overlay. im_bg_overlay = np.stack([background, im_crop, background], axis=2) io.imsave(output_name + "_crop_bg_overlay.png", (255 * im_bg_overlay).astype('uint8')) # This plot shows which spots have been assigned what index. debug_plots.plot_centroid_overlay( im_crop, constants.params, spot_props, output_name, ) debug_plots.plot_od( spots_df=spots_df, nbr_grid_rows=constants.params['rows'], nbr_grid_cols=constants.params['columns'], output_name=output_name, ) # save a composite of all spots, where spots are from source or from region prop debug_plots.save_composite_spots( spot_props, output_name, image=im_crop, ) debug_plots.save_composite_spots( spot_props, output_name, image=im_crop, from_source=True, ) stop2 = time.time() print(f"\ttime to save debug={stop2-stop}") # After running all wells, write plate reports well_xlsx_writer.close() reporter.write_reports()
def point_registration(input_dir, output_dir): """ For each image in input directory, detect spots using particle filtering to register fiducial spots to blobs detected in the image. :param str input_dir: Input directory containing images and an xml file with parameters :param str output_dir: Directory where output is written to """ logger = logging.getLogger(constants.LOG_NAME) metadata.MetaData(input_dir, output_dir) nbr_outliers = constants.params['nbr_outliers'] # Create reports instance for whole plate reporter = report.ReportWriter() # Create writer for stats per well well_xlsx_path = os.path.join( constants.RUN_PATH, 'stats_per_well.xlsx', ) well_xlsx_writer = pd.ExcelWriter(well_xlsx_path) antigen_df = reporter.get_antigen_df() antigen_df.to_excel(well_xlsx_writer, sheet_name='antigens') # Initialize background estimator bg_estimator = background_estimator.BackgroundEstimator2D( block_size=128, order=2, normalize=False, ) # Get grid rows and columns from params nbr_grid_rows = constants.params['rows'] nbr_grid_cols = constants.params['columns'] fiducials_idx = constants.FIDUCIALS_IDX # Create spot detector instance spot_detector = img_processing.SpotDetector( imaging_params=constants.params, ) well_images = io_utils.get_image_paths(input_dir) well_names = list(well_images) # If rerunning only a subset of wells if constants.RERUN: logger.info("Rerunning wells: {}".format(constants.RERUN_WELLS)) txt_parser.rerun_xl_od( well_names=well_names, well_xlsx_path=well_xlsx_path, rerun_names=constants.RERUN_WELLS, xlsx_writer=well_xlsx_writer, ) reporter.load_existing_reports() well_names = constants.RERUN_WELLS # remove debug images from old runs for f in os.listdir(constants.RUN_PATH): if f.split('_')[0] in well_names: os.remove(os.path.join(constants.RUN_PATH, f)) else: reporter.create_new_reports() # ================ # loop over well images # ================ for well_name in well_names: start_time = time.time() im_path = well_images[well_name] image = io_utils.read_gray_im(im_path) logger.info("Extracting well: {}".format(well_name)) # Get max intensity max_intensity = io_utils.get_max_intensity(image) logger.debug("Image max intensity: {}".format(max_intensity)) # Crop image to well only """"" try: well_center, well_radi, _ = image_parser.find_well_border( image, detmethod='region', segmethod='otsu', ) im_well, _ = img_processing.crop_image_at_center( im=image, center=well_center, height=2 * well_radi, width=2 * well_radi, ) except IndexError: logging.warning("Couldn't find well in {}".format(well_name)) im_well = image """ "" im_well = image # Find spot center coordinates spot_coords = spot_detector.get_spot_coords( im=im_well, max_intensity=max_intensity, ) if spot_coords.shape[0] < constants.MIN_NBR_SPOTS: logging.warning("Not enough spots detected in {}," "continuing.".format(well_name)) continue # Create particle filter registration instance register_inst = registration.ParticleFilter( spot_coords=spot_coords, im_shape=im_well.shape, fiducials_idx=fiducials_idx, ) register_inst.particle_filter() if not register_inst.registration_ok: logger.warning("Registration failed for {}, " "repeat with outlier removal".format(well_name)) register_inst.particle_filter(nbr_outliers=nbr_outliers) # Transform grid coordinates registered_coords = register_inst.compute_registered_coords() # Check that registered coordinates are inside well registration_ok = register_inst.check_reg_coords() if not registration_ok: logger.warning("Final registration failed," "will not write OD for {}".format(well_name)) if constants.DEBUG: debug_plots.plot_registration( im_well, spot_coords, register_inst.fiducial_coords, registered_coords, os.path.join(constants.RUN_PATH, well_name + '_failed'), max_intensity=max_intensity, ) continue # Crop image im_crop, crop_coords = img_processing.crop_image_from_coords( im=im_well, coords=registered_coords, ) im_crop = im_crop / max_intensity # Estimate background background = bg_estimator.get_background(im_crop) # Find spots near grid locations and compute properties spots_df, spot_props = array_gen.get_spot_intensity( coords=crop_coords, im=im_crop, background=background, ) # Write metrics for each spot in grid in current well spots_df.to_excel(well_xlsx_writer, sheet_name=well_name) # Assign well OD, intensity, and background stats to plate reporter.assign_well_to_plate(well_name, spots_df) time_msg = "Time to extract OD in {}: {:.3f} s".format( well_name, time.time() - start_time, ) print(time_msg) logger.info(time_msg) # ================================== # SAVE FOR DEBUGGING if constants.DEBUG: start_time = time.time() # Save spot and background intensities output_name = os.path.join(constants.RUN_PATH, well_name) # Save OD plots, composite spots and registration debug_plots.plot_od( spots_df=spots_df, nbr_grid_rows=nbr_grid_rows, nbr_grid_cols=nbr_grid_cols, output_name=output_name, ) debug_plots.save_composite_spots( spot_props=spot_props, output_name=output_name, image=im_crop, ) debug_plots.plot_background_overlay( im_crop, background, output_name, ) debug_plots.plot_registration( image=im_well, spot_coords=spot_coords, grid_coords=register_inst.fiducial_coords, reg_coords=registered_coords, output_name=output_name, max_intensity=max_intensity, ) logger.debug( "Time to save debug images: {:.3f} s".format(time.time() - start_time), ) # After running all wells, write plate reports well_xlsx_writer.close() reporter.write_reports()