def test_ioerror(self, tmp_path): # Store the original folder permissions test_dir_chmod = Path.stat(tmp_path).st_mode # Create a new subdir and make it read + execute test_dir = create_subdirectory(tmp_path, SUB_DIR) test_dir.chmod(555) try: with pytest.raises(IOError): save_to_csv("", "", test_dir.joinpath("test_csv")) finally: # Restore original folder permissions test_dir.chmod(test_dir_chmod)
def test_iterable_unpack(self, tmp_path, headers, data_list): import csv # Create a generic object that will require unpacking class TestIterator: def __init__(self, prop): self.prop = prop def __iter__(self): return iter([ self.prop ]) data_iters = list() for row in data_list: data_iters.append(TestIterator(row[0])) result = save_to_csv(data_iters, headers, tmp_path.joinpath("csv_unpack")) data_iters.insert(0, TestIterator(headers)) # Check all rows were written correctly with open(result, 'r') as csvfile: reader = csv.reader(csvfile) for i, row in enumerate(reader): if i == 0: data_iters[i].prop == row else: assert [str(data_iters[i].prop)] == row
def test_dict_view(self, tmp_path, headers, data_dict, data_list): import csv result = save_to_csv(data_dict.values(), headers, tmp_path.joinpath("csv_dict_view")) # Add headers to data data_list.insert(0, headers) # Check all rows were written correctly with open(result, 'r') as csvfile: reader = csv.reader(csvfile) for i, row in enumerate(reader): assert [str(x) for x in data_list[i]] == row
def test_dict(self, tmp_path, headers, data_dict): import csv result = save_to_csv(data_dict, headers, tmp_path.joinpath("csv_dict")) result_dict = dict.fromkeys(headers) with open(result, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: for key in result_dict.keys(): result_dict[key] = int(row[key]) assert data_dict == result_dict
def test_string_path(self, tmp_path): save_path = tmp_path.joinpath("test_csv") assert save_to_csv("", "", str(save_path)) == save_path.with_suffix(".csv")
def test_iterable(self, tmp_path): with pytest.raises(ValueError): save_to_csv(0, "", tmp_path.joinpath("test_csv"))
def main(): parser = argparse.ArgumentParser( description = "An image analysis tool for measuring microorganism colony growth", formatter_class = argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument("path", type = str, help = "Image files location", default = None) parser.add_argument("-v", "--verbose", type = int, default = 1, help = "Information output level") parser.add_argument("-dpi", "--dots_per_inch", type = int, default = 2540, help = "The image DPI (dots per inch) setting") parser.add_argument("--plate_size", type = int, default = 100, help = "The plate diameter, in millimetres") parser.add_argument("--plate_lattice", type = int, nargs = 2, default = (3, 2), metavar = ("ROW", "COL"), help = "The row and column co-ordinate layout of plates. Example usage: --plate_lattice 3 3") parser.add_argument("--save_plots", type = int, default = 1, help = "The detail level of plot images to store on disk") parser.add_argument("--use_saved", type = strtobool, default = True, help = "Allow or prevent use of previously calculated data") parser.add_argument("-mp", "--multiprocessing", type = strtobool, default = True, help = "Enables use of more CPU cores, faster but more resource intensive") args = parser.parse_args() BASE_PATH = args.path VERBOSE = args.verbose PLATE_SIZE = imaging.mm_to_pixels(args.plate_size, dots_per_inch = args.dots_per_inch) PLATE_LATTICE = tuple(args.plate_lattice) SAVE_PLOTS = args.save_plots USE_SAVED = args.use_saved POOL_MAX = 1 if args.multiprocessing: POOL_MAX = cpu_count() if VERBOSE >= 1: print("Starting ColonyScanalyser analysis") # Resolve working directory if BASE_PATH is None: raise ValueError("A path to a working directory must be supplied") else: BASE_PATH = Path(args.path).resolve() if not BASE_PATH.exists(): raise EnvironmentError(f"The supplied folder path could not be found: {BASE_PATH}") if VERBOSE >= 1: print(f"Working directory: {BASE_PATH}") # Find images in working directory image_formats = ["tif", "tiff", "png"] image_files = file_access.get_files_by_type(BASE_PATH, image_formats) # Check if images have been loaded if len(image_files) > 0: if VERBOSE >= 1: print(f"{len(image_files)} images found") else: raise IOError(f"No images could be found in the supplied folder path." " Images are expected in these formats: {image_formats}") # Get date and time information from filenames time_points = get_image_timestamps(image_files) time_points_elapsed = get_image_timestamps(image_files, elapsed_minutes = True) if len(time_points) != len(image_files) or len(time_points) != len(image_files): raise IOError("Unable to load timestamps from all image filenames." " Please check that images have a filename with YYYYMMDD_HHMM timestamps") # Check if processed image data is already stored and can be loaded segmented_image_data_filename = "processed_data" if USE_SAVED: if VERBOSE >= 1: print("Attempting to load cached data") plate_colonies = file_access.load_file( BASE_PATH.joinpath("data", segmented_image_data_filename), file_access.CompressionMethod.LZMA, pickle = True ) # Check that segmented image data has been loaded for all plates if (VERBOSE >= 1 and plate_colonies is not None and len(plate_colonies) == utilities.coordinate_to_index_number(PLATE_LATTICE)): print("Successfully loaded cached data") else: print("Unable to load cached data, starting image processing") plate_colonies = None # Process images to Timepoint data objects if not USE_SAVED or plate_colonies is None: plate_coordinates = None plate_images_mask = None plate_timepoints = defaultdict(list) if VERBOSE >= 1: print("Preprocessing images to locate plates") # Load the first image to get plate coordinate and mask with image_files[0] as image_file: # Load image img = imread(str(image_file), as_gray = True) # Only find centers using first image. Assume plates do not move if plate_coordinates is None: if VERBOSE >= 2: print(f"Locating plate centres in image: {image_file}") plate_coordinates = imaging.get_image_circles( img, int(PLATE_SIZE / 2), circle_count = utilities.coordinate_to_index_number(PLATE_LATTICE), search_radius = 50 ) if VERBOSE >= 3: for k, center in enumerate(plate_coordinates, start = 1): print(f"Plate {k} center: {center[0]}") # Split image into individual plates plate_images = get_plate_images(img, plate_coordinates, edge_cut = 60) # Use the first plate images as a noise mask if plate_images_mask is None: plate_images_mask = plate_images if VERBOSE >= 1: print("Processing colony data from all images") # Thin wrapper to display a progress bar def progress_update(result, progress): utilities.progress_bar(progress, message = "Processing images") processes = list() with Pool(processes = POOL_MAX) as pool: for i, image_file in enumerate(image_files): # Allow args to be passed to callback function callback_function = partial(progress_update, progress = ((i + 1) / len(image_files)) * 100) # Create processes processes.append(pool.apply_async( image_file_to_timepoints, args = (image_file, plate_coordinates, plate_images_mask, time_points[i], time_points_elapsed[i]), kwds = {"plot_path" : None}, callback = callback_function )) # Consolidate the results to a single dict for process in processes: result = process.get() for plate_id, timepoints in result.items(): plate_timepoints[plate_id].extend(timepoints) # Clear objects to free up memory processes = None plate_images = None plate_images_mask = None img = None if VERBOSE >= 1: print("Calculating colony properties") # Group Timepoints by centres and create Colony objects plate_colonies = dict() for plate_id, plate in plate_timepoints.items(): plate_colonies[plate_id] = {colony.id : colony for colony in colonies_from_timepoints(plate)} # Filter colonies to remove noise, background objects and merged colonies plate_colonies[plate_id] = dict(filter(lambda item: # Remove objects that do not have sufficient data points, usually just noise len(item[1].timepoints) > len(time_points) * 0.2 and # Remove object that do not show growth, these are not colonies item[1].growth_rate > 1 and # Colonies that appear with a large initial area are most likely merged colonies, not new colonies item[1].timepoint_first.area < 50, plate_colonies[plate_id].items() )) if VERBOSE >= 1: print(f"Colony data stored for {len(plate_colonies[plate_id])} colonies on plate {plate_id}") if not any([len(plate) for plate in plate_colonies.values()]): if VERBOSE >= 1: print("Unable to locate any colonies in the images provided") print(f"ColonyScanalyser analysis completed for: {BASE_PATH}") sys.exit() # Store pickled data to allow quick re-use save_path = file_access.create_subdirectory(BASE_PATH, "data") save_path = save_path.joinpath(segmented_image_data_filename) save_status = file_access.save_file(save_path, plate_colonies, file_access.CompressionMethod.LZMA) if VERBOSE >= 1: if save_status: print(f"Cached data saved to {save_path}") else: print(f"An error occurred and cached data could not be written to disk at {save_path}") # Store colony data in CSV format if VERBOSE >= 1: print("Saving data to CSV") save_path = BASE_PATH.joinpath("data") for plate_id, plate in plate_colonies.items(): headers = [ "Colony ID", "Time of appearance", "Time of appearance (elapsed minutes)", "Center point averaged (row, column)", "Growth rate average", "Growth rate", "Doubling time average (minutes)", "Doubling times (minutes)", "First detection (elapsed minutes)", "First center point (row, column)", "First area (pixels)", "First diameter (pixels)", "Final detection (elapsed minutes)", "Final center point (row, column)", "Final area (pixels)", "Final diameter (pixels)" ] # Save data for all colonies on one plate file_access.save_to_csv( plate.values(), headers, save_path.joinpath(f"plate{plate_id}_colonies") ) # Save data for each colony on a plate headers = [ "Colony ID", "Date/Time", "Elapsed time (minutes)", "Area (pixels)", "Center (row, column)", "Diameter (pixels)", "Perimeter (pixels)" ] colony_timepoints = list() for colony_id, colony in plate.items(): for timepoint in colony.timepoints.values(): # Unpack timepoint properties to a flat list colony_timepoints.append([colony_id, *timepoint]) file_access.save_to_csv( colony_timepoints, headers, save_path.joinpath(f"plate{plate_id}_colony_timepoints") ) if VERBOSE >= 1: print("Saving plots") # Plot colony growth curves and time of appearance for the plate if SAVE_PLOTS >= 2: for plate_id, plate in plate_colonies.items(): row, col = utilities.index_number_to_coordinate(plate_id, PLATE_LATTICE) save_path = get_plate_directory(BASE_PATH.joinpath("plots"), row, col, create_dir = True) plate_item = {plate_id : plate} plots.plot_growth_curve(plate_item, time_points_elapsed, save_path) plots.plot_appearance_frequency(plate_item, time_points_elapsed, save_path) plots.plot_appearance_frequency(plate_item, time_points_elapsed, save_path, bar = True) # Plot colony growth curves for all plates if SAVE_PLOTS >= 1: save_path = file_access.create_subdirectory(BASE_PATH, "plots") plots.plot_growth_curve(plate_colonies, time_points_elapsed, save_path) plots.plot_appearance_frequency(plate_colonies, time_points_elapsed, save_path) plots.plot_appearance_frequency(plate_colonies, time_points_elapsed, save_path, bar = True) plots.plot_doubling_map(plate_colonies, time_points_elapsed, save_path) if VERBOSE >= 1: print(f"ColonyScanalyser analysis completed for: {BASE_PATH}") sys.exit()