def check_slurm_n_processes(): rand_n = random.randint(1, 100) rand_min = random.randint(1, 30) os.environ["SLURM_NPROCS"] = str(rand_n) correct_n_procs = rand_n - rand_min assert correct_n_procs == system.get_num_processes( min_free_cpu_cores=rand_min)
def main(args, max_workers=3): signal_paths = args.signal_planes_paths[args.signal_channel] background_paths = args.background_planes_path[0] signal_images = system.get_sorted_file_paths(signal_paths, file_extension="tif") background_images = system.get_sorted_file_paths(background_paths, file_extension="tif") # Too many workers doesn't increase speed, and uses huge amounts of RAM workers = system.get_num_processes(min_free_cpu_cores=args.n_free_cpus, n_max_processes=max_workers) logging.debug("Initialising cube generator") inference_generator = CubeGeneratorFromFile( args.paths.cells_file_path, signal_images, background_images, batch_size=args.batch_size, x_pixel_um=args.x_pixel_um, y_pixel_um=args.y_pixel_um, z_pixel_um=args.z_pixel_um, x_pixel_um_network=args.x_pixel_um_network, y_pixel_um_network=args.y_pixel_um_network, z_pixel_um_network=args.z_pixel_um_network, cube_width=args.cube_width, cube_height=args.cube_height, cube_depth=args.cube_depth, ) model = get_model( existing_model=args.trained_model, model_weights=args.model_weights, network_depth=models[args.network_depth], inference=True, ) logging.info("Running inference") predictions = model.predict( inference_generator, use_multiprocessing=True, workers=workers, verbose=True, ) predictions = predictions.round() predictions = predictions.astype("uint16") predictions = np.argmax(predictions, axis=1) cells_list = [] # only go through the "extractable" cells for idx, cell in enumerate(inference_generator.ordered_cells): cell.type = predictions[idx] + 1 cells_list.append(cell) logging.info("Saving classified cells") save_cells(cells_list, args.paths.classification_out_file, save_csv=args.save_csv)
def main(args): n_processes = system.get_num_processes(min_free_cpu_cores=args.n_free_cpus) start_time = datetime.now() ( soma_diameter, max_cluster_size, ball_xy_size, ball_z_size, ) = calculate_parameters_in_pixels( args.x_pixel_um, args.y_pixel_um, args.z_pixel_um, args.soma_diameter, args.max_cluster_size, args.ball_xy_size, args.ball_z_size, ) # file extension only used if a directory is passed img_paths = system.get_sorted_file_paths(args.signal_planes_paths[0], file_extension="tif") if args.end_plane == -1: args.end_plane = len(img_paths) planes_paths_range = img_paths[args.start_plane:args.end_plane] workers_queue = MultiprocessingQueue(maxsize=n_processes) # WARNING: needs to be AT LEAST ball_z_size mp_3d_filter_queue = MultiprocessingQueue(maxsize=ball_z_size) for plane_id in range(n_processes): # place holder for the queue to have the right size on first run workers_queue.put(None) clipping_val, threshold_value, ball_filter, cell_detector = setup( img_paths[0], soma_diameter, ball_xy_size, ball_z_size, ball_overlap_fraction=args.ball_overlap_fraction, z_offset=args.start_plane, ) progress_bar = tqdm(total=len(planes_paths_range), desc="Processing planes") mp_3d_filter = Mp3DFilter( mp_3d_filter_queue, ball_filter, cell_detector, soma_diameter, args.output_dir, soma_size_spread_factor=args.soma_spread_factor, progress_bar=progress_bar, save_planes=args.save_planes, plane_directory=args.plane_directory, start_plane=args.start_plane, max_cluster_size=max_cluster_size, outlier_keep=args.outlier_keep, artifact_keep=args.artifact_keep, save_csv=args.save_csv, ) # start 3D analysis (waits for planes in queue) bf_process = multiprocessing.Process(target=mp_3d_filter.process, args=()) bf_process.start() # needs to be started before the loop mp_tile_processor = MpTileProcessor(workers_queue, mp_3d_filter_queue) prev_lock = Lock() processes = [] # start 2D tile filter (output goes into queue for 3D analysis) for plane_id, path in enumerate(planes_paths_range): workers_queue.get() lock = Lock() lock.acquire() p = multiprocessing.Process( target=mp_tile_processor.process, args=( plane_id, path, prev_lock, lock, clipping_val, threshold_value, soma_diameter, args.log_sigma_size, args.n_sds_above_mean_thresh, ), ) prev_lock = lock processes.append(p) p.start() processes[-1].join() mp_3d_filter_queue.put((None, None, None)) # Signal the end bf_process.join() logging.info( "Detection complete - all planes done in : {}".format(datetime.now() - start_time))
def main(max_workers=3): from cellfinder.main import suppress_tf_logging suppress_tf_logging(tf_suppress_log_messages) from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint from cellfinder.tools import system from cellfinder.tools.prep import prep_training from cellfinder.classify.tools import make_lists, get_model from cellfinder.classify.cube_generator import CubeGeneratorFromDisk start_time = datetime.now() args = training_parse() output_dir = Path(args.output_dir) system.ensure_directory_exists(output_dir) args = prep_training(args) tiff_files = parse_yaml(args.yaml_file) # Too many workers doesn't increase speed, and uses huge amounts of RAM workers = system.get_num_processes(min_free_cpu_cores=args.n_free_cpus, n_max_processes=max_workers) model = get_model( existing_model=args.trained_model, model_weights=args.model_weights, network_depth=models[args.network_depth], learning_rate=args.learning_rate, continue_training=args.continue_training, ) signal_train, background_train, labels_train = make_lists(tiff_files) if args.test_fraction > 0: ( signal_train, signal_test, background_train, background_test, labels_train, labels_test, ) = train_test_split( signal_train, background_train, labels_train, test_size=args.test_fraction, ) validation_generator = CubeGeneratorFromDisk( signal_test, background_test, labels=labels_test, batch_size=args.batch_size, train=True, ) else: validation_generator = None training_generator = CubeGeneratorFromDisk( signal_train, background_train, labels=labels_train, batch_size=args.batch_size, shuffle=True, train=True, augment=not args.no_augment, ) callbacks = [] if args.tensorboard: logdir = output_dir / "tensorboard" system.ensure_directory_exists(logdir) tensorboard = TensorBoard( log_dir=logdir, histogram_freq=0, write_graph=True, update_freq="epoch", ) callbacks.append(tensorboard) if args.save_checkpoints: if args.save_weights: filepath = str(output_dir / "weights.{epoch:02d}-{val_loss:.3f}.h5") else: filepath = str(output_dir / "model.{epoch:02d}-{val_loss:.3f}.h5") checkpoints = ModelCheckpoint(filepath, save_weights_only=args.save_weights) callbacks.append(checkpoints) model.fit( training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=workers, epochs=args.epochs, callbacks=callbacks, ) if args.save_weights: print("Saving model weights") model.save_weights(str(output_dir / "model_weights.h5")) else: print("Saving model") model.save(output_dir / "model.h5") print( "Finished training, " "Total time taken: %s", datetime.now() - start_time, )
def check_max_processes(): max_proc = 5 correct_n = min(len(os.sched_getaffinity(0)), max_proc) assert correct_n == system.get_num_processes(n_max_processes=max_proc)
def check_get_num_processes(): assert len(os.sched_getaffinity(0)) == system.get_num_processes()
def main(args): start_time = datetime.now() cells = get_cells(args.paths.cells_file_path) if not cells: logging.error("No cells found, exiting. Please check your " "cell xml file path: {}" " or verify your cell types " "(maybe use cells-only option to disable)".format( args.paths.cells_file_path)) raise ValueError("No cells found, exiting. Please check your " "cell xml file path: {}" " or verify your cell types " "(maybe use cells-only option to disable)".format( args.paths.cells_file_path)) if args.z_pixel_um != args.z_pixel_um_network: plane_scaling_factor = args.z_pixel_um_network / args.z_pixel_um num_planes_needed_for_cube = round(args.cube_depth * plane_scaling_factor) else: num_planes_needed_for_cube = args.cube_depth planes_paths = {} # Use args.paths for this all_channel_ids = args.signal_ch_ids + [args.background_ch_id] for idx, planes_paths_file_path in enumerate(args.all_planes_paths): channel = all_channel_ids[idx] if args.cube_extract_cli: channel_list = all_channel_ids args.signal_channel = all_channel_ids[0] else: # only extract those channels that are necessary for classification channel_list = [args.signal_channel, args.background_ch_id] if channel in channel_list: planes_paths[channel] = system.get_sorted_file_paths( planes_paths_file_path, file_extension="tif") if num_planes_needed_for_cube > len(planes_paths[0]): raise StackSizeError("The number of planes provided is not sufficient " "for any cubes to be extracted. Please check the " "input data") first_plane = tifffile.imread(list(planes_paths.values())[0][0]) planes_shape = first_plane.shape brain_depth = len(list(planes_paths.values())[0]) # TODO: use to assert all centre planes processed center_planes = sorted(list(set([cell.z for cell in cells]))) # REFACTOR: rename (clashes with different meaning of planes_to_read below) planes_to_read = np.zeros(brain_depth, dtype=np.bool) if tools.is_even(num_planes_needed_for_cube): half_nz = num_planes_needed_for_cube // 2 # WARNING: not centered because even for p in center_planes: planes_to_read[p - half_nz:p + half_nz] = 1 else: half_nz = num_planes_needed_for_cube // 2 # centered for p in center_planes: planes_to_read[p - half_nz:p + half_nz + 1] = 1 planes_to_read = np.where(planes_to_read)[0] if not planes_to_read.size: logging.error( f"No planes found, you need at the very least " f"{num_planes_needed_for_cube} " f"planes to proceed (i.e. cube z size)" f"Brain z dimension is {brain_depth}.", stack_info=True, ) raise ValueError(f"No planes found, you need at the very least " f"{num_planes_needed_for_cube} " f"planes to proceed (i.e. cube z size)" f"Brain z dimension is {brain_depth}.") # TODO: check if needs to flip args.cube_width and args.cube_height cells_groups = cell_tools.group_cells_by_z(cells) # copies=2 is set because at all times there is a plane queue (deque) # and an array passed to `Cube` ram_per_process = get_ram_requirement_per_process( planes_paths[args.signal_channel][0], num_planes_needed_for_cube, copies=2, ) n_processes = system.get_num_processes( min_free_cpu_cores=args.n_free_cpus, ram_needed_per_process=ram_per_process, n_max_processes=len(planes_to_read), fraction_free_ram=0.2, max_ram_usage=system.memory_in_bytes(args.max_ram, "GB"), ) # TODO: don't need to extract cubes from all channels if # n_signal_channels>1 with ProcessPoolExecutor(max_workers=n_processes) as executor: n_planes_per_chunk = len(planes_to_read) // n_processes for i in range(n_processes): start_idx = i * n_planes_per_chunk end_idx = (start_idx + n_planes_per_chunk + num_planes_needed_for_cube - 1) if end_idx > planes_to_read[-1]: end_idx = None sub_planes_to_read = planes_to_read[start_idx:end_idx] executor.submit( save_cubes, cells_groups, planes_paths, sub_planes_to_read, planes_shape, args.x_pixel_um, args.y_pixel_um, args.x_pixel_um_network, args.y_pixel_um_network, num_planes_for_cube=num_planes_needed_for_cube, cube_width=args.cube_width, cube_height=args.cube_height, cube_depth=args.cube_depth, thread_id=i, output_dir=args.paths.tmp__cubes_output_dir, save_empty_cubes=args.save_empty_cubes, ) total_cubes = system.get_number_of_files_in_dir( args.paths.tmp__cubes_output_dir) time_taken = datetime.now() - start_time logging.info("All cubes ({}) extracted in: {}".format( total_cubes, time_taken))
def prep_training(args): n_processes = system.get_num_processes(min_free_cpu_cores=args.n_free_cpus) prep_tensorflow(n_processes) args = prep_models(args) return args