예제 #1
0
def check_slurm_n_processes():
    rand_n = random.randint(1, 100)
    rand_min = random.randint(1, 30)
    os.environ["SLURM_NPROCS"] = str(rand_n)
    correct_n_procs = rand_n - rand_min
    assert correct_n_procs == system.get_num_processes(
        min_free_cpu_cores=rand_min)
예제 #2
0
def main(args, max_workers=3):
    signal_paths = args.signal_planes_paths[args.signal_channel]
    background_paths = args.background_planes_path[0]
    signal_images = system.get_sorted_file_paths(signal_paths,
                                                 file_extension="tif")
    background_images = system.get_sorted_file_paths(background_paths,
                                                     file_extension="tif")

    # Too many workers doesn't increase speed, and uses huge amounts of RAM
    workers = system.get_num_processes(min_free_cpu_cores=args.n_free_cpus,
                                       n_max_processes=max_workers)

    logging.debug("Initialising cube generator")
    inference_generator = CubeGeneratorFromFile(
        args.paths.cells_file_path,
        signal_images,
        background_images,
        batch_size=args.batch_size,
        x_pixel_um=args.x_pixel_um,
        y_pixel_um=args.y_pixel_um,
        z_pixel_um=args.z_pixel_um,
        x_pixel_um_network=args.x_pixel_um_network,
        y_pixel_um_network=args.y_pixel_um_network,
        z_pixel_um_network=args.z_pixel_um_network,
        cube_width=args.cube_width,
        cube_height=args.cube_height,
        cube_depth=args.cube_depth,
    )

    model = get_model(
        existing_model=args.trained_model,
        model_weights=args.model_weights,
        network_depth=models[args.network_depth],
        inference=True,
    )

    logging.info("Running inference")
    predictions = model.predict(
        inference_generator,
        use_multiprocessing=True,
        workers=workers,
        verbose=True,
    )
    predictions = predictions.round()
    predictions = predictions.astype("uint16")

    predictions = np.argmax(predictions, axis=1)
    cells_list = []

    # only go through the "extractable" cells
    for idx, cell in enumerate(inference_generator.ordered_cells):
        cell.type = predictions[idx] + 1
        cells_list.append(cell)

    logging.info("Saving classified cells")
    save_cells(cells_list,
               args.paths.classification_out_file,
               save_csv=args.save_csv)
예제 #3
0
파일: detect.py 프로젝트: rmd13/cellfinder
def main(args):
    n_processes = system.get_num_processes(min_free_cpu_cores=args.n_free_cpus)
    start_time = datetime.now()

    (
        soma_diameter,
        max_cluster_size,
        ball_xy_size,
        ball_z_size,
    ) = calculate_parameters_in_pixels(
        args.x_pixel_um,
        args.y_pixel_um,
        args.z_pixel_um,
        args.soma_diameter,
        args.max_cluster_size,
        args.ball_xy_size,
        args.ball_z_size,
    )

    # file extension only used if a directory is passed
    img_paths = system.get_sorted_file_paths(args.signal_planes_paths[0],
                                             file_extension="tif")

    if args.end_plane == -1:
        args.end_plane = len(img_paths)
    planes_paths_range = img_paths[args.start_plane:args.end_plane]

    workers_queue = MultiprocessingQueue(maxsize=n_processes)
    # WARNING: needs to be AT LEAST ball_z_size
    mp_3d_filter_queue = MultiprocessingQueue(maxsize=ball_z_size)
    for plane_id in range(n_processes):
        # place holder for the queue to have the right size on first run
        workers_queue.put(None)

    clipping_val, threshold_value, ball_filter, cell_detector = setup(
        img_paths[0],
        soma_diameter,
        ball_xy_size,
        ball_z_size,
        ball_overlap_fraction=args.ball_overlap_fraction,
        z_offset=args.start_plane,
    )

    progress_bar = tqdm(total=len(planes_paths_range),
                        desc="Processing planes")
    mp_3d_filter = Mp3DFilter(
        mp_3d_filter_queue,
        ball_filter,
        cell_detector,
        soma_diameter,
        args.output_dir,
        soma_size_spread_factor=args.soma_spread_factor,
        progress_bar=progress_bar,
        save_planes=args.save_planes,
        plane_directory=args.plane_directory,
        start_plane=args.start_plane,
        max_cluster_size=max_cluster_size,
        outlier_keep=args.outlier_keep,
        artifact_keep=args.artifact_keep,
        save_csv=args.save_csv,
    )

    # start 3D analysis (waits for planes in queue)
    bf_process = multiprocessing.Process(target=mp_3d_filter.process, args=())
    bf_process.start()  # needs to be started before the loop

    mp_tile_processor = MpTileProcessor(workers_queue, mp_3d_filter_queue)
    prev_lock = Lock()
    processes = []

    # start 2D tile filter (output goes into queue for 3D analysis)
    for plane_id, path in enumerate(planes_paths_range):
        workers_queue.get()
        lock = Lock()
        lock.acquire()
        p = multiprocessing.Process(
            target=mp_tile_processor.process,
            args=(
                plane_id,
                path,
                prev_lock,
                lock,
                clipping_val,
                threshold_value,
                soma_diameter,
                args.log_sigma_size,
                args.n_sds_above_mean_thresh,
            ),
        )
        prev_lock = lock
        processes.append(p)
        p.start()

    processes[-1].join()
    mp_3d_filter_queue.put((None, None, None))  # Signal the end
    bf_process.join()

    logging.info(
        "Detection complete - all planes done in : {}".format(datetime.now() -
                                                              start_time))
예제 #4
0
def main(max_workers=3):
    from cellfinder.main import suppress_tf_logging

    suppress_tf_logging(tf_suppress_log_messages)

    from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint

    from cellfinder.tools import system
    from cellfinder.tools.prep import prep_training
    from cellfinder.classify.tools import make_lists, get_model
    from cellfinder.classify.cube_generator import CubeGeneratorFromDisk

    start_time = datetime.now()
    args = training_parse()
    output_dir = Path(args.output_dir)
    system.ensure_directory_exists(output_dir)
    args = prep_training(args)
    tiff_files = parse_yaml(args.yaml_file)

    # Too many workers doesn't increase speed, and uses huge amounts of RAM
    workers = system.get_num_processes(min_free_cpu_cores=args.n_free_cpus,
                                       n_max_processes=max_workers)

    model = get_model(
        existing_model=args.trained_model,
        model_weights=args.model_weights,
        network_depth=models[args.network_depth],
        learning_rate=args.learning_rate,
        continue_training=args.continue_training,
    )

    signal_train, background_train, labels_train = make_lists(tiff_files)

    if args.test_fraction > 0:
        (
            signal_train,
            signal_test,
            background_train,
            background_test,
            labels_train,
            labels_test,
        ) = train_test_split(
            signal_train,
            background_train,
            labels_train,
            test_size=args.test_fraction,
        )
        validation_generator = CubeGeneratorFromDisk(
            signal_test,
            background_test,
            labels=labels_test,
            batch_size=args.batch_size,
            train=True,
        )
    else:
        validation_generator = None

    training_generator = CubeGeneratorFromDisk(
        signal_train,
        background_train,
        labels=labels_train,
        batch_size=args.batch_size,
        shuffle=True,
        train=True,
        augment=not args.no_augment,
    )
    callbacks = []

    if args.tensorboard:
        logdir = output_dir / "tensorboard"
        system.ensure_directory_exists(logdir)
        tensorboard = TensorBoard(
            log_dir=logdir,
            histogram_freq=0,
            write_graph=True,
            update_freq="epoch",
        )
        callbacks.append(tensorboard)

    if args.save_checkpoints:
        if args.save_weights:
            filepath = str(output_dir /
                           "weights.{epoch:02d}-{val_loss:.3f}.h5")
        else:
            filepath = str(output_dir / "model.{epoch:02d}-{val_loss:.3f}.h5")

        checkpoints = ModelCheckpoint(filepath,
                                      save_weights_only=args.save_weights)
        callbacks.append(checkpoints)

    model.fit(
        training_generator,
        validation_data=validation_generator,
        use_multiprocessing=True,
        workers=workers,
        epochs=args.epochs,
        callbacks=callbacks,
    )

    if args.save_weights:
        print("Saving model weights")
        model.save_weights(str(output_dir / "model_weights.h5"))
    else:
        print("Saving model")
        model.save(output_dir / "model.h5")

    print(
        "Finished training, "
        "Total time taken: %s",
        datetime.now() - start_time,
    )
예제 #5
0
def check_max_processes():
    max_proc = 5
    correct_n = min(len(os.sched_getaffinity(0)), max_proc)
    assert correct_n == system.get_num_processes(n_max_processes=max_proc)
예제 #6
0
def check_get_num_processes():
    assert len(os.sched_getaffinity(0)) == system.get_num_processes()
예제 #7
0
def main(args):

    start_time = datetime.now()

    cells = get_cells(args.paths.cells_file_path)
    if not cells:
        logging.error("No cells found, exiting. Please check your "
                      "cell xml file path: {}"
                      " or verify your cell types "
                      "(maybe use cells-only option to disable)".format(
                          args.paths.cells_file_path))
        raise ValueError("No cells found, exiting. Please check your "
                         "cell xml file path: {}"
                         " or verify your cell types "
                         "(maybe use cells-only option to disable)".format(
                             args.paths.cells_file_path))

    if args.z_pixel_um != args.z_pixel_um_network:
        plane_scaling_factor = args.z_pixel_um_network / args.z_pixel_um
        num_planes_needed_for_cube = round(args.cube_depth *
                                           plane_scaling_factor)
    else:
        num_planes_needed_for_cube = args.cube_depth

    planes_paths = {}
    # Use args.paths for this
    all_channel_ids = args.signal_ch_ids + [args.background_ch_id]
    for idx, planes_paths_file_path in enumerate(args.all_planes_paths):
        channel = all_channel_ids[idx]

        if args.cube_extract_cli:
            channel_list = all_channel_ids
            args.signal_channel = all_channel_ids[0]
        else:
            # only extract those channels that are necessary for classification
            channel_list = [args.signal_channel, args.background_ch_id]
        if channel in channel_list:
            planes_paths[channel] = system.get_sorted_file_paths(
                planes_paths_file_path, file_extension="tif")

    if num_planes_needed_for_cube > len(planes_paths[0]):
        raise StackSizeError("The number of planes provided is not sufficient "
                             "for any cubes to be extracted. Please check the "
                             "input data")

    first_plane = tifffile.imread(list(planes_paths.values())[0][0])

    planes_shape = first_plane.shape
    brain_depth = len(list(planes_paths.values())[0])

    # TODO: use to assert all centre planes processed
    center_planes = sorted(list(set([cell.z for cell in cells])))

    # REFACTOR: rename (clashes with different meaning of planes_to_read below)
    planes_to_read = np.zeros(brain_depth, dtype=np.bool)

    if tools.is_even(num_planes_needed_for_cube):
        half_nz = num_planes_needed_for_cube // 2
        # WARNING: not centered because even
        for p in center_planes:
            planes_to_read[p - half_nz:p + half_nz] = 1
    else:
        half_nz = num_planes_needed_for_cube // 2
        # centered
        for p in center_planes:
            planes_to_read[p - half_nz:p + half_nz + 1] = 1

    planes_to_read = np.where(planes_to_read)[0]

    if not planes_to_read.size:
        logging.error(
            f"No planes found, you need at the very least "
            f"{num_planes_needed_for_cube} "
            f"planes to proceed (i.e. cube z size)"
            f"Brain z dimension is {brain_depth}.",
            stack_info=True,
        )
        raise ValueError(f"No planes found, you need at the very least "
                         f"{num_planes_needed_for_cube} "
                         f"planes to proceed (i.e. cube z size)"
                         f"Brain z dimension is {brain_depth}.")
    # TODO: check if needs to flip args.cube_width and args.cube_height
    cells_groups = cell_tools.group_cells_by_z(cells)

    # copies=2 is set because at all times there is a plane queue (deque)
    # and an array passed to `Cube`
    ram_per_process = get_ram_requirement_per_process(
        planes_paths[args.signal_channel][0],
        num_planes_needed_for_cube,
        copies=2,
    )
    n_processes = system.get_num_processes(
        min_free_cpu_cores=args.n_free_cpus,
        ram_needed_per_process=ram_per_process,
        n_max_processes=len(planes_to_read),
        fraction_free_ram=0.2,
        max_ram_usage=system.memory_in_bytes(args.max_ram, "GB"),
    )
    # TODO: don't need to extract cubes from all channels if
    #  n_signal_channels>1
    with ProcessPoolExecutor(max_workers=n_processes) as executor:
        n_planes_per_chunk = len(planes_to_read) // n_processes
        for i in range(n_processes):
            start_idx = i * n_planes_per_chunk
            end_idx = (start_idx + n_planes_per_chunk +
                       num_planes_needed_for_cube - 1)
            if end_idx > planes_to_read[-1]:
                end_idx = None
            sub_planes_to_read = planes_to_read[start_idx:end_idx]

            executor.submit(
                save_cubes,
                cells_groups,
                planes_paths,
                sub_planes_to_read,
                planes_shape,
                args.x_pixel_um,
                args.y_pixel_um,
                args.x_pixel_um_network,
                args.y_pixel_um_network,
                num_planes_for_cube=num_planes_needed_for_cube,
                cube_width=args.cube_width,
                cube_height=args.cube_height,
                cube_depth=args.cube_depth,
                thread_id=i,
                output_dir=args.paths.tmp__cubes_output_dir,
                save_empty_cubes=args.save_empty_cubes,
            )

    total_cubes = system.get_number_of_files_in_dir(
        args.paths.tmp__cubes_output_dir)
    time_taken = datetime.now() - start_time
    logging.info("All cubes ({}) extracted in: {}".format(
        total_cubes, time_taken))
예제 #8
0
파일: prep.py 프로젝트: rmd13/cellfinder
def prep_training(args):
    n_processes = system.get_num_processes(min_free_cpu_cores=args.n_free_cpus)
    prep_tensorflow(n_processes)
    args = prep_models(args)
    return args