예제 #1
0
파일: actions.py 프로젝트: irenedet/3d-unet
def partition_raw_and_labels_tomograms_dice_multiclass(
        path_to_raw: str, labels_dataset_list: list, segmentation_names: list,
        output_h5_file_path: str, subtomo_shape: tuple, overlap: int):
    raw_dataset = load_tomogram(path_to_raw)
    padded_raw_dataset = pad_dataset(raw_dataset, subtomo_shape, overlap)
    padded_particles_coordinates = get_particle_coordinates_grid_with_overlap(
        padded_raw_dataset.shape, subtomo_shape, overlap)
    padded_labels_dataset_list = []
    for path_to_labeled in labels_dataset_list:
        labels_dataset = load_tomogram(path_to_labeled)
        labels_dataset = np.array(labels_dataset)
        print(path_to_labeled, "shape", labels_dataset.shape)
        padded_labels_dataset = pad_dataset(labels_dataset, subtomo_shape,
                                            overlap)
        padded_labels_dataset_list += [padded_labels_dataset]
    datasets_shapes = [padded.shape for padded in padded_labels_dataset_list]
    datasets_shapes += [padded_raw_dataset.shape]
    print("padded_dataset.shapes = ", datasets_shapes)
    write_joint_raw_and_labels_subtomograms_dice_multiclass(
        output_path=output_h5_file_path,
        padded_raw_dataset=padded_raw_dataset,
        padded_labels_list=padded_labels_dataset_list,
        segmentation_names=segmentation_names,
        window_centers=padded_particles_coordinates,
        crop_shape=subtomo_shape)
    return
예제 #2
0
def compute_list_best_cross_correlation_angles(
        list_of_peak_coordinates: list,
        catalogue_path: str,
        path_to_mask: str,
        path_to_dataset: str,
        reference_rotation_angles_file: str,
        in_tom_format=True) -> tuple:
    dataset = load_tomogram(path_to_dataset=path_to_dataset)
    mask = load_tomogram(path_to_dataset=path_to_mask)
    dataset_shape = dataset.shape
    with h5py.File(catalogue_path, 'r') as h5file:
        subtomo_shape = get_first_raw_subtomo_shape_from_h5file(h5file)
        subtomo_center = tuple([sh // 2 for sh in subtomo_shape])
        list_best_angle_indices = list()
        list_best_cross_correlations = list()
        if in_tom_format:
            list_of_peak_coordinates_in_python_system = list(
                map(invert_tom_coordinate_system, list_of_peak_coordinates))
        else:
            list_of_peak_coordinates_in_python_system = list_of_peak_coordinates
        for point in list_of_peak_coordinates_in_python_system:
            point = [int(entry) for entry in point]
            start_corners, end_corners, side_lengths = \
                get_subtomo_corners_within_dataset(dataset_shape=dataset_shape,
                                                   subtomo_shape=subtomo_shape,
                                                   center=point)
            if tuple(side_lengths) == subtomo_shape:
                ref_start_corners = (0, 0, 0)
            else:
                ref_start_corners, _, _ = get_subtomo_corners_within_dataset(
                    dataset_shape=subtomo_shape,
                    subtomo_shape=side_lengths,
                    center=subtomo_center)
            array = crop_window(input_array=dataset, shape_to_crop=side_lengths,
                                window_corner=start_corners)
            best_cross_correlation, best_angle_index = \
                compute_best_cross_correlation_angle(
                    array=array, mask=mask,
                    h5file=h5file,
                    ref_start_corners=ref_start_corners,
                    ref_side_lengths=side_lengths)

            list_best_cross_correlations.append(best_cross_correlation)
            list_best_angle_indices.append(best_angle_index)
    angles_reference = load_tomogram(
        path_to_dataset=reference_rotation_angles_file)

    list_best_angles = list()
    for best_angle_index in list_best_angle_indices:
        angle = angles_reference[best_angle_index]
        list_best_angles.append(angle)

    return list_best_cross_correlations, list_best_angles
예제 #3
0
파일: h5.py 프로젝트: irenedet/3d-unet
def generate_classification_training_set(path_to_output_h5: str,
                                         path_to_dataset: str, motl_path: str,
                                         label: str, subtomo_size: int or tuple
                                         or list):
    assert isinstance(subtomo_size, (int, tuple, list))
    if isinstance(subtomo_size, int):
        crop_shape = (subtomo_size, subtomo_size, subtomo_size)
    else:
        crop_shape = subtomo_size

    _, coordinates = read_motl_coordinates_and_values(motl_path)
    dataset = load_tomogram(path_to_dataset)

    if os.path.isfile(path_to_output_h5):
        mode = 'a'
    else:
        mode = 'w'

    makedirs(os.path.dirname(path_to_output_h5), exist_ok=True)
    with h5py.File(path_to_output_h5, mode) as f:
        internal_path = h5_internal_paths.LABELED_SUBTOMOGRAMS
        internal_path = join(internal_path, label)
        for point in coordinates:
            x, y, z = [int(entry) for entry in point]
            subtomo_name = "subtomo_" + str(point)
            subtomo = crop_window_around_point(input_array=dataset,
                                               crop_shape=crop_shape,
                                               window_center=(z, y, x))
            subtomo_path = join(internal_path, subtomo_name)
            f[subtomo_path] = subtomo[:]
    return path_to_output_h5
예제 #4
0
파일: actions.py 프로젝트: irenedet/3d-unet
def generate_random_labeled_partition(path_to_raw: str,
                                      labels_dataset_paths_list: list,
                                      segmentation_names: list,
                                      output_h5_file_path: str,
                                      subtomo_shape: tuple,
                                      n_total: int,
                                      min_label_fraction: float = 0,
                                      max_label_fraction: float = 1) -> list:
    raw_dataset = load_tomogram(path_to_raw)
    min_shape = raw_dataset.shape
    print(path_to_raw, "shape", min_shape)
    labels_dataset_list = []
    for path_to_labeled in labels_dataset_paths_list:
        print("loading", path_to_labeled)
        labels_dataset = load_tomogram(path_to_labeled)
        dataset_shape = labels_dataset.shape
        labels_dataset_list.append(labels_dataset)
        min_shape = np.minimum(min_shape, dataset_shape)
        print(path_to_labeled, "shape", labels_dataset.shape)
    print("min_shape = ", min_shape)
    min_x, min_y, min_z = min_shape
    raw_dataset = raw_dataset[:min_x, :min_y, :min_z]
    particles_coordinates = get_random_particle_coordinates(
        dataset_shape=min_shape,
        shape_to_crop_zyx=subtomo_shape,
        n_total=n_total)

    label_datasets = []
    for labels_dataset in labels_dataset_list:
        labels_dataset = labels_dataset[:min_x, :min_y, :min_z]
        label_datasets.append(labels_dataset)

    label_fractions_list = write_strongly_labeled_subtomograms(
        output_path=output_h5_file_path,
        padded_raw_dataset=raw_dataset,
        padded_labels_list=labels_dataset_list,
        segmentation_names=segmentation_names,
        window_centers=particles_coordinates,
        crop_shape=subtomo_shape,
        min_label_fraction=min_label_fraction,
        max_label_fraction=max_label_fraction,
        unpadded_dataset_shape=min_shape)
    return label_fractions_list
예제 #5
0
파일: actions.py 프로젝트: irenedet/3d-unet
def generate_strongly_labeled_partition(path_to_raw: str,
                                        labels_dataset_paths_list: list,
                                        segmentation_names: list,
                                        output_h5_file_path: str,
                                        subtomo_shape: tuple,
                                        overlap: int,
                                        min_label_fraction: float = 0,
                                        max_label_fraction: float = 1) -> list:
    raw_dataset = load_tomogram(path_to_dataset=path_to_raw, dtype=float)
    min_shape = raw_dataset.shape
    labels_dataset_list = []
    for path_to_labeled in labels_dataset_paths_list:
        print("loading", path_to_labeled)
        labels_dataset = load_tomogram(path_to_labeled)
        dataset_shape = labels_dataset.shape
        labels_dataset_list.append(labels_dataset)
        min_shape = np.minimum(min_shape, dataset_shape)
    min_x, min_y, min_z = min_shape
    raw_dataset = raw_dataset[:min_x, :min_y, :min_z]
    padded_raw_dataset = pad_dataset(raw_dataset, subtomo_shape, overlap)
    padded_particles_coordinates = get_particle_coordinates_grid_with_overlap(
        padded_raw_dataset.shape, subtomo_shape, overlap)

    padded_labels_dataset_list = []
    for labels_dataset in labels_dataset_list:
        labels_dataset = labels_dataset[:min_x, :min_y, :min_z]
        padded_labels_dataset = pad_dataset(labels_dataset, subtomo_shape,
                                            overlap)
        padded_labels_dataset_list.append(padded_labels_dataset)

    label_fractions_list = write_strongly_labeled_subtomograms(
        output_path=output_h5_file_path,
        padded_raw_dataset=padded_raw_dataset,
        padded_labels_list=padded_labels_dataset_list,
        segmentation_names=segmentation_names,
        window_centers=padded_particles_coordinates,
        crop_shape=subtomo_shape,
        min_label_fraction=min_label_fraction,
        max_label_fraction=max_label_fraction,
        unpadded_dataset_shape=min_shape)
    return label_fractions_list
예제 #6
0
파일: tools.py 프로젝트: irenedet/3d-unet
def create_template_catalogue(output_path: str,
                              reference_file: str,
                              angles_file: str,
                              in_degrees=False):
    reference = load_tomogram(reference_file)
    angles = load_tomogram(angles_file)
    if in_degrees:
        zxz_angles_in_degrees = angles
    else:
        zxz_angles_in_degrees = radians2degrees(angles)
    with h5py.File(output_path, 'w') as f:
        for index, angle in enumerate(list(zxz_angles_in_degrees)):
            rotation_name = str(index)
            rotated_reference = \
                rotate_ref(ref=reference,
                           zxz_angles_in_degrees=angle,
                           mode="nearest")
            internal_path = join(h5_internal_paths.RAW_SUBTOMOGRAMS,
                                 rotation_name)
            f[internal_path] = rotated_reference[:]
    return
예제 #7
0
        output_dir=config.work_dir, tomo_name=tomo_name, fold=fold)

    segmentation_label = model_name
    box_shape = [config.box_size, config.box_size, config.box_size]

    tomo_output_dir, output_path = get_probability_map_path(
        config.output_dir, model_name, tomo_name, config.pred_class)
    os.makedirs(tomo_output_dir, exist_ok=True)

    DTHeader = DatasetTableHeader(processing_tomo=config.processing_tomo)
    df = pd.read_csv(config.dataset_table, dtype={DTHeader.tomo_name: str})
    df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str)
    tomo_df = df[df[DTHeader.tomo_name] == tomo_name]
    print("config.processing_tomo", config.processing_tomo)
    tomo_path = tomo_df.iloc[0][config.processing_tomo]
    tomo = load_tomogram(path_to_dataset=tomo_path)
    output_shape = tomo.shape
    del tomo

    subtomos_internal_path = os.path.join(
        h5_internal_paths.PREDICTED_SEGMENTATION_SUBTOMOGRAMS,
        segmentation_label)

    assemble_tomo_from_subtomos(output_path=output_path,
                                partition_file_path=data_partition,
                                output_shape=output_shape,
                                subtomo_shape=box_shape,
                                subtomos_internal_path=subtomos_internal_path,
                                class_number=config.pred_class_number,
                                overlap=config.overlap,
                                reconstruction_type="prediction",
        box_shape = int(model_df.iloc[0][ModelsHeader.box_size])

        subtomogram_shape = (box_shape, box_shape, box_shape)

        DTHeader = DatasetTableHeader(processing_tomo=processing_tomo)

        df = pd.read_csv(dataset_table)
        df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str)

        print("Partitioning tomo", tomo_name)

        tomo_df = df[df[DTHeader.tomo_name] == tomo_name]
        path_to_raw = tomo_df.iloc[0][DTHeader.processing_tomo]
        path_to_lamella = tomo_df.iloc[0][DTHeader.filtering_mask]
        raw_dataset = load_tomogram(path_to_dataset=path_to_raw)
        if isinstance(path_to_lamella, float):
            print("No filtering mask file available.")
            partition_tomogram(dataset=raw_dataset,
                               output_h5_file_path=partition_path,
                               subtomo_shape=subtomogram_shape,
                               overlap=overlap)
        else:
            path_to_lamella = tomo_df.iloc[0][DTHeader.filtering_mask]
            lamella_mask = load_tomogram(path_to_dataset=path_to_lamella)

            lamella_shape = lamella_mask.shape
            dataset_shape = raw_dataset.shape

            minimum_shape = [
                np.min([data_dim, lamella_dim])
예제 #9
0
            print("Filtering mask file does not exist. "
                  "All points will be conserved for the analysis.")
            conserved_values = motl_values
            conserved_points = predicted_coordinates
            discarded_values = []
            discarded_points = []
            motl_writer(path_to_output_folder=conserved_points_dir,
                        list_of_peak_scores=conserved_values,
                        list_of_peak_coords=conserved_points,
                        in_tom_format=True)
            motl_writer(path_to_output_folder=discarded_points_dir,
                        list_of_peak_scores=discarded_values,
                        list_of_peak_coords=discarded_points,
                        in_tom_format=True)
        else:
            filtering_mask_indicator = load_tomogram(
                path_to_dataset=filtering_mask_path)
            mask_z, mask_y, mask_x = filtering_mask_indicator.shape

            conserved_points = []
            conserved_values = []
            discarded_points = []
            discarded_values = []
            for value, point in zip(motl_values, predicted_coordinates):
                point = [int(entry) for entry in point]
                x, y, z = point
                if np.min([mask_x - x, mask_y - y, mask_z - z]) > 0 and np.min(
                    [x, y, z]) >= 0:
                    if filtering_mask_indicator[z, y, x] == 1 and np.min([
                            x, y, x_dim - x, y_dim - y
                    ]) > ignore_border_thickness:
                        conserved_values += [value]
예제 #10
0
if write_on_table:
    for tomo_name in tomo_list:
        print("Partitioning tomo", tomo_name)
        output_dir = config['pred_output_dir']
        output_dir_tomo = os.path.join(output_dir, tomo_name)
        os.makedirs(output_dir_tomo, exist_ok=True)
        partition_path = os.path.join(output_dir_tomo, test_partition + ".h5")
        print("output path:", partition_path)
        if os.path.isfile(partition_path):
            print("Partition exists already.")
        else:
            tomo_df = df[df[DTHeader.tomo_name] == tomo_name]
            path_to_raw = tomo_df.iloc[0][DTHeader.processing_tomo]
            path_to_lamella = tomo_df.iloc[0][DTHeader.filtering_mask]
            raw_dataset = load_tomogram(path_to_dataset=path_to_raw)
            if isinstance(path_to_lamella, float):
                print("No filtering mask file available.")
                partition_tomogram(dataset=raw_dataset,
                                   output_h5_file_path=partition_path,
                                   subtomo_shape=subtomogram_shape,
                                   overlap=overlap)
            else:
                path_to_lamella = tomo_df.iloc[0][DTHeader.filtering_mask]
                lamella_mask = load_tomogram(path_to_dataset=path_to_lamella)

                lamella_shape = lamella_mask.shape
                dataset_shape = raw_dataset.shape

                minimum_shape = [
                    np.min([data_dim,
예제 #11
0
else:
    run_job = True

if run_job:
    print("Processing tomo", tomo_name)
    tomo_output_dir, output_path = get_probability_map_path(
        config.output_dir, model_name, tomo_name, config.pred_class)

    for file in listdir(tomo_output_dir):
        if "motl" in file:
            print("A motive list already exists:", file)
            shutil.move(os.path.join(tomo_output_dir, file),
                        os.path.join(tomo_output_dir, "prev_" + file))

    assert os.path.isfile(output_path)
    prediction_dataset = load_tomogram(path_to_dataset=output_path)
    output_shape = prediction_dataset.shape
    prediction_dataset_thr = 1 * (prediction_dataset > config.threshold)
    # set to zero the edges of tomogram
    if isinstance(config.ignore_border_thickness, int):
        ix = config.ignore_border_thickness
        iy, iz = ix, ix
    else:
        ix, iy, iz = config.ignore_border_thickness

    if iz > 0:
        prediction_dataset_thr[:iz, :, :] = np.zeros_like(
            prediction_dataset_thr[:iz, :, :])
        prediction_dataset_thr[-iz:, :, :] = np.zeros_like(
            prediction_dataset_thr[-iz:, :, :])
    if iy > 0:
예제 #12
0
        model_name=model_name,
        tomo_name=tomo_name,
        semantic_class=config.pred_class)
    print(prediction_path)
    assert os.path.isfile(
        prediction_path), "The prediction file does not exist!"

    DTHeader = DatasetTableHeader(semantic_classes=config.semantic_classes,
                                  filtering_mask=config.region_mask)
    df = pd.read_csv(config.dataset_table)
    df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str)
    clean_mask_name = DTHeader.masks_names[config.pred_class_number]

    tomo_df = df[df[DTHeader.tomo_name] == tomo_name]
    target_path = tomo_df.iloc[0][clean_mask_name]
    prediction = load_tomogram(path_to_dataset=prediction_path)

    contact_mode = config.contact_mode
    if contact_mode == "intersection":
        lamella_file = tomo_df.iloc[0][DTHeader.filtering_mask]

        if str(lamella_file) == "nan":
            prediction = load_tomogram(prediction_path)
        else:
            lamella_indicator = load_tomogram(path_to_dataset=lamella_file)
            shx, shy, shz = [
                np.min([shl, shp])
                for shl, shp in zip(lamella_indicator.shape, prediction.shape)
            ]
            lamella_indicator = lamella_indicator[:shx, :shy, :shz]
            prediction = prediction[:shx, :shy, :shz]
예제 #13
0
파일: utils.py 프로젝트: irenedet/3d-unet
def read_motl_data(path_to_motl: str):
    motl = load_tomogram(path_to_dataset=path_to_motl)
    motl_values, motl_coords = read_motl_coordinates_and_values(path_to_motl)
    motl_coords = np.array(motl_coords)
    angles = motl[:, 16:19]
    return motl_values, motl_coords, angles
if os.path.exists(partition_path):
    print("Exiting, path exists.")
else:
    overlap = config.overlap
    box_size = config.box_size
    box_shape = (box_size, box_size, box_size)

    DTHeader = DatasetTableHeader(processing_tomo=config.processing_tomo, filtering_mask=config.region_mask)
    df = pd.read_csv(config.dataset_table, dtype={"tomo_name": str})
    df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str)

    tomo_df = df[df[DTHeader.tomo_name] == tomo_name]
    print(tomo_name, config.processing_tomo, tomo_df)
    path_to_raw = tomo_df.iloc[0][config.processing_tomo]
    intersecting_mask_path = tomo_df.iloc[0][config.region_mask]
    raw_dataset = load_tomogram(path_to_dataset=path_to_raw, dtype=float)
    if isinstance(intersecting_mask_path, float):
        print("No region mask file available.")
        intersecting_mask = np.ones_like(raw_dataset)
    else:
        intersecting_mask_path = tomo_df.iloc[0][config.region_mask]
        intersecting_mask = load_tomogram(path_to_dataset=intersecting_mask_path)

        mask_shape = intersecting_mask.shape
        dataset_shape = raw_dataset.shape

        minimum_shape = [np.min([data_dim, mask_dim]) for
                         data_dim, mask_dim in zip(dataset_shape, mask_shape)]
        minz, miny, minx = minimum_shape

        intersecting_mask = intersecting_mask[:minz, :miny, :minx]
예제 #15
0
    df = pd.read_csv(dataset_table)
    df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str)
    tomo_df = df[df[DTHeader.tomo_name] == tomo_name]
    x_dim = int(tomo_df.iloc[0][DTHeader.x_dim])
    y_dim = int(tomo_df.iloc[0][DTHeader.y_dim])
    z_dim = int(tomo_df.iloc[0][DTHeader.z_dim])
    output_shape = (z_dim, y_dim, x_dim)
    calculate_motl = config["clustering_parameters"]["calculate_motl"]
    for file in listdir(tomo_output_dir):
        if "motl" in file:
            print("Motive list already exists:", file)
            calculate_motl = False
    if calculate_motl:
        output_path = os.path.join(tomo_output_dir, "prediction.mrc")
        assert os.path.isfile(output_path)
        prediction_dataset = load_tomogram(path_to_dataset=output_path)

        sigmoid = nn.Sigmoid()
        prediction_dataset = sigmoid(
            torch.from_numpy(prediction_dataset).float())
        prediction_dataset = 1 * (prediction_dataset > threshold).float()
        prediction_dataset = prediction_dataset.numpy()
        prediction_dataset.astype(int)
        if np.max(prediction_dataset) > 0:
            clustering_labels, centroids_list, cluster_size_list = \
                get_cluster_centroids(dataset=prediction_dataset,
                                      min_cluster_size=min_cluster_size,
                                      max_cluster_size=max_cluster_size,
                                      connectivity=1)
        else:
            clustering_labels = prediction_dataset