def partition_raw_and_labels_tomograms_dice_multiclass( path_to_raw: str, labels_dataset_list: list, segmentation_names: list, output_h5_file_path: str, subtomo_shape: tuple, overlap: int): raw_dataset = load_tomogram(path_to_raw) padded_raw_dataset = pad_dataset(raw_dataset, subtomo_shape, overlap) padded_particles_coordinates = get_particle_coordinates_grid_with_overlap( padded_raw_dataset.shape, subtomo_shape, overlap) padded_labels_dataset_list = [] for path_to_labeled in labels_dataset_list: labels_dataset = load_tomogram(path_to_labeled) labels_dataset = np.array(labels_dataset) print(path_to_labeled, "shape", labels_dataset.shape) padded_labels_dataset = pad_dataset(labels_dataset, subtomo_shape, overlap) padded_labels_dataset_list += [padded_labels_dataset] datasets_shapes = [padded.shape for padded in padded_labels_dataset_list] datasets_shapes += [padded_raw_dataset.shape] print("padded_dataset.shapes = ", datasets_shapes) write_joint_raw_and_labels_subtomograms_dice_multiclass( output_path=output_h5_file_path, padded_raw_dataset=padded_raw_dataset, padded_labels_list=padded_labels_dataset_list, segmentation_names=segmentation_names, window_centers=padded_particles_coordinates, crop_shape=subtomo_shape) return
def compute_list_best_cross_correlation_angles( list_of_peak_coordinates: list, catalogue_path: str, path_to_mask: str, path_to_dataset: str, reference_rotation_angles_file: str, in_tom_format=True) -> tuple: dataset = load_tomogram(path_to_dataset=path_to_dataset) mask = load_tomogram(path_to_dataset=path_to_mask) dataset_shape = dataset.shape with h5py.File(catalogue_path, 'r') as h5file: subtomo_shape = get_first_raw_subtomo_shape_from_h5file(h5file) subtomo_center = tuple([sh // 2 for sh in subtomo_shape]) list_best_angle_indices = list() list_best_cross_correlations = list() if in_tom_format: list_of_peak_coordinates_in_python_system = list( map(invert_tom_coordinate_system, list_of_peak_coordinates)) else: list_of_peak_coordinates_in_python_system = list_of_peak_coordinates for point in list_of_peak_coordinates_in_python_system: point = [int(entry) for entry in point] start_corners, end_corners, side_lengths = \ get_subtomo_corners_within_dataset(dataset_shape=dataset_shape, subtomo_shape=subtomo_shape, center=point) if tuple(side_lengths) == subtomo_shape: ref_start_corners = (0, 0, 0) else: ref_start_corners, _, _ = get_subtomo_corners_within_dataset( dataset_shape=subtomo_shape, subtomo_shape=side_lengths, center=subtomo_center) array = crop_window(input_array=dataset, shape_to_crop=side_lengths, window_corner=start_corners) best_cross_correlation, best_angle_index = \ compute_best_cross_correlation_angle( array=array, mask=mask, h5file=h5file, ref_start_corners=ref_start_corners, ref_side_lengths=side_lengths) list_best_cross_correlations.append(best_cross_correlation) list_best_angle_indices.append(best_angle_index) angles_reference = load_tomogram( path_to_dataset=reference_rotation_angles_file) list_best_angles = list() for best_angle_index in list_best_angle_indices: angle = angles_reference[best_angle_index] list_best_angles.append(angle) return list_best_cross_correlations, list_best_angles
def generate_classification_training_set(path_to_output_h5: str, path_to_dataset: str, motl_path: str, label: str, subtomo_size: int or tuple or list): assert isinstance(subtomo_size, (int, tuple, list)) if isinstance(subtomo_size, int): crop_shape = (subtomo_size, subtomo_size, subtomo_size) else: crop_shape = subtomo_size _, coordinates = read_motl_coordinates_and_values(motl_path) dataset = load_tomogram(path_to_dataset) if os.path.isfile(path_to_output_h5): mode = 'a' else: mode = 'w' makedirs(os.path.dirname(path_to_output_h5), exist_ok=True) with h5py.File(path_to_output_h5, mode) as f: internal_path = h5_internal_paths.LABELED_SUBTOMOGRAMS internal_path = join(internal_path, label) for point in coordinates: x, y, z = [int(entry) for entry in point] subtomo_name = "subtomo_" + str(point) subtomo = crop_window_around_point(input_array=dataset, crop_shape=crop_shape, window_center=(z, y, x)) subtomo_path = join(internal_path, subtomo_name) f[subtomo_path] = subtomo[:] return path_to_output_h5
def generate_random_labeled_partition(path_to_raw: str, labels_dataset_paths_list: list, segmentation_names: list, output_h5_file_path: str, subtomo_shape: tuple, n_total: int, min_label_fraction: float = 0, max_label_fraction: float = 1) -> list: raw_dataset = load_tomogram(path_to_raw) min_shape = raw_dataset.shape print(path_to_raw, "shape", min_shape) labels_dataset_list = [] for path_to_labeled in labels_dataset_paths_list: print("loading", path_to_labeled) labels_dataset = load_tomogram(path_to_labeled) dataset_shape = labels_dataset.shape labels_dataset_list.append(labels_dataset) min_shape = np.minimum(min_shape, dataset_shape) print(path_to_labeled, "shape", labels_dataset.shape) print("min_shape = ", min_shape) min_x, min_y, min_z = min_shape raw_dataset = raw_dataset[:min_x, :min_y, :min_z] particles_coordinates = get_random_particle_coordinates( dataset_shape=min_shape, shape_to_crop_zyx=subtomo_shape, n_total=n_total) label_datasets = [] for labels_dataset in labels_dataset_list: labels_dataset = labels_dataset[:min_x, :min_y, :min_z] label_datasets.append(labels_dataset) label_fractions_list = write_strongly_labeled_subtomograms( output_path=output_h5_file_path, padded_raw_dataset=raw_dataset, padded_labels_list=labels_dataset_list, segmentation_names=segmentation_names, window_centers=particles_coordinates, crop_shape=subtomo_shape, min_label_fraction=min_label_fraction, max_label_fraction=max_label_fraction, unpadded_dataset_shape=min_shape) return label_fractions_list
def generate_strongly_labeled_partition(path_to_raw: str, labels_dataset_paths_list: list, segmentation_names: list, output_h5_file_path: str, subtomo_shape: tuple, overlap: int, min_label_fraction: float = 0, max_label_fraction: float = 1) -> list: raw_dataset = load_tomogram(path_to_dataset=path_to_raw, dtype=float) min_shape = raw_dataset.shape labels_dataset_list = [] for path_to_labeled in labels_dataset_paths_list: print("loading", path_to_labeled) labels_dataset = load_tomogram(path_to_labeled) dataset_shape = labels_dataset.shape labels_dataset_list.append(labels_dataset) min_shape = np.minimum(min_shape, dataset_shape) min_x, min_y, min_z = min_shape raw_dataset = raw_dataset[:min_x, :min_y, :min_z] padded_raw_dataset = pad_dataset(raw_dataset, subtomo_shape, overlap) padded_particles_coordinates = get_particle_coordinates_grid_with_overlap( padded_raw_dataset.shape, subtomo_shape, overlap) padded_labels_dataset_list = [] for labels_dataset in labels_dataset_list: labels_dataset = labels_dataset[:min_x, :min_y, :min_z] padded_labels_dataset = pad_dataset(labels_dataset, subtomo_shape, overlap) padded_labels_dataset_list.append(padded_labels_dataset) label_fractions_list = write_strongly_labeled_subtomograms( output_path=output_h5_file_path, padded_raw_dataset=padded_raw_dataset, padded_labels_list=padded_labels_dataset_list, segmentation_names=segmentation_names, window_centers=padded_particles_coordinates, crop_shape=subtomo_shape, min_label_fraction=min_label_fraction, max_label_fraction=max_label_fraction, unpadded_dataset_shape=min_shape) return label_fractions_list
def create_template_catalogue(output_path: str, reference_file: str, angles_file: str, in_degrees=False): reference = load_tomogram(reference_file) angles = load_tomogram(angles_file) if in_degrees: zxz_angles_in_degrees = angles else: zxz_angles_in_degrees = radians2degrees(angles) with h5py.File(output_path, 'w') as f: for index, angle in enumerate(list(zxz_angles_in_degrees)): rotation_name = str(index) rotated_reference = \ rotate_ref(ref=reference, zxz_angles_in_degrees=angle, mode="nearest") internal_path = join(h5_internal_paths.RAW_SUBTOMOGRAMS, rotation_name) f[internal_path] = rotated_reference[:] return
output_dir=config.work_dir, tomo_name=tomo_name, fold=fold) segmentation_label = model_name box_shape = [config.box_size, config.box_size, config.box_size] tomo_output_dir, output_path = get_probability_map_path( config.output_dir, model_name, tomo_name, config.pred_class) os.makedirs(tomo_output_dir, exist_ok=True) DTHeader = DatasetTableHeader(processing_tomo=config.processing_tomo) df = pd.read_csv(config.dataset_table, dtype={DTHeader.tomo_name: str}) df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str) tomo_df = df[df[DTHeader.tomo_name] == tomo_name] print("config.processing_tomo", config.processing_tomo) tomo_path = tomo_df.iloc[0][config.processing_tomo] tomo = load_tomogram(path_to_dataset=tomo_path) output_shape = tomo.shape del tomo subtomos_internal_path = os.path.join( h5_internal_paths.PREDICTED_SEGMENTATION_SUBTOMOGRAMS, segmentation_label) assemble_tomo_from_subtomos(output_path=output_path, partition_file_path=data_partition, output_shape=output_shape, subtomo_shape=box_shape, subtomos_internal_path=subtomos_internal_path, class_number=config.pred_class_number, overlap=config.overlap, reconstruction_type="prediction",
box_shape = int(model_df.iloc[0][ModelsHeader.box_size]) subtomogram_shape = (box_shape, box_shape, box_shape) DTHeader = DatasetTableHeader(processing_tomo=processing_tomo) df = pd.read_csv(dataset_table) df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str) print("Partitioning tomo", tomo_name) tomo_df = df[df[DTHeader.tomo_name] == tomo_name] path_to_raw = tomo_df.iloc[0][DTHeader.processing_tomo] path_to_lamella = tomo_df.iloc[0][DTHeader.filtering_mask] raw_dataset = load_tomogram(path_to_dataset=path_to_raw) if isinstance(path_to_lamella, float): print("No filtering mask file available.") partition_tomogram(dataset=raw_dataset, output_h5_file_path=partition_path, subtomo_shape=subtomogram_shape, overlap=overlap) else: path_to_lamella = tomo_df.iloc[0][DTHeader.filtering_mask] lamella_mask = load_tomogram(path_to_dataset=path_to_lamella) lamella_shape = lamella_mask.shape dataset_shape = raw_dataset.shape minimum_shape = [ np.min([data_dim, lamella_dim])
print("Filtering mask file does not exist. " "All points will be conserved for the analysis.") conserved_values = motl_values conserved_points = predicted_coordinates discarded_values = [] discarded_points = [] motl_writer(path_to_output_folder=conserved_points_dir, list_of_peak_scores=conserved_values, list_of_peak_coords=conserved_points, in_tom_format=True) motl_writer(path_to_output_folder=discarded_points_dir, list_of_peak_scores=discarded_values, list_of_peak_coords=discarded_points, in_tom_format=True) else: filtering_mask_indicator = load_tomogram( path_to_dataset=filtering_mask_path) mask_z, mask_y, mask_x = filtering_mask_indicator.shape conserved_points = [] conserved_values = [] discarded_points = [] discarded_values = [] for value, point in zip(motl_values, predicted_coordinates): point = [int(entry) for entry in point] x, y, z = point if np.min([mask_x - x, mask_y - y, mask_z - z]) > 0 and np.min( [x, y, z]) >= 0: if filtering_mask_indicator[z, y, x] == 1 and np.min([ x, y, x_dim - x, y_dim - y ]) > ignore_border_thickness: conserved_values += [value]
if write_on_table: for tomo_name in tomo_list: print("Partitioning tomo", tomo_name) output_dir = config['pred_output_dir'] output_dir_tomo = os.path.join(output_dir, tomo_name) os.makedirs(output_dir_tomo, exist_ok=True) partition_path = os.path.join(output_dir_tomo, test_partition + ".h5") print("output path:", partition_path) if os.path.isfile(partition_path): print("Partition exists already.") else: tomo_df = df[df[DTHeader.tomo_name] == tomo_name] path_to_raw = tomo_df.iloc[0][DTHeader.processing_tomo] path_to_lamella = tomo_df.iloc[0][DTHeader.filtering_mask] raw_dataset = load_tomogram(path_to_dataset=path_to_raw) if isinstance(path_to_lamella, float): print("No filtering mask file available.") partition_tomogram(dataset=raw_dataset, output_h5_file_path=partition_path, subtomo_shape=subtomogram_shape, overlap=overlap) else: path_to_lamella = tomo_df.iloc[0][DTHeader.filtering_mask] lamella_mask = load_tomogram(path_to_dataset=path_to_lamella) lamella_shape = lamella_mask.shape dataset_shape = raw_dataset.shape minimum_shape = [ np.min([data_dim,
else: run_job = True if run_job: print("Processing tomo", tomo_name) tomo_output_dir, output_path = get_probability_map_path( config.output_dir, model_name, tomo_name, config.pred_class) for file in listdir(tomo_output_dir): if "motl" in file: print("A motive list already exists:", file) shutil.move(os.path.join(tomo_output_dir, file), os.path.join(tomo_output_dir, "prev_" + file)) assert os.path.isfile(output_path) prediction_dataset = load_tomogram(path_to_dataset=output_path) output_shape = prediction_dataset.shape prediction_dataset_thr = 1 * (prediction_dataset > config.threshold) # set to zero the edges of tomogram if isinstance(config.ignore_border_thickness, int): ix = config.ignore_border_thickness iy, iz = ix, ix else: ix, iy, iz = config.ignore_border_thickness if iz > 0: prediction_dataset_thr[:iz, :, :] = np.zeros_like( prediction_dataset_thr[:iz, :, :]) prediction_dataset_thr[-iz:, :, :] = np.zeros_like( prediction_dataset_thr[-iz:, :, :]) if iy > 0:
model_name=model_name, tomo_name=tomo_name, semantic_class=config.pred_class) print(prediction_path) assert os.path.isfile( prediction_path), "The prediction file does not exist!" DTHeader = DatasetTableHeader(semantic_classes=config.semantic_classes, filtering_mask=config.region_mask) df = pd.read_csv(config.dataset_table) df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str) clean_mask_name = DTHeader.masks_names[config.pred_class_number] tomo_df = df[df[DTHeader.tomo_name] == tomo_name] target_path = tomo_df.iloc[0][clean_mask_name] prediction = load_tomogram(path_to_dataset=prediction_path) contact_mode = config.contact_mode if contact_mode == "intersection": lamella_file = tomo_df.iloc[0][DTHeader.filtering_mask] if str(lamella_file) == "nan": prediction = load_tomogram(prediction_path) else: lamella_indicator = load_tomogram(path_to_dataset=lamella_file) shx, shy, shz = [ np.min([shl, shp]) for shl, shp in zip(lamella_indicator.shape, prediction.shape) ] lamella_indicator = lamella_indicator[:shx, :shy, :shz] prediction = prediction[:shx, :shy, :shz]
def read_motl_data(path_to_motl: str): motl = load_tomogram(path_to_dataset=path_to_motl) motl_values, motl_coords = read_motl_coordinates_and_values(path_to_motl) motl_coords = np.array(motl_coords) angles = motl[:, 16:19] return motl_values, motl_coords, angles
if os.path.exists(partition_path): print("Exiting, path exists.") else: overlap = config.overlap box_size = config.box_size box_shape = (box_size, box_size, box_size) DTHeader = DatasetTableHeader(processing_tomo=config.processing_tomo, filtering_mask=config.region_mask) df = pd.read_csv(config.dataset_table, dtype={"tomo_name": str}) df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str) tomo_df = df[df[DTHeader.tomo_name] == tomo_name] print(tomo_name, config.processing_tomo, tomo_df) path_to_raw = tomo_df.iloc[0][config.processing_tomo] intersecting_mask_path = tomo_df.iloc[0][config.region_mask] raw_dataset = load_tomogram(path_to_dataset=path_to_raw, dtype=float) if isinstance(intersecting_mask_path, float): print("No region mask file available.") intersecting_mask = np.ones_like(raw_dataset) else: intersecting_mask_path = tomo_df.iloc[0][config.region_mask] intersecting_mask = load_tomogram(path_to_dataset=intersecting_mask_path) mask_shape = intersecting_mask.shape dataset_shape = raw_dataset.shape minimum_shape = [np.min([data_dim, mask_dim]) for data_dim, mask_dim in zip(dataset_shape, mask_shape)] minz, miny, minx = minimum_shape intersecting_mask = intersecting_mask[:minz, :miny, :minx]
df = pd.read_csv(dataset_table) df[DTHeader.tomo_name] = df[DTHeader.tomo_name].astype(str) tomo_df = df[df[DTHeader.tomo_name] == tomo_name] x_dim = int(tomo_df.iloc[0][DTHeader.x_dim]) y_dim = int(tomo_df.iloc[0][DTHeader.y_dim]) z_dim = int(tomo_df.iloc[0][DTHeader.z_dim]) output_shape = (z_dim, y_dim, x_dim) calculate_motl = config["clustering_parameters"]["calculate_motl"] for file in listdir(tomo_output_dir): if "motl" in file: print("Motive list already exists:", file) calculate_motl = False if calculate_motl: output_path = os.path.join(tomo_output_dir, "prediction.mrc") assert os.path.isfile(output_path) prediction_dataset = load_tomogram(path_to_dataset=output_path) sigmoid = nn.Sigmoid() prediction_dataset = sigmoid( torch.from_numpy(prediction_dataset).float()) prediction_dataset = 1 * (prediction_dataset > threshold).float() prediction_dataset = prediction_dataset.numpy() prediction_dataset.astype(int) if np.max(prediction_dataset) > 0: clustering_labels, centroids_list, cluster_size_list = \ get_cluster_centroids(dataset=prediction_dataset, min_cluster_size=min_cluster_size, max_cluster_size=max_cluster_size, connectivity=1) else: clustering_labels = prediction_dataset