def load_motl(path_to_dataset: str) -> np.array: _, data_file_extension = os.path.splitext(path_to_dataset) assert data_file_extension in [".em", ".csv"], "file in non valid format." if data_file_extension == ".em": em_header, motl = read_em(path_to_emfile=path_to_dataset) else: # data_file_extension == ".csv": motl = read_motl_from_csv(path_to_csv_motl=path_to_dataset) return motl
def load_tomogram(path_to_dataset: str, dtype=None) -> np.array: """ Verified that they open according to same coordinate system """ _, data_file_extension = os.path.splitext(path_to_dataset) print("file in {} format".format(data_file_extension)) assert data_file_extension in [".em", ".hdf", ".mrc", ".rec"], \ "file in non valid format." if data_file_extension == ".em": em_header, dataset = read_em(path_to_emfile=path_to_dataset) elif data_file_extension == ".hdf": dataset = _load_hdf_dataset(hdf_file_path=path_to_dataset) elif data_file_extension in [".mrc", ".rec"]: dataset = read_mrc(path_to_mrc=path_to_dataset, dtype=dtype) return dataset
def load_motl_as_df(path_to_motl): _, data_file_extension = os.path.splitext(path_to_motl) column_names = [ 'score', 'x_', 'y_', 'peak', 'tilt_x', 'tilt_y', 'tilt_z', 'x', 'y', 'z', 'empty_1', 'empty_2', 'empty_3', 'x-shift', 'y-shift', 'z-shift', 'phi', 'psi', 'theta', 'class' ] assert data_file_extension in [".em", ".csv"], "file in non valid format." if data_file_extension == ".em": header, motl = read_em(path_to_emfile=path_to_motl) motl_df = pd.DataFrame(motl, columns=column_names) else: motl_df = pd.read_csv(path_to_motl, header=None) motl_df.columns = column_names return motl_df
def read_motl_coordinates_and_values(path_to_motl: str) -> tuple: _, motl_extension = os.path.splitext(path_to_motl) assert motl_extension in [ ".em", ".csv" ], "motl clean should be in a valid format .em or .csv" if motl_extension == ".em": print("motl in .em format") header, motl = read_em(path_to_emfile=path_to_motl) motl_values, motl_coords = extract_coordinates_and_values_from_em_motl( motl) else: print("motl in .csv format") motl = read_motl_from_csv(path_to_motl) motl_values, motl_coords = extract_motl_coordinates_and_score_values( motl) motl_coords = np.array(motl_coords, dtype=int) return motl_values, motl_coords
def generate_particle_mask_from_motl(path_to_motl: str, output_shape: tuple, sphere_radius=8, values_in_motl: bool = True, number_of_particles=None, z_shift=0, particles_in_tom_format=True) -> None: _, motl_extension = os.path.splitext(path_to_motl) assert motl_extension in [".csv", ".em"] if motl_extension == ".csv" or motl_extension == ".em": if motl_extension == ".csv": motive_list = read_motl_from_csv(path_to_motl) if isinstance(number_of_particles, int): motive_list = motive_list[:number_of_particles] print("Only", str(number_of_particles), " particles in the motive list will be pasted.") else: print("All particles in the motive list will be pasted.") if particles_in_tom_format: coordinates = [ np.array([int(row[9]) + z_shift, int(row[8]), int(row[7])]) for row in motive_list ] else: coordinates = [ np.array([int(row[7]) + z_shift, int(row[8]), int(row[9])]) for row in motive_list ] if values_in_motl: score_values = [row[0] for row in motive_list] else: score_values = np.ones(len(motive_list)) print("The map will be binary.") else: _, motive_list = read_em(path_to_emfile=path_to_motl) if isinstance(number_of_particles, int): motive_list = motive_list[:number_of_particles] print("Only", str(number_of_particles), " particles in the motive list will be pasted.") else: print("All particles in the motive list will be pasted.") coordinates = extract_coordinates_from_em_motl(motive_list) if particles_in_tom_format: print("coordinates already in tom format") coordinates = [[int(p[2]) + z_shift, int(p[1]), int(p[0])] for p in coordinates] else: print("transforming coordinates to tom format") coordinates = [[int(p[2]) + z_shift, int(p[1]), int(p[0])] for p in coordinates] score_values = np.ones(len(coordinates)) predicted_dataset = np.zeros(output_shape) for center, value in zip(coordinates, score_values): paste_sphere_in_dataset(dataset=predicted_dataset, center=center, radius=sphere_radius, value=value) return predicted_dataset