def write_ccp4_map(realmap, mapfile, cell, spacegroup): """ Write CCP4 map file from NumPy array of real-space density. Parameters ---------- realmap : np.ndarray 3D NumPy array of real-space, voxelized electron density mapfile : str Filename to which map will be written cell : gemmi.UnitCell Unit cell parameters to use in map file spacegroup : gemmi.SpaceGroup Spacegroup to use in map file """ if not isinstance(realmap, np.ndarray) or not (realmap.ndim == 3): raise ValueError("realmap must be a 3-dimension NumPy array") # Set up gemmi FloatGrid object with NumPy array grid = gemmi.FloatGrid(*realmap.shape) grid.set_unit_cell(cell) grid.spacegroup = spacegroup temp = np.array(grid, copy=False) temp[:, :, :] = realmap[:, :, :] # Write CCP4 map ccp4 = gemmi.Ccp4Map() ccp4.grid = grid ccp4.update_ccp4_header(2, True) ccp4.write_ccp4_map(mapfile) return
def save_xmap( event_map, path, ): ccp4 = gemmi.Ccp4Map() ccp4.grid = event_map ccp4.grid.spacegroup = gemmi.find_spacegroup_by_name("P 1") ccp4.update_ccp4_header(2, True) ccp4.write_ccp4_map(str(path)) return path
def gemmi_sf2map(self, sf_mmcif_in, map_out, f_column, phi_column): """ converts input mmCIF file map coefficients to map :param sf_mmcif_in: mmCIF structure factor input file :param map_out: map output file :param f_column: F column :param phi_column: PHI column :return: True if worked, False if failed """ st = gemmi.read_structure(self.coord_path) fbox = st.calculate_fractional_box(margin=5) if sf_mmcif_in: if os.path.exists(sf_mmcif_in): doc = gemmi.cif.read(sf_mmcif_in) rblocks = gemmi.as_refln_blocks(doc) if ( f_column in rblocks[0].column_labels() and phi_column in rblocks[0].column_labels() ): ccp4 = gemmi.Ccp4Map() ccp4.grid = rblocks[0].transform_f_phi_to_map(f_column, phi_column) ccp4.update_ccp4_header(2, True) ccp4.set_extent(fbox) ccp4.write_ccp4_map(map_out) if os.path.exists(map_out): return True else: logging.error("output map file {} missing".format(map_out)) else: logging.error( "{} {} columns not found in mmCIF {}".format( f_column, phi_column, sf_mmcif_in ) ) else: logging.error("cannot find input file {}".format(sf_mmcif_in)) # command = "{} sf2map -f {} -p {} {} {}".format(gemmi_path, f_column, phi_column, mmcif_in, map_out) # return self.run_command(command=command, output_file=map_out) logging.error("converting {} to {} failed".format(sf_mmcif_in, map_out)) return False
def prepare_training_data( mtz_directory: str, mtz_file: str, xyz_limits: List[int], output_directory: str, db_file: str, delete_temp: bool = True, ): """Convert both the original and inverse hands of a structure into a regular map file based on information about the cell info and space group and the xyz dimensions. Return True if no exceptions""" logging.info("Preparing training data") # Check all directories exist try: mtz_dir = Path(mtz_directory) assert mtz_dir.exists() except Exception: logging.error(f"Could not find mtz directory at {mtz_directory}") raise try: output_dir = Path(output_directory) assert output_dir.exists() except Exception: logging.error(f"Could not find output directory at {output_directory}") raise # Check xyz limits are of correct format try: assert type(xyz_limits) == list or type(xyz_limits) == tuple assert len(xyz_limits) == 3 assert all(type(values) == int for values in xyz_limits) except AssertionError: logging.error( "xyz_limits muste be provided as a list or tupls of three integer values" ) raise # Innitialise connection to database try: conn = sqlite3.connect(db_file) cur = conn.cursor() except Exception: logging.error(f"Could not connect to database at {db_file}") raise if not os.path.exists(os.path.join(output_dir, "conv_map_list.csv")): with open(os.path.join(output_dir, "conv_map_list.csv"), "w") as out_csv: writer = csv.writer(out_csv) writer.writerow(["filename", "ai_lable"]) # Get lists of child directories mtz_structs = [struct.stem for struct in mtz_dir.iterdir()] mtz_structs = sorted(mtz_structs) logging.debug(f"Following structures found to transform: {mtz_structs}") #this below works but runs serial for struct in mtz_structs: struct_dir = Path(os.path.join(mtz_dir, struct)) homo_lst = [h**o.stem for h**o in struct_dir.iterdir()] for h**o in homo_lst: homo_dir = os.path.join(struct_dir, h**o) logging.info( f"Converting results for structure {struct}, {mtz_structs.index(struct)+1}/{len(mtz_structs)}" ) if mtz_file in os.listdir(homo_dir): logging.info( f"Collecting info for {h**o}, {homo_lst.index(h**o)+1}/{len(homo_lst)}" ) homo_mtz = Path(os.path.join(homo_dir, mtz_file)) try: homo_mtz = Path(os.path.join(homo_dir, mtz_file)) assert homo_mtz.exists() except Exception: logging.error( f"Could not find homologue phased MTZ file {homo_mtz}") raise try: data = gemmi.read_mtz_file(str(homo_mtz)) cell = data.cell sg = data.spacegroup except Exception: logging.error(f"Could not read {homo_mtz}") # raise pass temp_out_file = os.path.join( output_dir, "temp_" + struct + "_" + h**o + ".ccp4") try: data_to_map = gemmi.Ccp4Map() print("Grid of MTZ file", data_to_map.grid) data_to_map.grid = data.transform_f_phi_to_map( 'FWT', 'PHWT', sample_rate=4) # shape = [round(a/1.2/2)*2 for a in data.cell.parameters[:3]] # data_to_map.grid = data.transform_f_phi_to_map('FWT', 'PHWT', exact_size=shape) print("Grid after converting MTZ to MAP", data_to_map.grid) data_to_map.update_ccp4_header(2, True) data_to_map.write_ccp4_map(temp_out_file) except Exception: logging.error(f"Could not create map from {homo_mtz}") raise try: # opening temporary map file which shouldn't be neccessary to be written out map_to_map = gemmi.read_ccp4_map(temp_out_file) map_to_map.setup() print("Grid after loading temp file", map_to_map.grid) #this bit here expands the unit cell to be 200A^3; #Can I expand the unit cell to standard volume and then extract a #grid cube (200, 200, 200) # xyz_limits = [200, 200, 200] # xyz_limits = [100, 100, 100] xyz_limits = [50, 50, 50] upper_limit = gemmi.Position(*xyz_limits) box = gemmi.FractionalBox() box.minimum = gemmi.Fractional(0, 0, 0) box.maximum = map_to_map.grid.unit_cell.fractionalize( upper_limit) # box.maximum = map_to_map.grid.point_to_fractional(map_to_map.grid.get_point(200, 200, 200)) # box.maximum = map_to_map.grid.point_to_fractional(map_to_map.grid.get_point(100, 100, 100)) box.maximum = map_to_map.grid.point_to_fractional( map_to_map.grid.get_point(50, 50, 50)) box.add_margin(1e-5) map_to_map.set_extent(box) print("Grid after setting XYZ limits for MAP", map_to_map.grid) #create a grid with extend x=0-->200, y=0-->200, z=0-->200 #currently problems as the 200 limit not always reached for all axes; #adding a margin maybe that will help # new_map.setup() # box1 = gemmi.FractionalBox() # box1.minimum = gemmi.Fractional(0, 0, 0) # box1.maximum = new_map.grid.point_to_fractional(new_map.grid.get_point(200, 200, 200)) # map_to_map.setup() # new_map.set_extent(box1) # print("Grid after setting grid dimensions", new_map.grid) except Exception: logging.error(f"Could not expand map {map_to_map}") raise # # try: # map_to_map = gemmi.read_ccp4_map(temp_out_file) # map_to_map.setup() # print(map_to_map.grid) # grid = map_to_map.grid # print(grid) # new_grid = grid.set_value(200, 200, 200, 4.0) # print(new_grid.get_value) # xyz_limits = [200, 200, 200] # upper_limit = gemmi.Position(*xyz_limits) # box = gemmi.FractionalBox() # box.minimum = gemmi.Fractional(0, 0, 0) # box.maximum = map_to_map.grid.unit_cell.fractionalize(upper_limit) # map_to_map.set_extent(box) # except Exception: # logging.error(f"Could not expand map {map_to_map}") # raise mtz_state = str(mtz_file).strip(".mtz") final_name = struct + "_" + h**o + "_" + mtz_state + ".ccp4" final = os.path.join(output_dir, final_name) # final = os.path.join(output_dir, struct+"_"+h**o+"_"+mtz_state+".ccp4") try: map_to_map.write_ccp4_map(final) # data_to_map.write_ccp4_map(final) cur.execute(''' SELECT refinement_success_lable, homologue_name_id FROM homologue_stats INNER JOIN homologue_name ON homologue_name.id = homologue_stats.homologue_name_id INNER JOIN pdb_id ON pdb_id.id = homologue_name.pdb_id_id WHERE homologue_name = "%s" AND pdb_id.pdb_id = "%s" ''' % (h**o, struct)) lable = (cur.fetchone())[0] print(lable) #set for MR works and building works if lable == "1a": new_lable = 1 else: new_lable = 0 #focus on the MR solution with building first and then work on the other options # #set for MR works and building doesn't work # if lable == "1b": # new_lable = 1 # else: # new_lable = 0 # #set for superpositioning works and building works # if lable == "2a": # new_lable = 1 # else: # new_lable = 0 # #set for superpositioning works and building doesn't work # if lable == "2b": # new_lable = 1 # else: # new_lable = 0 print(new_lable) with open(os.path.join(output_dir, "conv_map_list.csv"), "a", newline="") as out_csv: writer = csv.writer(out_csv) writer.writerow([final, new_lable]) # writer.writerow([final_name, new_lable]) except Exception: logging.error(f"Could not write final map {final}") return True
def mtz_to_ccp4( self, dtag, reference_pdb_path, dataset_path, output_path, min_res, structure_factors="FWT,PHWT", ): # Load structures f_ref = PDBFile(reference_pdb_path) reference_structure = structure_biopython_from_pdb(f_ref) # Load xmap # xmap = mdc3.types.real_space.xmap_from_path(dataset_path["mtz_path"], # structure_factors, # ) # Get box limits from reference structure box_limits_max = np.max( np.vstack([ atom.coord for atom in reference_structure.structure.get_atoms() ]), axis=0, ) box_limits_min = np.min( np.vstack([ atom.coord for atom in reference_structure.structure.get_atoms() ]), axis=0, ) # Interpolate NX map in moving protein frame grid_params = [int(x) + 4 for x in (box_limits_max - box_limits_min)] # nxmap = mdc3.types.real_space.interpolate_uniform_grid(xmap, # box_limits_min - np.array([2, 2, 2]), # np.eye(3), # grid_params=grid_params, # ) # # # Output to ccp4 # cell = clipper_python.Cell(clipper_python.Cell_descr(grid_params[0], # grid_params[1], # grid_params[2], # np.pi / 2, # np.pi / 2, # np.pi / 2, # ) # ) # # mdc3.types.real_space.output_nxmap(nxmap, # output_path / "{}.ccp4".format(dtag), # cell, # ) mtz = gemmi.read_mtz_file(str(dataset_path["mtz_path"])) all_data = np.array(mtz, copy=False) mtz.set_data(all_data[mtz.make_d_array() >= 3.0]) grid = mtz.transform_f_phi_to_map( "FWT", "PHWT", sample_rate=3, ) mp = gemmi.Ccp4Map() mp.grid = grid mp.update_ccp4_header(2, True) mp.write_ccp4_map(str(output_path / "{}.ccp4".format(dtag)))