def get_pdb( # pylint: disable=too-many-locals optimade_structure: OptimadeStructure, ) -> str: """ Write Protein Data Bank (PDB) structure in the old PDB format from OPTIMADE structure Inspired by `ase.io.proteindatabank.write_proteindatabank()` in the ASE package. :param optimade_structure: OPTIMADE structure :return: str """ if globals().get("np", None) is None: warn(NUMPY_NOT_FOUND) return None pdb = "" attributes = optimade_structure.attributes rotation = None if all(attributes.dimension_types): currentcell = np.asarray(attributes.lattice_vectors) cellpar = cell_to_cellpar(currentcell) exportedcell = cellpar_to_cell(cellpar) rotation = np.linalg.solve(currentcell, exportedcell) # Setting Z-value = 1 and using P1 since we have all atoms defined explicitly Z = 1 spacegroup = "P 1" pdb += ( f"CRYST1{cellpar[0]:9.3f}{cellpar[1]:9.3f}{cellpar[2]:8.3f}" f"{cellpar[3]:7.2f}{cellpar[4]:7.2f}{cellpar[5]:7.2f} {spacegroup:11s}{Z:4d}\n" ) for i, vector in enumerate(scaled_cell(currentcell)): pdb += f"SCALE{i + 1} {vector[0]:10.6f}{vector[1]:10.6f}{vector[2]:10.6f} {0:10.5f}\n" # There is a limit of 5 digit numbers in this field. pdb_maxnum = 100000 bfactor = 1.0 pdb += "MODEL 1\n" species: Dict[str, OptimadeStructureSpecies] = { species.name: species for species in attributes.species } cartesian_site_positions, _ = pad_positions( attributes.cartesian_site_positions) sites = np.asarray(cartesian_site_positions) if rotation is not None: sites = sites.dot(rotation) for site_number in range(attributes.nsites): species_name = attributes.species_at_sites[site_number] site = sites[site_number] current_species = species[species_name] for index, symbol in enumerate(current_species.chemical_symbols): if symbol == "vacancy": continue label = species_name if len(current_species.chemical_symbols) > 1: if ("vacancy" in current_species.chemical_symbols and len(current_species.chemical_symbols) == 2): pass else: label = f"{symbol}{index + 1}" pdb += ( f"ATOM {site_number % pdb_maxnum:5d} {label:4} MOL 1 " f"{site[0]:8.3f}{site[1]:8.3f}{site[2]:8.3f}" f"{current_species.concentration[index]:6.2f}" f"{bfactor:6.2f} {symbol.upper():2} \n") pdb += "ENDMDL\n" return pdb
def get_pdbx_mmcif( # pylint: disable=too-many-locals optimade_structure: OptimadeStructure, ) -> str: # pragma: no cover """ Write Protein Data Bank (PDB) structure in the PDBx/mmCIF format from OPTIMADE structure Inspired by `ase.io.proteindatabank:write_proteindatabank()` in the ASE package, as well as `ase.io.cif:write_cif()`. :param optimade_structure: OPTIMADE structure :return: str """ if globals().get("np", None) is None: warn(NUMPY_NOT_FOUND) return None cif = """# # Created from an OPTIMADE structure. # # See https://www.optimade.org and/or # https://github.com/Materials-Consortia/OPTIMADE for more information. # # CIF 2.0 format, specifically mmCIF (PDBx). # See http://mmcif.wwpdb.org for more information. # """ entry_id = f"{optimade_structure.type}{optimade_structure.id}" cif += f"data_{entry_id}\n_entry.id {entry_id}\n#\n" attributes = optimade_structure.attributes # Do this only if there's three non-zero lattice vectors if all(attributes.dimension_types): a_vector, b_vector, c_vector, alpha, beta, gamma = cell_to_cellpar( attributes.lattice_vectors) cif += (f"_cell.entry_id {entry_id}\n" f"_cell.length_a {a_vector:g}\n" f"_cell.length_b {b_vector:g}\n" f"_cell.length_c {c_vector:g}\n" f"_cell.angle_alpha {alpha:g}\n" f"_cell.angle_beta {beta:g}\n" f"_cell.angle_gamma {gamma:g}\n" "_cell.Z_PDB 1\n#\n") cif += (f"_symmetry.entry_id {entry_id}\n" "_symmetry.space_group_name_H-M 'P 1'\n" "_symmetry.Int_Tables_number 1\n#\n") # Since some structure viewers are having issues with cartesian coordinates, # we calculate the fractional coordinates if this is a 3D structure and we have all the necessary information. if not hasattr(attributes, "fractional_site_positions"): sites, _ = pad_positions(attributes.cartesian_site_positions) attributes.fractional_site_positions = fractional_coordinates( cell=attributes.lattice_vectors, cartesian_positions=sites) # TODO: The following lines are perhaps needed to create a "valid" PDBx/mmCIF file. # However, at the same time, the information here is "default" and will for all structures "at this moment in time" # be the same. I.e., no information is gained by adding this now. # If it is found that they indeed are needed to create a "valid" PDBx/mmCIF file, they should be included in the output. # cif += ( # "loop_\n" # "_struct_asym.id\n" # "_struct_asym.entity_id\n" # "A 1\n#\n" # At this point, not using this feature. # ) # cif += ( # "loop_\n" # "_chem_comp.id\n" # "X\n#\n" # At this point, not using this feature. # ) # cif += ( # "loop_\n" # "_entity.id\n" # "1\n#\n" # At this point, not using this feature. # ) # NOTE: This is otherwise a bit ahead of its time, since this OPTIMADE property is part of an open PR. # See https://github.com/Materials-Consortia/OPTIMADE/pull/206 coord_type = ("fract" if hasattr(attributes, "fractional_site_positions") else "Cartn") cif += ( "loop_\n" "_atom_site.group_PDB\n" # Always "ATOM" "_atom_site.id\n" # number (1-counting) "_atom_site.type_symbol\n" # species.chemical_symbols "_atom_site.label_atom_id\n" # species.checmical_symbols symbol + number # For these next keys, see the comment above. # "_atom_site.label_asym_id\n" # Will be set to "A" _struct_asym.id above # "_atom_site.label_comp_id\n" # Will be set to "X" _chem_comp.id above # "_atom_site.label_entity_id\n" # Will be set to "1" _entity.id above # "_atom_site.label_seq_id\n" "_atom_site.occupancy\n" # species.concentration f"_atom_site.{coord_type}_x\n" # cartesian_site_positions f"_atom_site.{coord_type}_y\n" # cartesian_site_positions f"_atom_site.{coord_type}_z\n" # cartesian_site_positions "_atom_site.thermal_displace_type\n" # Set to 'Biso' "_atom_site.B_iso_or_equiv\n" # Set to 1.0:f ) if coord_type == "fract": sites, _ = pad_positions(attributes.fractional_site_positions) else: sites, _ = pad_positions(attributes.cartesian_site_positions) species: Dict[str, OptimadeStructureSpecies] = { species.name: species for species in attributes.species } for site_number in range(attributes.nsites): species_name = attributes.species_at_sites[site_number] site = sites[site_number] current_species = species[species_name] for index, symbol in enumerate(current_species.chemical_symbols): if symbol == "vacancy": continue label = f"{species_name.upper()}{site_number + 1}" if len(current_species.chemical_symbols) > 1: if ("vacancy" in current_species.chemical_symbols and len(current_species.chemical_symbols) == 2): pass else: label = f"{symbol.upper()}{index + 1}" cif += ( f"ATOM {site_number + 1:5d} {symbol} {label:8} " f"{current_species.concentration[index]:6.4f} {site[0]:8.5f} " f"{site[1]:8.5f} {site[2]:8.5f} {'Biso':4} {'1.000':6}\n") return cif
def get_cif( # pylint: disable=too-many-locals,too-many-branches optimade_structure: OptimadeStructure, ) -> str: """ Get CIF file as string from OPTIMADE structure Based on `ase.io.cif:write_cif()`. :param optimade_structure: OPTIMADE structure :param formatting: What formatting to use for the CIF file data keys. Can be either "mp" or "default". :param encoding: Encoding used for the string. CIF files use "latin-1" as standard. If encoding is "str", a Python str object will be returned. :return: str """ # NumPy is needed for calculations if globals().get("np", None) is None: warn(NUMPY_NOT_FOUND) return None cif = """# # Created from an OPTIMADE structure. # # See https://www.optimade.org and/or # https://github.com/Materials-Consortia/OPTIMADE for more information. # """ cif += f"data_{optimade_structure.id}\n\n" attributes = optimade_structure.attributes # Do this only if there's three non-zero lattice vectors # NOTE: This also negates handling of lattice_vectors with null/None values if all(attributes.dimension_types): a_vector, b_vector, c_vector, alpha, beta, gamma = cell_to_cellpar( attributes.lattice_vectors ) cif += ( f"_cell_length_a {a_vector:g}\n" f"_cell_length_b {b_vector:g}\n" f"_cell_length_c {c_vector:g}\n" f"_cell_angle_alpha {alpha:g}\n" f"_cell_angle_beta {beta:g}\n" f"_cell_angle_gamma {gamma:g}\n\n" ) cif += ( "_symmetry_space_group_name_H-M 'P 1'\n" "_symmetry_int_tables_number 1\n\n" "loop_\n" " _symmetry_equiv_pos_as_xyz\n" " 'x, y, z'\n\n" ) # Since some structure viewers are having issues with cartesian coordinates, # we calculate the fractional coordinates if this is a 3D structure and we have all the necessary information. if not hasattr(attributes, "fractional_site_positions"): sites, _ = pad_positions(attributes.cartesian_site_positions) attributes.fractional_site_positions = fractional_coordinates( cell=attributes.lattice_vectors, cartesian_positions=sites ) # NOTE: This is otherwise a bit ahead of its time, since this OPTIMADE property is part of an open PR. # See https://github.com/Materials-Consortia/OPTIMADE/pull/206 coord_type = ( "fract" if hasattr(attributes, "fractional_site_positions") else "Cartn" ) cif += ( "loop_\n" " _atom_site_type_symbol\n" # species.chemical_symbols " _atom_site_label\n" # species.name + unique int " _atom_site_occupancy\n" # species.concentration f" _atom_site_{coord_type}_x\n" # cartesian_site_positions f" _atom_site_{coord_type}_y\n" # cartesian_site_positions f" _atom_site_{coord_type}_z\n" # cartesian_site_positions " _atom_site_thermal_displace_type\n" # Set to 'Biso' " _atom_site_B_iso_or_equiv\n" # Set to 1.0:f ) if coord_type == "fract": sites, _ = pad_positions(attributes.fractional_site_positions) else: sites, _ = pad_positions(attributes.cartesian_site_positions) species: Dict[str, OptimadeStructureSpecies] = { species.name: species for species in attributes.species } symbol_occurences = {} for site_number in range(attributes.nsites): species_name = attributes.species_at_sites[site_number] site = sites[site_number] current_species = species[species_name] for index, symbol in enumerate(current_species.chemical_symbols): if symbol == "vacancy": continue if symbol in symbol_occurences: symbol_occurences[symbol] += 1 else: symbol_occurences[symbol] = 1 label = f"{symbol}{symbol_occurences[symbol]}" cif += ( f" {symbol} {label} {current_species.concentration[index]:6.4f} {site[0]:8.5f} " f"{site[1]:8.5f} {site[2]:8.5f} {'Biso':4} {'1.000':6}\n" ) return cif