Beispiel #1
0
def test_pad_positions(null_position_structure):
    """Make sure None values in cartesian_site_positions are converted to padding float value"""
    positions, padded_position = pad_positions(
        null_position_structure.attributes.cartesian_site_positions)

    assert not any(value is None for vector in positions for value in vector)
    assert padded_position

    positions, padded_position = pad_positions(positions)

    assert not any(value is None for vector in positions for value in vector)
    assert not padded_position
Beispiel #2
0
def get_jarvis_atoms(optimade_structure: OptimadeStructure) -> Atoms:
    """ Get jarvis Atoms from OPTIMADE structure

    NOTE: Cannot handle partial occupancies

    :param optimade_structure: OPTIMADE structure
    :return: jarvis.core.Atoms
    """
    if globals().get("Atoms", None) is None:
        warn(JARVIS_NOT_FOUND)
        return None

    attributes = optimade_structure.attributes

    # Cannot handle partial occupancies
    if "disorder" in attributes.structure_features:
        raise ConversionError(
            "jarvis-tools cannot handle structures with partial occupancies."
        )

    cartesian_site_positions, _ = pad_positions(attributes.cartesian_site_positions)

    return Atoms(
        lattice_mat=attributes.lattice_vectors,
        elements=[specie.name for specie in attributes.species],
        coords=cartesian_site_positions,
        cartesian=True,
    )
Beispiel #3
0
def get_aiida_structure_data(
        optimade_structure: OptimadeStructure) -> StructureData:
    """ Get AiiDA StructureData from OPTIMADE structure
    :param optimade_structure: OPTIMADE structure
    :return: StructureData
    """
    if globals().get("StructureData", None) is None:
        warn(AIIDA_NOT_FOUND)
        return None

    attributes = optimade_structure.attributes

    # Convert null/None values to float("nan")
    lattice_vectors, adjust_cell = pad_cell(attributes.lattice_vectors)
    structure = StructureData(cell=lattice_vectors)

    # Add Kinds
    for kind in attributes.species:
        symbols = []
        concentration = []
        for index, chemical_symbol in enumerate(kind.chemical_symbols):
            # NOTE: The non-chemical element identifier "X" is identical to how AiiDA handles this,
            # so it will be treated the same as any other true chemical identifier.
            if chemical_symbol == "vacancy":
                # Skip. This is how AiiDA handles vacancies;
                # to not include them, while keeping the concentration in a site less than 1.
                continue
            else:
                symbols.append(chemical_symbol)
                concentration.append(kind.concentration[index])

        # AiiDA needs a definition for the mass, and for it to be > 0
        # mass is OPTIONAL for OPTIMADE structures
        mass = kind.mass if kind.mass else 1

        structure.append_kind(
            Kind(symbols=symbols,
                 weights=concentration,
                 mass=mass,
                 name=kind.name))

    # Convert null/None values to float("nan")
    cartesian_site_positions, _ = pad_positions(
        attributes.cartesian_site_positions)

    # Add Sites
    for index in range(attributes.nsites):
        # range() to ensure 1-to-1 between kind and site
        structure.append_site(
            Site(
                kind_name=attributes.species_at_sites[index],
                position=cartesian_site_positions[index],
            ))

    if adjust_cell:
        structure._adjust_default_cell(
            pbc=[bool(dim.value) for dim in attributes.dimension_types])

    return structure
Beispiel #4
0
def _get_molecule(optimade_structure: OptimadeStructure) -> Molecule:
    """Create pymatgen Molecule from OPTIMADE structure"""

    attributes = optimade_structure.attributes

    cartesian_site_positions, _ = pad_positions(
        attributes.cartesian_site_positions)

    return Molecule(
        species=_pymatgen_species(
            nsites=attributes.nsites,
            species=attributes.species,
            species_at_sites=attributes.species_at_sites,
        ),
        coords=cartesian_site_positions,
    )
Beispiel #5
0
def _get_structure(optimade_structure: OptimadeStructure) -> Structure:
    """Create pymatgen Structure from OPTIMADE structure"""

    attributes = optimade_structure.attributes

    cartesian_site_positions, _ = pad_positions(
        attributes.cartesian_site_positions)

    return Structure(
        lattice=attributes.lattice_vectors,
        species=_pymatgen_species(
            nsites=attributes.nsites,
            species=attributes.species,
            species_at_sites=attributes.species_at_sites,
        ),
        coords=cartesian_site_positions,
        coords_are_cartesian=True,
    )
Beispiel #6
0
def get_pdbx_mmcif(  # pylint: disable=too-many-locals
    optimade_structure: OptimadeStructure, ) -> str:  # pragma: no cover
    """ Write Protein Data Bank (PDB) structure in the PDBx/mmCIF format from OPTIMADE structure

    Inspired by `ase.io.proteindatabank:write_proteindatabank()` in the ASE package,
    as well as `ase.io.cif:write_cif()`.

    :param optimade_structure: OPTIMADE structure
    :return: str
    """
    if globals().get("np", None) is None:
        warn(NUMPY_NOT_FOUND)
        return None

    cif = """#
# Created from an OPTIMADE structure.
#
# See https://www.optimade.org and/or
# https://github.com/Materials-Consortia/OPTIMADE for more information.
#
# CIF 2.0 format, specifically mmCIF (PDBx).
# See http://mmcif.wwpdb.org for more information.
#
"""

    entry_id = f"{optimade_structure.type}{optimade_structure.id}"
    cif += f"data_{entry_id}\n_entry.id                         {entry_id}\n#\n"

    attributes = optimade_structure.attributes

    # Do this only if there's three non-zero lattice vectors
    if all(attributes.dimension_types):
        a_vector, b_vector, c_vector, alpha, beta, gamma = cell_to_cellpar(
            attributes.lattice_vectors)

        cif += (f"_cell.entry_id                    {entry_id}\n"
                f"_cell.length_a                    {a_vector:g}\n"
                f"_cell.length_b                    {b_vector:g}\n"
                f"_cell.length_c                    {c_vector:g}\n"
                f"_cell.angle_alpha                 {alpha:g}\n"
                f"_cell.angle_beta                  {beta:g}\n"
                f"_cell.angle_gamma                 {gamma:g}\n"
                "_cell.Z_PDB                       1\n#\n")
        cif += (f"_symmetry.entry_id                {entry_id}\n"
                "_symmetry.space_group_name_H-M    'P 1'\n"
                "_symmetry.Int_Tables_number       1\n#\n")

        # Since some structure viewers are having issues with cartesian coordinates,
        # we calculate the fractional coordinates if this is a 3D structure and we have all the necessary information.
        if not hasattr(attributes, "fractional_site_positions"):
            sites, _ = pad_positions(attributes.cartesian_site_positions)
            attributes.fractional_site_positions = fractional_coordinates(
                cell=attributes.lattice_vectors, cartesian_positions=sites)

    # TODO: The following lines are perhaps needed to create a "valid" PDBx/mmCIF file.
    # However, at the same time, the information here is "default" and will for all structures "at this moment in time"
    # be the same. I.e., no information is gained by adding this now.
    # If it is found that they indeed are needed to create a "valid" PDBx/mmCIF file, they should be included in the output.
    # cif += (
    #     "loop_\n"
    #     "_struct_asym.id\n"
    #     "_struct_asym.entity_id\n"
    #     "A  1\n#\n"  # At this point, not using this feature.
    # )

    # cif += (
    #     "loop_\n"
    #     "_chem_comp.id\n"
    #     "X\n#\n"  # At this point, not using this feature.
    # )

    # cif += (
    #     "loop_\n"
    #     "_entity.id\n"
    #     "1\n#\n"  # At this point, not using this feature.
    # )

    # NOTE: This is otherwise a bit ahead of its time, since this OPTIMADE property is part of an open PR.
    # See https://github.com/Materials-Consortia/OPTIMADE/pull/206
    coord_type = ("fract" if hasattr(attributes, "fractional_site_positions")
                  else "Cartn")

    cif += (
        "loop_\n"
        "_atom_site.group_PDB\n"  # Always "ATOM"
        "_atom_site.id\n"  # number (1-counting)
        "_atom_site.type_symbol\n"  # species.chemical_symbols
        "_atom_site.label_atom_id\n"  # species.checmical_symbols symbol + number
        # For these next keys, see the comment above.
        # "_atom_site.label_asym_id\n"  # Will be set to "A" _struct_asym.id above
        # "_atom_site.label_comp_id\n"  # Will be set to "X" _chem_comp.id above
        # "_atom_site.label_entity_id\n"  # Will be set to "1" _entity.id above
        # "_atom_site.label_seq_id\n"
        "_atom_site.occupancy\n"  # species.concentration
        f"_atom_site.{coord_type}_x\n"  # cartesian_site_positions
        f"_atom_site.{coord_type}_y\n"  # cartesian_site_positions
        f"_atom_site.{coord_type}_z\n"  # cartesian_site_positions
        "_atom_site.thermal_displace_type\n"  # Set to 'Biso'
        "_atom_site.B_iso_or_equiv\n"  # Set to 1.0:f
    )

    if coord_type == "fract":
        sites, _ = pad_positions(attributes.fractional_site_positions)
    else:
        sites, _ = pad_positions(attributes.cartesian_site_positions)

    species: Dict[str, OptimadeStructureSpecies] = {
        species.name: species
        for species in attributes.species
    }

    for site_number in range(attributes.nsites):
        species_name = attributes.species_at_sites[site_number]
        site = sites[site_number]

        current_species = species[species_name]

        for index, symbol in enumerate(current_species.chemical_symbols):
            if symbol == "vacancy":
                continue

            label = f"{species_name.upper()}{site_number + 1}"
            if len(current_species.chemical_symbols) > 1:
                if ("vacancy" in current_species.chemical_symbols
                        and len(current_species.chemical_symbols) == 2):
                    pass
                else:
                    label = f"{symbol.upper()}{index + 1}"

            cif += (
                f"ATOM  {site_number + 1:5d}  {symbol}  {label:8}  "
                f"{current_species.concentration[index]:6.4f}  {site[0]:8.5f}  "
                f"{site[1]:8.5f}  {site[2]:8.5f}  {'Biso':4}  {'1.000':6}\n")

    return cif
Beispiel #7
0
def get_pdb(  # pylint: disable=too-many-locals
    optimade_structure: OptimadeStructure, ) -> str:
    """ Write Protein Data Bank (PDB) structure in the old PDB format from OPTIMADE structure

    Inspired by `ase.io.proteindatabank.write_proteindatabank()` in the ASE package.

    :param optimade_structure: OPTIMADE structure
    :return: str
    """
    if globals().get("np", None) is None:
        warn(NUMPY_NOT_FOUND)
        return None

    pdb = ""

    attributes = optimade_structure.attributes

    rotation = None
    if all(attributes.dimension_types):
        currentcell = np.asarray(attributes.lattice_vectors)
        cellpar = cell_to_cellpar(currentcell)
        exportedcell = cellpar_to_cell(cellpar)
        rotation = np.linalg.solve(currentcell, exportedcell)
        # Setting Z-value = 1 and using P1 since we have all atoms defined explicitly
        Z = 1
        spacegroup = "P 1"
        pdb += (
            f"CRYST1{cellpar[0]:9.3f}{cellpar[1]:9.3f}{cellpar[2]:8.3f}"
            f"{cellpar[3]:7.2f}{cellpar[4]:7.2f}{cellpar[5]:7.2f} {spacegroup:11s}{Z:4d}\n"
        )

        for i, vector in enumerate(scaled_cell(currentcell)):
            pdb += f"SCALE{i + 1}    {vector[0]:10.6f}{vector[1]:10.6f}{vector[2]:10.6f}     {0:10.5f}\n"

    # There is a limit of 5 digit numbers in this field.
    pdb_maxnum = 100000
    bfactor = 1.0

    pdb += "MODEL     1\n"

    species: Dict[str, OptimadeStructureSpecies] = {
        species.name: species
        for species in attributes.species
    }

    cartesian_site_positions, _ = pad_positions(
        attributes.cartesian_site_positions)
    sites = np.asarray(cartesian_site_positions)
    if rotation is not None:
        sites = sites.dot(rotation)

    for site_number in range(attributes.nsites):
        species_name = attributes.species_at_sites[site_number]
        site = sites[site_number]

        current_species = species[species_name]

        for index, symbol in enumerate(current_species.chemical_symbols):
            if symbol == "vacancy":
                continue

            label = species_name
            if len(current_species.chemical_symbols) > 1:
                if ("vacancy" in current_species.chemical_symbols
                        and len(current_species.chemical_symbols) == 2):
                    pass
                else:
                    label = f"{symbol}{index + 1}"

            pdb += (
                f"ATOM  {site_number % pdb_maxnum:5d} {label:4} MOL     1    "
                f"{site[0]:8.3f}{site[1]:8.3f}{site[2]:8.3f}"
                f"{current_species.concentration[index]:6.2f}"
                f"{bfactor:6.2f}          {symbol.upper():2}  \n")
    pdb += "ENDMDL\n"

    return pdb
Beispiel #8
0
def get_cif(  # pylint: disable=too-many-locals,too-many-branches
    optimade_structure: OptimadeStructure,
) -> str:
    """ Get CIF file as string from OPTIMADE structure

    Based on `ase.io.cif:write_cif()`.

    :param optimade_structure: OPTIMADE structure
    :param formatting: What formatting to use for the CIF file data keys.
        Can be either "mp" or "default".
    :param encoding: Encoding used for the string. CIF files use "latin-1" as standard.
        If encoding is "str", a Python str object will be returned.
    :return: str
    """
    # NumPy is needed for calculations
    if globals().get("np", None) is None:
        warn(NUMPY_NOT_FOUND)
        return None

    cif = """#
# Created from an OPTIMADE structure.
#
# See https://www.optimade.org and/or
# https://github.com/Materials-Consortia/OPTIMADE for more information.
#
"""

    cif += f"data_{optimade_structure.id}\n\n"

    attributes = optimade_structure.attributes

    # Do this only if there's three non-zero lattice vectors
    # NOTE: This also negates handling of lattice_vectors with null/None values
    if all(attributes.dimension_types):
        a_vector, b_vector, c_vector, alpha, beta, gamma = cell_to_cellpar(
            attributes.lattice_vectors
        )

        cif += (
            f"_cell_length_a                    {a_vector:g}\n"
            f"_cell_length_b                    {b_vector:g}\n"
            f"_cell_length_c                    {c_vector:g}\n"
            f"_cell_angle_alpha                 {alpha:g}\n"
            f"_cell_angle_beta                  {beta:g}\n"
            f"_cell_angle_gamma                 {gamma:g}\n\n"
        )
        cif += (
            "_symmetry_space_group_name_H-M    'P 1'\n"
            "_symmetry_int_tables_number       1\n\n"
            "loop_\n"
            "  _symmetry_equiv_pos_as_xyz\n"
            "  'x, y, z'\n\n"
        )

        # Since some structure viewers are having issues with cartesian coordinates,
        # we calculate the fractional coordinates if this is a 3D structure and we have all the necessary information.
        if not hasattr(attributes, "fractional_site_positions"):
            sites, _ = pad_positions(attributes.cartesian_site_positions)
            attributes.fractional_site_positions = fractional_coordinates(
                cell=attributes.lattice_vectors, cartesian_positions=sites
            )

    # NOTE: This is otherwise a bit ahead of its time, since this OPTIMADE property is part of an open PR.
    # See https://github.com/Materials-Consortia/OPTIMADE/pull/206
    coord_type = (
        "fract" if hasattr(attributes, "fractional_site_positions") else "Cartn"
    )

    cif += (
        "loop_\n"
        "  _atom_site_type_symbol\n"  # species.chemical_symbols
        "  _atom_site_label\n"  # species.name + unique int
        "  _atom_site_occupancy\n"  # species.concentration
        f"  _atom_site_{coord_type}_x\n"  # cartesian_site_positions
        f"  _atom_site_{coord_type}_y\n"  # cartesian_site_positions
        f"  _atom_site_{coord_type}_z\n"  # cartesian_site_positions
        "  _atom_site_thermal_displace_type\n"  # Set to 'Biso'
        "  _atom_site_B_iso_or_equiv\n"  # Set to 1.0:f
    )

    if coord_type == "fract":
        sites, _ = pad_positions(attributes.fractional_site_positions)
    else:
        sites, _ = pad_positions(attributes.cartesian_site_positions)

    species: Dict[str, OptimadeStructureSpecies] = {
        species.name: species for species in attributes.species
    }

    symbol_occurences = {}
    for site_number in range(attributes.nsites):
        species_name = attributes.species_at_sites[site_number]
        site = sites[site_number]

        current_species = species[species_name]

        for index, symbol in enumerate(current_species.chemical_symbols):
            if symbol == "vacancy":
                continue

            if symbol in symbol_occurences:
                symbol_occurences[symbol] += 1
            else:
                symbol_occurences[symbol] = 1
            label = f"{symbol}{symbol_occurences[symbol]}"

            cif += (
                f"  {symbol} {label} {current_species.concentration[index]:6.4f} {site[0]:8.5f}  "
                f"{site[1]:8.5f}  {site[2]:8.5f}  {'Biso':4}  {'1.000':6}\n"
            )

    return cif