def get_mol_block(self, conformer):
        """Returns the MOL file block with atoms and bonds.

    Args:
      conformer: dataset_pb2.Conformer

    Returns:
      list of strings
    """
        contents = []
        contents.append('\n')
        contents.append('{:3d}{:3d}  0  0  0  0  0  0  0  0999 V2000\n'.format(
            len(conformer.bond_topologies[0].atoms),
            len(conformer.bond_topologies[0].bonds)))
        for atom_type, coords in zip(
                conformer.bond_topologies[0].atoms,
                conformer.optimized_geometry.atom_positions):
            contents.append(
                '{:10.4f}{:10.4f}{:10.4f} {:s}   0  0  0  0  0  0  0  0  0  0  0  0\n'
                .format(smu_utils_lib.bohr_to_angstroms(coords.x),
                        smu_utils_lib.bohr_to_angstroms(coords.y),
                        smu_utils_lib.bohr_to_angstroms(coords.z),
                        smu_utils_lib.ATOM_TYPE_TO_RDKIT[atom_type][0]))
        for bond in conformer.bond_topologies[0].bonds:
            contents.append('{:3d}{:3d}{:3d}  0\n'.format(
                bond.atom_a + 1, bond.atom_b + 1, bond.bond_type))

        return contents
def geom_to_angstroms(geometry):
    """Convert all the coordinates in `geometry` to Angstroms.

  Args:
    geometry: starting Geometry Returns New Geometry with adjusted coordinates.
  Returns:
    Coordinates in Angstroms.
  """
    result = dataset_pb2.Geometry()
    for atom in geometry.atom_positions:
        new_atom = dataset_pb2.Geometry.AtomPos()
        new_atom.x = smu_utils_lib.bohr_to_angstroms(atom.x)
        new_atom.y = smu_utils_lib.bohr_to_angstroms(atom.y)
        new_atom.z = smu_utils_lib.bohr_to_angstroms(atom.z)
        result.atom_positions.append(new_atom)

    return result
def extract_bond_lengths(conformer, dist_sig_digits, unbonded_max):
    """Yields quantized bond lengths.

  Args:
    conformer: dataset_pb2.Conformer
    dist_sig_digits: number of digits after decimal point to keep
    unbonded_max: maximum distance to report for unbonded pairs  output atom
      types are single charecters, sorted lexographically. bond_type is
      dataset_pb2.BondTopology.BondType dist_sig_digits is a string (to avoid
      vagaries of floating point compares)

  Yields:
    (atom type 1, atom type 2, bond type, quantized dist)
  """
    bt = conformer.bond_topologies[0]
    format_str = '{:.%df}' % dist_sig_digits

    for atom_idx0, atom_idx1 in itertools.combinations(range(len(bt.atoms)),
                                                       r=2):

        if (bt.atoms[atom_idx0] == dataset_pb2.BondTopology.ATOM_H
                or bt.atoms[atom_idx1] == dataset_pb2.BondTopology.ATOM_H):
            continue

        bond_type = dataset_pb2.BondTopology.BOND_UNDEFINED
        for bond in bt.bonds:
            if ((bond.atom_a == atom_idx0 and bond.atom_b == atom_idx1) or
                (bond.atom_a == atom_idx1 and bond.atom_b == atom_idx0)):
                bond_type = bond.bond_type
                break

        geom = conformer.optimized_geometry
        atom_pos0 = np.array([
            geom.atom_positions[atom_idx0].x, geom.atom_positions[atom_idx0].y,
            geom.atom_positions[atom_idx0].z
        ],
                             dtype=np.double)
        atom_pos1 = np.array([
            geom.atom_positions[atom_idx1].x, geom.atom_positions[atom_idx1].y,
            geom.atom_positions[atom_idx1].z
        ],
                             dtype=np.double)
        # The intention is the buckets are the left edge of an empricial CDF.
        dist = (np.floor(
            smu_utils_lib.bohr_to_angstroms(
                np.linalg.norm(atom_pos0 - atom_pos1)) * 10**dist_sig_digits) /
                10**dist_sig_digits)
        if (bond_type == dataset_pb2.BondTopology.BOND_UNDEFINED
                and dist > unbonded_max):
            continue

        atom_char0 = smu_utils_lib.ATOM_TYPE_TO_CHAR[bt.atoms[atom_idx0]]
        atom_char1 = smu_utils_lib.ATOM_TYPE_TO_CHAR[bt.atoms[atom_idx1]]
        if atom_char0 > atom_char1:
            atom_char0, atom_char1 = atom_char1, atom_char0

        yield atom_char0, atom_char1, bond_type, format_str.format(dist)
def distance_between_atoms(geom, a1, a2):
    """Return the distance between atoms `a1` and `a2` in `geom`.

  Args:
    geom:
    a1:
    a2:

  Returns:
    Distance in Angstroms.
  """
    return smu_utils_lib.bohr_to_angstroms(
        math.sqrt((geom.atom_positions[a1].x - geom.atom_positions[a2].x) *
                  (geom.atom_positions[a1].x - geom.atom_positions[a2].x) +
                  (geom.atom_positions[a1].y - geom.atom_positions[a2].y) *
                  (geom.atom_positions[a1].y - geom.atom_positions[a2].y) +
                  (geom.atom_positions[a1].z - geom.atom_positions[a2].z) *
                  (geom.atom_positions[a1].z - geom.atom_positions[a2].z)))
Exemple #5
0
def extract_bond_lengths(conformer, dist_sig_digits, unbonded_max):
    """Yields quantized bond lengths.

  Args:
    conformer: dataset_pb2.Conformer
    dist_sig_digits: number of digits after decimal point to keep
    unbonded_max: maximum distance to report for unbonded pairs  output atom
      types are single charecters, sorted lexographically. bond_type is
      dataset_pb2.BondTopology.BondType dist_sig_digits is a string (to avoid
      vagaries of floating point compares)

  Yields:
    (atom type 1, atom type 2, bond type, quantized dist)
  """
    # These are considered "major" or worse errors
    if (conformer.properties.errors.status >= 8
            or conformer.duplicated_by > 0):
        return

    bt = conformer.bond_topologies[0]
    format_str = '{:.%df}' % dist_sig_digits

    for atom_idx0, atom_idx1 in itertools.combinations(range(len(bt.atoms)),
                                                       r=2):

        if (bt.atoms[atom_idx0] == dataset_pb2.BondTopology.ATOM_H
                or bt.atoms[atom_idx1] == dataset_pb2.BondTopology.ATOM_H):
            continue

        # Hello huge hack. F-F creates problems for us because there is
        # exactly one conformer that has an F-F bond. We can't create an
        # empirical distribution out of 1 value. So we'll just drop that
        # one and let the FF conformer have no detected geometries.
        if (bt.atoms[atom_idx0] == dataset_pb2.BondTopology.ATOM_F
                and bt.atoms[atom_idx1] == dataset_pb2.BondTopology.ATOM_F):
            continue

        bond_type = smu_utils_lib.get_bond_type(bt, atom_idx0, atom_idx1)

        geom = conformer.optimized_geometry
        atom_pos0 = np.array([
            geom.atom_positions[atom_idx0].x, geom.atom_positions[atom_idx0].y,
            geom.atom_positions[atom_idx0].z
        ],
                             dtype=np.double)
        atom_pos1 = np.array([
            geom.atom_positions[atom_idx1].x, geom.atom_positions[atom_idx1].y,
            geom.atom_positions[atom_idx1].z
        ],
                             dtype=np.double)
        # The intention is the buckets are the left edge of an empricial CDF.
        dist = (np.floor(
            smu_utils_lib.bohr_to_angstroms(
                np.linalg.norm(atom_pos0 - atom_pos1)) * 10**dist_sig_digits) /
                10**dist_sig_digits)
        if (bond_type == dataset_pb2.BondTopology.BOND_UNDEFINED
                and dist > unbonded_max):
            continue

        atom_char0 = smu_utils_lib.ATOM_TYPE_TO_CHAR[bt.atoms[atom_idx0]]
        atom_char1 = smu_utils_lib.ATOM_TYPE_TO_CHAR[bt.atoms[atom_idx1]]
        if atom_char0 > atom_char1:
            atom_char0, atom_char1 = atom_char1, atom_char0

        yield atom_char0, atom_char1, bond_type, format_str.format(dist)