def test_single_fragment_3_atoms_1_bonds(self):
    bt = text_format.Parse(
        """ atoms: ATOM_C
    atoms: ATOM_C
    atoms: ATOM_C
    bonds {
      atom_a: 0
      atom_b: 1
      bond_type: BOND_SINGLE
    }
""", dataset_pb2.BondTopology())
    self.assertFalse(utilities.is_single_fragment(bt))
예제 #2
0
def hydrogen_to_nearest_atom(bond_topology, distances, bond_lengths):
  """Generate a BondTopology with each Hydrogen atom to its nearest heavy atom.

  If bond_lengths is given, the distance of the hydrogen is checked to the
  nearest
  heavy is checked to be allowed under that distance

  Args:
    bond_topology:
    distances: matrix of interatomic distances.
    bond_lengths: None or AllAtomPairLengthDistributions

  Returns:
    dataset_pb2.BondTopology
  """
  result = dataset_pb2.BondTopology()
  result.atoms[:] = bond_topology.atoms
  natoms = len(bond_topology.atoms)
  for a1 in range(0, natoms):
    if bond_topology.atoms[a1] != dataset_pb2.BondTopology.AtomType.ATOM_H:
      continue

    shortest_distance = 1.0e+30
    closest_heavy_atom = -1
    for a2 in range(0, natoms):
      if bond_topology.atoms[a2] == dataset_pb2.BondTopology.AtomType.ATOM_H:
        continue

      if distances[a1, a2] >= THRESHOLD:
        continue

      if distances[a1, a2] < shortest_distance:
        shortest_distance = distances[a1, a2]
        closest_heavy_atom = a2

    if closest_heavy_atom < 0:
      return None

    if bond_lengths:
      if (bond_lengths[(bond_topology.atoms[closest_heavy_atom],
                        dataset_pb2.BondTopology.ATOM_H)]
          [dataset_pb2.BondTopology.BOND_SINGLE].pdf(shortest_distance) == 0.0):
        return None

    bond = dataset_pb2.BondTopology.Bond(
        atom_a=a1,
        atom_b=closest_heavy_atom,
        bond_type=dataset_pb2.BondTopology.BondType.BOND_SINGLE)
    result.bonds.append(bond)

  return result
예제 #3
0
  def test_canonical(self):
    bt = text_format.Parse(
        """
    atoms: ATOM_C
    atoms: ATOM_C
    atoms: ATOM_C
    bonds {
      atom_a: 2
      atom_b: 1
      bond_type: BOND_SINGLE
    },
    bonds {
      atom_a: 1
      atom_b: 0
      bond_type: BOND_SINGLE
    }
""", dataset_pb2.BondTopology())

    expected = text_format.Parse(
        """ atoms: ATOM_C
    atoms: ATOM_C
    atoms: ATOM_C
    bonds {
      atom_a: 0
      atom_b: 1
      bond_type: BOND_SINGLE
    },
    bonds {
      atom_a: 1
      atom_b: 2
      bond_type: BOND_SINGLE
    }
""", dataset_pb2.BondTopology())

    utilities.canonicalize_bond_topology(bt)
    self.assertEqual(
        text_format.MessageToString(bt), text_format.MessageToString(expected))
예제 #4
0
  def test_equality(self):
    bt1 = text_format.Parse(
        """
    atoms: ATOM_C
    atoms: ATOM_C
    atoms: ATOM_C
    bonds {
      atom_a: 2
      atom_b: 1
      bond_type: BOND_SINGLE
    },
    bonds {
      atom_a: 1
      atom_b: 0
      bond_type: BOND_SINGLE
    }
""", dataset_pb2.BondTopology())

    bt2 = text_format.Parse(
        """ atoms: ATOM_C
    atoms: ATOM_C
    atoms: ATOM_C
    bonds {
      atom_a: 0
      atom_b: 1
      bond_type: BOND_SINGLE
    },
    bonds {
      atom_a: 1
      atom_b: 2
      bond_type: BOND_SINGLE
    }
""", dataset_pb2.BondTopology())

    self.assertFalse(utilities.same_bond_topology(bt1, bt2))
    utilities.canonicalize_bond_topology(bt1)
    self.assertTrue(utilities.same_bond_topology(bt1, bt2))
예제 #5
0
def hydrogen_to_nearest_atom(
    bond_topology,
    distances):
  """Generate a BondTopology that joins each Hydrogen atom to its nearest.

      heavy atom.
  Args:
    bond_topology:
    distances:

  Returns:
  """
  result = dataset_pb2.BondTopology()
  result.atoms[:] = bond_topology.atoms
  natoms = len(bond_topology.atoms)
  for a1 in range(0, natoms):
    if bond_topology.atoms[a1] != dataset_pb2.BondTopology.AtomType.ATOM_H:
      continue

    shortest_distance = 1.0e+30
    closest_heavy_atom = -1
    for a2 in range(0, natoms):
      if bond_topology.atoms[a2] == dataset_pb2.BondTopology.AtomType.ATOM_H:
        continue

      if distances[a1, a2] >= THRESHOLD:
        continue

      if distances[a1, a2] < shortest_distance:
        shortest_distance = distances[a1, a2]
        closest_heavy_atom = a2

    if closest_heavy_atom < 0:
      return None

    bond = dataset_pb2.BondTopology.Bond(
        atom_a=a1,
        atom_b=closest_heavy_atom,
        bond_type=dataset_pb2.BondTopology.BondType.BOND_SINGLE)
    result.bonds.append(bond)

  return result
    def test_ethane_all(self, btype, expected_bond):
        cc = text_format.Parse("""
      atoms: ATOM_C
      atoms: ATOM_C
""", dataset_pb2.BondTopology())
        bonds_to_scores = {(0, 1): np.zeros(4, dtype=np.float32)}
        bonds_to_scores[(0, 1)][btype] = 1.0
        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.must_match_all_bonds = False
        mol = smu_molecule.SmuMolecule(cc, bonds_to_scores,
                                       matching_parameters)
        state = mol.generate_search_state()
        for s in itertools.product(*state):
            res = mol.place_bonds(s, matching_parameters)
            if btype == 0:
                self.assertIsNone(res)
            else:
                self.assertIsNotNone(res)
                self.assertLen(res.bonds, 1)
                self.assertEqual(res.bonds[0].bond_type, expected_bond)
예제 #7
0
    def place_bonds_inner(self, state):
        """Place bonds corresponding to `state`.

    No validity checking is done, the calling function is responsible
    for that.

    Args:
      state: for each pair of atoms, the kind of bond to be placed.

    Returns:
      If successful, a BondTopology.
    """
        self._current_bonds_attached = np.copy(
            self._bonds_with_hydrogens_attached)

        result = dataset_pb2.BondTopology()
        result.CopyFrom(
            self._starting_bond_topology)  # only Hydrogens attached.
        result.score = self._initial_score

        # Make sure each atoms gets at least one bond
        atom_got_bond = np.zeros(self._heavy_atoms)

        for i, btype in enumerate(state):
            if btype != dataset_pb2.BondTopology.BOND_UNDEFINED:
                a1 = self._bonds[i][0]
                a2 = self._bonds[i][1]
                if not self._place_bond(a1, a2, btype):
                    return None
                add_bond(a1, a2, btype, result)
                atom_got_bond[a1] = 1
                atom_got_bond[a2] = 1

            result.score = self._accumulate_score(result.score,
                                                  self._scores[i][btype])
        if not np.all(atom_got_bond):
            return None

        return result
예제 #8
0
def molecule_to_bond_topology(mol):
    """Molecule to bond topology.

  Args:
    mol:

  Returns:
    Bond topology.
  """
    bond_topology = dataset_pb2.BondTopology()
    for atom in mol.GetAtoms():
        bond_topology.atoms.append(rdkit_atom_to_atom_type(atom))

    for bond in mol.GetBonds():
        btype = rdkit_bond_type_to_btype(bond.GetBondType())
        bt_bond = dataset_pb2.BondTopology.Bond()
        bt_bond.atom_a = bond.GetBeginAtom().GetIdx()
        bt_bond.atom_b = bond.GetEndAtom().GetIdx()
        bt_bond.bond_type = btype
        bond_topology.bonds.append(bt_bond)

    return bond_topology
예제 #9
0
  def get_molecule(self, oc_dist, cn_dist):
    molecule = dataset_pb2.Molecule(molecule_id=12345)
    molecule.bond_topologies.append(dataset_pb2.BondTopology(smiles='N=C=O'))
    molecule.bond_topologies[0].atoms.extend([
        dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.ATOM_C,
        dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_H
    ])
    molecule.bond_topologies[0].bonds.append(
        dataset_pb2.BondTopology.Bond(
            atom_a=0,
            atom_b=1,
            bond_type=dataset_pb2.BondTopology.BondType.BOND_DOUBLE))
    molecule.bond_topologies[0].bonds.append(
        dataset_pb2.BondTopology.Bond(
            atom_a=1,
            atom_b=2,
            bond_type=dataset_pb2.BondTopology.BondType.BOND_DOUBLE))
    molecule.bond_topologies[0].bonds.append(
        dataset_pb2.BondTopology.Bond(
            atom_a=2,
            atom_b=3,
            bond_type=dataset_pb2.BondTopology.BondType.BOND_SINGLE))

    molecule.optimized_geometry.atom_positions.append(
        dataset_pb2.Geometry.AtomPos(x=0, y=0, z=0))
    molecule.optimized_geometry.atom_positions.append(
        dataset_pb2.Geometry.AtomPos(
            x=0, y=0, z=oc_dist / smu_utils_lib.BOHR_TO_ANGSTROMS))
    molecule.optimized_geometry.atom_positions.append(
        dataset_pb2.Geometry.AtomPos(
            x=0, y=0, z=(oc_dist + cn_dist) / smu_utils_lib.BOHR_TO_ANGSTROMS))
    molecule.optimized_geometry.atom_positions.append(
        dataset_pb2.Geometry.AtomPos(
            x=0,
            y=0,
            z=(oc_dist + cn_dist + 1) / smu_utils_lib.BOHR_TO_ANGSTROMS))

    return molecule
예제 #10
0
  def test_single_fragment_4_atoms_3_bonds_no_ring(self):
    bt = text_format.Parse(
        """ atoms: ATOM_C
    atoms: ATOM_C
    atoms: ATOM_C
    atoms: ATOM_C
    bonds {
      atom_a: 0
      atom_b: 1
      bond_type: BOND_SINGLE
    }
    bonds {
      atom_a: 1
      atom_b: 2
      bond_type: BOND_SINGLE
    }
    bonds {
      atom_a: 2
      atom_b: 3
      bond_type: BOND_SINGLE
    }
""", dataset_pb2.BondTopology())
    self.assertTrue(utilities.is_single_fragment(bt))
    def test_operators(self):
        cc = text_format.Parse(
            """
      atoms: ATOM_C
      atoms: ATOM_C
      atoms: ATOM_C
""", dataset_pb2.BondTopology())
        #   print(f"Generating bonds {btype1} and {btype2}")
        bonds_to_scores = {
            (0, 1): np.zeros(4, dtype=np.float32),
            (1, 2): np.zeros(4, dtype=np.float32)
        }
        scores = np.array([1.0, 3.0], dtype=np.float32)
        bonds_to_scores[(0, 1)][1] = scores[0]
        bonds_to_scores[(1, 2)][1] = scores[1]
        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.must_match_all_bonds = False
        mol = smu_molecule.SmuMolecule(cc, bonds_to_scores,
                                       matching_parameters)
        mol.set_initial_score_and_incrementer(1.0, operator.mul)
        state = mol.generate_search_state()
        for s in itertools.product(*state):
            res = mol.place_bonds(s, matching_parameters)
            self.assertAlmostEqual(res.score, np.product(scores))
예제 #12
0
def create_bond_topology(atoms, connectivity_matrix_string, hydrogens_string):
    """Creates a BondTopology from a compact string representation.

  Any hydrogens in the atoms string will be ignored. The appropriate number
  will be added based on what is in the hydrogens string.

  Args:
    atoms: a string like 'CCCCOON' (case insensitive) for the heavy atoms
    connectivity_matrix_string: a string for the uppertriangular connectivity
      matrix with bond orders, like '010210'
    hydrogens_string: a string for the number of hydrogens conencted to each
      heavy atom

  Returns:
    BondTopology
  """
    bond_topology = dataset_pb2.BondTopology()

    # Add the heavy atoms
    for atom_type in atoms.lower():
        if atom_type == 'c':
            bond_topology.atoms.append(
                dataset_pb2.BondTopology.AtomType.ATOM_C)
        elif atom_type == 'n':
            bond_topology.atoms.append(
                dataset_pb2.BondTopology.AtomType.ATOM_N)
        elif atom_type == 'o':
            bond_topology.atoms.append(
                dataset_pb2.BondTopology.AtomType.ATOM_O)
        elif atom_type == 'f':
            bond_topology.atoms.append(
                dataset_pb2.BondTopology.AtomType.ATOM_F)
        elif atom_type == 'h':
            pass
        else:
            raise ValueError('Unknown atom type: {}'.format(atom_type))

    num_heavy_atoms = len(bond_topology.atoms)

    # Now add the bonds between the heavy atoms
    if num_heavy_atoms > 1:
        for (i, j), bond_order in zip(
                np.nditer(np.triu_indices(num_heavy_atoms, k=1)),
                connectivity_matrix_string):
            if bond_order == '0':
                continue
            bond = bond_topology.bonds.add()
            bond.atom_a = int(i)
            bond.atom_b = int(j)
            if bond_order == '1':
                bond.bond_type = dataset_pb2.BondTopology.BondType.BOND_SINGLE
            elif bond_order == '2':
                bond.bond_type = dataset_pb2.BondTopology.BondType.BOND_DOUBLE
            elif bond_order == '3':
                bond.bond_type = dataset_pb2.BondTopology.BondType.BOND_TRIPLE
            else:
                raise ValueError('Bad bond order {}'.format(bond_order))

    # Now add the hydrogens, and adjust charged atoms if the total bond counts
    # indicate that.
    expected_hydrogens = compute_bonded_hydrogens(
        bond_topology, compute_adjacency_matrix(bond_topology))
    for atom_idx, (actual_h, expected_h) in enumerate(
            zip(hydrogens_string, expected_hydrogens)):
        actual_h = int(actual_h)
        diff = expected_h - actual_h
        atom_type = bond_topology.atoms[atom_idx]
        if diff == -1 and atom_type == dataset_pb2.BondTopology.AtomType.ATOM_N:
            bond_topology.atoms[
                atom_idx] = dataset_pb2.BondTopology.AtomType.ATOM_NPOS
        elif diff == 1 and atom_type == dataset_pb2.BondTopology.AtomType.ATOM_O:
            bond_topology.atoms[
                atom_idx] = dataset_pb2.BondTopology.AtomType.ATOM_ONEG
        elif diff:
            raise ValueError(
                f'Bad hydrogen count (actual={actual_h}, expected={expected_h} '
                'for {atom_type}, index {atom_idx}')
        for _ in range(actual_h):
            bond_topology.atoms.append(
                dataset_pb2.BondTopology.AtomType.ATOM_H)
            h_idx = len(bond_topology.atoms) - 1
            bond = bond_topology.bonds.add()
            bond.atom_a = atom_idx
            bond.atom_b = h_idx
            bond.bond_type = dataset_pb2.BondTopology.BondType.BOND_SINGLE

    return bond_topology
예제 #13
0
def str_to_bond_topology(s):
  bt = dataset_pb2.BondTopology()
  text_format.Parse(s, bt)
  return bt
예제 #14
0
  def test_single_fragment_two_disconnected_atoms(self):
    bt = text_format.Parse(""" atoms: ATOM_C
    atoms: ATOM_C
""", dataset_pb2.BondTopology())
    self.assertFalse(utilities.is_single_fragment(bt))
예제 #15
0
  def test_single_fragment_single_atom(self):
    bt = text_format.Parse(""" atoms: ATOM_C
""", dataset_pb2.BondTopology())
    self.assertTrue(utilities.is_single_fragment(bt))
예제 #16
0
    def test_scores(self):
        carbon = dataset_pb2.BondTopology.ATOM_C
        single_bond = dataset_pb2.BondTopology.BondType.BOND_SINGLE
        double_bond = dataset_pb2.BondTopology.BondType.BOND_DOUBLE

        # For testing, turn off the need for complete matching.
        smu_molecule.default_must_match_all_bonds = False

        all_distributions = bond_length_distribution.AllAtomPairLengthDistributions(
        )
        x, y = triangular_distribution(1.0, 1.4, 2.0)
        df = pd.DataFrame({"length": x, "count": y})
        bldc1c = bond_length_distribution.EmpiricalLengthDistribution(df, 0.0)
        all_distributions.add(carbon, carbon, single_bond, bldc1c)

        x, y = triangular_distribution(1.0, 1.5, 2.0)
        df = pd.DataFrame({"length": x, "count": y})
        bldc2c = bond_length_distribution.EmpiricalLengthDistribution(df, 0.0)
        all_distributions.add(carbon, carbon, double_bond, bldc2c)

        bond_topology = text_format.Parse(
            """
atoms: ATOM_C
atoms: ATOM_C
bonds: {
  atom_a: 0
  atom_b: 1
  bond_type: BOND_SINGLE
}
""", dataset_pb2.BondTopology())

        geometry = text_format.Parse(
            """
atom_positions {
  x: 0.0
  y: 0.0
  z: 0.0
},
atom_positions {
  x: 0.0
  y: 0.0
  z: 0.0
}
""", dataset_pb2.Geometry())
        geometry.atom_positions[1].x = 1.4 / smu_utils_lib.BOHR_TO_ANGSTROMS

        matching_parameters = smu_molecule.MatchingParameters()
        matching_parameters.must_match_all_bonds = False
        fate = dataset_pb2.Conformer.FATE_SUCCESS
        conformer_id = 1001
        result = topology_from_geom.bond_topologies_from_geom(
            all_distributions, conformer_id, fate, bond_topology, geometry,
            matching_parameters)
        self.assertIsNotNone(result)
        self.assertLen(result.bond_topology, 2)
        self.assertLen(result.bond_topology[0].bonds, 1)
        self.assertLen(result.bond_topology[1].bonds, 1)
        self.assertEqual(result.bond_topology[0].bonds[0].bond_type,
                         single_bond)
        self.assertEqual(result.bond_topology[1].bonds[0].bond_type,
                         double_bond)
        self.assertGreater(result.bond_topology[0].topology_score,
                           result.bond_topology[1].topology_score)
        self.assertAlmostEqual(
            np.sum(np.exp([bt.topology_score for bt in result.bond_topology])),
            1.0)
        self.assertAlmostEqual(result.bond_topology[0].geometry_score,
                               np.log(bldc1c.pdf(1.4)))
        self.assertAlmostEqual(result.bond_topology[1].geometry_score,
                               np.log(bldc2c.pdf(1.4)))
예제 #17
0
  def test_scores(self):
    carbon = dataset_pb2.BondTopology.ATOM_C
    single_bond = dataset_pb2.BondTopology.BondType.BOND_SINGLE
    double_bond = dataset_pb2.BondTopology.BondType.BOND_DOUBLE

    # For testing, turn off the need for complete matching.
    topology_molecule.default_must_match_all_bonds = False

    all_distributions = bond_length_distribution.AllAtomPairLengthDistributions(
    )
    bldc1c = triangular_distribution(1.0, 1.4, 2.0)
    all_distributions.add(carbon, carbon, single_bond, bldc1c)
    bldc2c = triangular_distribution(1.0, 1.5, 2.0)
    all_distributions.add(carbon, carbon, double_bond, bldc2c)

    molecule = dataset_pb2.Molecule()

    molecule.bond_topologies.append(
        text_format.Parse(
            """
atoms: ATOM_C
atoms: ATOM_C
bonds: {
  atom_a: 0
  atom_b: 1
  bond_type: BOND_SINGLE
}
""", dataset_pb2.BondTopology()))

    molecule.optimized_geometry.MergeFrom(
        text_format.Parse(
            """
atom_positions {
  x: 0.0
  y: 0.0
  z: 0.0
},
atom_positions {
  x: 0.0
  y: 0.0
  z: 0.0
}
""", dataset_pb2.Geometry()))
    molecule.optimized_geometry.atom_positions[1].x = (
        1.4 / smu_utils_lib.BOHR_TO_ANGSTROMS)

    matching_parameters = topology_molecule.MatchingParameters()
    matching_parameters.must_match_all_bonds = False
    molecule.properties.errors.fate = dataset_pb2.Properties.FATE_SUCCESS
    molecule.molecule_id = 1001
    result = topology_from_geom.bond_topologies_from_geom(
        molecule, all_distributions, matching_parameters)
    self.assertIsNotNone(result)
    self.assertLen(result.bond_topology, 2)
    self.assertLen(result.bond_topology[0].bonds, 1)
    self.assertLen(result.bond_topology[1].bonds, 1)
    self.assertEqual(result.bond_topology[0].bonds[0].bond_type, single_bond)
    self.assertEqual(result.bond_topology[1].bonds[0].bond_type, double_bond)
    self.assertGreater(result.bond_topology[0].topology_score,
                       result.bond_topology[1].topology_score)
    self.assertAlmostEqual(
        np.sum(np.exp([bt.topology_score for bt in result.bond_topology])), 1.0)
    self.assertAlmostEqual(result.bond_topology[0].geometry_score,
                           np.log(bldc1c.pdf(1.4)))
    self.assertAlmostEqual(result.bond_topology[1].geometry_score,
                           np.log(bldc2c.pdf(1.4)))