예제 #1
0
 def test_compute_smiles_from_molecule_no_hs(self):
   mol = Chem.MolFromSmiles('FOC', sanitize=False)
   self.assertEqual(
       smu_utils_lib.compute_smiles_for_molecule(mol, include_hs=False), 'COF')
   # This is expected. Even with include_hs=True, if there were no Hs in the
   # molecule, they will not be in the smiles.
   self.assertEqual(
       smu_utils_lib.compute_smiles_for_molecule(mol, include_hs=True), 'COF')
예제 #2
0
 def test_compute_smiles_from_molecule_with_hs(self):
   mol = Chem.MolFromSmiles('FOC', sanitize=False)
   Chem.SanitizeMol(mol, Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS)
   mol = Chem.AddHs(mol)
   self.assertEqual(
       smu_utils_lib.compute_smiles_for_molecule(mol, include_hs=False), 'COF')
   self.assertEqual(
       smu_utils_lib.compute_smiles_for_molecule(mol, include_hs=True),
       '[H]C([H])([H])OF')
예제 #3
0
 def test_compute_smiles_from_molecule_labeled_no_h(self):
   mol = Chem.MolFromSmiles(
       '[O-][N+]([H])([H])N([H])OC([H])([H])F', sanitize=False)
   self.assertIsNotNone(mol)
   self.assertEqual(
       '[O-][NH2+:1][NH:2][O:3][CH2:4][F:5]',
       smu_utils_lib.compute_smiles_for_molecule(
           mol, include_hs=False, labeled_atoms=True))
예제 #4
0
 def test_compute_smiles_from_molecule_labeled_with_h(self):
   mol = Chem.MolFromSmiles(
       '[O-][N+]([H])([H])N([H])OC([H])([H])F', sanitize=False)
   self.assertIsNotNone(mol)
   self.assertEqual(
       '[O-][N+:1]([H:2])([H:3])[N:4]([H:5])[O:6][C:7]([H:8])([H:9])[F:10]',
       smu_utils_lib.compute_smiles_for_molecule(
           mol, include_hs=True, labeled_atoms=True))
예제 #5
0
 def test_compute_smiles_from_molecule_special_case(self):
   mol = Chem.MolFromSmiles('C12=C3C4=C1C4=C23', sanitize=False)
   # Double check that this really is the special case -- we get back the
   # SMILES we put in even though it's not the one we want.
   self.assertEqual('C12=C3C4=C1C4=C23',
                    Chem.MolToSmiles(mol, kekuleSmiles=True))
   self.assertEqual(
       smu_utils_lib.compute_smiles_for_molecule(mol, include_hs=False),
       'C12=C3C1=C1C2=C31')
예제 #6
0
    def find_by_smiles(self, smiles):
        """Finds all conformer associated with a given smiles string.

    Args:
      smiles: string

    Returns:
      iterable for dataset_pb2.Conformer
    """
        canon_smiles = smu_utils_lib.compute_smiles_for_molecule(
            Chem.MolFromSmiles(smiles, sanitize=False), include_hs=False)
        cur = self._conn.cursor()
        select = f'SELECT btid FROM {_SMILES_TABLE_NAME} WHERE smiles = ?'
        cur.execute(select, (canon_smiles, ))
        result = cur.fetchall()

        if not result:
            return []

        # Since it's a unique index, there should only be one result and it's a
        # tuple with one value.
        assert len(result) == 1
        assert len(result[0]) == 1
        return self.find_by_bond_topology_id(result[0][0])
예제 #7
0
def bond_topologies_from_geom(bond_lengths, conformer_id, fate, bond_topology,
                              geometry, matching_parameters):
    """Return all BondTopology's that are plausible.

    Given a molecule described by `bond_topology` and `geometry`, return all
    possible
    BondTopology that are consistent with that.
    Note that `bond_topology` will be put in a canonical form.

  Args:
    bond_lengths: matrix of interatomic distances
    conformer_id:
    fate: outcome of calculations
    bond_topology:
    geometry: coordinates for the bond_topology
    matching_parameters:

  Returns:
    TopologyMatches
  """
    result = dataset_pb2.TopologyMatches()  # To be returned.
    result.starting_smiles = bond_topology.smiles
    result.conformer_id = conformer_id
    result.fate = fate

    natoms = len(bond_topology.atoms)
    if natoms == 1:
        return result  # empty.

    if len(geometry.atom_positions) != natoms:
        return result  # empty
    distances = utilities.distances(geometry)

    # First join each Hydrogen to its nearest heavy atom, thereby
    # creating a starting BondTopology from which all others can grow
    starting_bond_topology = hydrogen_to_nearest_atom(bond_topology, distances)
    if starting_bond_topology is None:
        return result

    heavy_atom_indices = [
        i for i, t in enumerate(bond_topology.atoms)
        if t != dataset_pb2.BondTopology.AtomType.ATOM_H
    ]

    # For each atom pair, a list of possible bond types.
    # Key is a tuple of the two atom numbers, value is an np.array
    # with the score for each bond type.

    bonds_to_scores: Dict[Tuple[int, int], np.ndarray] = {}
    for (i, j) in itertools.combinations(heavy_atom_indices, 2):  # All pairs.
        dist = distances[i, j]
        if dist > THRESHOLD:
            continue
        try:
            possible_bonds = bond_lengths.probability_of_bond_types(
                bond_topology.atoms[i], bond_topology.atoms[j], dist)
        except KeyError:  # Happens when this bond type has no data
            continue
        if not possible_bonds:
            continue
        # Note that this relies on the fact that BOND_SINGLE==1 etc..
        btypes = np.zeros(4, np.float32)
        for key, value in possible_bonds.items():
            btypes[key] = value
        bonds_to_scores[(i, j)] = btypes

    if not bonds_to_scores:  # Seems unlikely.
        return result

    # Need to know when the starting smiles has been recovered.
    rdkit_mol = smu_utils_lib.bond_topology_to_molecule(bond_topology)
    starting_smiles = smu_utils_lib.compute_smiles_for_molecule(
        rdkit_mol, include_hs=True)
    initial_ring_atom_count = utilities.ring_atom_count_mol(rdkit_mol)

    # Avoid finding duplicates.
    all_found_smiles: Set[str] = set()

    mol = smu_molecule.SmuMolecule(starting_bond_topology, bonds_to_scores,
                                   matching_parameters)

    search_space = mol.generate_search_state()
    for s in itertools.product(*search_space):
        bt = mol.place_bonds(list(s), matching_parameters)
        if not bt:
            continue

        rdkit_mol = smu_utils_lib.bond_topology_to_molecule(bt)
        if matching_parameters.consider_not_bonded and len(
                Chem.GetMolFrags(rdkit_mol)) > 1:
            continue

        found_smiles = smu_utils_lib.compute_smiles_for_molecule(
            rdkit_mol, include_hs=True)
        if found_smiles in all_found_smiles:
            continue

        all_found_smiles.add(found_smiles)

        if matching_parameters.ring_atom_count_cannot_decrease:
            ring_atoms = utilities.ring_atom_count_mol(rdkit_mol)
            if ring_atoms < initial_ring_atom_count:
                continue
            bt.ring_atom_count = ring_atoms

        bt.bond_topology_id = bond_topology.bond_topology_id
        utilities.canonical_bond_topology(bt)

        if found_smiles == starting_smiles:
            bt.is_starting_topology = True

        if not matching_parameters.smiles_with_h:
            found_smiles = smu_utils_lib.compute_smiles_for_molecule(
                rdkit_mol, include_hs=False)

        bt.geometry_score = geometry_score(bt, distances, bond_lengths)
        bt.smiles = found_smiles
        result.bond_topology.append(bt)

    if len(result.bond_topology) > 1:
        result.bond_topology.sort(key=lambda bt: bt.score, reverse=True)

    score_sum = np.sum([bt.score for bt in result.bond_topology])
    for bt in result.bond_topology:
        bt.topology_score = np.log(bt.score / score_sum)
        bt.ClearField("score")

    return result