Exemple #1
0
    def test_atom_mapping_1(self):
        """Test that to_rdkit_mol returns correct indices and atom mappings."""
        bond_order_dict = {'SINGLE': 1, 'DOUBLE': 2, 'TRIPLE': 3, 'AROMATIC': 1.5}
        mol = Molecule().from_smiles('C1CCC=C1C=O')
        rdkitmol, rd_atom_indices = to_rdkit_mol(mol, remove_h=False, return_mapping=True)
        for atom in mol.atoms:
            # Check that all atoms are found in mapping
            self.assertTrue(atom in rd_atom_indices)
            # Check that all bonds are in rdkitmol with correct mapping and order
            for connected_atom, bond in atom.bonds.items():
                bond_type = str(rdkitmol.GetBondBetweenAtoms(rd_atom_indices[atom],
                                                             rd_atom_indices[connected_atom]).GetBondType())
                rdkit_bond_order = bond_order_dict[bond_type]
                self.assertEqual(bond.order, rdkit_bond_order)

        # Test for remove_h = True
        rdkitmol2, rd_atom_indices2 = to_rdkit_mol(mol, remove_h=True, return_mapping=True)
        for atom in mol.atoms:
            # Check that all non-hydrogen atoms are found in mapping
            if atom.symbol != 'H':
                self.assertTrue(atom in rd_atom_indices2)
                # Check that all bonds connected to non-hydrogen have the correct mapping and order
                for connected_atom, bond in atom.bonds.items():
                    if connected_atom.symbol != 'H':
                        bond_type = str(rdkitmol2.GetBondBetweenAtoms(rd_atom_indices2[atom],
                                                                      rd_atom_indices2[connected_atom]).GetBondType())
                        rdkit_bond_order = bond_order_dict[bond_type]
                        self.assertEqual(bond.order, rdkit_bond_order)
Exemple #2
0
def _rdkit_translator(input_object, identifier_type, mol=None):
    """
    Converts between formats using RDKit. If input is a :class:`Molecule`,
    the identifier_type is used to determine the output type. If the input is
    a `str`, then the identifier_type is used to identify the input, and the
    desired output is assumed to be a :class:`Molecule` object.

    Args:
        input_object: either molecule or string identifier
        identifier_type: format of string identifier
            'inchi'    -> InChI
            'inchikey' -> InChI Key
            'sma'      -> SMARTS
            'smi'      -> SMILES
        mol: molecule object for output (optional)
    """
    if identifier_type == 'inchi' and not Chem.inchi.INCHI_AVAILABLE:
        raise DependencyError("RDKit installed without InChI. Please reinstall to read and write InChI strings.")

    if isinstance(input_object, str):
        # We are converting from a string identifier to a molecule
        if identifier_type == 'inchi':
            rdkitmol = Chem.inchi.MolFromInchi(input_object, removeHs=False)
        elif identifier_type == 'sma':
            rdkitmol = Chem.MolFromSmarts(input_object)
        elif identifier_type == 'smi':
            rdkitmol = Chem.MolFromSmiles(input_object)
        else:
            raise ValueError('Identifier type {0} is not supported for reading using RDKit.'.format(identifier_type))
        if rdkitmol is None:
            raise ValueError("Could not interpret the identifier {0!r}".format(input_object))
        if mol is None:
            mol = mm.Molecule()
        output = from_rdkit_mol(mol, rdkitmol)
    elif isinstance(input_object, mm.Molecule):
        # We are converting from a molecule to a string identifier
        if identifier_type == 'smi':
            rdkitmol = to_rdkit_mol(input_object, sanitize=False)
        else:
            rdkitmol = to_rdkit_mol(input_object, sanitize=True)
        if identifier_type == 'inchi':
            output = Chem.inchi.MolToInchi(rdkitmol, options='-SNon')
        elif identifier_type == 'inchikey':
            inchi = to_inchi(input_object)
            output = Chem.inchi.InchiToInchiKey(inchi)
        elif identifier_type == 'sma':
            output = Chem.MolToSmarts(rdkitmol)
        elif identifier_type == 'smi':
            if input_object.is_aromatic():
                output = Chem.MolToSmiles(rdkitmol)
            else:
                output = Chem.MolToSmiles(rdkitmol, kekuleSmiles=True)
        else:
            raise ValueError('Identifier type {0} is not supported for writing using RDKit.'.format(identifier_type))
    else:
        raise ValueError('Unexpected input format. Should be a Molecule or a string.')

    return output
Exemple #3
0
 def test_rdkit_round_trip(self):
     """Test conversion to and from RDKitMol"""
     for mol in self.test_mols:
         rdkit_mol = to_rdkit_mol(mol)
         new_mol = from_rdkit_mol(Molecule(), rdkit_mol)
         self.assertTrue(mol.is_isomorphic(new_mol) or self.test_Hbond_free_mol.is_isomorphic(new_mol))
         self.assertEqual(mol.get_element_count(), new_mol.get_element_count())
Exemple #4
0
    def test_atom_mapping_2(self):
        """Test that to_rdkit_mol returns correct indices and atom mappings when hydrogens are removed."""
        adjlist = """
1 H u0 p0 c0 {2,S}
2 C u0 p0 c0 {1,S} {3,S} {4,S} {5,S}
3 H u0 p0 c0 {2,S}
4 H u0 p0 c0 {2,S}
5 O u0 p2 c0 {2,S} {6,S}
6 H u0 p0 c0 {5,S}
        """

        mol = Molecule().from_adjacency_list(adjlist)
        rdkitmol, rd_atom_indices = to_rdkit_mol(mol,
                                                 remove_h=True,
                                                 return_mapping=True)

        heavy_atoms = [at for at in mol.atoms if at.number != 1]
        for at1 in heavy_atoms:
            for at2 in heavy_atoms:
                if mol.has_bond(at1, at2):
                    try:
                        rdkitmol.GetBondBetweenAtoms(rd_atom_indices[at1],
                                                     rd_atom_indices[at2])
                    except RuntimeError:
                        self.fail(
                            "RDKit failed in finding the bond in the original atom!"
                        )
Exemple #5
0
    def test_lone_pair_retention(self):
        """Test that we don't lose any lone pairs on round trip RDKit conversion."""
        mol = Molecule().from_adjacency_list("""
1 C u0 p0 c0 {2,D} {3,S} {4,S}
2 O u0 p2 c0 {1,D}
3 H u0 p0 c0 {1,S}
4 H u0 p0 c0 {1,S}
""")
        rdmol = to_rdkit_mol(mol)

        mol2 = from_rdkit_mol(Molecule(), rdmol)
        self.assertTrue(mol.is_isomorphic(mol2))
Exemple #6
0
def create_augmented_layers(mol):
    """
    The indices in the string refer to the atom indices in the molecule, according to the atom order
    obtained by sorting the atoms using the InChI canonicalization algorithm.

    First a deep copy is created of the original molecule and hydrogen atoms are removed from the molecule.
    Next, the molecule is converted into an InChI string, and the auxiliary information of the inchification
    procedure is retrieved.

    The N-layer is parsed and used to sort the atoms of the original order according
    to the order in the InChI. In case, the molecule contains atoms that cannot be distinguished
    with the InChI algorithm ('equivalent atoms'), the position of the unpaired electrons is changed
    as to ensure the atoms with the lowest indices are used to compose the string.
    """

    if mol.get_radical_count() == 0 and not _has_unexpected_lone_pairs(mol):
        return None, None
    elif mol.get_formula() == 'H':
        return U_LAYER_PREFIX + '1', None
    else:
        molcopy = mol.copy(deep=True)

        hydrogens = [at for at in molcopy.atoms if at.number == 1]
        for h in hydrogens:
            molcopy.remove_atom(h)

        rdkitmol = to_rdkit_mol(molcopy)
        _, auxinfo = Chem.MolToInchiAndAuxInfo(
            rdkitmol, options='-SNon')  # suppress stereo warnings

        # extract the atom numbers from N-layer of auxiliary info:
        atom_indices = _parse_n_layer(auxinfo)
        atom_indices = [
            atom_indices.index(i + 1) for i, atom in enumerate(molcopy.atoms)
        ]

        # sort the atoms based on the order of the atom indices
        molcopy.atoms = [
            x for (y, x) in sorted(zip(atom_indices, molcopy.atoms),
                                   key=lambda pair: pair[0])
        ]

        ulayer = _create_u_layer(molcopy, auxinfo)

        player = _create_p_layer(molcopy, auxinfo)

        return ulayer, player