Esempio n. 1
0
    def __init__(self,
                 mol: rdkit.Chem.rdchem.Mol,
                 ccd_cif_dict: Dict[str, Any] = None,
                 properties: CCDProperties = None,
                 descriptors: List[Descriptor] = None) -> None:

        self.conformers_mapping = \
            {ConformerType.AllConformers: - 1,
             ConformerType.Ideal: 0,
             ConformerType.Model: 1 if len(mol.GetConformers()) == 2 else 1000,
             ConformerType.Computed: 2000}

        self.mol = mol
        self._mol_no_h = None
        self.mol2D = None
        self.ccd_cif_dict = ccd_cif_dict
        self._fragments: Dict[str, SubstructureMapping] = {}
        self._scaffolds: Dict[str, SubstructureMapping] = {}
        self._descriptors: List[Descriptor] = []
        self._inchi_from_rdkit = ''
        self._inchikey_from_rdkit = ''
        self._sanitization_issues = self._sanitize()
        self._physchem_properties: Dict[str, Any] = {}
        self._external_mapping: List[Tuple[str, str]] = []

        if descriptors is not None:
            self._descriptors = descriptors

        if properties is not None:
            self._cif_properties = properties
Esempio n. 2
0
def get_atom_count(mol: rdkit.Chem.rdchem.Mol,
                   radical_check: bool = False) -> collections.Counter:
    """Takes a mol object and returns a counter with each element type in the set.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        Mol object to count atoms for.
    radical_check : bool, optional
        Check for radical electrons and count if present.

    Returns
    -------
    atoms : collections.Counter
        Count of each atom type in input molecule.
    """
    atoms = collections.Counter()
    # Find all strings of the form A# in the molecular formula where A
    # is the element (e.g. C) and # is the number of atoms of that
    # element in the molecule. Pair is of form [A, #]
    for pair in re.findall(r"([A-Z][a-z]*)(\d*)", AllChem.CalcMolFormula(mol)):
        # Add # to atom count, unless there is no # (in which case
        # there is just one of that element, as ones are implicit in
        # chemical formulas)
        if pair[1]:
            atoms[pair[0]] += int(pair[1])
        else:
            atoms[pair[0]] += 1
    if radical_check:
        radical = any(
            [atom.GetNumRadicalElectrons() for atom in mol.GetAtoms()])
        if radical:
            atoms["*"] += 1
    return atoms
Esempio n. 3
0
def set_atomic_charges(
    mol: rdkit.Chem.rdchem.Mol,
    atomic_numbers: Iterable[int],
    atomic_valence_electrons,
    BO_valences,
    BO_matrix,
    mol_charge,
) -> rdkit.Chem.rdchem.Mol:
    q = 0
    for i, atom in enumerate(atomic_numbers):
        a = mol.GetAtomWithIdx(i)
        charge = _get_atomic_charge(atom, atomic_valence_electrons[atom],
                                    BO_valences[i])
        q += charge
        if atom == 6:
            number_of_single_bonds_to_C = list(BO_matrix[i, :]).count(1)
            if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2:
                q += 1
                charge = 0
            if number_of_single_bonds_to_C == 3 and q + 1 < mol_charge:
                q += 2
                charge = 1

        if abs(charge) > 0:
            a.SetFormalCharge(int(charge))

    mol = _clean_charges(mol)

    return mol
def get_subrdmol(rdmol: rdkit.Chem.rdchem.Mol,
                 indices: list = [],
                 sanitize: bool = False):
    """Create new sub-molecule from selected atom indices
    Parameters
    ----------
    rdmol: rdkit.Chem.rdchem.Mol
        Input molecule
    indices: iterable of ints
        atom indices to include from input molecule, indexed from 0
    sanitize: bool
        whether to sanitize the molecule (recommend: no)
    Returns
    -------
    rdkit.Chem.rdchem.Mol: subset of molecule
    """
    submol = Chem.RWMol(rdmol)
    ix = sorted(
        [at.GetIdx() for at in rdmol.GetAtoms() if at.GetIdx() not in indices])
    for i in ix[::-1]:
        submol.RemoveAtom(int(i))
    if sanitize:
        Chem.SanitizeMol(submol)

    for atom in submol.GetAtoms():
        #print(dir(atom))
        atom.SetNoImplicit(True)

    remove_charge_and_bond_order_from_imidazole(submol)
    remove_charge_and_bond_order_from_guanidinium(submol)
    return submol
Esempio n. 5
0
def neutralise_charges(mol: rdkit.Chem.rdchem.Mol,
                       reactions=None) -> rdkit.Chem.rdchem.Mol:
    """Neutralize all charges in an rdkit mol.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        Molecule to neutralize.
    reactions : list, optional
        patterns to neutralize, by default None.

    Returns
    -------
    mol : rdkit.Chem.rdchem.Mol
        Neutralized molecule.
    """
    def _initialise_neutralisation_reactions():
        patts = (
            # Imidazoles
            ("[n+;H]", "n"),
            # Amines
            ("[N+;!H0]", "N"),
            # Carboxylic acids and alcohols
            ("[$([O-]);!$([O-][#7])]", "O"),
            # Thiols
            ("[S-;X1]", "S"),
            # Sulfonamides
            ("[$([N-;X2]S(=O)=O)]", "N"),
            # Enamines
            ("[$([N-;X2][C,N]=C)]", "N"),
            # Tetrazoles
            ("[n-]", "[nH]"),
            # Sulfoxides
            ("[$([S-]=O)]", "S"),
            # Amides
            ("[$([N-]C=O)]", "N"),
        )
        return [(AllChem.MolFromSmarts(x), AllChem.MolFromSmiles(y, False))
                for x, y in patts]

    global _REACTIONS  # pylint: disable=global-statement
    if reactions is None:
        if _REACTIONS is None:
            _REACTIONS = _initialise_neutralisation_reactions()
        reactions = _REACTIONS
    for (reactant, product) in reactions:
        while mol.HasSubstructMatch(reactant):
            rms = AllChem.ReplaceSubstructs(mol, reactant, product)
            mol = rms[0]
    return mol
Esempio n. 6
0
def set_atomic_radicals(
    mol: rdkit.Chem.rdchem.Mol,
    atomic_numbers: Iterable[int],
    atomic_valence_electrons,
    BO_valences,
) -> rdkit.Chem.rdchem.Mol:
    # The number of radical electrons = absolute atomic charge
    for i, atom in enumerate(atomic_numbers):
        a = mol.GetAtomWithIdx(i)
        charge = _get_atomic_charge(atom, atomic_valence_electrons[atom],
                                    BO_valences[i])

        if abs(charge) > 0:
            a.SetNumRadicalElectrons(abs(int(charge)))

    return mol
Esempio n. 7
0
    def _connectivity_COO_format(mol: rdkit.Chem.rdchem.Mol) -> np.array:
        """
        Returns the connectivity of the molecular graph in COO format.

        Parameters:
            mol: rdkit molecule to extract bonds from
        Returns:
            array: graph connectivity in COO format with shape [2, num_edges]
        """

        row, col = [], []

        for bond in mol.GetBonds():
            start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
            row += [start, end]
            col += [end, start]

        return np.array([row, col])