def __init__(self, mol: rdkit.Chem.rdchem.Mol, ccd_cif_dict: Dict[str, Any] = None, properties: CCDProperties = None, descriptors: List[Descriptor] = None) -> None: self.conformers_mapping = \ {ConformerType.AllConformers: - 1, ConformerType.Ideal: 0, ConformerType.Model: 1 if len(mol.GetConformers()) == 2 else 1000, ConformerType.Computed: 2000} self.mol = mol self._mol_no_h = None self.mol2D = None self.ccd_cif_dict = ccd_cif_dict self._fragments: Dict[str, SubstructureMapping] = {} self._scaffolds: Dict[str, SubstructureMapping] = {} self._descriptors: List[Descriptor] = [] self._inchi_from_rdkit = '' self._inchikey_from_rdkit = '' self._sanitization_issues = self._sanitize() self._physchem_properties: Dict[str, Any] = {} self._external_mapping: List[Tuple[str, str]] = [] if descriptors is not None: self._descriptors = descriptors if properties is not None: self._cif_properties = properties
def get_atom_count(mol: rdkit.Chem.rdchem.Mol, radical_check: bool = False) -> collections.Counter: """Takes a mol object and returns a counter with each element type in the set. Parameters ---------- mol : rdkit.Chem.rdchem.Mol Mol object to count atoms for. radical_check : bool, optional Check for radical electrons and count if present. Returns ------- atoms : collections.Counter Count of each atom type in input molecule. """ atoms = collections.Counter() # Find all strings of the form A# in the molecular formula where A # is the element (e.g. C) and # is the number of atoms of that # element in the molecule. Pair is of form [A, #] for pair in re.findall(r"([A-Z][a-z]*)(\d*)", AllChem.CalcMolFormula(mol)): # Add # to atom count, unless there is no # (in which case # there is just one of that element, as ones are implicit in # chemical formulas) if pair[1]: atoms[pair[0]] += int(pair[1]) else: atoms[pair[0]] += 1 if radical_check: radical = any( [atom.GetNumRadicalElectrons() for atom in mol.GetAtoms()]) if radical: atoms["*"] += 1 return atoms
def set_atomic_charges( mol: rdkit.Chem.rdchem.Mol, atomic_numbers: Iterable[int], atomic_valence_electrons, BO_valences, BO_matrix, mol_charge, ) -> rdkit.Chem.rdchem.Mol: q = 0 for i, atom in enumerate(atomic_numbers): a = mol.GetAtomWithIdx(i) charge = _get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i]) q += charge if atom == 6: number_of_single_bonds_to_C = list(BO_matrix[i, :]).count(1) if number_of_single_bonds_to_C == 2 and BO_valences[i] == 2: q += 1 charge = 0 if number_of_single_bonds_to_C == 3 and q + 1 < mol_charge: q += 2 charge = 1 if abs(charge) > 0: a.SetFormalCharge(int(charge)) mol = _clean_charges(mol) return mol
def get_subrdmol(rdmol: rdkit.Chem.rdchem.Mol, indices: list = [], sanitize: bool = False): """Create new sub-molecule from selected atom indices Parameters ---------- rdmol: rdkit.Chem.rdchem.Mol Input molecule indices: iterable of ints atom indices to include from input molecule, indexed from 0 sanitize: bool whether to sanitize the molecule (recommend: no) Returns ------- rdkit.Chem.rdchem.Mol: subset of molecule """ submol = Chem.RWMol(rdmol) ix = sorted( [at.GetIdx() for at in rdmol.GetAtoms() if at.GetIdx() not in indices]) for i in ix[::-1]: submol.RemoveAtom(int(i)) if sanitize: Chem.SanitizeMol(submol) for atom in submol.GetAtoms(): #print(dir(atom)) atom.SetNoImplicit(True) remove_charge_and_bond_order_from_imidazole(submol) remove_charge_and_bond_order_from_guanidinium(submol) return submol
def neutralise_charges(mol: rdkit.Chem.rdchem.Mol, reactions=None) -> rdkit.Chem.rdchem.Mol: """Neutralize all charges in an rdkit mol. Parameters ---------- mol : rdkit.Chem.rdchem.Mol Molecule to neutralize. reactions : list, optional patterns to neutralize, by default None. Returns ------- mol : rdkit.Chem.rdchem.Mol Neutralized molecule. """ def _initialise_neutralisation_reactions(): patts = ( # Imidazoles ("[n+;H]", "n"), # Amines ("[N+;!H0]", "N"), # Carboxylic acids and alcohols ("[$([O-]);!$([O-][#7])]", "O"), # Thiols ("[S-;X1]", "S"), # Sulfonamides ("[$([N-;X2]S(=O)=O)]", "N"), # Enamines ("[$([N-;X2][C,N]=C)]", "N"), # Tetrazoles ("[n-]", "[nH]"), # Sulfoxides ("[$([S-]=O)]", "S"), # Amides ("[$([N-]C=O)]", "N"), ) return [(AllChem.MolFromSmarts(x), AllChem.MolFromSmiles(y, False)) for x, y in patts] global _REACTIONS # pylint: disable=global-statement if reactions is None: if _REACTIONS is None: _REACTIONS = _initialise_neutralisation_reactions() reactions = _REACTIONS for (reactant, product) in reactions: while mol.HasSubstructMatch(reactant): rms = AllChem.ReplaceSubstructs(mol, reactant, product) mol = rms[0] return mol
def set_atomic_radicals( mol: rdkit.Chem.rdchem.Mol, atomic_numbers: Iterable[int], atomic_valence_electrons, BO_valences, ) -> rdkit.Chem.rdchem.Mol: # The number of radical electrons = absolute atomic charge for i, atom in enumerate(atomic_numbers): a = mol.GetAtomWithIdx(i) charge = _get_atomic_charge(atom, atomic_valence_electrons[atom], BO_valences[i]) if abs(charge) > 0: a.SetNumRadicalElectrons(abs(int(charge))) return mol
def _connectivity_COO_format(mol: rdkit.Chem.rdchem.Mol) -> np.array: """ Returns the connectivity of the molecular graph in COO format. Parameters: mol: rdkit molecule to extract bonds from Returns: array: graph connectivity in COO format with shape [2, num_edges] """ row, col = [], [] for bond in mol.GetBonds(): start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx() row += [start, end] col += [end, start] return np.array([row, col])