def atom_mapper3D(reactant, products): ''' Written by Mads Koerstz ''' reactant = label_atoms(reactant) opts = StereoEnumerationOptions(onlyUnassigned=False, unique=False) rdmolops.AssignStereochemistry(reactant, cleanIt=True, flagPossibleStereoCenters=True, force=True) reactant = next(EnumerateStereoisomers(reactant, options=opts)) # Prepare reactant reactant = reassign_atom_idx( reactant) # Makes Graph atom idx = SMILES atom mapped idx. rdmolops.AssignStereochemistry(reactant, cleanIt=True, flagPossibleStereoCenters=True, force=True) # Assigns _CIPCode. # Prepare Product new_products = [] for product in products: product = label_atoms(product) product = reassign_atom_idx( product) # Makes Graph atom idx = SMILES atom mapped idx . new_products.append(set_chirality(product, reactant)) return reactant, new_products
def set_chirality(product, reactant): """ Written by Mads Koerstz Produce all combinations of isomers (R/S and cis/trans). But force product atoms with unchanged neighbors to the same label chirality as the reactant """ # TODO move these somewhere it makes more sense. product = reassign_atom_idx(product) reactant = reassign_atom_idx(reactant) Chem.SanitizeMol(product) Chem.SanitizeMol(reactant) # Find chiral atoms - including label chirality chiral_atoms_product = Chem.FindMolChiralCenters(product, includeUnassigned=True) unchanged_atoms = [] for atom, chiral_tag in chiral_atoms_product: product_neighbors = [ a.GetIdx() for a in product.GetAtomWithIdx(atom).GetNeighbors() ] reactant_neighbors = [ a.GetIdx() for a in reactant.GetAtomWithIdx(atom).GetNeighbors() ] if sorted(product_neighbors) == sorted(reactant_neighbors): unchanged_atoms.append(atom) # make combinations of isomers. opts = StereoEnumerationOptions(onlyUnassigned=False, unique=False) rdmolops.AssignStereochemistry(product, cleanIt=True, flagPossibleStereoCenters=True, force=True) product_isomers = [] product_isomers_mols = [] for product_isomer in EnumerateStereoisomers(product, options=opts): rdmolops.AssignStereochemistry(product_isomer, force=True) for atom in unchanged_atoms: reactant_global_tag = reactant.GetAtomWithIdx(atom).GetProp( '_CIPCode') # TODO make sure that the _CIPRank is the same for atom in reactant and product. product_isomer_global_tag = product_isomer.GetAtomWithIdx( atom).GetProp('_CIPCode') if reactant_global_tag != product_isomer_global_tag: product_isomer.GetAtomWithIdx(atom).InvertChirality() if Chem.MolToSmiles(product_isomer) not in product_isomers: product_isomers.append(Chem.MolToSmiles(product_isomer)) product_isomers_mols.append(product_isomer) return product_isomers_mols
def write_xyz_file(mol, file_name): """ Embeds a mol object to get 3D coordinates which are written to an .xyz file """ n_atoms = mol.GetNumAtoms() charge = Chem.GetFormalCharge(mol) symbols = [a.GetSymbol() for a in mol.GetAtoms()] Chem.SanitizeMol(mol) rdmolops.AssignStereochemistry(mol) AllChem.EmbedMolecule(mol, maxAttempts=10000) AllChem.MMFFOptimizeMolecule(mol, ignoreInterfragInteractions=False) with open(file_name, 'w') as _file: _file.write(str(n_atoms)+'\n\n') for atom, symbol in enumerate(symbols): coord = mol.GetConformers()[0].GetAtomPosition(atom) line = " ".join((symbol, str(coord.x), str(coord.y), str(coord.z), "\n")) _file.write(line) if charge != 0: _file.write("$set\n") _file.write("chrg "+str(charge)+"\n") _file.write("$end")
def extract_smiles(xyz_file, charge, allow_charge=True, check_ac=False): """ uses xyz2mol to extract smiles with as much 3d structural information as possible """ atoms, _, xyz_coordinates = xyz2mol_local.read_xyz_file(xyz_file) try: input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge, use_graph=True, allow_charged_fragments=allow_charge, use_huckel=True, use_atom_maps=True, embed_chiral=True) except: input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge, use_graph=True, allow_charged_fragments=allow_charge, use_huckel=False, use_atom_maps=True, embed_chiral=True) input_mol = reorder_atoms_to_map(input_mol) structure_mol, res_status = choose_resonance_structure(input_mol) structure_mol = chiral_tags(structure_mol) rdmolops.AssignStereochemistry(structure_mol) structure_smiles = Chem.MolToSmiles(structure_mol) if check_ac: global AC_SAME ac = Chem.GetAdjacencyMatrix(input_mol) if not np.all(AC == ac): AC_SAME = False print("change in AC: stopping") return structure_smiles, GetFormalCharge(structure_mol), res_status
def choose_resonance_structure(mol): """ This function creates all resonance structures of the mol object, counts the number of rotatable bonds for each structure and chooses the one with fewest rotatable bonds (most 'locked' structure) """ resonance_mols = rdchem.ResonanceMolSupplier(mol, rdchem.ResonanceFlags.ALLOW_CHARGE_SEPARATION) res_status = True new_mol = None if not resonance_mols: print("using input mol") new_mol = mol res_status = False for res_mol in resonance_mols: Chem.SanitizeMol(res_mol) n_rot_bonds = Chem.rdMolDescriptors.CalcNumRotatableBonds(res_mol) if new_mol is None: smallest_rot_bonds = n_rot_bonds new_mol = res_mol if n_rot_bonds < smallest_rot_bonds: smallest_rot_bonds = n_rot_bonds new_mol = res_mol Chem.DetectBondStereochemistry(new_mol, -1) rdmolops.AssignStereochemistry(new_mol, flagPossibleStereoCenters=True, force=True) Chem.AssignAtomChiralTagsFromStructure(new_mol, -1) return new_mol, res_status
def chiral_tags(mol): """ Tag methylene and methyl groups with a chiral tag priority defined from the atom index of the hydrogens """ li_list = [] smarts_ch2 = '[!#1][*]([#1])([#1])([!#1])' atom_sets = mol.GetSubstructMatches(Chem.MolFromSmarts(smarts_ch2)) for atoms in atom_sets: atoms = sorted(atoms[2:4]) prioritized_H = atoms[-1] li_list.append(prioritized_H) mol.GetAtoms()[prioritized_H].SetAtomicNum(9) smarts_ch3 = '[!#1][*]([#1])([#1])([#1])' atom_sets = mol.GetSubstructMatches(Chem.MolFromSmarts(smarts_ch3)) for atoms in atom_sets: atoms = sorted(atoms[2:]) H1 = atoms[-1] H2 = atoms[-2] li_list.append(H1) li_list.append(H2) mol.GetAtoms()[H1].SetAtomicNum(9) mol.GetAtoms()[H2].SetAtomicNum(9) Chem.AssignAtomChiralTagsFromStructure(mol, -1) rdmolops.AssignStereochemistry(mol) for atom_idx in li_list: mol.GetAtoms()[atom_idx].SetAtomicNum(1) return mol
def extract_smiles(xyz_file, charge, allow_charge=True): """ uses xyz2mol to extract smiles with as much 3d structural information as possible """ atoms, _, xyz_coordinates = xyz2mol_local.read_xyz_file(xyz_file) try: input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge, use_graph=True, allow_charged_fragments=allow_charge, use_huckel=True, use_atom_maps=True, embed_chiral=True) except: input_mol = xyz2mol_local.xyz2mol(atoms, xyz_coordinates, charge=charge, use_graph=True, allow_charged_fragments=allow_charge, use_huckel=False, use_atom_maps=True, embed_chiral=True) input_mol = reorder_atoms_to_map(input_mol) structure_mol, res_status = choose_resonance_structure(input_mol) structure_mol = chiral_tags(structure_mol) rdmolops.AssignStereochemistry(structure_mol) structure_smiles = Chem.MolToSmiles(structure_mol) return structure_smiles, GetFormalCharge(structure_mol), res_status
def construct_mol_features(mol: rdchem.Mol, out_size: Optional[int] = -1) -> np.ndarray: """Returns the atom features of all the atoms in the molecule. Params: ------- mol: rdkit.Chem.rdchem.Mol Molecule of interest. out_size: int, optional, default=-1 The size of the returned array. If this option is negative, it does not take any effect. Otherwise, it must be larger than or equal to the number of atoms in the input molecule. If so, the end of the array is padded with zeros. Returns: -------- mol_feats: np.ndarray, shape=(n,m) Where `n` is the total number of atoms within the molecule, and `m` is the number of feats. """ # Caluclate charges and chirality of atoms within molecule rdPartialCharges.ComputeGasteigerCharges( mol) # stored under _GasteigerCharge rdmolops.AssignStereochemistry( mol) # stored under _CIPCode, see doc for more info # Retrieve atom index locations of matches HYDROGEN_DONOR = rdmolfiles.MolFromSmarts( "[$([N;!H0;v3,v4&+1]),$([O,S;H1;+0])" + ",n&H1&+0]") HYROGEN_ACCEPTOR = rdmolfiles.MolFromSmarts( "[$([O,S;H1;v2;!$(*-*=[O,N,P,S])])" + ",$([O,S;H0;v2]),$([O,S;-]),$([N;v3;!$(N-*=[O,N,P,S])]),n&H0&+0," + "$([o,s;+0;!$([o,s]:n);!$([o,s]:c:n)])]") ACIDIC = rdmolfiles.MolFromSmarts("[$([C,S](=[O,S,P])-[O;H1,-1])]") BASIC = rdmolfiles.MolFromSmarts( "[#7;+,$([N;H2&+0][$([C,a]);!$([C,a](=O))])" + ",$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))])," + "$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))])]") hydrogen_donor_match = sum(mol.GetSubstructMatches(HYDROGEN_DONOR), ()) hydrogen_acceptor_match = sum(mol.GetSubstructMatches(HYROGEN_ACCEPTOR), ()) acidic_match = sum(mol.GetSubstructMatches(ACIDIC), ()) basic_match = sum(mol.GetSubstructMatches(BASIC), ()) # Get ring info ring = mol.GetRingInfo() mol_feats = [] n_atoms = mol.GetNumAtoms() for atom_idx in range(n_atoms): atom = mol.GetAtomWithIdx(atom_idx) atom_feats = [] atom_feats += one_hot(atom.GetSymbol(), [ 'C', 'O', 'N', 'S', 'Cl', 'F', 'Br', 'P', 'I', 'Si', 'B', 'Na', 'Sn', 'Se', 'other' ]) atom_feats += one_hot(atom.GetDegree(), [1, 2, 3, 4, 5, 6]) atom_feats += one_hot(atom.GetHybridization(), list(rdchem.HybridizationType.names.values())) atom_feats += one_hot(atom.GetImplicitValence(), [0, 1, 2, 3, 4, 5, 6]) atom_feats += one_hot(atom.GetFormalCharge(), [-3, -2, -1, 0, 1, 2, 3]) g_charge = float(atom.GetProp("_GasteigerCharge")) atom_feats += [g_charge] if not np.isnan(g_charge) else [0.] atom_feats += [atom.GetIsAromatic()] atom_feats += [ ring.IsAtomInRingOfSize(atom_idx, size) for size in range(3, 9) ] atom_feats += one_hot(atom.GetTotalNumHs(), [0, 1, 2, 3, 4]) # Chirality try: atom_feats += one_hot(atom.GetProp('_CIPCode'), ["R", "S"]) + [ atom.HasProp("_ChiralityPossible") ] except: atom_feats += [False, False] + [atom.HasProp("_ChiralityPossible")] # Hydrogen bonding atom_feats += [atom_idx in hydrogen_donor_match] atom_feats += [atom_idx in hydrogen_acceptor_match] # Is Acidic/Basic atom_feats += [atom_idx in acidic_match] atom_feats += [atom_idx in basic_match] mol_feats.append(atom_feats) if out_size < 0: return np.array(mol_feats, dtype=np.float) elif out_size >= n_atoms: # 'empty' padding for `mol_feats`. Generate(s) feature matrix of same size for all mols # NOTE: len(mol_feats[0]) is the number of feats padded_mol_feats = np.zeros((out_size, len(mol_feats[0])), dtype=np.float) padded_mol_feats[:n_atoms] = np.array(mol_feats, dtype=np.float) return padded_mol_feats else: raise ValueError( '`out_size` (N={}) must be negative or larger than or ' 'equal to the number of atoms in the input molecules (N={}).'. format(out_size, n_atoms))
smiles_set = set() props = Properties() n_vec = props.GetPropertyNames() rdk_mols = [] for row in table: mol = filter_extract_mol(row, headers_dict) if (mol is None): continue row[smiles_idx] = Chem.MolToSmiles(mol) if (row[smiles_idx] in smiles_set): continue smiles_set.add(row[smiles_idx]) mol.SetProp('_Name', row[chemblid_idx]) rdmolops.AssignStereochemistry(mol) p_vec = props.ComputeProperties(mol) too_flexible = False for name, value in zip(n_vec, p_vec): if (name == 'NumRotatableBonds' and value > 5): too_flexible = True break mol.SetProp('RDK{0:s}'.format(name), '{0:.3f}'.format(value)) if (too_flexible): continue for i in range(pchembl_idx + 1): mol.SetProp(headers[i], row[i]) rdDepictor.Compute2DCoords(mol) p.ligands.append(mol) rdk_mols.append(mol)