def GenerateRxnNet(initial_reactant, reaction_rules): """ Generates reaction network following the algorithm from (Ind. Eng. Chem. Res. 2010, 49 (21), 10459-10470) Arguments: - initial_reactant: can be smiles or mol - reaction_rules: can be smarts string or rdkit.Chem.rdChemReactions. ChemicalReaction object Return: - list of reaction intermediates Example: Ethane C-H scission and C-C scission A = generate_rxn_net('CC',['[C:1][H:2]>>[C:1].[H:2]', '[C:1][C:2]>>[C:1].[C:2]']) for product in A: print(Chem.MolToSmiles(product)) Tip: For dehydrogenation, you must number carbon and hydrogen i.e. [C][H]>>[C].[H] (x) [C:1][H:1]>>[C:1].[H:2] (o) Same goes for changing bond order [C][C]>>[C]=[C] (x) [C:1][C:2]>>[C:1]=[C:2] (o) Aromatization and Kekulize has some problem with several species. So, we don't set aromatization for sanitize, and try kekulize. TODO: - record reactions as well (make it an option as it's expensive) """ # set-up reactants if not isinstance(initial_reactant, list): initial_reactant = [initial_reactant] if isinstance(initial_reactant[0], str): for i in range(0, len(initial_reactant)): initial_reactant[i] = Chem.MolFromSmiles(initial_reactant[i], sanitize=False) # sanitize everything except aromatization set _sanitize_except_aromatization(initial_reactant[i]) # Treatment necessary for radicals # (https://github.com/rdkit/rdkit/issues/69) for i in range(0, len(initial_reactant)): # print Chem.MolToSmiles(initial_reactant[i]) initial_reactant[i] = Chem.AddHs(initial_reactant[i]) # sanitize everything except aromatization set _sanitize_except_aromatization(initial_reactant[i]) # Chem.SanitizeMol(initial_reactant[i]) # Chem.Kekulize(initial_reactant[i]) for atoms in initial_reactant[i].GetAtoms(): atoms.SetNoImplicit(True) Chem.AssignRadicals(initial_reactant[i]) # set up reactions if not isinstance(reaction_rules, list): reaction_rules = [reaction_rules] if isinstance(reaction_rules[0], str): for i in range(0, len(reaction_rules)): try: reaction_rules[i] = Read(reaction_rules[i]) except Exception: reaction_rules[i] = ReactionFromSmarts(reaction_rules[i]) # generator main algorithm unprocessed = initial_reactant processed = [] while unprocessed: # Pop a molecule and put it in a processed list reactant0 = unprocessed[0] processed.insert(0, unprocessed[0]) del unprocessed[0] # go through all reactions for reaction_rule in reaction_rules: # set up reactant list. # Generate combinatorial product list of reactants if reaction # requires several reactants reactant_list = itpd([list(range(1, len(processed)))], repeat=reaction_rule. GetNumReactantTemplates()-1) # go through each set of reactants for reactant_indexes in reactant_list: # Reaction # Make the reactant mol tuple (Runreactants only accept tuple) reactants = (reactant0,) for reactant_index in reactant_indexes: reactants += (processed[reactant_index],) # React ele_reactions = reaction_rule.RunReactants(reactants) # Record reactions (TODO) # Pre-processing products # Go through reactiosn and make a single list of products products = [] for ele_reaction in ele_reactions: for mol in ele_reaction: products.append(mol) # Treatment necessary for radicals # (https://github.com/rdkit/rdkit/issues/69) for mol in products: for atoms in mol.GetAtoms(): atoms.SetNoImplicit(True) atoms.UpdatePropertyCache(strict=False) Chem.AssignRadicals(mol) # Remove molecule with atoms with over valence for i in range(len(products)-1, -1, -1): for atoms in products[i].GetAtoms(): if PeriodicTable.GetDefaultValence(GetPeriodicTable(), atoms.GetAtomicNum() ) < \ atoms.GetTotalValence(): del products[i] break # remove duplicates # TODO. This removes also species with different charges for i in range(len(products)-1, -1, -1): for j in range(0, i): if products[i].GetNumAtoms() ==\ products[j].GetNumAtoms() and \ products[i].GetNumAtoms() ==\ len(products[i]. GetSubstructMatch(products[j])): del products[i] break # update unprocessed molecule list # check for duplicate and append to unprocessed_list if missing for mol1 in products: inthelist = 0 for mol2 in processed: # first check the nubmer of atoms and then # look for substructure match if mol1.GetNumAtoms() == mol2.GetNumAtoms() and \ mol1.GetNumAtoms() == len(mol1.GetSubstructMatch (mol2)): # if it's in processed list, break inthelist = 1 break # not in the processed list. append to unprocessed if inthelist == 0: unprocessed.insert(0, mol1) # Prettify for i in range(0, len(processed)): # print Chem.MolToSmiles(processed[i]) processed[i] = Chem.RemoveHs(processed[i], sanitize=False) _sanitize_except_aromatization(processed[i]) # print Chem.MolToSmiles(processed[i]) return processed
def LoadByCovalentRadius(cls,CoordinateFPath, SurfaceAtomSymbols, \ rfacup = 1.35,rfacdown = 0.6, z_vector = 2): """ This function reads file using ASE read, and construts molecular graph in rdkit object, Mol. See manuscript for overall algorithm. Input List CoordinateFPath: path to ASE readable coordinate file. SurfaceAtomSymbols: List of atomic symbols of surface atoms. rfacup: Upper percentage limit for determining connectivity. rfacdown: Lower percentage limit for determining connectivity. z_vector: index of cell basis vector that is orthogonal to surface. Output List adsorbate class """ # initialize ASEAtomIndex2RdKitAtomIndex = dict() RdKitAtomIndex2ASEAtomIndex = dict() if isinstance(SurfaceAtomSymbols, str): SurfaceAtomSymbols = [SurfaceAtomSymbols] else: assert isinstance(SurfaceAtomSymbols, list) # load POSCAR AseAtoms = read(CoordinateFPath) # if none given for surface layer z coordinate, average the top layer atomic coordinate _, SurfaceAtomIndex = DetermineSurfaceLayerZ(AseAtoms, SurfaceAtomSymbols, ZVecIndex=z_vector) # (p)eriodic (b)oundary (c)ondition(s) PBCs = [[0, 0, 0]] if AseAtoms.pbc[0]: temp = np.add(PBCs, [1, 0, 0]) temp = np.concatenate((temp, np.add(PBCs, [-1, 0, 0]))) PBCs = np.concatenate((PBCs, temp)) if AseAtoms.pbc[1]: temp = np.add(PBCs, [0, 1, 0]) temp = np.concatenate((temp, np.add(PBCs, [0, -1, 0]))) PBCs = np.concatenate((PBCs, temp)) if AseAtoms.pbc[2]: temp = np.add(PBCs, [0, 0, 1]) temp = np.concatenate((temp, np.add(PBCs, [0, 0, -1]))) PBCs = np.concatenate((PBCs, temp)) # Get organic atoms from the DFT calculations (their index and atomic number) ans = AseAtoms.get_atomic_numbers() # (a)tomic (n)umber(s) oai = list() #organic atom index in the atoms object oan = list() #organic atomic number for i in xrange(0, AseAtoms.__len__()): if ans[i] in cls.soan: oai.append(i) oan.append(ans[i]) # Determine connectivity of the organic atoms adj_mat = np.zeros((oai.__len__(), oai.__len__())) # adjacency matrix for i in xrange(0, oai.__len__()): for j in xrange(i + 1, oai.__len__()): if cls._DetermineConnectivity(AseAtoms, oai[i], oai[j], PBCs, rfacup, rfacdown): adj_mat[i, j] = 1 # construct mol object RdkitMol = Chem.Mol() RdkitMol = Chem.RWMol(RdkitMol) ## add atom ### organic atoms for i in xrange(0, oan.__len__()): atom = Chem.Atom(oan[i]) atom.SetNoImplicit( True) # this allows molecule to have radical atoms atom.SetBoolProp('Adsorbed', False) RdkitMol.AddAtom(atom) ASEAtomIndex2RdKitAtomIndex[oai[i]] = i RdKitAtomIndex2ASEAtomIndex[i] = oai[i] ### surface atoms for index in SurfaceAtomIndex: atom = Chem.Atom(AseAtoms[index].symbol) atom.SetBoolProp('SurfaceAtom', True) atom.SetBoolProp('Occupied', False) i = RdkitMol.AddAtom(atom) ASEAtomIndex2RdKitAtomIndex[index] = i RdKitAtomIndex2ASEAtomIndex[i] = index ## add bond ### between organic atoms for i in xrange(0, oai.__len__()): for j in xrange(i + 1, oai.__len__()): if adj_mat[i, j] == 1: RdkitMol.AddBond(i, j, order=Chem.rdchem.BondType.SINGLE) ### between surface atoms for i in xrange(0, len(SurfaceAtomIndex)): for j in xrange(i + 1, len(SurfaceAtomIndex)): if cls._DetermineConnectivity(AseAtoms, SurfaceAtomIndex[i], SurfaceAtomIndex[j], PBCs, rfacup, rfacdown): RdkitMol.AddBond( ASEAtomIndex2RdKitAtomIndex[SurfaceAtomIndex[i]], ASEAtomIndex2RdKitAtomIndex[SurfaceAtomIndex[j]], order=Chem.rdchem.BondType.ZERO) ## assign radicals Chem.AssignRadicals(RdkitMol) ## set smilesSymbol for atom in RdkitMol.GetAtoms(): if atom.GetSymbol() in ['C', 'O' ] and atom.GetNumRadicalElectrons() == 0: atom.SetProp( "smilesSymbol", '[' + atom.GetSymbol() + str(atom.GetNumRadicalElectrons()) + ']') elif atom.GetNumRadicalElectrons() > 0: atom.SetProp( "smilesSymbol", atom.GetSymbol() + str(atom.GetNumRadicalElectrons())) # Find surface binding atom. This is done by finding all the radical atoms rai_rdkit = list() # radical atom index for rdkit mol rai_ase = list() # radical atom index for rdkit ase atoms object for atom in RdkitMol.GetAtoms(): if atom.GetNumRadicalElectrons() > 0: rai_rdkit.append(atom.GetIdx()) rai_ase.append(oai[atom.GetIdx()]) # Surface connectivity for i in xrange(0, len(rai_ase)): for j in xrange(0, len(SurfaceAtomIndex)): if cls._DetermineConnectivity(AseAtoms, rai_ase[i], SurfaceAtomIndex[j], PBCs, rfacup, rfacdown): RdkitMol.AddBond( rai_rdkit[i], ASEAtomIndex2RdKitAtomIndex[SurfaceAtomIndex[j]], order=Chem.rdchem.BondType.ZERO) RdkitMol.GetAtomWithIdx(ASEAtomIndex2RdKitAtomIndex[ SurfaceAtomIndex[j]]).SetBoolProp('Occupied', True) RdkitMol.GetAtomWithIdx(rai_rdkit[i]).SetBoolProp( 'Adsorbed', True) # assign binding site. for i in xrange(0, len(rai_rdkit)): a = RdkitMol.GetAtomWithIdx(rai_rdkit[i]) nsurf = 0 for neighbor_atom in a.GetNeighbors(): if neighbor_atom.GetSymbol() in SurfaceAtomSymbols: nsurf += 1 a.SetProp("smilesSymbol", a.GetProp("smilesSymbol") + '_' + str(nsurf) + 'fold') adsorbate = cls(AseAtoms,RdkitMol,SurfaceAtomSymbols, \ ASEAtomIndex2RdKitAtomIndex, RdKitAtomIndex2ASEAtomIndex) return adsorbate