Example #1
0
def GenerateRxnNet(initial_reactant, reaction_rules):
    """
    Generates reaction network following the algorithm from
    (Ind. Eng. Chem. Res. 2010, 49 (21), 10459-10470)

    Arguments:
    - initial_reactant:     can be smiles or mol
    - reaction_rules:       can be smarts string or rdkit.Chem.rdChemReactions.
                            ChemicalReaction object

    Return:
    - list of reaction intermediates

    Example:
    Ethane C-H scission and C-C scission
    A = generate_rxn_net('CC',['[C:1][H:2]>>[C:1].[H:2]',
                         '[C:1][C:2]>>[C:1].[C:2]'])
    for product in A:
        print(Chem.MolToSmiles(product))

    Tip:
    For dehydrogenation, you must number carbon and hydrogen i.e.
    [C][H]>>[C].[H]             (x)
    [C:1][H:1]>>[C:1].[H:2]     (o)
    Same goes for changing bond order
    [C][C]>>[C]=[C]             (x)
    [C:1][C:2]>>[C:1]=[C:2]     (o)
    Aromatization and Kekulize has some problem with several species. So, we
    don't set aromatization for sanitize, and try kekulize.

    TODO:
    - record reactions as well (make it an option as it's expensive)

    """

    # set-up reactants
    if not isinstance(initial_reactant, list):
        initial_reactant = [initial_reactant]
    if isinstance(initial_reactant[0], str):
        for i in range(0, len(initial_reactant)):
            initial_reactant[i] = Chem.MolFromSmiles(initial_reactant[i],
                                                     sanitize=False)
            # sanitize everything except aromatization set
            _sanitize_except_aromatization(initial_reactant[i])
    # Treatment necessary for radicals
    # (https://github.com/rdkit/rdkit/issues/69)
    for i in range(0, len(initial_reactant)):
        # print Chem.MolToSmiles(initial_reactant[i])
        initial_reactant[i] = Chem.AddHs(initial_reactant[i])
        # sanitize everything except aromatization set
        _sanitize_except_aromatization(initial_reactant[i])
        # Chem.SanitizeMol(initial_reactant[i])
        # Chem.Kekulize(initial_reactant[i])
        for atoms in initial_reactant[i].GetAtoms():
            atoms.SetNoImplicit(True)
        Chem.AssignRadicals(initial_reactant[i])

    # set up reactions
    if not isinstance(reaction_rules, list):
        reaction_rules = [reaction_rules]
    if isinstance(reaction_rules[0], str):
        for i in range(0, len(reaction_rules)):
            try:
                reaction_rules[i] = Read(reaction_rules[i])
            except Exception:
                reaction_rules[i] = ReactionFromSmarts(reaction_rules[i])

    # generator main algorithm
    unprocessed = initial_reactant
    processed = []
    while unprocessed:
        # Pop a molecule and put it in a processed list
        reactant0 = unprocessed[0]
        processed.insert(0, unprocessed[0])
        del unprocessed[0]
        # go through all reactions
        for reaction_rule in reaction_rules:
            # set up reactant list.
            # Generate combinatorial product list of reactants if reaction
            # requires several reactants
            reactant_list = itpd([list(range(1, len(processed)))],
                                 repeat=reaction_rule.
                                 GetNumReactantTemplates()-1)
            # go through each set of reactants
            for reactant_indexes in reactant_list:
                # Reaction
                # Make the reactant mol tuple (Runreactants only accept tuple)
                reactants = (reactant0,)
                for reactant_index in reactant_indexes:
                    reactants += (processed[reactant_index],)
                # React
                ele_reactions = reaction_rule.RunReactants(reactants)

                # Record reactions (TODO)

                # Pre-processing products
                # Go through reactiosn and make a single list of products
                products = []
                for ele_reaction in ele_reactions:
                    for mol in ele_reaction:
                        products.append(mol)
                # Treatment necessary for radicals
                # (https://github.com/rdkit/rdkit/issues/69)
                for mol in products:
                    for atoms in mol.GetAtoms():
                        atoms.SetNoImplicit(True)
                        atoms.UpdatePropertyCache(strict=False)
                    Chem.AssignRadicals(mol)
                    # Remove molecule with atoms with over valence
                for i in range(len(products)-1, -1, -1):
                    for atoms in products[i].GetAtoms():
                        if PeriodicTable.GetDefaultValence(GetPeriodicTable(),
                                                           atoms.GetAtomicNum()
                                                           ) < \
                             atoms.GetTotalValence():
                            del products[i]
                            break
                # remove duplicates
                # TODO. This removes also species with different charges
                for i in range(len(products)-1, -1, -1):
                    for j in range(0, i):
                        if products[i].GetNumAtoms() ==\
                            products[j].GetNumAtoms() and \
                            products[i].GetNumAtoms() ==\
                                len(products[i].
                                    GetSubstructMatch(products[j])):

                            del products[i]
                            break
                # update unprocessed molecule list
                # check for duplicate and append to unprocessed_list if missing
                for mol1 in products:
                    inthelist = 0
                    for mol2 in processed:
                        # first check the nubmer of atoms and then
                        # look for substructure match
                        if mol1.GetNumAtoms() == mol2.GetNumAtoms() and \
                            mol1.GetNumAtoms() == len(mol1.GetSubstructMatch
                                                      (mol2)):
                            # if it's in processed list, break
                            inthelist = 1
                            break
                    # not in the processed list. append to unprocessed
                    if inthelist == 0:
                        unprocessed.insert(0, mol1)
    # Prettify
    for i in range(0, len(processed)):
        # print Chem.MolToSmiles(processed[i])
        processed[i] = Chem.RemoveHs(processed[i], sanitize=False)
        _sanitize_except_aromatization(processed[i])
        # print Chem.MolToSmiles(processed[i])
    return processed
    def LoadByCovalentRadius(cls,CoordinateFPath, SurfaceAtomSymbols, \
        rfacup = 1.35,rfacdown = 0.6, z_vector = 2):
        """ 
        This function reads file using ASE read, and construts molecular graph
        in rdkit object, Mol. See manuscript for overall algorithm.
        
        
        Input List
        CoordinateFPath:    path to ASE readable coordinate file.
        SurfaceAtomSymbols: List of atomic symbols of surface atoms.
        rfacup:             Upper percentage limit for determining connectivity.
        rfacdown:           Lower percentage limit for determining connectivity.
        z_vector:           index of cell basis vector that is orthogonal to surface.
        
        Output List
        adsorbate class
        """

        # initialize
        ASEAtomIndex2RdKitAtomIndex = dict()
        RdKitAtomIndex2ASEAtomIndex = dict()
        if isinstance(SurfaceAtomSymbols, str):
            SurfaceAtomSymbols = [SurfaceAtomSymbols]
        else:
            assert isinstance(SurfaceAtomSymbols, list)
        # load POSCAR
        AseAtoms = read(CoordinateFPath)
        # if none given for surface layer z coordinate, average the top layer atomic coordinate
        _, SurfaceAtomIndex = DetermineSurfaceLayerZ(AseAtoms,
                                                     SurfaceAtomSymbols,
                                                     ZVecIndex=z_vector)

        # (p)eriodic (b)oundary (c)ondition(s)
        PBCs = [[0, 0, 0]]
        if AseAtoms.pbc[0]:
            temp = np.add(PBCs, [1, 0, 0])
            temp = np.concatenate((temp, np.add(PBCs, [-1, 0, 0])))
            PBCs = np.concatenate((PBCs, temp))
        if AseAtoms.pbc[1]:
            temp = np.add(PBCs, [0, 1, 0])
            temp = np.concatenate((temp, np.add(PBCs, [0, -1, 0])))
            PBCs = np.concatenate((PBCs, temp))
        if AseAtoms.pbc[2]:
            temp = np.add(PBCs, [0, 0, 1])
            temp = np.concatenate((temp, np.add(PBCs, [0, 0, -1])))
            PBCs = np.concatenate((PBCs, temp))

        # Get organic atoms from the DFT calculations (their index and atomic number)
        ans = AseAtoms.get_atomic_numbers()  # (a)tomic (n)umber(s)
        oai = list()  #organic atom index in the atoms object
        oan = list()  #organic atomic number
        for i in xrange(0, AseAtoms.__len__()):
            if ans[i] in cls.soan:
                oai.append(i)
                oan.append(ans[i])

        # Determine connectivity of the organic atoms
        adj_mat = np.zeros((oai.__len__(), oai.__len__()))  # adjacency matrix
        for i in xrange(0, oai.__len__()):
            for j in xrange(i + 1, oai.__len__()):
                if cls._DetermineConnectivity(AseAtoms, oai[i], oai[j], PBCs,
                                              rfacup, rfacdown):
                    adj_mat[i, j] = 1

        # construct mol object
        RdkitMol = Chem.Mol()
        RdkitMol = Chem.RWMol(RdkitMol)

        ## add atom
        ### organic atoms
        for i in xrange(0, oan.__len__()):
            atom = Chem.Atom(oan[i])
            atom.SetNoImplicit(
                True)  # this allows molecule to have radical atoms
            atom.SetBoolProp('Adsorbed', False)
            RdkitMol.AddAtom(atom)
            ASEAtomIndex2RdKitAtomIndex[oai[i]] = i
            RdKitAtomIndex2ASEAtomIndex[i] = oai[i]
        ### surface atoms
        for index in SurfaceAtomIndex:
            atom = Chem.Atom(AseAtoms[index].symbol)
            atom.SetBoolProp('SurfaceAtom', True)
            atom.SetBoolProp('Occupied', False)
            i = RdkitMol.AddAtom(atom)
            ASEAtomIndex2RdKitAtomIndex[index] = i
            RdKitAtomIndex2ASEAtomIndex[i] = index

        ## add bond
        ### between organic atoms
        for i in xrange(0, oai.__len__()):
            for j in xrange(i + 1, oai.__len__()):
                if adj_mat[i, j] == 1:
                    RdkitMol.AddBond(i, j, order=Chem.rdchem.BondType.SINGLE)

        ### between surface atoms
        for i in xrange(0, len(SurfaceAtomIndex)):
            for j in xrange(i + 1, len(SurfaceAtomIndex)):
                if cls._DetermineConnectivity(AseAtoms, SurfaceAtomIndex[i],
                                              SurfaceAtomIndex[j], PBCs,
                                              rfacup, rfacdown):
                    RdkitMol.AddBond(
                        ASEAtomIndex2RdKitAtomIndex[SurfaceAtomIndex[i]],
                        ASEAtomIndex2RdKitAtomIndex[SurfaceAtomIndex[j]],
                        order=Chem.rdchem.BondType.ZERO)

        ## assign radicals
        Chem.AssignRadicals(RdkitMol)

        ## set smilesSymbol
        for atom in RdkitMol.GetAtoms():
            if atom.GetSymbol() in ['C', 'O'
                                    ] and atom.GetNumRadicalElectrons() == 0:
                atom.SetProp(
                    "smilesSymbol", '[' + atom.GetSymbol() +
                    str(atom.GetNumRadicalElectrons()) + ']')
            elif atom.GetNumRadicalElectrons() > 0:
                atom.SetProp(
                    "smilesSymbol",
                    atom.GetSymbol() + str(atom.GetNumRadicalElectrons()))

        # Find surface binding atom. This is done by finding all the radical atoms
        rai_rdkit = list()  # radical atom index for rdkit mol
        rai_ase = list()  # radical atom index for rdkit ase atoms object
        for atom in RdkitMol.GetAtoms():
            if atom.GetNumRadicalElectrons() > 0:
                rai_rdkit.append(atom.GetIdx())
                rai_ase.append(oai[atom.GetIdx()])

        # Surface connectivity
        for i in xrange(0, len(rai_ase)):
            for j in xrange(0, len(SurfaceAtomIndex)):
                if cls._DetermineConnectivity(AseAtoms, rai_ase[i],
                                              SurfaceAtomIndex[j], PBCs,
                                              rfacup, rfacdown):
                    RdkitMol.AddBond(
                        rai_rdkit[i],
                        ASEAtomIndex2RdKitAtomIndex[SurfaceAtomIndex[j]],
                        order=Chem.rdchem.BondType.ZERO)
                    RdkitMol.GetAtomWithIdx(ASEAtomIndex2RdKitAtomIndex[
                        SurfaceAtomIndex[j]]).SetBoolProp('Occupied', True)
                    RdkitMol.GetAtomWithIdx(rai_rdkit[i]).SetBoolProp(
                        'Adsorbed', True)

        # assign binding site.
        for i in xrange(0, len(rai_rdkit)):
            a = RdkitMol.GetAtomWithIdx(rai_rdkit[i])
            nsurf = 0
            for neighbor_atom in a.GetNeighbors():
                if neighbor_atom.GetSymbol() in SurfaceAtomSymbols:
                    nsurf += 1
            a.SetProp("smilesSymbol",
                      a.GetProp("smilesSymbol") + '_' + str(nsurf) + 'fold')

        adsorbate = cls(AseAtoms,RdkitMol,SurfaceAtomSymbols, \
                 ASEAtomIndex2RdKitAtomIndex, RdKitAtomIndex2ASEAtomIndex)

        return adsorbate