Ejemplo n.º 1
0
def standardize_smiles(smiles,
                       min_heavy_atoms=2,
                       max_heavy_atoms=70,
                       element_list=None,
                       remove_long_side_chains=True,
                       neutralise_charges=True):
    if element_list is None:
        element_list = [6, 7, 8, 9, 16, 17, 35]
    mol = rkc.MolFromSmiles(smiles)
    if mol:
        mol = _get_largest_fragment(mol)
    if mol:
        mol = rdmolops.RemoveHs(mol,
                                implicitOnly=False,
                                updateExplicitCount=False,
                                sanitize=True)
    if mol:
        mol = _remove_salts(mol)
    if mol and neutralise_charges:
        mol, _ = _neutralise_charges(mol)
    if mol:
        rdmolops.Cleanup(mol)
        rdmolops.SanitizeMol(mol)
        mol = rdmolops.RemoveHs(mol,
                                implicitOnly=False,
                                updateExplicitCount=False,
                                sanitize=True)
    if mol and _valid_size(mol, min_heavy_atoms, max_heavy_atoms, element_list,
                           remove_long_side_chains):
        return rkc.MolToSmiles(mol, isomericSmiles=False)
    return None
Ejemplo n.º 2
0
def standardize_smiles(smiles, min_heavy_atoms=10, max_heavy_atoms=50, element_list=[6, 7, 8, 9, 16, 17, 35],
                       remove_long_side_chains=True, neutralise_charges=True):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        mol = _getlargestFragment(mol)
    if mol:
        mol = rdmolops.RemoveHs(mol, implicitOnly=False, updateExplicitCount=False, sanitize=True)
    if mol:
        mol = _saltremover.StripMol(mol, dontRemoveEverything=True)
    if mol and neutralise_charges:
        mol, _ = _neutraliseCharges(mol)
    if mol:
        rdmolops.Cleanup(mol)
        rdmolops.SanitizeMol(mol)
        mol = rdmolops.RemoveHs(mol, implicitOnly=False, updateExplicitCount=False, sanitize=True)
    if mol and valid_size(mol, min_heavy_atoms, max_heavy_atoms, element_list, remove_long_side_chains):
        return Chem.MolToSmiles(mol, isomericSmiles=False)
    return None
Ejemplo n.º 3
0
def test_mols():
    mols = []
    all_smiles = [
        'CN=C=O', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2', 'CCCCCCCCCCCCCCCC'
    ]
    for smiles in all_smiles:
        mol = rdmolfiles.MolFromSmiles(smiles)
        mol = rdmolops.AddHs(mol, addCoords=True)
        rdDistGeom.EmbedMolecule(mol, rdDistGeom.ETKDG())
        mol = rdmolops.RemoveHs(mol)
        mol.SetProp('Fitness', str(np.random.rand(1)[0]))
        mols.append(mol)
    return mols
Ejemplo n.º 4
0
    def apply_retrorules(self, smile, rxns, explicit_hydrogens=False):
        '''Function takes a smile and dictionary of reactions, applys the reactions and
           returns a dictionary of rxn_names : products '''
        try:
            substrate_molecule = AllChem.MolFromSmiles(smile)
        except:
            return {}

        if explicit_hydrogens == True:
            substrate_molecule = rdmolops.AddHs(substrate_molecule)

        rxn_product_dict = {}
        for rxn_name, rxn in rxns.items():
            try:
                products = rxn.RunReactants((substrate_molecule, ))
            except:
                products = []
                print('Error running reactants for: ' + str(smile))

            smiles_products = []
            for product in products:
                sub_list = []
                for mol in product:
                    mols = [mol]

                    if explicit_hydrogens == True:
                        mol = rdmolops.RemoveHs(mol)

                    try:
                        mols = rdmolops.GetMolFrags(mol, asMols=True)
                    except:
                        pass

                    for mol in mols:
                        try:
                            p_smile = AllChem.MolToSmiles(mol)
                            p_smile = rdkit_smile(p_smile)
                            if self._check_valid_smile(
                                    p_smile, rxn_name=rxn_name) == True:
                                sub_list.append(p_smile)
                        except:
                            pass

                if (sub_list not in smiles_products) and (len(sub_list) != 0):
                    smiles_products.append(sub_list)

            if len(smiles_products) != 0:
                rxn_product_dict[rxn_name] = smiles_products

        return rxn_product_dict
Ejemplo n.º 5
0
def prepare_mol(mol,
                do_geometry=True,
                do_charge=True,
                property_name='_GasteigerCharge',
                max_iter=1000,
                mmffvariant='MMFF94',
                seed=26,
                max_attempts=5):

    # 'mmffVariant : “MMFF94” or “MMFF94s”'
    # seeded coordinate generation, if = -1, no random seed provided
    # removes starting coordinates to ensure reproducibility
    # max attempts, to increase if issues are encountered during optimization

    if do_charge is True:
        property_name = '_GasteigerCharge'

    # options for sanitization
    san_opt = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_KEKULIZE

    # sanitization
    if mol is None:
        err = 1
    else:
        # sanitize
        sanitize_fail = Chem.SanitizeMol(mol,
                                         catchErrors=True,
                                         sanitizeOps=san_opt)
        if sanitize_fail:
            raise ValueError(sanitize_fail)
            err = 1

        if do_geometry is True:
            mol, err = opt_geometry(mol, max_iter, mmffvariant, seed,
                                    max_attempts)

        # calculates or assigns atom charges based on what annotated in do_charge
        mol = rdmolops.RemoveHs(mol)

        if do_charge is True:
            mol, name, err = get_charge(mol, property_name, do_charge)

    if err == 1:
        print('Error in molecule pre-treatment')

    return mol, err
Ejemplo n.º 6
0
def prepare_mol(mol,
                do_geometry=True,
                do_charge=True,
                property_name='_GasteigerCharge',
                max_iter=1000,
                mmffvariant='MMFF94',
                seed=26,
                max_attempts=5):

    if do_charge is True:
        property_name = '_GasteigerCharge'

    # options for sanitization
    san_opt = Chem.SanitizeFlags.SANITIZE_ALL ^ Chem.SanitizeFlags.SANITIZE_KEKULIZE

    # sanitization
    if mol is None:
        err = 1
    else:
        # sanitize
        sanitize_fail = Chem.SanitizeMol(mol,
                                         catchErrors=True,
                                         sanitizeOps=san_opt)
        if sanitize_fail:
            raise ValueError(sanitize_fail)
            err = 1

        if do_geometry is True:
            mol, err = opt_geometry(mol, max_iter, mmffvariant, seed,
                                    max_attempts)

        # calculates or assigns atom charges based on what annotated in do_charge
        mol = rdmolops.RemoveHs(mol)

        if do_charge is True:
            mol, name, err = get_charge(mol, property_name, do_charge)

    if err == 1:
        print('Error in molecule pre-treatment')

    return mol, err
Ejemplo n.º 7
0
def construct_pos_matrix(mol: rdchem.Mol,
                         out_size: Optional[int] = -1) -> np.ndarray:
    """Construct relative positions from each atom within the molecule.

    Params:
    -------
    mol: rdkit.Chem.rdchem.Mol
        Molecule of interest. 

    out_size: int, optional, default=-1
        The size of the returned array. If this option is negative, it 
        does not take any effect. Otherwise, it must be larger than or 
        equal to the number of atoms in the input molecule. If so, the 
        end of the array is padded with zeros.

    Returns:
    --------
    pos_matrix: np.ndarray, shape=(n,n,3)
        Relative position (XYZ) coordinates from one atom the others in 
        the mol. 

    Examples:
    ---------
    ```python
    >>> from rdkit import Chem
    >>> from rdkit.Chem import AllChem
    >>> smiles = 'N[C@@]([H])([C@]([H])(O2)C)C(=O)N[C@@]([H])(CC(=O)N)C(=O)N[C@@]([H])([C@]([H])' \
                 '(O)C)C(=O)N[C@@]([H])(Cc1ccc(O)cc1)C(=O)2'
    >>> mol = Chem.MolFromSmiles(smiles)
    >>> mol = Chem.AddHs(mol, addCoords=True)
    >>> AllChem.EmbedMolecule(mol, AllChem.ETKDG())
    >>> mol = Chem.RemoveHs(mol)
    >>> pos_matrix = construct_pos_matrix(mol, out_size=-1)
    >>> pos_matrix.shape
    (34,34,3)

    >>> pos_matrix = construct_pos_matrix(mol, out_size=49)
    >>> pos_matrix.shape
    (49,49,3)
    ```
    """
    # Obtain initial distance geometry between atoms, if unavilable
    if mol.GetNumConformers() == 0:
        mol = rdmolops.AddHs(mol, addCoords=True)
        rdDistGeom.EmbedMolecule(mol, rdDistGeom.ETKDG())
        mol = rdmolops.RemoveHs(mol)
    coords = mol.GetConformer().GetPositions()  # shape=(N,3)
    N = mol.GetNumAtoms()

    # Determine appropiate output size to generate feature matrix of same size for all mols.
    if out_size < 0:
        size = N
    elif out_size >= N:
        size = out_size
    else:
        raise ValueError(
            '`out_size` (N={}) is smaller than number of atoms in mol (N={})'.
            format(out_size, N))

    pos_matrix = np.zeros(shape=(size, size, 3), dtype=np.float)
    for atom_idx in range(N):
        atom_pos = coords[atom_idx]  # central atom of interest
        for neighbor_idx in range(N):
            neigh_pos = coords[neighbor_idx]  # neighboring atom
            pos_matrix[
                atom_idx,
                neighbor_idx] = atom_pos - neigh_pos  # dist between neighbor -> center
    return pos_matrix
Ejemplo n.º 8
0
def sample_mol():
    mol = rdmolfiles.MolFromSmiles('CN=C=O')
    mol = rdmolops.AddHs(mol, addCoords=True)
    rdDistGeom.EmbedMolecule(mol, rdDistGeom.ETKDG())
    return rdmolops.RemoveHs(mol)
Ejemplo n.º 9
0
def ChopWithRDKit(outputDir,inputPath):
    #read input from terminal and get file name
    lig=os.path.basename(inputPath) #file name, no path

    #output folder

    output=outputDir+'output-chop/'

    outputFolderPath_log=outputDir+'output-log/'
    
    outputFolderPath_sdf=outputDir+'output-sdf/'

    outputFolderPath_chop_comb=outputDir+'output-chop-comb/'

    suppl=Chem.MolFromMol2File(inputPath,sanitize=False)

    tempSDFPath=outputDir+'output-sdf/'+lig+'.sdf'
    w=Chem.SDWriter(tempSDFPath)
    w.SetKekulize(False)
    w.write(suppl)
    w.close()
    
    suppl2 = rdmolops.RemoveHs(suppl)
    newmol=Chem.FragmentOnBRICSBonds(suppl2)
    mfl=Chem.GetMolFrags(newmol,asMols=True,sanitizeFrags=False)
    
    # Reconnect some double bonds broken by BRICS - L7
    mfl2 = ReconnectDoubleBond(suppl2, mfl)
    
    #generate fragments with rdkit
    fileList=[]
    f=0
    l=0
    r=0
    for m in mfl2:
        carbonC=0
        nitrogC=0
        oxygenC=0
        rmAtomCount=0
        for i in range(m.GetNumAtoms()):
            #record dummy atom and hydrogen number
            if m.GetAtomWithIdx(i).GetSymbol() == '*':
                rmAtomCount=rmAtomCount+1
            if m.GetAtomWithIdx(i).GetSymbol() == 'H':
                rmAtomCount=rmAtomCount+1
            if m.GetAtomWithIdx(i).GetSymbol() == 'C':
                carbonC=carbonC+1
            if m.GetAtomWithIdx(i).GetSymbol() == 'N':
                nitrogC=nitrogC+1
            if m.GetAtomWithIdx(i).GetSymbol() == 'O':
                oxygenC=oxygenC+1

        #create file
        totalAtomNum=m.GetNumAtoms()-rmAtomCount
        if m.GetNumAtoms()-rmAtomCount >=4 :
            tempFileName=output+'b-'+lig+'-'+str(r).zfill(3)+'.sdf'
            r=r+1
        if m.GetNumAtoms()-rmAtomCount <4 :
            tempFileName=output+'l-'+lig+'-'+str(l).zfill(3)+'.sdf'
            l=l+1
    
        w=Chem.SDWriter(tempFileName)
        w.SetKekulize(False)
        f=f+1
        w.write(m)
        w.close()
        fileList.append(tempFileName)

        #create file list with atom numbers
        with open(outputFolderPath_log+'ListAll','at') as outlist:
            outlist.write(tempFileName+' T '+str(totalAtomNum)+' C '+str(carbonC)+' N '+str(nitrogC)+' O '+str(oxygenC)+' \n')

    #with open(outputFolderPath_log+'Process.log','at') as outf:
    #    outf.write('Files are created.\n')

    #read atom coordinates and atom type from mol2 file
    mol2AllList=[]
    with open(inputPath,'r') as inf:
        mol2AllList=inf.readlines()
    mol2AtomInfo=[]
    molHead=mol2AllList.index('@<TRIPOS>ATOM\n')
    molEnd=mol2AllList.index('@<TRIPOS>BOND\n')
    mol2AtomInfo=mol2AllList[molHead+1:molEnd]
    mol2X=[]
    mol2Y=[]
    mol2Z=[]
    mol2A=[]
    for i in range(len(mol2AtomInfo)):
        mol2Line=mol2AtomInfo[i].split()
        mol2X.append(float(mol2Line[2]))
        mol2Y.append(float(mol2Line[3]))
        mol2Z.append(float(mol2Line[4]))
        mol2A.append(mol2Line[5])

    for filePath in fileList:
        fileName=os.path.basename(filePath)
        if len(fileName) >0:
            #processing brick fragments
            if fileName[0] == 'b':
                brickInfoList=[]
                with open(filePath,'r') as inf:
                    brickInfoList=inf.readlines()
                #print(brickInfoList)
                brickMolEndList=[i for i, x in enumerate(brickInfoList) if x == '$$$$\n']
                #print(brickInfoList[:brickMolEndList[0]])
                fileHead=list(filter(lambda x: 'V2000' in x, brickInfoList))
                
                fileHeadLineNum=brickInfoList.index(fileHead[0])
                #print(fileHeadLineNum)
                fileHeadList=fileHead[0].split()
                atomNum=int(fileHead[0][0:3])
                bondNum=int(fileHead[0][3:6])
                atomList=brickInfoList[fileHeadLineNum+1:fileHeadLineNum+atomNum+1]
                bondList=brickInfoList[fileHeadLineNum+atomNum+1:fileHeadLineNum+atomNum+bondNum+1]
            
                #Search for atom type
                atomTypeList=[]

                dummyAtomList=[]
                dummyAtomLineList=[]

                hydrAtomList=[]
                hydrAtomLineList=[]

                for atomLine in atomList:
                    atomLineInfoList=atomLine.split()
                    #atom in brick.sdf, xyz coordinates of one line
                    atomX=float(atomLineInfoList[0])
                    atomY=float(atomLineInfoList[1])
                    atomZ=float(atomLineInfoList[2])
                    #calculate norm
                    normList=[]
                    for i in range(len(mol2AtomInfo)):
                        norm=(atomX-mol2X[i])*(atomX-mol2X[i])+(atomY-mol2Y[i])*(atomY-mol2Y[i])+(atomZ-mol2Z[i])*(atomZ-mol2Z[i])
                        normList.append(norm)
                    minInd=normList.index(min(normList))
                    atomTypeList.append(mol2A[minInd]+'\n')

                    #dummy atom List
                    if atomLineInfoList[3] == "R":
                        dummyAtomList.append(atomList.index(atomLine))
                        dummyAtomLineList.append(atomLine)
    
                    #hydrogen atom list
                    if atomLineInfoList[3] == "H":
                        hydrAtomList.append(atomList.index(atomLine))
                        hydrAtomLineList.append(atomLine)

                newBrickInfoList=brickInfoList[:brickMolEndList[0]]
            
                #Branch, eligible to connect
            
                bondInfoList=[]
            
                for bondLine in bondList:
                    #bondLineInfoList=bondLine.split()
                    bondLineInfoList=[bondLine[0:3],bondLine[3:6]]+bondLine[6:].split()
                    bondInfoList.append([int(bondLineInfoList[0]),int(bondLineInfoList[1])])
            
                dummyConnection=[] #dummyConnection is a list of connections of the original file, eg. ['8 14 1 0\n',''], which will be used to remove not using connections in the last step   
                allConnection=[] #all connection is a list of connection pairs, eg. [[8,14],[6,15]], which will be used to generate appendix II
                for dummyIdx in dummyAtomList:
                    connectionList=list(filter(lambda x: dummyIdx+1 in x, bondInfoList))

                    for tempCon in connectionList:
                        conIndex=bondInfoList.index(tempCon)
                        dummyConnection.append(bondList[conIndex])

                    #remove the case both dummy atoms are in the bond
                    rmBond=[]

                    for connect in connectionList:
                        if connect[0]-1 in dummyAtomList:
                            if connect[1]-1 in dummyAtomList:
                                rmBond.append(connect)                        
                    for tempBond in rmBond:
                        connectionList.remove(tempBond)
                    allConnection=allConnection+connectionList
    
                tempDummyCon=[]
                for dummyCon in dummyConnection:
                    if dummyCon not in tempDummyCon:
                        tempDummyCon.append(dummyCon)
    
                dummyConnection=tempDummyCon
    
                branchCon=[]
    
                for connect in allConnection:
                    if connect[0]-1 in dummyAtomList:
                        branchCon.append(str(connect[1])+' '+atomTypeList[connect[0]-1])
    
                    if connect[1]-1 in dummyAtomList:
                        branchCon.append(str(connect[0])+' '+atomTypeList[connect[1]-1])

                #sort branch by atom index
                branchConAtomList=[]
                branchConAtomListBefore=[]
                branchConAtomIndexList=[]
                newBranchCon=[] #newBranchCon is the list of appendix II
                for branchLine in branchCon:
                    branchLineList=branchLine.split()
                    branchConAtomListBefore.append(branchLineList[0])

                branchConAtomList=sorted(branchConAtomListBefore)
                branchConAtomIndexList=sorted(range(len(branchConAtomListBefore)),key=lambda k:branchConAtomListBefore[k])
                for ind in range(len(branchConAtomList)):
                    newBranchCon.append(branchCon[branchConAtomIndexList[ind]]) 

                #hydrogen
                hydrConnection=[]
                for hydrIdx in hydrAtomList:
                    connectionList=filter(lambda x: hydrIdx+1 in x, bondInfoList)

                    for tempCon in connectionList:
                        conIndex=bondInfoList.index(tempCon)
                        hydrConnection.append(bondList[conIndex])
 
                #edit head line
                newAtomNum=atomNum-len(dummyAtomLineList)-len(hydrAtomLineList)
                newBondNum=bondNum-len(dummyConnection)-len(hydrConnection)
                newHead=str(newAtomNum).rjust(3)+str(newBondNum).rjust(3)+fileHead[0][6:]
                newBrickInfoList[fileHeadLineNum]=newHead
                newBrickInfoList[0]=fileName+'\n'

                #edit output list
                #edit appendix I - ATOM TYPES
                newBrickInfoList.append('\n')
                newBrickInfoList.append('> <ATOMTYPES> \n')
                newBrickInfoList=newBrickInfoList+atomTypeList[:newAtomNum]
                #edit appendix II - BRANCH ATOM NUMBER AND ELIGIBLE ATMTYPE TO CONNECT
                newBrickInfoList.append('\n')
                newBrickInfoList.append('> <BRANCH @atom-number eligible-atmtype-to-connect> \n')
                newBrickInfoList=newBrickInfoList+newBranchCon
                newBrickInfoList.append('\n')
                newBrickInfoList.append('$$$$\n')

                #remove dummy atoms
                for dummyLine in dummyAtomLineList:
                    newBrickInfoList.remove(dummyLine)
                #remove dummy bonds
                for dummyCon in dummyConnection:
                    newBrickInfoList.remove(dummyCon)
                #remove hydrogen atoms
                for hydrLine in hydrAtomLineList:
                    newBrickInfoList.remove(hydrLine)
                #remove hydrogen bonds
                for hydrCon in hydrConnection:
                    newBrickInfoList.remove(hydrCon)

                #remove M ISO line
                fileMISO=list(filter(lambda x: 'M  ISO' in x, newBrickInfoList))
                #print(fileMISO)
                if len(fileMISO)>0:
                    for ISO in fileMISO:
                        newBrickInfoList.remove(ISO)

                #remove M CHG line
                fileMCHG=list(filter(lambda x: 'M  CHG' in x, newBrickInfoList))
                if len(fileMCHG)>0:
                    for CHG in fileMCHG:
                        newBrickInfoList.remove(CHG)

                #write brick info to file
                with open(filePath,'w') as outf:
                    outf.writelines(newBrickInfoList)

            #Processing linker fragments
            if fileName[0] == 'l':
                linkerInfoList=[]
                with open(filePath,'r+') as inf:
                    linkerInfoList=inf.readlines()

                #find the end of molcules
                linkerMolEndList=[i for i, x in enumerate(linkerInfoList) if x == '$$$$\n']
                #find the start of molecules
                fileHead=list(filter(lambda x: 'V2000' in x, linkerInfoList))
                #indicate the line num of the head line
                fileHeadLineNum=linkerInfoList.index(fileHead[0])
                #separate atom num and bond num, then separate atom and bond info
                fileHeadList=fileHead[0].split()
                atomNum=int(fileHead[0][0:3])
                bondNum=int(fileHead[0][3:6])
                atomList=linkerInfoList[fileHeadLineNum+1:fileHeadLineNum+atomNum+1]
                bondList=linkerInfoList[fileHeadLineNum+atomNum+1:fileHeadLineNum+atomNum+bondNum+1]
            
                #Search for atom type
                atomTypeList=[]
                dummyAtomList=[]
                dummyAtomLineList=[]
                hydrAtomList=[]
                hydrAtomLineList=[]
                for atomLine in atomList:
                    atomLineInfoList=atomLine.split()
                    #atom in brick.sdf, xyz coordinates of one line
                    atomX=float(atomLineInfoList[0])
                    atomY=float(atomLineInfoList[1])
                    atomZ=float(atomLineInfoList[2])
                    #calculate norm
                    normList=[]
                    for i in range(len(mol2AtomInfo)):
                        norm=(atomX-mol2X[i])*(atomX-mol2X[i])+(atomY-mol2Y[i])*(atomY-mol2Y[i])+(atomZ-mol2Z[i])*(atomZ-mol2Z[i])
                        normList.append(norm)
                    minInd=normList.index(min(normList))
                    atomTypeList.append(mol2A[minInd]+'\n')

                    #dummy atom List
                
                    if atomLineInfoList[3] == "R":
                        dummyAtomList.append(atomList.index(atomLine))
                        dummyAtomLineList.append(atomLine)

                    #hydrogen atom list
                    if atomLineInfoList[3] == "H":
                        hydrAtomList.append(atomList.index(atomLine))
                        hydrAtomLineList.append(atomLine)

                newLinkerInfoList=linkerInfoList[:linkerMolEndList[0]]

                #Branch, eligible to connect

                bondInfoList=[]
                for bondLine in bondList:
                    #bondLineInfoList=bondLine.split()
                    bondLineInfoList=[bondLine[0:3],bondLine[3:6]]+bondLine[6:].split()
                    bondInfoList.append([int(bondLineInfoList[0]),int(bondLineInfoList[1])])
                
                dummyConnection=[]
                allConnection=[]
                for dummyIdx in dummyAtomList:
                    connectionList=list(filter(lambda x: dummyIdx+1 in x, bondInfoList))
                
                    for tempCon in connectionList:
                        conIndex=bondInfoList.index(tempCon)
                        dummyConnection.append(bondList[conIndex])

                    #remove the case both dummy atom are in the bond
                    rmBond=[]
                
                    for connect in connectionList:
                        if connect[0]-1 in dummyAtomList:
                            if connect[1]-1 in dummyAtomList:
                                rmBond.append(connect)
                    for tempBond in rmBond:
                        connectionList.remove(tempBond)
                    allConnection=allConnection+connectionList
                #remove connection duplicates
                tempDummyCon=[]
                for dummyCon in dummyConnection:
                    if dummyCon not in tempDummyCon:
                        tempDummyCon.append(dummyCon)

                dummyConnection=tempDummyCon

                contactCount=[]
            
                for connect in allConnection:
                    if connect[0]-1 in dummyAtomList:
                        contactCount.append(connect[1])
                    
                    if connect[1]-1 in dummyAtomList:
                        contactCount.append(connect[0])

                #hydrogen
                hydrConnection=[]
                for hydrIdx in hydrAtomList:
                    connectionList=list(filter(lambda x: hydrIdx+1 in x, bondInfoList))

                    for tempCon in connectionList:
                        conIndex=bondInfoList.index(tempCon)
                        hydrConnection.append(bondList[conIndex])

                #edit output list
                #edit head line
                newAtomNum=atomNum-len(dummyAtomLineList)-len(hydrAtomLineList)
                newBondNum=bondNum-len(dummyConnection)-len(hydrConnection)
                newHead=str(newAtomNum).rjust(3)+str(newBondNum).rjust(3)+fileHead[0][6:]
                newLinkerInfoList[fileHeadLineNum]=newHead
                newLinkerInfoList[0]=fileName+'\n'

                #edit appendix I - MAX NUMBER OF CONTACTS AND ATOMTYPES
                contactAppend=[]
                for i in range(atomNum):
                    contactAppend.append(str(contactCount.count(i+1))+' '+atomTypeList[i])

                newLinkerInfoList.append('\n')
                newLinkerInfoList.append('> <MAX-NUMBER-Of-CONTACTS ATOMTYPES> \n')
                newLinkerInfoList=newLinkerInfoList+contactAppend[:newAtomNum]
                newLinkerInfoList.append('\n')
                newLinkerInfoList.append('$$$$\n')

                #remove dummy atoms
                for dummyLine in dummyAtomLineList:
                    newLinkerInfoList.remove(dummyLine)
                #remove dummy bonds
                for dummyCon in dummyConnection:
                    newLinkerInfoList.remove(dummyCon)
                #remove hydrogen atoms
                for hydrLine in hydrAtomLineList:
                    newLinkerInfoList.remove(hydrLine)
                #remove hydrogen bonds
                for hydrCon in hydrConnection:
                    newLinkerInfoList.remove(hydrCon)

                #remove M ISO line
                fileMISO=list(filter(lambda x: 'M  ISO' in x, newLinkerInfoList))
                if len(fileMISO)>0:            
                    for ISO in fileMISO:
                        newLinkerInfoList.remove(ISO)

                #remove M CHG line
                fileMCHG=list(filter(lambda x: 'M  CHG' in x, newLinkerInfoList))
                if len(fileMCHG)>0:
                    for CHG in fileMCHG:
                        newLinkerInfoList.remove(CHG)

                #write linker info to file
                with open(filePath,'w') as outf:
                    outf.writelines(newLinkerInfoList)


    with open(outputFolderPath_log+'Process.log','at') as outLog:
        outLog.write(time.asctime( time.localtime(time.time()) ))
        outLog.write(' CHOP-MOL ')
        outLog.write(inputPath)
        outLog.write('\n')

    tempCombineList=[]
    tempCombineList.append(inputPath)
    tempCombineList=tempCombineList+fileList

    combineLinkers(outputDir,tempCombineList)
Ejemplo n.º 10
0
        tempValue = FindDoubleBonds(inputFrags[i])
        if tempValue >= 0:
            # Find C.2 = C.2 bond
            dbFragList.append(fragmentMolblocks[i])
        else:
            newFragmentMolBlocks.append(fragmentMolblocks[i])
    
    reconnectedDBFrags = ProcessDoubleBonds(parentMolblock, dbFragList)
    
    newFragmentMolBlocks = newFragmentMolBlocks + reconnectedDBFrags
    
    newFragmentMol = []
    for i in range(len(newFragmentMolBlocks)):
        tempFragMol = Chem.MolFromMolBlock(newFragmentMolBlocks[i],sanitize=False)
        newFragmentMol.append(tempFragMol)

    # return tuple mol-objects
    return tuple(newFragmentMol)

suppl2 = rdmolops.RemoveHs(suppl)

new2 = BRICS.BreakBRICSBonds(suppl2)
mfl2 = Chem.GetMolFrags(new2, asMols=True, sanitizeFrags=False)

mfl3 = ReconnectDoubleBond(suppl2,mfl2)

for i in range(len(mfl3)):
    print(Chem.MolToMolBlock(mfl3[i]))
    pass
    #FindDoubleBonds(mfl2[i])