Beispiel #1
0
def FragmentUnsantize(suppl1):
    try:
        newmol = Chem.FragmentOnBRICSBonds(suppl1)
        #print('here')
        mfl = Chem.GetMolFrags(newmol, asMols=True, sanitizeFrags=False)
        print('Good False')
        return mfl
    except:
        print('Not good for false')
        raise RDKitError(2)
Beispiel #2
0
def FragmentSanitize(tempSDFPath):
    try:
        suppl2 = Chem.SDMolSupplier(tempSDFPath, sanitize=True)
        newmol2 = Chem.FragmentOnBRICSBonds(suppl2[0])
        mfl = Chem.GetMolFrags(newmol2, asMols=True, sanitizeFrags=False)
        #print('Good True')
        return mfl
    except:
        #print('Not good for true')
        raise RDKitError(1)
Beispiel #3
0
def BreakBRICSBonds(mol, bonds=None, sanitize=True, silent=True):
    """ breaks the BRICS bonds in a molecule and returns the results

    >>> from rdkit import Chem
    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> m2=BreakBRICSBonds(m)
    >>> Chem.MolToSmiles(m2,True)
    '[3*]O[3*].[4*]CC.[4*]CCC'

    a more complicated case:

    >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
    >>> m2=BreakBRICSBonds(m)
    >>> Chem.MolToSmiles(m2,True)
    '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O'


    can also specify a limited set of bonds to work with:

    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))])
    >>> Chem.MolToSmiles(m2,True)
    '[3*]OCC.[4*]CCC'

    this can be used as an alternate approach for doing a BRICS decomposition by
    following BreakBRICSBonds with a call to Chem.GetMolFrags:

    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> m2=BreakBRICSBonds(m)
    >>> frags = Chem.GetMolFrags(m2,asMols=True)
    >>> [Chem.MolToSmiles(x,True) for x in frags]
    ['[4*]CCC', '[3*]O[3*]', '[4*]CC']

    """
    if not bonds:
        #bonds = FindBRICSBonds(mol)
        res = Chem.FragmentOnBRICSBonds(mol)
        if sanitize:
            Chem.SanitizeMol(res)
        return res
    eMol = Chem.EditableMol(mol)
    nAts = mol.GetNumAtoms()

    dummyPositions = []
    for indices, dummyTypes in bonds:
        ia, ib = indices
        obond = mol.GetBondBetweenAtoms(ia, ib)
        bondType = obond.GetBondType()
        eMol.RemoveBond(ia, ib)

        da, db = dummyTypes
        atoma = Chem.Atom(0)
        atoma.SetIsotope(int(da))
        atoma.SetNoImplicit(True)
        idxa = nAts
        nAts += 1
        eMol.AddAtom(atoma)
        eMol.AddBond(ia, idxa, bondType)

        atomb = Chem.Atom(0)
        atomb.SetIsotope(int(db))
        atomb.SetNoImplicit(True)
        idxb = nAts
        nAts += 1
        eMol.AddAtom(atomb)
        eMol.AddBond(ib, idxb, bondType)
        if mol.GetNumConformers():
            dummyPositions.append((idxa, ib))
            dummyPositions.append((idxb, ia))
    res = eMol.GetMol()
    if sanitize:
        Chem.SanitizeMol(res)
    if mol.GetNumConformers():
        for conf in mol.GetConformers():
            resConf = res.GetConformer(conf.GetId())
            for ia, pa in dummyPositions:
                resConf.SetAtomPosition(ia, conf.GetAtomPosition(pa))
    return res
Beispiel #4
0
def ChopWithRDKit(outputDir, inputPath):
    #read input from terminal and get file name
    lig = os.path.basename(inputPath)  #file name, no path

    #output folder

    output = outputDir + 'output-chop/'

    outputFolderPath_log = outputDir + 'output-log/'

    outputFolderPath_sdf = outputDir + 'output-sdf/'

    outputFolderPath_chop_comb = outputDir + 'output-chop-comb/'

    suppl = Chem.MolFromMol2File(inputPath, sanitize=False)
    newmol = Chem.FragmentOnBRICSBonds(suppl)
    mfl = Chem.GetMolFrags(newmol, asMols=True, sanitizeFrags=False)

    tempSDFPath = outputDir + 'output-sdf/' + lig + '.sdf'
    w = Chem.SDWriter(tempSDFPath)
    w.SetKekulize(False)
    w.write(suppl)
    w.close()

    #generate fragments with rdkit
    fileList = []
    f = 0
    l = 0
    r = 0
    for m in mfl:
        carbonC = 0
        nitrogC = 0
        oxygenC = 0
        rmAtomCount = 0
        for i in range(m.GetNumAtoms()):
            #record dummy atom and hydrogen number
            if m.GetAtomWithIdx(i).GetSymbol() == '*':
                rmAtomCount = rmAtomCount + 1
            if m.GetAtomWithIdx(i).GetSymbol() == 'H':
                rmAtomCount = rmAtomCount + 1
            if m.GetAtomWithIdx(i).GetSymbol() == 'C':
                carbonC = carbonC + 1
            if m.GetAtomWithIdx(i).GetSymbol() == 'N':
                nitrogC = nitrogC + 1
            if m.GetAtomWithIdx(i).GetSymbol() == 'O':
                oxygenC = oxygenC + 1

        #create file
        totalAtomNum = m.GetNumAtoms() - rmAtomCount
        if m.GetNumAtoms() - rmAtomCount >= 4:
            tempFileName = output + 'b-' + lig + '-' + str(r).zfill(3) + '.sdf'
            r = r + 1
        if m.GetNumAtoms() - rmAtomCount < 4:
            tempFileName = output + 'l-' + lig + '-' + str(l).zfill(3) + '.sdf'
            l = l + 1

        w = Chem.SDWriter(tempFileName)
        w.SetKekulize(False)
        f = f + 1
        w.write(m)
        w.close()
        fileList.append(tempFileName)

        #create file list with atom numbers
        with open(outputFolderPath_log + 'ListAll', 'at') as outlist:
            outlist.write(tempFileName + ' T ' + str(totalAtomNum) + ' C ' +
                          str(carbonC) + ' N ' + str(nitrogC) + ' O ' +
                          str(oxygenC) + ' \n')

    #with open(outputFolderPath_log+'Process.log','at') as outf:
    #    outf.write('Files are created.\n')

    #read atom coordinates and atom type from mol2 file
    mol2AllList = []
    with open(inputPath, 'r') as inf:
        mol2AllList = inf.readlines()
    mol2AtomInfo = []
    molHead = mol2AllList.index('@<TRIPOS>ATOM\n')
    molEnd = mol2AllList.index('@<TRIPOS>BOND\n')
    mol2AtomInfo = mol2AllList[molHead + 1:molEnd]
    mol2X = []
    mol2Y = []
    mol2Z = []
    mol2A = []
    for i in range(len(mol2AtomInfo)):
        mol2Line = mol2AtomInfo[i].split()
        mol2X.append(float(mol2Line[2]))
        mol2Y.append(float(mol2Line[3]))
        mol2Z.append(float(mol2Line[4]))
        mol2A.append(mol2Line[5])

    for filePath in fileList:
        fileName = os.path.basename(filePath)
        if len(fileName) > 0:
            #processing brick fragments
            if fileName[0] == 'b':
                brickInfoList = []
                with open(filePath, 'r') as inf:
                    brickInfoList = inf.readlines()
                #print(brickInfoList)
                brickMolEndList = [
                    i for i, x in enumerate(brickInfoList) if x == '$$$$\n'
                ]
                #print(brickInfoList[:brickMolEndList[0]])
                fileHead = list(filter(lambda x: 'V2000' in x, brickInfoList))

                fileHeadLineNum = brickInfoList.index(fileHead[0])
                #print(fileHeadLineNum)
                fileHeadList = fileHead[0].split()
                atomNum = int(fileHead[0][0:3])
                bondNum = int(fileHead[0][3:6])
                atomList = brickInfoList[fileHeadLineNum + 1:fileHeadLineNum +
                                         atomNum + 1]
                bondList = brickInfoList[fileHeadLineNum + atomNum +
                                         1:fileHeadLineNum + atomNum +
                                         bondNum + 1]

                #Search for atom type
                atomTypeList = []

                dummyAtomList = []
                dummyAtomLineList = []

                hydrAtomList = []
                hydrAtomLineList = []

                for atomLine in atomList:
                    atomLineInfoList = atomLine.split()
                    #atom in brick.sdf, xyz coordinates of one line
                    atomX = float(atomLineInfoList[0])
                    atomY = float(atomLineInfoList[1])
                    atomZ = float(atomLineInfoList[2])
                    #calculate norm
                    normList = []
                    for i in range(len(mol2AtomInfo)):
                        norm = (atomX - mol2X[i]) * (atomX - mol2X[i]) + (
                            atomY - mol2Y[i]) * (atomY - mol2Y[i]) + (
                                atomZ - mol2Z[i]) * (atomZ - mol2Z[i])
                        normList.append(norm)
                    minInd = normList.index(min(normList))
                    atomTypeList.append(mol2A[minInd] + '\n')

                    #dummy atom List
                    if atomLineInfoList[3] == "R":
                        dummyAtomList.append(atomList.index(atomLine))
                        dummyAtomLineList.append(atomLine)

                    #hydrogen atom list
                    if atomLineInfoList[3] == "H":
                        hydrAtomList.append(atomList.index(atomLine))
                        hydrAtomLineList.append(atomLine)

                newBrickInfoList = brickInfoList[:brickMolEndList[0]]

                #Branch, eligible to connect

                bondInfoList = []

                for bondLine in bondList:
                    #bondLineInfoList=bondLine.split()
                    bondLineInfoList = [bondLine[0:3], bondLine[3:6]
                                        ] + bondLine[6:].split()
                    bondInfoList.append(
                        [int(bondLineInfoList[0]),
                         int(bondLineInfoList[1])])

                dummyConnection = [
                ]  #dummyConnection is a list of connections of the original file, eg. ['8 14 1 0\n',''], which will be used to remove not using connections in the last step
                allConnection = [
                ]  #all connection is a list of connection pairs, eg. [[8,14],[6,15]], which will be used to generate appendix II
                for dummyIdx in dummyAtomList:
                    connectionList = list(
                        filter(lambda x: dummyIdx + 1 in x, bondInfoList))

                    for tempCon in connectionList:
                        conIndex = bondInfoList.index(tempCon)
                        dummyConnection.append(bondList[conIndex])

                    #remove the case both dummy atoms are in the bond
                    rmBond = []

                    for connect in connectionList:
                        if connect[0] - 1 in dummyAtomList:
                            if connect[1] - 1 in dummyAtomList:
                                rmBond.append(connect)
                    for tempBond in rmBond:
                        connectionList.remove(tempBond)
                    allConnection = allConnection + connectionList

                tempDummyCon = []
                for dummyCon in dummyConnection:
                    if dummyCon not in tempDummyCon:
                        tempDummyCon.append(dummyCon)

                dummyConnection = tempDummyCon

                branchCon = []

                for connect in allConnection:
                    if connect[0] - 1 in dummyAtomList:
                        branchCon.append(
                            str(connect[1]) + ' ' +
                            atomTypeList[connect[0] - 1])

                    if connect[1] - 1 in dummyAtomList:
                        branchCon.append(
                            str(connect[0]) + ' ' +
                            atomTypeList[connect[1] - 1])

                #sort branch by atom index
                branchConAtomList = []
                branchConAtomListBefore = []
                branchConAtomIndexList = []
                newBranchCon = []  #newBranchCon is the list of appendix II
                for branchLine in branchCon:
                    branchLineList = branchLine.split()
                    branchConAtomListBefore.append(branchLineList[0])

                branchConAtomList = sorted(branchConAtomListBefore)
                branchConAtomIndexList = sorted(
                    range(len(branchConAtomListBefore)),
                    key=lambda k: branchConAtomListBefore[k])
                for ind in range(len(branchConAtomList)):
                    newBranchCon.append(branchCon[branchConAtomIndexList[ind]])

                #hydrogen
                hydrConnection = []
                for hydrIdx in hydrAtomList:
                    connectionList = filter(lambda x: hydrIdx + 1 in x,
                                            bondInfoList)

                    for tempCon in connectionList:
                        conIndex = bondInfoList.index(tempCon)
                        hydrConnection.append(bondList[conIndex])

                #edit head line
                newAtomNum = atomNum - len(dummyAtomLineList) - len(
                    hydrAtomLineList)
                newBondNum = bondNum - len(dummyConnection) - len(
                    hydrConnection)
                newHead = str(newAtomNum).rjust(3) + str(newBondNum).rjust(
                    3) + fileHead[0][6:]
                newBrickInfoList[fileHeadLineNum] = newHead
                newBrickInfoList[0] = fileName + '\n'

                #edit output list
                #edit appendix I - ATOM TYPES
                newBrickInfoList.append('\n')
                newBrickInfoList.append('> <ATOMTYPES> \n')
                newBrickInfoList = newBrickInfoList + atomTypeList[:newAtomNum]
                #edit appendix II - BRANCH ATOM NUMBER AND ELIGIBLE ATMTYPE TO CONNECT
                newBrickInfoList.append('\n')
                newBrickInfoList.append(
                    '> <BRANCH @atom-number eligible-atmtype-to-connect> \n')
                newBrickInfoList = newBrickInfoList + newBranchCon
                newBrickInfoList.append('\n')
                newBrickInfoList.append('$$$$\n')

                #remove dummy atoms
                for dummyLine in dummyAtomLineList:
                    newBrickInfoList.remove(dummyLine)
                #remove dummy bonds
                for dummyCon in dummyConnection:
                    newBrickInfoList.remove(dummyCon)
                #remove hydrogen atoms
                for hydrLine in hydrAtomLineList:
                    newBrickInfoList.remove(hydrLine)
                #remove hydrogen bonds
                for hydrCon in hydrConnection:
                    newBrickInfoList.remove(hydrCon)

                #remove M ISO line
                fileMISO = list(
                    filter(lambda x: 'M  ISO' in x, newBrickInfoList))
                #print(fileMISO)
                if len(fileMISO) > 0:
                    for ISO in fileMISO:
                        newBrickInfoList.remove(ISO)

                #remove M CHG line
                fileMCHG = list(
                    filter(lambda x: 'M  CHG' in x, newBrickInfoList))
                if len(fileMCHG) > 0:
                    for CHG in fileMCHG:
                        newBrickInfoList.remove(CHG)

                #write brick info to file
                with open(filePath, 'w') as outf:
                    outf.writelines(newBrickInfoList)

            #Processing linker fragments
            if fileName[0] == 'l':
                linkerInfoList = []
                with open(filePath, 'r+') as inf:
                    linkerInfoList = inf.readlines()

                #find the end of molcules
                linkerMolEndList = [
                    i for i, x in enumerate(linkerInfoList) if x == '$$$$\n'
                ]
                #find the start of molecules
                fileHead = list(filter(lambda x: 'V2000' in x, linkerInfoList))
                #indicate the line num of the head line
                fileHeadLineNum = linkerInfoList.index(fileHead[0])
                #separate atom num and bond num, then separate atom and bond info
                fileHeadList = fileHead[0].split()
                atomNum = int(fileHead[0][0:3])
                bondNum = int(fileHead[0][3:6])
                atomList = linkerInfoList[fileHeadLineNum + 1:fileHeadLineNum +
                                          atomNum + 1]
                bondList = linkerInfoList[fileHeadLineNum + atomNum +
                                          1:fileHeadLineNum + atomNum +
                                          bondNum + 1]

                #Search for atom type
                atomTypeList = []
                dummyAtomList = []
                dummyAtomLineList = []
                hydrAtomList = []
                hydrAtomLineList = []
                for atomLine in atomList:
                    atomLineInfoList = atomLine.split()
                    #atom in brick.sdf, xyz coordinates of one line
                    atomX = float(atomLineInfoList[0])
                    atomY = float(atomLineInfoList[1])
                    atomZ = float(atomLineInfoList[2])
                    #calculate norm
                    normList = []
                    for i in range(len(mol2AtomInfo)):
                        norm = (atomX - mol2X[i]) * (atomX - mol2X[i]) + (
                            atomY - mol2Y[i]) * (atomY - mol2Y[i]) + (
                                atomZ - mol2Z[i]) * (atomZ - mol2Z[i])
                        normList.append(norm)
                    minInd = normList.index(min(normList))
                    atomTypeList.append(mol2A[minInd] + '\n')

                    #dummy atom List

                    if atomLineInfoList[3] == "R":
                        dummyAtomList.append(atomList.index(atomLine))
                        dummyAtomLineList.append(atomLine)

                    #hydrogen atom list
                    if atomLineInfoList[3] == "H":
                        hydrAtomList.append(atomList.index(atomLine))
                        hydrAtomLineList.append(atomLine)

                newLinkerInfoList = linkerInfoList[:linkerMolEndList[0]]

                #Branch, eligible to connect

                bondInfoList = []
                for bondLine in bondList:
                    #bondLineInfoList=bondLine.split()
                    bondLineInfoList = [bondLine[0:3], bondLine[3:6]
                                        ] + bondLine[6:].split()
                    bondInfoList.append(
                        [int(bondLineInfoList[0]),
                         int(bondLineInfoList[1])])

                dummyConnection = []
                allConnection = []
                for dummyIdx in dummyAtomList:
                    connectionList = list(
                        filter(lambda x: dummyIdx + 1 in x, bondInfoList))

                    for tempCon in connectionList:
                        conIndex = bondInfoList.index(tempCon)
                        dummyConnection.append(bondList[conIndex])

                    #remove the case both dummy atom are in the bond
                    rmBond = []

                    for connect in connectionList:
                        if connect[0] - 1 in dummyAtomList:
                            if connect[1] - 1 in dummyAtomList:
                                rmBond.append(connect)
                    for tempBond in rmBond:
                        connectionList.remove(tempBond)
                    allConnection = allConnection + connectionList
                #remove connection duplicates
                tempDummyCon = []
                for dummyCon in dummyConnection:
                    if dummyCon not in tempDummyCon:
                        tempDummyCon.append(dummyCon)

                dummyConnection = tempDummyCon

                contactCount = []

                for connect in allConnection:
                    if connect[0] - 1 in dummyAtomList:
                        contactCount.append(connect[1])

                    if connect[1] - 1 in dummyAtomList:
                        contactCount.append(connect[0])

                #hydrogen
                hydrConnection = []
                for hydrIdx in hydrAtomList:
                    connectionList = list(
                        filter(lambda x: hydrIdx + 1 in x, bondInfoList))

                    for tempCon in connectionList:
                        conIndex = bondInfoList.index(tempCon)
                        hydrConnection.append(bondList[conIndex])

                #edit output list
                #edit head line
                newAtomNum = atomNum - len(dummyAtomLineList) - len(
                    hydrAtomLineList)
                newBondNum = bondNum - len(dummyConnection) - len(
                    hydrConnection)
                newHead = str(newAtomNum).rjust(3) + str(newBondNum).rjust(
                    3) + fileHead[0][6:]
                newLinkerInfoList[fileHeadLineNum] = newHead
                newLinkerInfoList[0] = fileName + '\n'

                #edit appendix I - MAX NUMBER OF CONTACTS AND ATOMTYPES
                contactAppend = []
                for i in range(atomNum):
                    contactAppend.append(
                        str(contactCount.count(i + 1)) + ' ' + atomTypeList[i])

                newLinkerInfoList.append('\n')
                newLinkerInfoList.append(
                    '> <MAX-NUMBER-Of-CONTACTS ATOMTYPES> \n')
                newLinkerInfoList = newLinkerInfoList + contactAppend[:
                                                                      newAtomNum]
                newLinkerInfoList.append('\n')
                newLinkerInfoList.append('$$$$\n')

                #remove dummy atoms
                for dummyLine in dummyAtomLineList:
                    newLinkerInfoList.remove(dummyLine)
                #remove dummy bonds
                for dummyCon in dummyConnection:
                    newLinkerInfoList.remove(dummyCon)
                #remove hydrogen atoms
                for hydrLine in hydrAtomLineList:
                    newLinkerInfoList.remove(hydrLine)
                #remove hydrogen bonds
                for hydrCon in hydrConnection:
                    newLinkerInfoList.remove(hydrCon)

                #remove M ISO line
                fileMISO = list(
                    filter(lambda x: 'M  ISO' in x, newLinkerInfoList))
                if len(fileMISO) > 0:
                    for ISO in fileMISO:
                        newLinkerInfoList.remove(ISO)

                #remove M CHG line
                fileMCHG = list(
                    filter(lambda x: 'M  CHG' in x, newLinkerInfoList))
                if len(fileMCHG) > 0:
                    for CHG in fileMCHG:
                        newLinkerInfoList.remove(CHG)

                #write linker info to file
                with open(filePath, 'w') as outf:
                    outf.writelines(newLinkerInfoList)

    with open(outputFolderPath_log + 'Process.log', 'at') as outLog:
        outLog.write(time.asctime(time.localtime(time.time())))
        outLog.write(' CHOP-MOL ')
        outLog.write(inputPath)
        outLog.write('\n')

    tempCombineList = []
    tempCombineList.append(inputPath)
    tempCombineList = tempCombineList + fileList

    combineLinkers(outputDir, tempCombineList)