예제 #1
0
    def checkProject(self,
                     ccpnProject=None,
                     ccpnDir=None,
                     structureEnsembleId=None,
                     shiftListSerial=None):

        if self.showMessages:
            print drawBox(" VASCO: calculating rereferencing...")

        #
        # Get info from CCPN project
        #

        if ccpnProject:
            self.ccpnProject = ccpnProject
        elif ccpnDir:
            self.ccpnProject = loadProject(ccpnDir)
        elif self.ccpnDir:
            self.ccpnProject = loadProject(self.ccpnDir)
        else:
            from memops.editor.OpenProjectPopup import OpenProjectPopup
            _popup = OpenProjectPopup(self.guiParent,
                                      callback=self.initProject,
                                      modal=True)

        #
        # Get the relevant structureEnsemble
        #

        self.selectStructureEnsemble(structureEnsembleId=structureEnsembleId)

        #
        # Get the right shift list
        #

        self.selectShiftList(shiftListSerial=shiftListSerial)

        #
        # Prep the data
        #

        self.prepareData()

        #
        # Create a dictionary to run VASCO on
        #

        self.createEntryDict()

        #
        # Get VASCO reref data
        #

        self.getVascoRerefInfo()
예제 #2
0
  def checkProject(self,ccpnProject=None,ccpnDir=None,structureEnsembleId=None,shiftListSerial=None):

    if self.showMessages:
        print drawBox(" VASCO: calculating rereferencing...")

    #
    # Get info from CCPN project
    #
    
    if ccpnProject:
      self.ccpnProject = ccpnProject
    elif ccpnDir:
      self.ccpnProject = loadProject(ccpnDir)
    elif self.ccpnDir:
      self.ccpnProject = loadProject(self.ccpnDir)
    else:
      from memops.editor.OpenProjectPopup import OpenProjectPopup
      _popup = OpenProjectPopup(self.guiParent, callback = self.initProject, modal=True)
    
    #
    # Get the relevant structureEnsemble
    #
    
    self.selectStructureEnsemble(structureEnsembleId=structureEnsembleId)
      
    #
    # Get the right shift list
    #
    
    self.selectShiftList(shiftListSerial=shiftListSerial)
          
    #
    # Prep the data
    #
    
    self.prepareData()
    
    #
    # Create a dictionary to run VASCO on
    #

    self.createEntryDict()
    
    #
    # Get VASCO reref data
    #
    
    self.getVascoRerefInfo()    
예제 #3
0
def printAtomsBonds(chemComp):

    print drawBox("ChemBond information", indent="  ")

    for chemBond in chemComp.sortedChemBonds():
        chemAtomNames = [
            "%s (%d)" % (chemAtom.name, chemAtom.subType)
            for chemAtom in chemBond.chemAtoms
        ]
        print "    %-12s-%-12s: %s" % (chemAtomNames[0], chemAtomNames[1],
                                       chemBond.bondType)

    # Print CCV info
    print
    print drawBox("ChemCompVar atom information", indent="  ")
    for ccv in chemComp.sortedChemCompVars():

        print "  %s, %s" % (ccv.linking, ccv.descriptor)

        chemAtoms = ccv.sortedChemAtoms()
        print "    %s" % ', '.join([
            "%s (%d)" % (chemAtom.name, chemAtom.subType)
            for chemAtom in chemAtoms
        ])

        otherChemAtoms = []
        for chemAtom in chemComp.sortedChemAtoms():
            if not chemAtom in chemAtoms:
                otherChemAtoms.append(chemAtom)

        print "    NOT INCLUDED: %s" % ', '.join([
            "%s (%d)" % (chemAtom.name, chemAtom.subType)
            for chemAtom in otherChemAtoms
        ])
        print

    print
예제 #4
0
    def drawBoxDelimiter(self,boxText):

        print
        print drawBox(boxText)
        print
예제 #5
0
    import sys

    (testMode, writeData, verbose, ccpCodeList,
     chemCompDataDir) = initialiseChemCompScript(sys.argv)

    for (molType, ccpCodes) in ccpCodeList:

        if not ccpCodes:
            print "NO %s" % molType
            continue

        for ccpCode in ccpCodes[:1]:

            project = Implementation.MemopsRoot(name='tempData')

            print drawBox("ChemComp %s, %s" % (molType, ccpCode))

            chemComp = getChemComp(project,
                                   molType,
                                   ccpCode,
                                   download=False,
                                   chemCompArchiveDir='lala',
                                   copyFile=False)

            if chemComp:
                checkChemComp(chemComp, verbose=verbose)
            else:
                print "  ERROR: not available!"
                print
예제 #6
0
def writeMappingFile(project, outFile, originalFormats=None):

    if originalFormats:
        checkFormatList = originalFormats

    else:
        checkFormatList = allFormatsDict.keys()

    #
    # First check if there's any mapping info at all...
    #

    formatList = []
    allResonances = project.currentNmrProject.sortedResonances()

    for format in checkFormatList:
        resonancesDict = getApplResNames(format, allResonances)

        if resonancesDict != {}:
            formatList.append(format)

    if formatList == []:

        #
        # No resonances linked
        #

        return False

    formatList.sort()

    fout = open(outFile, 'w')

    #
    # Write top header with time/user info
    #

    fout.write("#" + newline)
    fout.write("# CcpNmr atom name mapping file." + newline)
    fout.write("#" + newline)
    fout.write(
        "# Contains list of original names from the imported file(s) and the" +
        newline)
    fout.write("# atom names they were mapped to in the data model." + newline)
    fout.write("#" + newline)
    fout.write("# File written on %s." % time.ctime() + newline)
    fout.write("#" + newline * 2)

    for format in formatList:

        resonancesDict = getApplResNames(format, allResonances)
        resonanceNames = []

        #
        # Do some sorting...
        #

        resonancesSortDict = {}

        for resonanceName in resonancesDict.keys():

            (chainCode, seqCode, spinSystemId, seqInsertCode,
             atomName) = getNameInfo(resonanceName)

            if seqCode == None:
                continue

            seqCode = returnInt(seqCode)

            if not resonancesSortDict.has_key(chainCode):
                resonancesSortDict[chainCode] = {}

            if not resonancesSortDict[chainCode].has_key(seqCode):
                resonancesSortDict[chainCode][seqCode] = {}

            if not resonancesSortDict[chainCode][seqCode].has_key(
                    seqInsertCode):
                resonancesSortDict[chainCode][seqCode][seqInsertCode] = {}

            resonancesSortDict[chainCode][seqCode][seqInsertCode][
                atomName] = resonanceName

        chainCodeList = resonancesSortDict.keys()
        chainCodeList.sort()

        for chainCode in chainCodeList:

            seqCodeList = resonancesSortDict[chainCode].keys()
            seqCodeList.sort()

            for seqCode in seqCodeList:

                seqInsertCodeList = resonancesSortDict[chainCode][
                    seqCode].keys()
                seqInsertCodeList.sort()

                for seqInsertCode in seqInsertCodeList:

                    atomNameList = resonancesSortDict[chainCode][seqCode][
                        seqInsertCode].keys()
                    atomNameList.sort()

                    for atomName in atomNameList:

                        resonanceNames.append(
                            resonancesSortDict[chainCode][seqCode]
                            [seqInsertCode][atomName])

        #
        # Write it all out
        #

        if resonancesDict != {}:

            fout.write(drawBox("Origin file format: %s" % format, liner="*"))
            fout.write(newline)

            fout.write("  %-15s    %-15s  %s" %
                       ('Original name', 'Mapped to atoms',
                        'Molecules:chains:residues'))
            fout.write(newline * 2)

            for resonanceName in resonanceNames:

                resonances = resonancesDict[resonanceName]
                atomNames = []

                resCodes = []
                moleculeNames = []
                chainCodes = []

                for resonance in resonances:
                    atomName = ""

                    resonanceSet = resonance.resonanceSet

                    if resonanceSet:

                        for atomSet in resonanceSet.atomSets:

                            #
                            # Get some info out of data model for clarity...
                            #

                            residue = atomSet.findFirstAtom().residue
                            resCode = residue.molResidue.ccpCode + str(
                                residue.seqCode) + string.strip(
                                    residue.seqInsertCode)
                            chainCode = residue.chain.code
                            moleculeName = residue.chain.molecule.name

                            if resCode not in resCodes:
                                resCodes.append(resCode)

                            if chainCode not in chainCodes:
                                chainCodes.append(chainCode)

                            if moleculeName not in moleculeNames:
                                moleculeNames.append(moleculeName)

                            #
                            # Get name of atom(s) this resonance was mapped to
                            #

                            atomName += atomSet.name + "/"

                        if atomName:
                            atomName = atomName[:-1]

                            if atomName not in atomNames:
                                atomNames.append(atomName)

                if atomNames != []:

                    moleculeText = string.join(moleculeNames, ',')
                    chainText = string.join(chainCodes, ',')
                    resCodesText = string.join(resCodes, ',')

                    addText = moleculeText + ':' + chainText + ':' + resCodesText

                    fout.write("  %-15s -> %-15s  %s" %
                               ("'" + resonanceName + "'",
                                string.join(atomNames, ','), addText))

                else:
                    fout.write("  %-15s    %-15s" %
                               ("'" + resonanceName + "'", 'NOT LINKED'))

                fout.write(newline)

    return True
예제 #7
0
def printMissingInfo(resResidueDict):

    print drawBox(
        "Suggested mappings, based on atom names for original restraint data",
        indent="  ")

    resChainCodes = resResidueDict.keys()
    resChainCodes.sort()

    for resChainCode in resChainCodes:

        print "    Original chain code '%s'. Higher scores below are better." % resChainCode
        print

        resSeqCodes = resResidueDict[resChainCode].keys()
        resSeqCodes.sort()

        for resSeqCode in resSeqCodes:

            scoreTexts = []

            if resResidueDict[resChainCode][resSeqCode][0]:

                scoreTexts.append("%s (from file)" %
                                  resResidueDict[resChainCode][resSeqCode][0])

            else:

                atomNames = resResidueDict[resChainCode][resSeqCode][1]

                scoreList = {}

                for atomName in atomNames:
                    for (ccpCodes, atomNameMatch, score,
                         multiplier) in comparisonList:
                        if atomName in atomNameMatch:
                            for ccpCode in ccpCodes:
                                if not scoreList.has_key(ccpCode):
                                    scoreList[ccpCode] = score
                                else:
                                    scoreList[ccpCode] *= multiplier

                values = scoreList.values()
                values.sort()
                values.reverse()

                ccpCodes = scoreList.keys()
                ccpCodes.sort()

                for value in values:
                    for ccpCode in ccpCodes:
                        if scoreList[ccpCode] == value:
                            scoreTexts.append("%s (%d)" % (ccpCode, value))
                            ccpCodes.pop(ccpCodes.index(ccpCode))
                            break

            if scoreTexts:
                print "      %-4s%s:" % (str(resSeqCode[0]), resSeqCode[1]),
                print ', '.join(scoreTexts)

        print

    print

    return None
예제 #8
0
def makeFullSugar(carboBaseName,
                  coordSystem,
                  baseGlycoCtCode,
                  testMode,
                  replace=False,
                  saveData=True):

    carboMolType = 'carbohydrate'
    namingSystemName = 'EuroCarbDb'

    project = Implementation.MemopsRoot(name='chemComp')
    project.currentUserId = 'ccpnRef'

    #
    # Set archive dir info
    #

    if testMode:
        chemCompDataDir = testChemCompDataDir
        chemCompCoordDataDir = testChemCompCoordDataDir
    else:
        chemCompDataDir = editChemCompDataDir
        chemCompCoordDataDir = editChemCompCoordDataDir

    #
    # First import all mol2 files, pick one form as 'base' unit, adapt this one,
    # then add coords from other chemComps
    #
    # TODO: This is currently very specific!
    #

    #
    # TODO SET A STEREOCHEMISTRY CLASS FOR THE A/B CHEMATOMS!!
    #

    importDir = os.path.join(origMol2DataDir, 'carbo')
    importFiles = os.listdir(os.path.join(importDir, carboBaseName))

    for importFile in importFiles[:]:
        if not importFile[-4:] == 'mol2':
            importFiles.pop(importFiles.index(importFile))

    importFiles.sort()

    mol2Format = Mol2Format(project, guiParent=None, allowPopups=False)
    molTypes = [carboMolType]

    chemComps = []
    rawChemComps = []

    for importFile in importFiles:

        # Should be 'a', 'b' or 'o'
        anomericCenter = importFile[0]

        if anomericCenter != 'a':
            ccpCode = '%s-%s' % (anomericCenter, carboBaseName)
        else:
            ccpCode = carboBaseName

        fileName = os.path.join(importDir, carboBaseName, importFile)

        print "Reading mol2 file %s..." % fileName

        ccs = mol2Format.readChemComps(fileName,
                                       molTypes=molTypes,
                                       ccpCodes=[ccpCode],
                                       saveChemComp=False,
                                       minimalPrompts=True,
                                       makeNamingSystem=namingSystemName)

        chemComps.append(ccs[0])
        rawChemComps.append(mol2Format.rawChemComp)

    #
    # Check whether only open form available (e.g. aldi ones)
    #

    if len(chemComps) == 1 and chemComps[0].ccpCode[0] == 'o':
        print "  Warning: only open form available, not creating a/b isoforms."
        hasOnlyOpenForm = True
    else:
        hasOnlyOpenForm = False

    #print chemComps
    #print rawChemComps
    """
  for cch in project.chemCompHeads:
    print cch.molType, cch.ccpCode
    for ccv in cch.chemComp.chemCompVars:
      print ccv.descriptor
      print ccv.chemAtoms
    print
  """

    refChemComp = chemComps[0]  # Should be the a form

    #
    # Reset save location, check if file already exists
    #

    chemCompXmlFile = findChemCompOrCoordFilePath(refChemComp,
                                                  testMode=testMode)

    # In this case, getting nothing back with replace - False means that it does exist!
    if chemCompXmlFile and not replace:
        print "  ChemComp %s, %s already exists - aborting creation." % (
            carboMolType, carboBaseName)

        try:
            refChemComp = getChemComp(project,
                                      carboMolType,
                                      carboBaseName,
                                      download=False,
                                      chemCompArchiveDir=editChemCompDataDir,
                                      copyFile=False)

        except:
            print "WARNING: chemcomp was already loaded!"
            refChemComp = project.findFirstChemComp(molType=carboMolType,
                                                    ccpCode=carboBaseName)

        return refChemComp

    #
    # Start creating/modifying...
    #

    print
    print drawBox("Creating sugar information")
    print

    #
    # Set the base Glyco CT code, this is always x-, except for -o only forms (aldehydes)
    #
    # TODO: might need to hack this for substituents so know which one is which... or just do it by order? Should be fine...
    #

    refGlycoCtCode = "RES\n1b:%s" % baseGlycoCtCode

    print "Setting GlycoCT code to:\n\n%s\n" % refGlycoCtCode
    print

    project.override = True
    try:
        refChemComp.baseGlycoCtCode = refGlycoCtCode
        refChemComp.findFirstChemCompVar().glycoCtCode = refGlycoCtCode
    finally:
        project.override = False

    #
    # Look for a C-O-C fragment. Should be O5 for 6 rings.
    #
    # Note: ONLY works on cyclic sugars!!!
    #

    centralOAtom = None

    for chemAtom in refChemComp.sortedChemAtoms():

        # TODO: should really do this from the Var level, but...
        if chemAtom.elementSymbol == 'O' and len(chemAtom.chemBonds) == 2:
            connectedToC = True
            for chemBond in chemAtom.sortedChemBonds():
                otherChemAtom = getOtherAtom(chemAtom, chemBond)
                if otherChemAtom.elementSymbol != 'C':
                    connectedToC = False
                    break

            if connectedToC:

                centralOAtom = chemAtom

                #
                # Try to generically determine the carbon atoms in the ring... trying not to depend on names
                # but if a C1 is connected to the O5, then start from there.
                #

                connectedAtoms = []

                for chemBond in centralOAtom.sortedChemBonds():
                    otherChemAtom = getOtherAtom(centralOAtom, chemBond)

                    if otherChemAtom.name == 'C1':
                        connectedAtoms.insert(0, otherChemAtom)
                    else:
                        connectedAtoms.append(otherChemAtom)

                #
                # Start to loop... use recursive function to find right order
                #

                ringCarbons = findRingCarbon(connectedAtoms[0], connectedAtoms)

                if not ringCarbons:
                    centralOAtom = None
                    continue

                otherCarbons = []
                for searchCarbon in refChemComp.findAllChemAtoms(
                        elementSymbol='C'):
                    if searchCarbon not in ringCarbons:
                        otherCarbons.append(searchCarbon)

                break

    if not centralOAtom and not hasOnlyOpenForm:
        raise "  Error: no central O atom found in ring!!"

    #
    # Now look for anomeric carbon and connection sites...
    #

    anomericCarbon = None
    anomericOxygen = None

    if not hasOnlyOpenForm:
        for searchCarbon in [ringCarbons[0], ringCarbons[-1]]:
            for chemBond in searchCarbon.chemBonds:
                otherChemAtom = getOtherAtom(searchCarbon, chemBond)
                if otherChemAtom and otherChemAtom.elementSymbol == 'O' and otherChemAtom != centralOAtom:
                    anomericCarbon = searchCarbon
                    anomericOxygen = otherChemAtom
                    break

            if anomericCarbon:
                break

    else:
        # Hardset these... not really anomeric but good enough
        anomericCarbon = refChemComp.findFirstChemAtom(name='C1')
        anomericOxygen = refChemComp.findFirstChemAtom(name='O1')

    if not anomericCarbon:
        raise "  Error: no anomeric carbon found."

    else:
        for chemBond in anomericOxygen.chemBonds:
            otherChemAtom = getOtherAtom(anomericOxygen, chemBond)
            if otherChemAtom and otherChemAtom.elementSymbol == 'H':
                anomericHydrogen = otherChemAtom
                break

        if not anomericHydrogen:
            print "  Warning: no anomeric hydrogen found."

    #
    # Set the stereo information for the anomeric carbon, then create subtypes for beta and open forms
    #

    #anomericCarbons = {'a': None, 'b': None, 'o': None}

    stereoAtom = anomericCarbon
    bondDict = {
        'single': 1,
        'double': 2,
        'triple': 3,
        'aromatic': 1.5,
        'dative': 1.0,
        'singleplanar': 1.5
    }  # TODO CHECK THIS!
    """
  # OK to do this because is neutral 'real' chemComp, but otherwise need to start from chemCompVar!!!
  
  #
  # TODO TODO also need to track chemBonds, but ONLY at beginning
  # then need to go back to coordinates to find out in which order the bonds appear based on the coords (when looking down
  # the main bond)!
  #
  # Might be easiest to use Martin's code (or someone else's), use the atom names to link back to here... 
  #
  totalChemBonds = chemComp.chemBonds
  chemBondsHandled = []
  
  priorityList = getStereoPriorities(stereoAtom,chemBondsHandled)
  
  priorityList.sort()
  priorityList.reverse()
  
  priorityKeys = []  
  for (priorityKey,otherAtom) in priorityList:
   priorityKeys.append(priorityKey)
  
  for priorityKey in priorityKeys:
    if priorityKeys.count(priorityKey) > 1:
      print priorityKey
  
  print priorityList

  sys.exit()
  """
    #
    # Look for binding oxygens... has to be hydroxy group connected to carbon
    #

    bindingOxygens = []
    bindingHydrogens = {}

    #
    # Set carbons to search for connected OH groups
    #

    if not hasOnlyOpenForm:
        searchCarbons = ringCarbons + otherCarbons
    else:
        searchCarbons = list(refChemComp.findAllChemAtoms(elementSymbol='C'))

        if 'aldi' in baseGlycoCtCode:
            # Don't do anything on 1 position for these - alditols
            searchCarbons.pop(searchCarbons.index(anomericCarbon))

    for searchCarbon in searchCarbons:

        validConnectedAtoms = {}
        validOHgroups = []

        for chemBond in searchCarbon.sortedChemBonds():

            otherChemAtom = getOtherAtom(searchCarbon, chemBond)

            if otherChemAtom:

                for elementSymbol in ['O', 'N']:

                    if otherChemAtom.elementSymbol == elementSymbol and otherChemAtom != centralOAtom:

                        if not validConnectedAtoms.has_key(elementSymbol):
                            validConnectedAtoms[elementSymbol] = []
                        validConnectedAtoms[elementSymbol].append(
                            otherChemAtom)

                        otherChemBonds = list(otherChemAtom.chemBonds)

                        if elementSymbol == 'O' and len(otherChemBonds) == 2:

                            otherChemBond = otherChemBonds[not otherChemBonds.
                                                           index(chemBond)]

                            connectedChemAtom = getOtherAtom(
                                otherChemAtom, otherChemBond)

                            if connectedChemAtom.elementSymbol == 'H':

                                validOHgroups.append(otherChemAtom)

        #
        # Check if single OH (no double O, or amide, ...)
        #

        if len(validOHgroups) == 1:

            if len(validConnectedAtoms) == 1:
                # Carboxylic acid (except for ring O-C-OH!)
                if len(validConnectedAtoms['O']
                       ) == 2 and not searchCarbon == anomericCarbon:
                    print "  Warning: ignoring oxygen %s - is carboxylic acid (or similar)" % validOHgroups[
                        0].name
                    validOHgroups = []
            else:
                # Amide or something similar
                print "  Warning: ignoring oxygen %s - is amide (or similar)" % validOHgroups[
                    0].name
                validOHgroups = []

            if validOHgroups:

                bindingOxygens.append(validOHgroups[0])
                bindingHydrogens[validOHgroups[0]] = connectedChemAtom

                if not hasOnlyOpenForm and searchCarbon in otherCarbons:
                    print "  Warning: setting oxygen %s as binding one (not directly connected to ring)." % (
                        validOHgroups[0].name)

    #
    # Now create variants...
    #
    # Need to have all combinations of:
    #
    #  - anomeric a/b and free/bound
    #  - binding oxygens free/bound
    #

    bindingOxygenCombs = makePowerSet(bindingOxygens)
    origAnomericCarbon = anomericCarbon

    linkAtomsMapForCoordinates = {}

    if hasOnlyOpenForm:
        stereoTypes = ('open_1', )
    else:
        stereoTypes = ('stereo_1', 'stereo_2')

    for stereoType in stereoTypes:

        subType = int(stereoType.split('_')[1])
        anomericCarbon = refChemComp.findFirstChemAtom(name='C1',
                                                       subType=subType)

        if not anomericCarbon:
            # Should only happen for stereo_2

            creationDict = {'name': 'C1', 'subType': subType}
            for attrName in ('elementSymbol', 'shortVegaType',
                             'waterExchangeable'):
                creationDict[attrName] = getattr(origAnomericCarbon, attrName)

            # TODO set chirality to OPPOSITE of whatever the first subtype is!
            anomericCarbon = refChemComp.newChemAtom(**creationDict)

            namingSystem = refChemComp.findFirstNamingSystem(
                name=namingSystemName)
            namingSystem.newAtomSysName(sysName=anomericCarbon.name,
                                        atomName=anomericCarbon.name,
                                        atomSubType=anomericCarbon.subType)

            # Bonds and other atoms are exactly the same!
            for chemBond in origAnomericCarbon.chemBonds:
                otherChemAtom = getOtherAtom(origAnomericCarbon, chemBond)
                refChemComp.newChemBond(chemAtoms=(anomericCarbon,
                                                   otherChemAtom),
                                        bondType=chemBond.bondType,
                                        stereochem=chemBond.stereochem)

        for anomericBound in range(0, 2):
            for i in range(0, len(bindingOxygenCombs)):
                bindingOxygens = bindingOxygenCombs[i]

                # Main list only contains real ChemAtoms!!
                currentChemAtoms = list(
                    refChemComp.findAllChemAtoms(className='ChemAtom'))
                anomericCarbons = refChemComp.findAllChemAtoms(name='C1')
                for tempAnomericCarbon in anomericCarbons:
                    if tempAnomericCarbon != anomericCarbon and tempAnomericCarbon in currentChemAtoms:
                        currentChemAtoms.pop(
                            currentChemAtoms.index(tempAnomericCarbon))

                # This is the neutral refChemComp - already exists!
                if not anomericBound and not bindingOxygens:
                    continue

                # Can't be both bound on the anomeric carbon and have an oxygen link there...
                if anomericBound and len(
                        bindingOxygens
                ) == 1 and bindingOxygens[0] == anomericOxygen:
                    continue

                # Can't have a link to 1 and something else...
                if len(bindingOxygens
                       ) > 1 and anomericOxygen in bindingOxygens:
                    continue

                linkedAtomKeys = []
                linkedAtoms = {}
                linkAtoms = {}

                if anomericBound and not hasOnlyOpenForm and 'aldi' not in baseGlycoCtCode:
                    # Only do C1 when relevant - for alditols it's not, always reducing end.
                    currentChemAtoms.pop(
                        currentChemAtoms.index(anomericOxygen))
                    if anomericHydrogen:
                        currentChemAtoms.pop(
                            currentChemAtoms.index(anomericHydrogen))

                    anomericCarbonKey = getChemAtomKey(anomericCarbon)
                    linkedAtomKeys.append(anomericCarbonKey)

                    linkedAtoms[anomericCarbonKey] = anomericCarbon

                    linkAtom = getLinkAtom(anomericCarbon, multi=True)

                    linkAtoms[anomericCarbonKey] = linkAtom
                    currentChemAtoms.append(linkAtom)

                    linkAtomsMapForCoordinates[(
                        linkAtom.name, linkAtom.subType)] = anomericOxygen.name

                for bindingOxygen in bindingOxygens:

                    bindingHydrogen = bindingHydrogens[bindingOxygen]

                    if bindingHydrogen and bindingHydrogen in currentChemAtoms:
                        currentChemAtoms.pop(
                            currentChemAtoms.index(bindingHydrogen))

                    bindingOxygenKey = getChemAtomKey(bindingOxygen)
                    linkedAtomKeys.append(bindingOxygenKey)
                    linkedAtoms[bindingOxygenKey] = bindingOxygen

                    linkAtom = getLinkAtom(bindingOxygen)

                    linkAtoms[bindingOxygenKey] = linkAtom
                    currentChemAtoms.append(linkAtom)

                    linkAtomsMapForCoordinates[(
                        linkAtom.name,
                        linkAtom.subType)] = bindingHydrogen.name

                linkedAtomKeys.sort()

                # Possible for C1 linkages if not handled (only reducing end)
                if not linkedAtomKeys:
                    continue

                #
                # Create linkEnds
                #

                linkInfo = []

                for linkedAtomKey in linkedAtomKeys:

                    if linkedAtomKey[0] == 'C1':
                        linkCode = "%s_%s" % (linkedAtomKey[0],
                                              linkedAtomKey[1])
                    else:
                        linkCode = linkedAtomKey[0]

                    linkInfo.append(linkCode)

                    if not refChemComp.findFirstLinkEnd(linkCode=linkCode):
                        boundChemAtom = linkedAtoms[linkedAtomKey]
                        boundLinkAtom = linkAtoms[linkedAtomKey]
                        linkEnd = refChemComp.newLinkEnd(
                            linkCode=linkCode,
                            boundChemAtom=boundChemAtom,
                            boundLinkAtom=boundLinkAtom)

                # TODO THIS IS NOT GREAT - no way of telling what's what if atom names are messed up
                #linking = 'none'
                #descriptor = 'link:%s' % string.join(linkedAtomKeys,',')
                linking = 'link:%s' % string.join(linkInfo, ',')

                if stereoType.count('stereo'):
                    descriptor = '%s:C1' % stereoType
                else:
                    descriptor = 'neutral'

                if not refChemComp.findFirstChemCompVar(linking=linking,
                                                        descriptor=descriptor):
                    print "  Trying %s,%s" % (linking, descriptor)
                    #for ca in currentChemAtoms:
                    #  if ca.className == 'LinkAtom':
                    #    print "   LA:",ca.name, ca.subType
                    #  else:
                    #    print "   CA:",ca.name, ca.subType
                    #print "   ",linkedAtomKeys

                    # Create the stereospecific GlycoCt code
                    if stereoType == "stereo_1":
                        stereoCode = 'a'
                    elif stereoType == "stereo_2":
                        stereoCode = 'b'
                    elif stereoType == 'open_1':
                        stereoCode = 'o'

                    varGlycoCtCode = "RES\n1b:%s" % (stereoCode +
                                                     baseGlycoCtCode[1:])

                    ccv = refChemComp.newChemCompVar(
                        chemAtoms=currentChemAtoms,
                        linking=linking,
                        descriptor=descriptor,
                        glycoCtCode=varGlycoCtCode,
                        formalCharge=0,
                        isParamagnetic=False,
                        isAromatic=False)

    #
    # Make sure the chemElements are accessible
    #

    project.currentChemElementStore = project.findFirstChemElementStore()

    #
    # Reset the name and molType...
    #

    chMolType = 'carbohydrate'

    project.override = True
    try:
        refChemComp.ccpCode = carboBaseName
        refChemComp.molType = carboMolType
    finally:
        project.override = False

    # TODO SET THESE CORRECTLY? Where do I get info from for this though? Can this come from MSD? Ask Dimitris!!
    # Set the PDB/MSD name - NOTE that have to do this for the correct a/b forms! Var specific!!
    #ChemComp.ChemCompSysName(refChemComp,namingSystem = 'PDB',sysName=ccpCode,specificChemCompVars = refChemComp.sortedChemCompVars())
    #ChemComp.ChemCompSysName(refChemComp,namingSystem = 'MSD',sysName=ccpCode,specificChemCompVars = refChemComp.sortedChemCompVars())

    #
    # Check chemComp validatity and save
    # TODO make this all options in running script!
    #

    refChemComp.checkAllValid()

    if saveData:

        #
        # Get the original file GUID, if possible, when replacing existing file
        #

        if replace:

            (existingGuid,
             existingFile) = findExistingChemCompInfo(chemCompDataDir,
                                                      refChemComp.ccpCode,
                                                      refChemComp.molType)

            if existingGuid:
                project.override = True
                try:
                    refChemComp.guid = existingGuid
                finally:
                    project.override = False

        (tmpFilePath,
         existingFilePath) = saveTemporaryChemCompOrCoord(refChemComp,
                                                          testMode=testMode)

        # Do a check here? Or don't bother?
        consolidateTemporaryChemCompOrCoord(refChemComp,
                                            tmpFilePath,
                                            existingFilePath,
                                            testMode=testMode,
                                            replace=replace)

    #
    # Get the coordinates as well!
    #

    print "  Creating coordinates!!"

    chemCompCoord = project.newChemCompCoord(sourceName=coordSystem,
                                             molType=refChemComp.molType,
                                             ccpCode=refChemComp.ccpCode)

    for i in range(len(rawChemComps)):

        rawChemComp = rawChemComps[i]

        # Identify which one we're dealing with!!
        (dirName, baseName) = os.path.split(rawChemComp.parent.name)

        if baseName[0] == 'a':
            stereoDescriptor = "stereo_1:C1"

        elif baseName[0] == 'b':
            stereoDescriptor = "stereo_2:C1"

        elif baseName[0] == 'o':
            stereoDescriptor = "none"

        else:
            print "  Not handling type '%s' for coordinates - ignored." % baseName[
                0]
            continue

        #
        # Mark that generated by this script...
        #

        applData = Implementation.AppDataString(application='ccpNmr',
                                                keyword='origin',
                                                value='makeFullSugar.py')
        chemCompCoord.addApplicationData(applData)

        # Don't do any link atoms (yet)... could in principle use atoms that are 'missing'

        # TODO: should decompose descriptor here, then check...
        chemCompVars = refChemComp.findAllChemCompVars(
            descriptor=stereoDescriptor)

        chemAtomKeys = []

        for ccv in chemCompVars:
            for ca in ccv.sortedChemAtoms():
                caKey = (ca.name, ca.subType)
                if caKey not in chemAtomKeys:
                    chemAtomKeys.append(caKey)

        #
        # Create a dictionary for the coordinates, based on the 'raw' chemComp from the mol2 file
        #

        chemAtomCoordDict = {}

        for chemAtomKey in chemAtomKeys:

            coords = None

            if linkAtomsMapForCoordinates.has_key(chemAtomKey):
                useChemAtomName = linkAtomsMapForCoordinates[chemAtomKey]
            else:
                useChemAtomName = chemAtomKey[0]

            for rawAtom in rawChemComp.atoms:

                if rawAtom.name == useChemAtomName:
                    coords = (rawAtom.x, rawAtom.y, rawAtom.z)
                    break

            if not coords:
                print "  Warning: no coordinate for %s, atom key %s." % (
                    coordSystem, chemAtomKey)
            elif not chemAtomCoordDict.has_key(chemAtomKey):
                chemAtomCoordDict[chemAtomKey] = coords
            else:
                print "  Error: double atom key %s!" % chemAtomKey

        #
        # Set the coordinates
        #

        chemAtomCoords = {}

        #print [ra.name for ra in rawChemComp.atoms]

        for chemCompVar in chemCompVars:

            #print chemCompVar

            chemCompVarCoord = chemCompCoord.findFirstChemCompVarCoord(
                linking=chemCompVar.linking, descriptor=chemCompVar.descriptor)

            if not chemCompVarCoord:
                chemCompVarCoord = chemCompCoord.newChemCompVarCoord(
                    linking=chemCompVar.linking,
                    descriptor=chemCompVar.descriptor)

            #print chemCompVarCoord

            for ca in chemCompVar.sortedChemAtoms():

                caKey = (ca.name, ca.subType)

                #print "%-20s" % str(caKey),

                if chemAtomCoordDict.has_key(caKey):
                    coords = chemAtomCoordDict[caKey]

                    if coords:

                        if chemAtomCoords.has_key(caKey):
                            chemAtomCoord = chemAtomCoords[caKey]
                        else:
                            chemAtomCoord = chemCompCoord.newChemAtomCoord(
                                name=caKey[0],
                                subType=caKey[1],
                                x=coords[0],
                                y=coords[1],
                                z=coords[2])
                            chemAtomCoords[caKey] = chemAtomCoord

                        if chemAtomCoord not in chemCompVarCoord.chemAtomCoords:
                            chemCompVarCoord.addChemAtomCoord(chemAtomCoord)
                            #print chemAtomCoord.name, chemAtomCoord.subType,

                #print

    chemCompCoord.checkAllValid()

    if saveData:

        if replace:

            (existingGuid, existingFile) = findExistingChemCompCoordInfo(
                chemCompCoordDataDir, coordSystem, chemCompCoord.ccpCode,
                chemCompCoord.molType)

            if existingGuid:
                project.override = True
                try:
                    chemCompCoord.guid = existingGuid
                finally:
                    project.override = False

        (tmpFilePath,
         existingFilePath) = saveTemporaryChemCompOrCoord(chemCompCoord,
                                                          testMode=testMode)

        # Do a check here? Or don't bother? This is blank regeneration from reference data, so should be OK!
        consolidateTemporaryChemCompOrCoord(chemCompCoord,
                                            tmpFilePath,
                                            existingFilePath,
                                            testMode=testMode,
                                            replace=replace)

    return refChemComp
예제 #9
0
def addSubstituentToBaseUnit(baseUnitCcpCode,
                             baseUnitMolType,
                             testMode,
                             mergeInfoList,
                             coordSystem,
                             substRemoveAtomName='SUB',
                             saveData=True,
                             replace=False,
                             namingSystemName=None,
                             resetGlycoCtCode=False):

    #
    # Set directories to read/write data from/to
    #

    if testMode:
        chemCompCoordDataDir = testChemCompCoordDataDir
        chemCompDataDir = testChemCompDataDir
    else:
        chemCompCoordDataDir = editChemCompCoordDataDir
        chemCompDataDir = editChemCompDataDir

    #
    # Now start setting up project
    #

    substIndexPatt = re.compile("(\d+)")
    substituentDir = os.path.join(origMol2DataDir, 'subst')

    ccpCode = baseUnitCcpCode

    substituentList = []
    for mergeInfoDict in mergeInfoList:
        substituent = mergeInfoDict['substituent']
        if not substituent in substituentList:
            substituentList.append(substituent)

        ccpCode += ":%s_%s" % (mergeInfoDict['baseBindingAtomName'],
                               substituentInfo[substituent]['shortCode'])

    project = Implementation.MemopsRoot(name='chemComp')
    project.currentUserId = 'ccpnRef'
    project.currentChemElementStore = project.findFirstChemElementStore()

    #
    # First import all relevant subsituent mol2 files
    # TODO could in principle have these 'pre-imported' in CCPN, in special dir...
    #      note though that the SUBST atoms have to be removed in that case...
    #

    mol2Format = Mol2Format(project, guiParent=None, allowPopups=False)

    chemComps = {}
    rawChemComps = {}

    for substituent in substituentList:

        fileName = os.path.join(substituentDir, "%s.mol2" % substituent)

        ccs = mol2Format.readChemComps(fileName,
                                       ccpCodes=[substituent],
                                       saveChemComp=False,
                                       minimalPrompts=True)

        chemComps[substituent] = ccs[0]
        rawChemComps[substituent] = mol2Format.rawChemComp

    #
    # Get the original base unit information
    #

    origBaseUnit = getChemComp(project,
                               baseUnitMolType,
                               baseUnitCcpCode,
                               chemCompArchiveDir=chemCompDataDir,
                               copyFile=False)

    #
    # If replacing, keep GUID of original as default
    #

    creationKeywds = {}

    if replace:

        if testMode:
            dataDir = testChemCompDataDir
        else:
            dataDir = editChemCompDataDir

        (existingGuid,
         existingFile) = findExistingChemCompInfo(dataDir, ccpCode,
                                                  baseUnitMolType)

        if existingGuid:
            creationKeywds['guid'] = existingGuid

    #
    # Now create the new base unit, copy the original base unit file, change the guid in the file, then load it
    #

    baseUnit = project.newNonStdChemComp(molType=baseUnitMolType,
                                         ccpCode=ccpCode,
                                         **creationKeywds)
    repository = project.findFirstRepository(name='userData')

    isNewFile = copyBaseToModifiedFile(project,
                                       baseUnit,
                                       origBaseUnit,
                                       testMode,
                                       repository,
                                       replace=replace)

    if not isNewFile:
        return

    #print baseUnit.chemAtoms

    origBaseUnitCcc = getChemCompCoord(
        project,
        coordSystem,
        baseUnitMolType,
        baseUnitCcpCode,
        chemCompCoordArchiveDir=chemCompCoordDataDir,
        copyFile=False)
    if not origBaseUnitCcc:
        raise ("Error: no coordinates available for %s!" % baseUnitCcpCode)
    else:
        #
        # If replacing, keep GUID of original as default
        #

        creationKeywds = {}

        if replace:

            if testMode:
                dataDir = testChemCompCoordDataDir
            else:
                dataDir = editChemCompCoordDataDir

            (existingGuid, existingFile) = findExistingChemCompCoordInfo(
                dataDir, coordSystem, ccpCode, baseUnitMolType)

            if existingGuid:
                creationKeywds['guid'] = existingGuid

        baseUnitCcc = project.newChemCompCoord(sourceName=coordSystem,
                                               molType=baseUnitMolType,
                                               ccpCode=ccpCode,
                                               **creationKeywds)
        copyBaseToModifiedFile(project,
                               baseUnitCcc,
                               origBaseUnitCcc,
                               testMode,
                               repository,
                               replace=replace)
        #print baseUnitCcc.chemAtomCoords

    #
    # Reset the glycoCtCode for the new base unit (if relevant)
    #

    if resetGlycoCtCode:

        substGlycoCtText = ""
        substGlycoCtInfo = {'RES': [], 'LIN': []}
        resIndex = 2
        linIndex = 1

        # Note: this assumes the mergeInfoList is ordered!

        for mergeInfoDict in mergeInfoList:

            baseBindingAtomName = mergeInfoDict['baseBindingAtomName']
            removeBaseAtomNames = mergeInfoDict['removeBaseAtomNames']
            substituent = mergeInfoDict['substituent']

            substGlycoCtInfo['RES'].append("%ds:%s" % (resIndex, substituent))

            parentAtomIndex = int(baseBindingAtomName[-1])

            if baseBindingAtomName[0] == 'O':
                parentSubstitutionType = 'o'
            else:
                parentSubstitutionType = 'd'

            substGlycoCtInfo['LIN'].append(
                "%d:%d%s(%d+%d)%d%s" % (linIndex, 1, parentSubstitutionType,
                                        parentAtomIndex, 1, resIndex, 'n'))

            resIndex += 1
            linIndex += 1

        for tmpStr in substGlycoCtInfo['RES']:
            substGlycoCtText += "\n" + tmpStr
        substGlycoCtText += "\nLIN"
        for tmpStr in substGlycoCtInfo['LIN']:
            substGlycoCtText += "\n" + tmpStr

        newBaseGlycoCtCode = origBaseUnit.baseGlycoCtCode + substGlycoCtText

        project.override = True
        try:
            baseUnit.baseGlycoCtCode = newBaseGlycoCtCode

            for chemCompVar in baseUnit.chemCompVars:
                chemCompVar.glycoCtCode = chemCompVar.glycoCtCode + substGlycoCtText

        finally:
            project.override = False

        print "Setting base GlycoCT code to:\n\n%s\n" % newBaseGlycoCtCode
        print

    #
    # Set naming system
    #

    namingSystem = None
    if namingSystemName:
        namingSystem = baseUnit.findFirstNamingSystem(name=namingSystemName)
        if not namingSystem:
            namingSystem = baseUnit.newNamingSystem(name=namingSystemName)
            print "Created new naming system %s" % namingSystemName

    #
    # Add substituent info, remove atoms from base unit
    #

    addVariants = {}

    for mergeInfoDict in mergeInfoList:

        #
        # 0. Get the info from the mergeInfoDict, print a comment
        #

        baseBindingAtomName = mergeInfoDict['baseBindingAtomName']
        removeBaseAtomNames = mergeInfoDict['removeBaseAtomNames']
        substituent = mergeInfoDict['substituent']
        renameSubstituentAtomNames = mergeInfoDict[
            'renameSubstituentAtomNames']

        baseBindingAtoms = baseUnit.findAllChemAtoms(name=baseBindingAtomName)

        newBondType = substituentInfo[substituent]['bondType']
        newStereochem = substituentInfo[substituent]['stereochem']

        print
        print drawBox("Creating link between base atom %s to substituent %s" %
                      (baseBindingAtomName, substituent))
        print

        #
        # 1. Set the substUnitIndex - this is the identifier that is added to the substituents
        #    when part of the base chemComp. It is taken from whichever number is part of the baseBindingAtomName
        #
        #    TODO: should be molType specific - use A,B,G,... for amino acids!!
        #

        substUnitSearch = substIndexPatt.search(baseBindingAtomName)
        substUnitIndex = substUnitSearch.group(1)

        #
        # 2. Remove relevant atoms from the base unit, all subtypes
        #    Keep track of atom directly linked to the baseBindingAtom for recalculating coordinates!
        #

        baseAtomCoords = {}

        for removeBaseAtomName in removeBaseAtomNames:

            removeBaseAtoms = baseUnit.findAllChemAtoms(
                name=removeBaseAtomName)

            for removeBaseAtom in removeBaseAtoms:

                # Search if bound to the baseBindingAtom
                isBoundToBaseBindingAtom = False
                for chemBond in baseUnit.chemBonds:
                    bondChemAtoms = list(chemBond.chemAtoms)
                    if removeBaseAtom in bondChemAtoms:
                        otherBondChemAtom = bondChemAtoms[
                            not bondChemAtoms.index(removeBaseAtom)]
                        if otherBondChemAtom in baseBindingAtoms:
                            isBoundToBaseBindingAtom = True
                            break

                # Track the coordinates if bound to the baseBindingAtom
                if isBoundToBaseBindingAtom:
                    for baseBindingAtom in baseBindingAtoms:

                        baseAtomCoords[baseBindingAtom] = {}
                        baseBindingAtomCoords = baseUnitCcc.findAllChemAtomCoords(
                            chemAtom=baseBindingAtom)

                        for baseBindingAtomCoord in baseBindingAtomCoords:
                            baseCoord = (baseBindingAtomCoord.x,
                                         baseBindingAtomCoord.y,
                                         baseBindingAtomCoord.z)

                            for chemCompVarCoord in baseBindingAtomCoord.chemCompVarCoords:
                                # Get the coordinates, if any
                                removeBaseAtomCoord = chemCompVarCoord.findFirstChemAtomCoord(
                                    chemAtom=removeBaseAtom)
                                if removeBaseAtomCoord:
                                    baseBoundCoord = (removeBaseAtomCoord.x,
                                                      removeBaseAtomCoord.y,
                                                      removeBaseAtomCoord.z)
                                    jointCoords = (baseCoord, baseBoundCoord)

                                    if not baseAtomCoords[
                                            baseBindingAtom].has_key(
                                                jointCoords):
                                        baseAtomCoords[baseBindingAtom][
                                            jointCoords] = []
                                    baseAtomCoords[baseBindingAtom][
                                        jointCoords].append(chemCompVarCoord)

                # Now start deleting on chemComp and chemCompCoord levels
                removeBaseAtomCoords = baseUnitCcc.findAllChemAtomCoords(
                    chemAtom=removeBaseAtom)
                for removeBaseAtomCoord in removeBaseAtomCoords:
                    removeBaseAtomCoord.delete()

                removeBaseAtom.delete()
                print "  Removed atom %s, subType %d from base unit..." % (
                    removeBaseAtomName, removeBaseAtom.subType)

                for namingSystem in baseUnit.namingSystems:
                    for asn in namingSystem.atomSysNames:
                        if asn.atomName == removeBaseAtomName:
                            asn.delete()

                #
                # 2.1 Also rename all linkEnds and chemCompVars that have this atom in the descriptor - are now irrelevant
                #

                for chemCompVar in baseUnit.chemCompVars:
                    deleteVoidChemCompVar(chemCompVar, 'descriptor',
                                          removeBaseAtomName, baseUnit,
                                          baseUnitCcc)
                    deleteVoidChemCompVar(chemCompVar, 'linking',
                                          removeBaseAtomName, baseUnit,
                                          baseUnitCcc)
                    deleteLinkEnd(baseUnit, removeBaseAtomName)

        #
        # 3. Add the substituent info to the base unit
        #
        # Currently this works off the mol2 file. Could add, e.g. SUB_C, SUB_O, SUB_N, depending on substituted atom...
        # for better coordinates later on. TODO: try to implement this!!!
        #
        # TODO: look into avoiding mol2 step, just put into CCPN in temporary library, use linkAtoms as linking ones.
        # These can then also be identified by elementSymbol (O,C,N,...)
        #

        substUnit = chemComps[substituent]
        rawSubstChemComp = rawChemComps[substituent]

        #
        # 3.1 Initialise information
        #       - determine which atom to remove from substituent (will need this for coordinates further down though)
        #       - get single bond coming from this atom to identify the binding atom on the substituent side
        #       - create substToBaseDict dictionary that maps substituent objects to newly created chemComp unit objects
        #       - deal with the coordinates

        substRemoveAtom = substUnit.findFirstChemAtom(name=substRemoveAtomName)
        chemBond = substRemoveAtom.findFirstChemBond(
        )  # Should only have ONE single bond!
        substBindingAtom = getOtherAtom(substRemoveAtom, chemBond)

        substCoords = [
            getAtomOrigCoords(rawSubstChemComp, substRemoveAtomName),
            getAtomOrigCoords(rawSubstChemComp, substBindingAtom.name)
        ]
        substCoordsBaseAtoms = [None, None]

        substToBaseDict = {}

        #
        # 3.2 Create chemAtoms and chemAtomSets
        #

        redoChemAtomSets = []
        substChemAtoms = []

        for chemAtomOrSet in substUnit.sortedChemAtoms(
        ) + substUnit.sortedChemAtomSets():

            # Ignore atoms to be substituted
            if chemAtomOrSet.name[:len(substRemoveAtomName
                                       )] == substRemoveAtomName:
                continue

            createChemAtomOrSet = CreateChemAtomOrSet(chemAtomOrSet)

            if renameSubstituentAtomNames.has_key(chemAtomOrSet.name):
                createChemAtomOrSet.setForcedName(
                    renameSubstituentAtomNames[chemAtomOrSet.name])
            else:
                createChemAtomOrSet.setName(substUnitIndex)

            redoChemAtomSets.extend(
                createChemAtomOrSet.setChemAtomLinks(
                    chemAtomOrSet, substToBaseDict,
                    substRemoveAtomName))  # Only relevant for chemAtomSets!

            if createChemAtomOrSet.checkExistence(baseUnit):
                continue

            newChemAtomOrSet = createChemAtomOrSet.createNewObject(baseUnit)
            createChemAtomOrSet.setAtomSysName(namingSystem)

            substToBaseDict[chemAtomOrSet] = newChemAtomOrSet

            # Keep track of new chemAtoms, also track coordinates
            if newChemAtomOrSet.className in ('LinkAtom', 'ChemAtom'):
                substChemAtoms.append(newChemAtomOrSet)

                if chemAtomOrSet == substBindingAtom:
                    baseSubstBindingAtom = newChemAtomOrSet
                    substCoordsBaseAtoms[1] = newChemAtomOrSet
                else:
                    substCoordsBaseAtoms.append(newChemAtomOrSet)
                    substCoords.append(
                        getAtomOrigCoords(rawSubstChemComp,
                                          chemAtomOrSet.name))

        #
        # 3.2.1 Now set chemAtomSets that are linked to other chemAtomSets - have to do this later because
        #       otherwise could have not been created yet. Could in principle do this in above loop by
        #       organising list of chemAtomSets, but this is easier.
        #

        for chemAtomSet in redoChemAtomSets:
            createChemAtomOrSet = CreateChemAtomOrSet(chemAtomSet)

            if renameSubstituentAtomNames.has_key(chemAtomSet.name):
                createChemAtomOrSet.setForcedName(
                    renameSubstituentAtomNames[chemAtomSet.name])
            else:
                createChemAtomOrSet.setName(substUnitIndex)

            createChemAtomOrSet.setChemAtomSetLinks()
            if createChemAtomOrSet.checkExistence(baseUnit):
                continue
            newChemAtomSet = createChemAtomOrSet.createNewObject(baseUnit)
            createChemAtomOrSet.setAtomSysName(namingSystem)
            substToBaseDict[chemAtomSet] = newChemAtomSet

        #
        # 3.3 Now add all other objects connected to the chemComp (see global chemCompLinkList)
        #
        # TODO: this is not fully functional - have to add specific settings for some links (but probably never required, so will wait)
        #

        for ccLinkName in chemCompLinkList:
            for substObject in getattr(substUnit, ccLinkName):
                createCcpnObject = CreateCcpnObject(substObject)
                if not createCcpnObject.setLinks(substToBaseDict):
                    continue
                newCcpnObject = createCcpnObject.createNewObject(baseUnit)
                substToBaseDict[substObject] = newCcpnObject
        """            
      # TODO: LINKS THAT REQUIRE SPECIAL TREATMENT/MERGING WITH EXISTING INFO
      # 'chemCompVars', 'applicationData',
      
      # TODO: object that require special treatment (have to be renamed, ...): chemAtomSysNames, chemCompSysNames (?)
    """

        #
        # 4. Connect base binding atom(s) to the new subsituent atoms
        #

        for baseBindingAtom in baseBindingAtoms:

            #
            # 4.1 First remove all chemCompVars and linkEnds that have linking vars including this atom!
            #

            for chemCompVar in baseBindingAtom.chemCompVars:
                deleteVoidChemCompVar(chemCompVar,
                                      'linking',
                                      baseBindingAtomName,
                                      baseUnit,
                                      baseUnitCcc,
                                      actionType='binding')
                deleteLinkEnd(baseUnit,
                              baseBindingAtomName,
                              actionType='binding')

            #
            # 4.2 Calculate the new coordinates for the substituent atoms
            #

            coordsForChemCompVar = {}

            if baseAtomCoords.has_key(baseBindingAtom):
                for jointCoords in baseAtomCoords[baseBindingAtom]:
                    newSubstCoords = tuple([
                        tuple(coord) for coord in superposeNewVectorsOnOld(
                            jointCoords, substCoords)
                    ])
                    coordsForChemCompVar[newSubstCoords] = baseAtomCoords[
                        baseBindingAtom][jointCoords]

            #
            # 4.3 Now create the bond between the new substituent and the existing base unit
            #

            baseUnit.newChemBond(chemAtoms=(baseBindingAtom,
                                            baseSubstBindingAtom),
                                 bondType=newBondType,
                                 stereochem=newStereochem)

            #print "  Creating chemBond between base atom %s and subsituent atom %s" % (baseBindingAtom.name,baseSubstBindingAtom.name)

            for chemCompVar in baseBindingAtom.chemCompVars:
                #print "   ", baseSubstBindingAtom.name, baseSubstBindingAtom.subType,chemCompVar.linking,chemCompVar.descriptor
                for substChemAtom in substChemAtoms:
                    chemCompVar.addChemAtom(substChemAtom)
                # TODO: have to RENAME the chemCompVar in case there are variants for the substituent!!

            #
            # 4.4 Finally set the new coordinates... ignore first one (is the substituted atom from the substituent!)
            #

            for newSubstCoords in coordsForChemCompVar.keys():
                for i in range(1, len(substCoordsBaseAtoms)):
                    newSubstCoord = newSubstCoords[i]
                    substChemAtom = substCoordsBaseAtoms[i]

                    substChemAtomCoord = baseUnitCcc.findFirstChemAtomCoord(
                        chemAtom=substChemAtom,
                        x=newSubstCoord[0],
                        y=newSubstCoord[1],
                        z=newSubstCoord[2])
                    if not substChemAtomCoord:
                        substChemAtomCoord = baseUnitCcc.newChemAtomCoord(
                            name=substChemAtom.name,
                            subType=substChemAtom.subType,
                            x=newSubstCoord[0],
                            y=newSubstCoord[1],
                            z=newSubstCoord[2])

                    for cccv in coordsForChemCompVar[newSubstCoords]:
                        if not cccv.isDeleted:
                            cccv.addChemAtomCoord(substChemAtomCoord)

            print "  Connected new substituent chemAtom %s,%s to base unit atom %s,%s, and included in relevant chemCompVars" % (
                baseSubstBindingAtom.name, baseSubstBindingAtom.subType,
                baseBindingAtom.name, baseBindingAtom.subType)

        #
        # NEXT ON LIST: make sure the chemCompVars make sense if there are any for the substituent!!!
        #
        # ALSO try this on amino acids when working for carbs - need split code and rename some variables though
        #

    #
    # Check validity and save
    #

    baseUnit.checkAllValid(complete=True)

    if baseUnitCcc:
        baseUnitCcc.checkAllValid(complete=True)

    if saveData:

        (filePath,
         existingFilePath) = saveTemporaryChemCompOrCoord(baseUnit,
                                                          testMode=testMode)
        consolidateTemporaryChemCompOrCoord(baseUnit,
                                            filePath,
                                            existingFilePath,
                                            testMode=testMode,
                                            replace=replace)

        if baseUnitCcc:
            (filePath, existingFilePath) = saveTemporaryChemCompOrCoord(
                baseUnitCcc, testMode=testMode)
            consolidateTemporaryChemCompOrCoord(baseUnitCcc,
                                                filePath,
                                                existingFilePath,
                                                testMode=testMode,
                                                replace=replace)