Beispiel #1
0
    def getRefChemAtom(self, nmrAtoms):

        refChemAtom = nmrAtoms[0].chemAtom

        if len(refChemAtom.chemBonds) == 1:

            refChemAtom = getOtherAtom(refChemAtom, refChemAtom.chemBonds[0])

        else:

            refChemAtom = None

        return refChemAtom
Beispiel #2
0
def createResidueProtonToHeavyAtom(residue):
  
  ccpCode = residue.ccpCode
  chemComp = residue.chemCompVar.chemComp

  protonToHeavyAtom = {ccpCode: {}}

  for heavyAtom in chemComp.sortedChemAtoms():

    if heavyAtom.elementSymbol == 'H':
      continue

    atomName = heavyAtom.name

    if atomName == 'OXT':
      continue

    bondedProtons = []

    for bond in heavyAtom.chemBonds:

      otherAtom = getOtherAtom(heavyAtom,bond)

      if otherAtom.elementSymbol == 'H':
        if atomName == 'N' and otherAtom.name != 'H':
          continue
        bondedProtons.append(otherAtom)

    if bondedProtons:
      chemAtomSet = bondedProtons[0].chemAtomSet
      if chemAtomSet:
        if chemAtomSet.isEquivalent or chemAtomSet.isProchiral == True:
          protonKey = [bp.name for bp in bondedProtons]
          protonKey.sort()
          protonKey = tuple(protonKey)
          protonToHeavyAtom[ccpCode][protonKey] = atomName

          if chemAtomSet.isEquivalent:
            continue

      for bp in bondedProtons:
        protonToHeavyAtom[ccpCode][bp.name] = atomName

  return protonToHeavyAtom
Beispiel #3
0
def findRingCarbon(chemAtom, connectedAtoms):

    returnConnectedAtoms = None

    for chemBond in chemAtom.chemBonds:

        otherChemAtom = getOtherAtom(chemAtom, chemBond)

        if otherChemAtom == connectedAtoms[-1]:
            returnConnectedAtoms = connectedAtoms[:]
            break

        elif otherChemAtom and otherChemAtom not in connectedAtoms and otherChemAtom.elementSymbol == 'C':

            returnConnectedAtoms = findRingCarbon(
                otherChemAtom,
                connectedAtoms[:-1] + [otherChemAtom, connectedAtoms[-1]])

            if returnConnectedAtoms:
                break

    return returnConnectedAtoms
Beispiel #4
0
def getStereoPriorities(stereoAtom, chemBondsHandled):

    priorityList = []
    priorityKeys = []
    followPriorityKeys = []

    for chemBond in stereoAtom.chemBonds:
        if chemBond in chemBondsHandled:
            otherAtom = None
            priorityKey = (0, 0)
        else:
            otherAtom = getOtherAtom(stereoAtom, chemBond)
            priorityKey = (otherAtom.chemElement.atomNumber,
                           bondDict[chemBond.bondType])
            chemBondsHandled.append(chemBond)

            if priorityKey not in priorityKeys:
                priorityKeys.append(priorityKey)
            elif priorityKey not in followPriorityKeys:
                followPriorityKeys.append(priorityKey)

        priorityList.append((priorityKey, otherAtom))

    return (priorityList, followPriorityKeys)
Beispiel #5
0
def makeFullSugar(carboBaseName,
                  coordSystem,
                  baseGlycoCtCode,
                  testMode,
                  replace=False,
                  saveData=True):

    carboMolType = 'carbohydrate'
    namingSystemName = 'EuroCarbDb'

    project = Implementation.MemopsRoot(name='chemComp')
    project.currentUserId = 'ccpnRef'

    #
    # Set archive dir info
    #

    if testMode:
        chemCompDataDir = testChemCompDataDir
        chemCompCoordDataDir = testChemCompCoordDataDir
    else:
        chemCompDataDir = editChemCompDataDir
        chemCompCoordDataDir = editChemCompCoordDataDir

    #
    # First import all mol2 files, pick one form as 'base' unit, adapt this one,
    # then add coords from other chemComps
    #
    # TODO: This is currently very specific!
    #

    #
    # TODO SET A STEREOCHEMISTRY CLASS FOR THE A/B CHEMATOMS!!
    #

    importDir = os.path.join(origMol2DataDir, 'carbo')
    importFiles = os.listdir(os.path.join(importDir, carboBaseName))

    for importFile in importFiles[:]:
        if not importFile[-4:] == 'mol2':
            importFiles.pop(importFiles.index(importFile))

    importFiles.sort()

    mol2Format = Mol2Format(project, guiParent=None, allowPopups=False)
    molTypes = [carboMolType]

    chemComps = []
    rawChemComps = []

    for importFile in importFiles:

        # Should be 'a', 'b' or 'o'
        anomericCenter = importFile[0]

        if anomericCenter != 'a':
            ccpCode = '%s-%s' % (anomericCenter, carboBaseName)
        else:
            ccpCode = carboBaseName

        fileName = os.path.join(importDir, carboBaseName, importFile)

        print "Reading mol2 file %s..." % fileName

        ccs = mol2Format.readChemComps(fileName,
                                       molTypes=molTypes,
                                       ccpCodes=[ccpCode],
                                       saveChemComp=False,
                                       minimalPrompts=True,
                                       makeNamingSystem=namingSystemName)

        chemComps.append(ccs[0])
        rawChemComps.append(mol2Format.rawChemComp)

    #
    # Check whether only open form available (e.g. aldi ones)
    #

    if len(chemComps) == 1 and chemComps[0].ccpCode[0] == 'o':
        print "  Warning: only open form available, not creating a/b isoforms."
        hasOnlyOpenForm = True
    else:
        hasOnlyOpenForm = False

    #print chemComps
    #print rawChemComps
    """
  for cch in project.chemCompHeads:
    print cch.molType, cch.ccpCode
    for ccv in cch.chemComp.chemCompVars:
      print ccv.descriptor
      print ccv.chemAtoms
    print
  """

    refChemComp = chemComps[0]  # Should be the a form

    #
    # Reset save location, check if file already exists
    #

    chemCompXmlFile = findChemCompOrCoordFilePath(refChemComp,
                                                  testMode=testMode)

    # In this case, getting nothing back with replace - False means that it does exist!
    if chemCompXmlFile and not replace:
        print "  ChemComp %s, %s already exists - aborting creation." % (
            carboMolType, carboBaseName)

        try:
            refChemComp = getChemComp(project,
                                      carboMolType,
                                      carboBaseName,
                                      download=False,
                                      chemCompArchiveDir=editChemCompDataDir,
                                      copyFile=False)

        except:
            print "WARNING: chemcomp was already loaded!"
            refChemComp = project.findFirstChemComp(molType=carboMolType,
                                                    ccpCode=carboBaseName)

        return refChemComp

    #
    # Start creating/modifying...
    #

    print
    print drawBox("Creating sugar information")
    print

    #
    # Set the base Glyco CT code, this is always x-, except for -o only forms (aldehydes)
    #
    # TODO: might need to hack this for substituents so know which one is which... or just do it by order? Should be fine...
    #

    refGlycoCtCode = "RES\n1b:%s" % baseGlycoCtCode

    print "Setting GlycoCT code to:\n\n%s\n" % refGlycoCtCode
    print

    project.override = True
    try:
        refChemComp.baseGlycoCtCode = refGlycoCtCode
        refChemComp.findFirstChemCompVar().glycoCtCode = refGlycoCtCode
    finally:
        project.override = False

    #
    # Look for a C-O-C fragment. Should be O5 for 6 rings.
    #
    # Note: ONLY works on cyclic sugars!!!
    #

    centralOAtom = None

    for chemAtom in refChemComp.sortedChemAtoms():

        # TODO: should really do this from the Var level, but...
        if chemAtom.elementSymbol == 'O' and len(chemAtom.chemBonds) == 2:
            connectedToC = True
            for chemBond in chemAtom.sortedChemBonds():
                otherChemAtom = getOtherAtom(chemAtom, chemBond)
                if otherChemAtom.elementSymbol != 'C':
                    connectedToC = False
                    break

            if connectedToC:

                centralOAtom = chemAtom

                #
                # Try to generically determine the carbon atoms in the ring... trying not to depend on names
                # but if a C1 is connected to the O5, then start from there.
                #

                connectedAtoms = []

                for chemBond in centralOAtom.sortedChemBonds():
                    otherChemAtom = getOtherAtom(centralOAtom, chemBond)

                    if otherChemAtom.name == 'C1':
                        connectedAtoms.insert(0, otherChemAtom)
                    else:
                        connectedAtoms.append(otherChemAtom)

                #
                # Start to loop... use recursive function to find right order
                #

                ringCarbons = findRingCarbon(connectedAtoms[0], connectedAtoms)

                if not ringCarbons:
                    centralOAtom = None
                    continue

                otherCarbons = []
                for searchCarbon in refChemComp.findAllChemAtoms(
                        elementSymbol='C'):
                    if searchCarbon not in ringCarbons:
                        otherCarbons.append(searchCarbon)

                break

    if not centralOAtom and not hasOnlyOpenForm:
        raise "  Error: no central O atom found in ring!!"

    #
    # Now look for anomeric carbon and connection sites...
    #

    anomericCarbon = None
    anomericOxygen = None

    if not hasOnlyOpenForm:
        for searchCarbon in [ringCarbons[0], ringCarbons[-1]]:
            for chemBond in searchCarbon.chemBonds:
                otherChemAtom = getOtherAtom(searchCarbon, chemBond)
                if otherChemAtom and otherChemAtom.elementSymbol == 'O' and otherChemAtom != centralOAtom:
                    anomericCarbon = searchCarbon
                    anomericOxygen = otherChemAtom
                    break

            if anomericCarbon:
                break

    else:
        # Hardset these... not really anomeric but good enough
        anomericCarbon = refChemComp.findFirstChemAtom(name='C1')
        anomericOxygen = refChemComp.findFirstChemAtom(name='O1')

    if not anomericCarbon:
        raise "  Error: no anomeric carbon found."

    else:
        for chemBond in anomericOxygen.chemBonds:
            otherChemAtom = getOtherAtom(anomericOxygen, chemBond)
            if otherChemAtom and otherChemAtom.elementSymbol == 'H':
                anomericHydrogen = otherChemAtom
                break

        if not anomericHydrogen:
            print "  Warning: no anomeric hydrogen found."

    #
    # Set the stereo information for the anomeric carbon, then create subtypes for beta and open forms
    #

    #anomericCarbons = {'a': None, 'b': None, 'o': None}

    stereoAtom = anomericCarbon
    bondDict = {
        'single': 1,
        'double': 2,
        'triple': 3,
        'aromatic': 1.5,
        'dative': 1.0,
        'singleplanar': 1.5
    }  # TODO CHECK THIS!
    """
  # OK to do this because is neutral 'real' chemComp, but otherwise need to start from chemCompVar!!!
  
  #
  # TODO TODO also need to track chemBonds, but ONLY at beginning
  # then need to go back to coordinates to find out in which order the bonds appear based on the coords (when looking down
  # the main bond)!
  #
  # Might be easiest to use Martin's code (or someone else's), use the atom names to link back to here... 
  #
  totalChemBonds = chemComp.chemBonds
  chemBondsHandled = []
  
  priorityList = getStereoPriorities(stereoAtom,chemBondsHandled)
  
  priorityList.sort()
  priorityList.reverse()
  
  priorityKeys = []  
  for (priorityKey,otherAtom) in priorityList:
   priorityKeys.append(priorityKey)
  
  for priorityKey in priorityKeys:
    if priorityKeys.count(priorityKey) > 1:
      print priorityKey
  
  print priorityList

  sys.exit()
  """
    #
    # Look for binding oxygens... has to be hydroxy group connected to carbon
    #

    bindingOxygens = []
    bindingHydrogens = {}

    #
    # Set carbons to search for connected OH groups
    #

    if not hasOnlyOpenForm:
        searchCarbons = ringCarbons + otherCarbons
    else:
        searchCarbons = list(refChemComp.findAllChemAtoms(elementSymbol='C'))

        if 'aldi' in baseGlycoCtCode:
            # Don't do anything on 1 position for these - alditols
            searchCarbons.pop(searchCarbons.index(anomericCarbon))

    for searchCarbon in searchCarbons:

        validConnectedAtoms = {}
        validOHgroups = []

        for chemBond in searchCarbon.sortedChemBonds():

            otherChemAtom = getOtherAtom(searchCarbon, chemBond)

            if otherChemAtom:

                for elementSymbol in ['O', 'N']:

                    if otherChemAtom.elementSymbol == elementSymbol and otherChemAtom != centralOAtom:

                        if not validConnectedAtoms.has_key(elementSymbol):
                            validConnectedAtoms[elementSymbol] = []
                        validConnectedAtoms[elementSymbol].append(
                            otherChemAtom)

                        otherChemBonds = list(otherChemAtom.chemBonds)

                        if elementSymbol == 'O' and len(otherChemBonds) == 2:

                            otherChemBond = otherChemBonds[not otherChemBonds.
                                                           index(chemBond)]

                            connectedChemAtom = getOtherAtom(
                                otherChemAtom, otherChemBond)

                            if connectedChemAtom.elementSymbol == 'H':

                                validOHgroups.append(otherChemAtom)

        #
        # Check if single OH (no double O, or amide, ...)
        #

        if len(validOHgroups) == 1:

            if len(validConnectedAtoms) == 1:
                # Carboxylic acid (except for ring O-C-OH!)
                if len(validConnectedAtoms['O']
                       ) == 2 and not searchCarbon == anomericCarbon:
                    print "  Warning: ignoring oxygen %s - is carboxylic acid (or similar)" % validOHgroups[
                        0].name
                    validOHgroups = []
            else:
                # Amide or something similar
                print "  Warning: ignoring oxygen %s - is amide (or similar)" % validOHgroups[
                    0].name
                validOHgroups = []

            if validOHgroups:

                bindingOxygens.append(validOHgroups[0])
                bindingHydrogens[validOHgroups[0]] = connectedChemAtom

                if not hasOnlyOpenForm and searchCarbon in otherCarbons:
                    print "  Warning: setting oxygen %s as binding one (not directly connected to ring)." % (
                        validOHgroups[0].name)

    #
    # Now create variants...
    #
    # Need to have all combinations of:
    #
    #  - anomeric a/b and free/bound
    #  - binding oxygens free/bound
    #

    bindingOxygenCombs = makePowerSet(bindingOxygens)
    origAnomericCarbon = anomericCarbon

    linkAtomsMapForCoordinates = {}

    if hasOnlyOpenForm:
        stereoTypes = ('open_1', )
    else:
        stereoTypes = ('stereo_1', 'stereo_2')

    for stereoType in stereoTypes:

        subType = int(stereoType.split('_')[1])
        anomericCarbon = refChemComp.findFirstChemAtom(name='C1',
                                                       subType=subType)

        if not anomericCarbon:
            # Should only happen for stereo_2

            creationDict = {'name': 'C1', 'subType': subType}
            for attrName in ('elementSymbol', 'shortVegaType',
                             'waterExchangeable'):
                creationDict[attrName] = getattr(origAnomericCarbon, attrName)

            # TODO set chirality to OPPOSITE of whatever the first subtype is!
            anomericCarbon = refChemComp.newChemAtom(**creationDict)

            namingSystem = refChemComp.findFirstNamingSystem(
                name=namingSystemName)
            namingSystem.newAtomSysName(sysName=anomericCarbon.name,
                                        atomName=anomericCarbon.name,
                                        atomSubType=anomericCarbon.subType)

            # Bonds and other atoms are exactly the same!
            for chemBond in origAnomericCarbon.chemBonds:
                otherChemAtom = getOtherAtom(origAnomericCarbon, chemBond)
                refChemComp.newChemBond(chemAtoms=(anomericCarbon,
                                                   otherChemAtom),
                                        bondType=chemBond.bondType,
                                        stereochem=chemBond.stereochem)

        for anomericBound in range(0, 2):
            for i in range(0, len(bindingOxygenCombs)):
                bindingOxygens = bindingOxygenCombs[i]

                # Main list only contains real ChemAtoms!!
                currentChemAtoms = list(
                    refChemComp.findAllChemAtoms(className='ChemAtom'))
                anomericCarbons = refChemComp.findAllChemAtoms(name='C1')
                for tempAnomericCarbon in anomericCarbons:
                    if tempAnomericCarbon != anomericCarbon and tempAnomericCarbon in currentChemAtoms:
                        currentChemAtoms.pop(
                            currentChemAtoms.index(tempAnomericCarbon))

                # This is the neutral refChemComp - already exists!
                if not anomericBound and not bindingOxygens:
                    continue

                # Can't be both bound on the anomeric carbon and have an oxygen link there...
                if anomericBound and len(
                        bindingOxygens
                ) == 1 and bindingOxygens[0] == anomericOxygen:
                    continue

                # Can't have a link to 1 and something else...
                if len(bindingOxygens
                       ) > 1 and anomericOxygen in bindingOxygens:
                    continue

                linkedAtomKeys = []
                linkedAtoms = {}
                linkAtoms = {}

                if anomericBound and not hasOnlyOpenForm and 'aldi' not in baseGlycoCtCode:
                    # Only do C1 when relevant - for alditols it's not, always reducing end.
                    currentChemAtoms.pop(
                        currentChemAtoms.index(anomericOxygen))
                    if anomericHydrogen:
                        currentChemAtoms.pop(
                            currentChemAtoms.index(anomericHydrogen))

                    anomericCarbonKey = getChemAtomKey(anomericCarbon)
                    linkedAtomKeys.append(anomericCarbonKey)

                    linkedAtoms[anomericCarbonKey] = anomericCarbon

                    linkAtom = getLinkAtom(anomericCarbon, multi=True)

                    linkAtoms[anomericCarbonKey] = linkAtom
                    currentChemAtoms.append(linkAtom)

                    linkAtomsMapForCoordinates[(
                        linkAtom.name, linkAtom.subType)] = anomericOxygen.name

                for bindingOxygen in bindingOxygens:

                    bindingHydrogen = bindingHydrogens[bindingOxygen]

                    if bindingHydrogen and bindingHydrogen in currentChemAtoms:
                        currentChemAtoms.pop(
                            currentChemAtoms.index(bindingHydrogen))

                    bindingOxygenKey = getChemAtomKey(bindingOxygen)
                    linkedAtomKeys.append(bindingOxygenKey)
                    linkedAtoms[bindingOxygenKey] = bindingOxygen

                    linkAtom = getLinkAtom(bindingOxygen)

                    linkAtoms[bindingOxygenKey] = linkAtom
                    currentChemAtoms.append(linkAtom)

                    linkAtomsMapForCoordinates[(
                        linkAtom.name,
                        linkAtom.subType)] = bindingHydrogen.name

                linkedAtomKeys.sort()

                # Possible for C1 linkages if not handled (only reducing end)
                if not linkedAtomKeys:
                    continue

                #
                # Create linkEnds
                #

                linkInfo = []

                for linkedAtomKey in linkedAtomKeys:

                    if linkedAtomKey[0] == 'C1':
                        linkCode = "%s_%s" % (linkedAtomKey[0],
                                              linkedAtomKey[1])
                    else:
                        linkCode = linkedAtomKey[0]

                    linkInfo.append(linkCode)

                    if not refChemComp.findFirstLinkEnd(linkCode=linkCode):
                        boundChemAtom = linkedAtoms[linkedAtomKey]
                        boundLinkAtom = linkAtoms[linkedAtomKey]
                        linkEnd = refChemComp.newLinkEnd(
                            linkCode=linkCode,
                            boundChemAtom=boundChemAtom,
                            boundLinkAtom=boundLinkAtom)

                # TODO THIS IS NOT GREAT - no way of telling what's what if atom names are messed up
                #linking = 'none'
                #descriptor = 'link:%s' % string.join(linkedAtomKeys,',')
                linking = 'link:%s' % string.join(linkInfo, ',')

                if stereoType.count('stereo'):
                    descriptor = '%s:C1' % stereoType
                else:
                    descriptor = 'neutral'

                if not refChemComp.findFirstChemCompVar(linking=linking,
                                                        descriptor=descriptor):
                    print "  Trying %s,%s" % (linking, descriptor)
                    #for ca in currentChemAtoms:
                    #  if ca.className == 'LinkAtom':
                    #    print "   LA:",ca.name, ca.subType
                    #  else:
                    #    print "   CA:",ca.name, ca.subType
                    #print "   ",linkedAtomKeys

                    # Create the stereospecific GlycoCt code
                    if stereoType == "stereo_1":
                        stereoCode = 'a'
                    elif stereoType == "stereo_2":
                        stereoCode = 'b'
                    elif stereoType == 'open_1':
                        stereoCode = 'o'

                    varGlycoCtCode = "RES\n1b:%s" % (stereoCode +
                                                     baseGlycoCtCode[1:])

                    ccv = refChemComp.newChemCompVar(
                        chemAtoms=currentChemAtoms,
                        linking=linking,
                        descriptor=descriptor,
                        glycoCtCode=varGlycoCtCode,
                        formalCharge=0,
                        isParamagnetic=False,
                        isAromatic=False)

    #
    # Make sure the chemElements are accessible
    #

    project.currentChemElementStore = project.findFirstChemElementStore()

    #
    # Reset the name and molType...
    #

    chMolType = 'carbohydrate'

    project.override = True
    try:
        refChemComp.ccpCode = carboBaseName
        refChemComp.molType = carboMolType
    finally:
        project.override = False

    # TODO SET THESE CORRECTLY? Where do I get info from for this though? Can this come from MSD? Ask Dimitris!!
    # Set the PDB/MSD name - NOTE that have to do this for the correct a/b forms! Var specific!!
    #ChemComp.ChemCompSysName(refChemComp,namingSystem = 'PDB',sysName=ccpCode,specificChemCompVars = refChemComp.sortedChemCompVars())
    #ChemComp.ChemCompSysName(refChemComp,namingSystem = 'MSD',sysName=ccpCode,specificChemCompVars = refChemComp.sortedChemCompVars())

    #
    # Check chemComp validatity and save
    # TODO make this all options in running script!
    #

    refChemComp.checkAllValid()

    if saveData:

        #
        # Get the original file GUID, if possible, when replacing existing file
        #

        if replace:

            (existingGuid,
             existingFile) = findExistingChemCompInfo(chemCompDataDir,
                                                      refChemComp.ccpCode,
                                                      refChemComp.molType)

            if existingGuid:
                project.override = True
                try:
                    refChemComp.guid = existingGuid
                finally:
                    project.override = False

        (tmpFilePath,
         existingFilePath) = saveTemporaryChemCompOrCoord(refChemComp,
                                                          testMode=testMode)

        # Do a check here? Or don't bother?
        consolidateTemporaryChemCompOrCoord(refChemComp,
                                            tmpFilePath,
                                            existingFilePath,
                                            testMode=testMode,
                                            replace=replace)

    #
    # Get the coordinates as well!
    #

    print "  Creating coordinates!!"

    chemCompCoord = project.newChemCompCoord(sourceName=coordSystem,
                                             molType=refChemComp.molType,
                                             ccpCode=refChemComp.ccpCode)

    for i in range(len(rawChemComps)):

        rawChemComp = rawChemComps[i]

        # Identify which one we're dealing with!!
        (dirName, baseName) = os.path.split(rawChemComp.parent.name)

        if baseName[0] == 'a':
            stereoDescriptor = "stereo_1:C1"

        elif baseName[0] == 'b':
            stereoDescriptor = "stereo_2:C1"

        elif baseName[0] == 'o':
            stereoDescriptor = "none"

        else:
            print "  Not handling type '%s' for coordinates - ignored." % baseName[
                0]
            continue

        #
        # Mark that generated by this script...
        #

        applData = Implementation.AppDataString(application='ccpNmr',
                                                keyword='origin',
                                                value='makeFullSugar.py')
        chemCompCoord.addApplicationData(applData)

        # Don't do any link atoms (yet)... could in principle use atoms that are 'missing'

        # TODO: should decompose descriptor here, then check...
        chemCompVars = refChemComp.findAllChemCompVars(
            descriptor=stereoDescriptor)

        chemAtomKeys = []

        for ccv in chemCompVars:
            for ca in ccv.sortedChemAtoms():
                caKey = (ca.name, ca.subType)
                if caKey not in chemAtomKeys:
                    chemAtomKeys.append(caKey)

        #
        # Create a dictionary for the coordinates, based on the 'raw' chemComp from the mol2 file
        #

        chemAtomCoordDict = {}

        for chemAtomKey in chemAtomKeys:

            coords = None

            if linkAtomsMapForCoordinates.has_key(chemAtomKey):
                useChemAtomName = linkAtomsMapForCoordinates[chemAtomKey]
            else:
                useChemAtomName = chemAtomKey[0]

            for rawAtom in rawChemComp.atoms:

                if rawAtom.name == useChemAtomName:
                    coords = (rawAtom.x, rawAtom.y, rawAtom.z)
                    break

            if not coords:
                print "  Warning: no coordinate for %s, atom key %s." % (
                    coordSystem, chemAtomKey)
            elif not chemAtomCoordDict.has_key(chemAtomKey):
                chemAtomCoordDict[chemAtomKey] = coords
            else:
                print "  Error: double atom key %s!" % chemAtomKey

        #
        # Set the coordinates
        #

        chemAtomCoords = {}

        #print [ra.name for ra in rawChemComp.atoms]

        for chemCompVar in chemCompVars:

            #print chemCompVar

            chemCompVarCoord = chemCompCoord.findFirstChemCompVarCoord(
                linking=chemCompVar.linking, descriptor=chemCompVar.descriptor)

            if not chemCompVarCoord:
                chemCompVarCoord = chemCompCoord.newChemCompVarCoord(
                    linking=chemCompVar.linking,
                    descriptor=chemCompVar.descriptor)

            #print chemCompVarCoord

            for ca in chemCompVar.sortedChemAtoms():

                caKey = (ca.name, ca.subType)

                #print "%-20s" % str(caKey),

                if chemAtomCoordDict.has_key(caKey):
                    coords = chemAtomCoordDict[caKey]

                    if coords:

                        if chemAtomCoords.has_key(caKey):
                            chemAtomCoord = chemAtomCoords[caKey]
                        else:
                            chemAtomCoord = chemCompCoord.newChemAtomCoord(
                                name=caKey[0],
                                subType=caKey[1],
                                x=coords[0],
                                y=coords[1],
                                z=coords[2])
                            chemAtomCoords[caKey] = chemAtomCoord

                        if chemAtomCoord not in chemCompVarCoord.chemAtomCoords:
                            chemCompVarCoord.addChemAtomCoord(chemAtomCoord)
                            #print chemAtomCoord.name, chemAtomCoord.subType,

                #print

    chemCompCoord.checkAllValid()

    if saveData:

        if replace:

            (existingGuid, existingFile) = findExistingChemCompCoordInfo(
                chemCompCoordDataDir, coordSystem, chemCompCoord.ccpCode,
                chemCompCoord.molType)

            if existingGuid:
                project.override = True
                try:
                    chemCompCoord.guid = existingGuid
                finally:
                    project.override = False

        (tmpFilePath,
         existingFilePath) = saveTemporaryChemCompOrCoord(chemCompCoord,
                                                          testMode=testMode)

        # Do a check here? Or don't bother? This is blank regeneration from reference data, so should be OK!
        consolidateTemporaryChemCompOrCoord(chemCompCoord,
                                            tmpFilePath,
                                            existingFilePath,
                                            testMode=testMode,
                                            replace=replace)

    return refChemComp
Beispiel #6
0
def addSubstituentToBaseUnit(baseUnitCcpCode,
                             baseUnitMolType,
                             testMode,
                             mergeInfoList,
                             coordSystem,
                             substRemoveAtomName='SUB',
                             saveData=True,
                             replace=False,
                             namingSystemName=None,
                             resetGlycoCtCode=False):

    #
    # Set directories to read/write data from/to
    #

    if testMode:
        chemCompCoordDataDir = testChemCompCoordDataDir
        chemCompDataDir = testChemCompDataDir
    else:
        chemCompCoordDataDir = editChemCompCoordDataDir
        chemCompDataDir = editChemCompDataDir

    #
    # Now start setting up project
    #

    substIndexPatt = re.compile("(\d+)")
    substituentDir = os.path.join(origMol2DataDir, 'subst')

    ccpCode = baseUnitCcpCode

    substituentList = []
    for mergeInfoDict in mergeInfoList:
        substituent = mergeInfoDict['substituent']
        if not substituent in substituentList:
            substituentList.append(substituent)

        ccpCode += ":%s_%s" % (mergeInfoDict['baseBindingAtomName'],
                               substituentInfo[substituent]['shortCode'])

    project = Implementation.MemopsRoot(name='chemComp')
    project.currentUserId = 'ccpnRef'
    project.currentChemElementStore = project.findFirstChemElementStore()

    #
    # First import all relevant subsituent mol2 files
    # TODO could in principle have these 'pre-imported' in CCPN, in special dir...
    #      note though that the SUBST atoms have to be removed in that case...
    #

    mol2Format = Mol2Format(project, guiParent=None, allowPopups=False)

    chemComps = {}
    rawChemComps = {}

    for substituent in substituentList:

        fileName = os.path.join(substituentDir, "%s.mol2" % substituent)

        ccs = mol2Format.readChemComps(fileName,
                                       ccpCodes=[substituent],
                                       saveChemComp=False,
                                       minimalPrompts=True)

        chemComps[substituent] = ccs[0]
        rawChemComps[substituent] = mol2Format.rawChemComp

    #
    # Get the original base unit information
    #

    origBaseUnit = getChemComp(project,
                               baseUnitMolType,
                               baseUnitCcpCode,
                               chemCompArchiveDir=chemCompDataDir,
                               copyFile=False)

    #
    # If replacing, keep GUID of original as default
    #

    creationKeywds = {}

    if replace:

        if testMode:
            dataDir = testChemCompDataDir
        else:
            dataDir = editChemCompDataDir

        (existingGuid,
         existingFile) = findExistingChemCompInfo(dataDir, ccpCode,
                                                  baseUnitMolType)

        if existingGuid:
            creationKeywds['guid'] = existingGuid

    #
    # Now create the new base unit, copy the original base unit file, change the guid in the file, then load it
    #

    baseUnit = project.newNonStdChemComp(molType=baseUnitMolType,
                                         ccpCode=ccpCode,
                                         **creationKeywds)
    repository = project.findFirstRepository(name='userData')

    isNewFile = copyBaseToModifiedFile(project,
                                       baseUnit,
                                       origBaseUnit,
                                       testMode,
                                       repository,
                                       replace=replace)

    if not isNewFile:
        return

    #print baseUnit.chemAtoms

    origBaseUnitCcc = getChemCompCoord(
        project,
        coordSystem,
        baseUnitMolType,
        baseUnitCcpCode,
        chemCompCoordArchiveDir=chemCompCoordDataDir,
        copyFile=False)
    if not origBaseUnitCcc:
        raise ("Error: no coordinates available for %s!" % baseUnitCcpCode)
    else:
        #
        # If replacing, keep GUID of original as default
        #

        creationKeywds = {}

        if replace:

            if testMode:
                dataDir = testChemCompCoordDataDir
            else:
                dataDir = editChemCompCoordDataDir

            (existingGuid, existingFile) = findExistingChemCompCoordInfo(
                dataDir, coordSystem, ccpCode, baseUnitMolType)

            if existingGuid:
                creationKeywds['guid'] = existingGuid

        baseUnitCcc = project.newChemCompCoord(sourceName=coordSystem,
                                               molType=baseUnitMolType,
                                               ccpCode=ccpCode,
                                               **creationKeywds)
        copyBaseToModifiedFile(project,
                               baseUnitCcc,
                               origBaseUnitCcc,
                               testMode,
                               repository,
                               replace=replace)
        #print baseUnitCcc.chemAtomCoords

    #
    # Reset the glycoCtCode for the new base unit (if relevant)
    #

    if resetGlycoCtCode:

        substGlycoCtText = ""
        substGlycoCtInfo = {'RES': [], 'LIN': []}
        resIndex = 2
        linIndex = 1

        # Note: this assumes the mergeInfoList is ordered!

        for mergeInfoDict in mergeInfoList:

            baseBindingAtomName = mergeInfoDict['baseBindingAtomName']
            removeBaseAtomNames = mergeInfoDict['removeBaseAtomNames']
            substituent = mergeInfoDict['substituent']

            substGlycoCtInfo['RES'].append("%ds:%s" % (resIndex, substituent))

            parentAtomIndex = int(baseBindingAtomName[-1])

            if baseBindingAtomName[0] == 'O':
                parentSubstitutionType = 'o'
            else:
                parentSubstitutionType = 'd'

            substGlycoCtInfo['LIN'].append(
                "%d:%d%s(%d+%d)%d%s" % (linIndex, 1, parentSubstitutionType,
                                        parentAtomIndex, 1, resIndex, 'n'))

            resIndex += 1
            linIndex += 1

        for tmpStr in substGlycoCtInfo['RES']:
            substGlycoCtText += "\n" + tmpStr
        substGlycoCtText += "\nLIN"
        for tmpStr in substGlycoCtInfo['LIN']:
            substGlycoCtText += "\n" + tmpStr

        newBaseGlycoCtCode = origBaseUnit.baseGlycoCtCode + substGlycoCtText

        project.override = True
        try:
            baseUnit.baseGlycoCtCode = newBaseGlycoCtCode

            for chemCompVar in baseUnit.chemCompVars:
                chemCompVar.glycoCtCode = chemCompVar.glycoCtCode + substGlycoCtText

        finally:
            project.override = False

        print "Setting base GlycoCT code to:\n\n%s\n" % newBaseGlycoCtCode
        print

    #
    # Set naming system
    #

    namingSystem = None
    if namingSystemName:
        namingSystem = baseUnit.findFirstNamingSystem(name=namingSystemName)
        if not namingSystem:
            namingSystem = baseUnit.newNamingSystem(name=namingSystemName)
            print "Created new naming system %s" % namingSystemName

    #
    # Add substituent info, remove atoms from base unit
    #

    addVariants = {}

    for mergeInfoDict in mergeInfoList:

        #
        # 0. Get the info from the mergeInfoDict, print a comment
        #

        baseBindingAtomName = mergeInfoDict['baseBindingAtomName']
        removeBaseAtomNames = mergeInfoDict['removeBaseAtomNames']
        substituent = mergeInfoDict['substituent']
        renameSubstituentAtomNames = mergeInfoDict[
            'renameSubstituentAtomNames']

        baseBindingAtoms = baseUnit.findAllChemAtoms(name=baseBindingAtomName)

        newBondType = substituentInfo[substituent]['bondType']
        newStereochem = substituentInfo[substituent]['stereochem']

        print
        print drawBox("Creating link between base atom %s to substituent %s" %
                      (baseBindingAtomName, substituent))
        print

        #
        # 1. Set the substUnitIndex - this is the identifier that is added to the substituents
        #    when part of the base chemComp. It is taken from whichever number is part of the baseBindingAtomName
        #
        #    TODO: should be molType specific - use A,B,G,... for amino acids!!
        #

        substUnitSearch = substIndexPatt.search(baseBindingAtomName)
        substUnitIndex = substUnitSearch.group(1)

        #
        # 2. Remove relevant atoms from the base unit, all subtypes
        #    Keep track of atom directly linked to the baseBindingAtom for recalculating coordinates!
        #

        baseAtomCoords = {}

        for removeBaseAtomName in removeBaseAtomNames:

            removeBaseAtoms = baseUnit.findAllChemAtoms(
                name=removeBaseAtomName)

            for removeBaseAtom in removeBaseAtoms:

                # Search if bound to the baseBindingAtom
                isBoundToBaseBindingAtom = False
                for chemBond in baseUnit.chemBonds:
                    bondChemAtoms = list(chemBond.chemAtoms)
                    if removeBaseAtom in bondChemAtoms:
                        otherBondChemAtom = bondChemAtoms[
                            not bondChemAtoms.index(removeBaseAtom)]
                        if otherBondChemAtom in baseBindingAtoms:
                            isBoundToBaseBindingAtom = True
                            break

                # Track the coordinates if bound to the baseBindingAtom
                if isBoundToBaseBindingAtom:
                    for baseBindingAtom in baseBindingAtoms:

                        baseAtomCoords[baseBindingAtom] = {}
                        baseBindingAtomCoords = baseUnitCcc.findAllChemAtomCoords(
                            chemAtom=baseBindingAtom)

                        for baseBindingAtomCoord in baseBindingAtomCoords:
                            baseCoord = (baseBindingAtomCoord.x,
                                         baseBindingAtomCoord.y,
                                         baseBindingAtomCoord.z)

                            for chemCompVarCoord in baseBindingAtomCoord.chemCompVarCoords:
                                # Get the coordinates, if any
                                removeBaseAtomCoord = chemCompVarCoord.findFirstChemAtomCoord(
                                    chemAtom=removeBaseAtom)
                                if removeBaseAtomCoord:
                                    baseBoundCoord = (removeBaseAtomCoord.x,
                                                      removeBaseAtomCoord.y,
                                                      removeBaseAtomCoord.z)
                                    jointCoords = (baseCoord, baseBoundCoord)

                                    if not baseAtomCoords[
                                            baseBindingAtom].has_key(
                                                jointCoords):
                                        baseAtomCoords[baseBindingAtom][
                                            jointCoords] = []
                                    baseAtomCoords[baseBindingAtom][
                                        jointCoords].append(chemCompVarCoord)

                # Now start deleting on chemComp and chemCompCoord levels
                removeBaseAtomCoords = baseUnitCcc.findAllChemAtomCoords(
                    chemAtom=removeBaseAtom)
                for removeBaseAtomCoord in removeBaseAtomCoords:
                    removeBaseAtomCoord.delete()

                removeBaseAtom.delete()
                print "  Removed atom %s, subType %d from base unit..." % (
                    removeBaseAtomName, removeBaseAtom.subType)

                for namingSystem in baseUnit.namingSystems:
                    for asn in namingSystem.atomSysNames:
                        if asn.atomName == removeBaseAtomName:
                            asn.delete()

                #
                # 2.1 Also rename all linkEnds and chemCompVars that have this atom in the descriptor - are now irrelevant
                #

                for chemCompVar in baseUnit.chemCompVars:
                    deleteVoidChemCompVar(chemCompVar, 'descriptor',
                                          removeBaseAtomName, baseUnit,
                                          baseUnitCcc)
                    deleteVoidChemCompVar(chemCompVar, 'linking',
                                          removeBaseAtomName, baseUnit,
                                          baseUnitCcc)
                    deleteLinkEnd(baseUnit, removeBaseAtomName)

        #
        # 3. Add the substituent info to the base unit
        #
        # Currently this works off the mol2 file. Could add, e.g. SUB_C, SUB_O, SUB_N, depending on substituted atom...
        # for better coordinates later on. TODO: try to implement this!!!
        #
        # TODO: look into avoiding mol2 step, just put into CCPN in temporary library, use linkAtoms as linking ones.
        # These can then also be identified by elementSymbol (O,C,N,...)
        #

        substUnit = chemComps[substituent]
        rawSubstChemComp = rawChemComps[substituent]

        #
        # 3.1 Initialise information
        #       - determine which atom to remove from substituent (will need this for coordinates further down though)
        #       - get single bond coming from this atom to identify the binding atom on the substituent side
        #       - create substToBaseDict dictionary that maps substituent objects to newly created chemComp unit objects
        #       - deal with the coordinates

        substRemoveAtom = substUnit.findFirstChemAtom(name=substRemoveAtomName)
        chemBond = substRemoveAtom.findFirstChemBond(
        )  # Should only have ONE single bond!
        substBindingAtom = getOtherAtom(substRemoveAtom, chemBond)

        substCoords = [
            getAtomOrigCoords(rawSubstChemComp, substRemoveAtomName),
            getAtomOrigCoords(rawSubstChemComp, substBindingAtom.name)
        ]
        substCoordsBaseAtoms = [None, None]

        substToBaseDict = {}

        #
        # 3.2 Create chemAtoms and chemAtomSets
        #

        redoChemAtomSets = []
        substChemAtoms = []

        for chemAtomOrSet in substUnit.sortedChemAtoms(
        ) + substUnit.sortedChemAtomSets():

            # Ignore atoms to be substituted
            if chemAtomOrSet.name[:len(substRemoveAtomName
                                       )] == substRemoveAtomName:
                continue

            createChemAtomOrSet = CreateChemAtomOrSet(chemAtomOrSet)

            if renameSubstituentAtomNames.has_key(chemAtomOrSet.name):
                createChemAtomOrSet.setForcedName(
                    renameSubstituentAtomNames[chemAtomOrSet.name])
            else:
                createChemAtomOrSet.setName(substUnitIndex)

            redoChemAtomSets.extend(
                createChemAtomOrSet.setChemAtomLinks(
                    chemAtomOrSet, substToBaseDict,
                    substRemoveAtomName))  # Only relevant for chemAtomSets!

            if createChemAtomOrSet.checkExistence(baseUnit):
                continue

            newChemAtomOrSet = createChemAtomOrSet.createNewObject(baseUnit)
            createChemAtomOrSet.setAtomSysName(namingSystem)

            substToBaseDict[chemAtomOrSet] = newChemAtomOrSet

            # Keep track of new chemAtoms, also track coordinates
            if newChemAtomOrSet.className in ('LinkAtom', 'ChemAtom'):
                substChemAtoms.append(newChemAtomOrSet)

                if chemAtomOrSet == substBindingAtom:
                    baseSubstBindingAtom = newChemAtomOrSet
                    substCoordsBaseAtoms[1] = newChemAtomOrSet
                else:
                    substCoordsBaseAtoms.append(newChemAtomOrSet)
                    substCoords.append(
                        getAtomOrigCoords(rawSubstChemComp,
                                          chemAtomOrSet.name))

        #
        # 3.2.1 Now set chemAtomSets that are linked to other chemAtomSets - have to do this later because
        #       otherwise could have not been created yet. Could in principle do this in above loop by
        #       organising list of chemAtomSets, but this is easier.
        #

        for chemAtomSet in redoChemAtomSets:
            createChemAtomOrSet = CreateChemAtomOrSet(chemAtomSet)

            if renameSubstituentAtomNames.has_key(chemAtomSet.name):
                createChemAtomOrSet.setForcedName(
                    renameSubstituentAtomNames[chemAtomSet.name])
            else:
                createChemAtomOrSet.setName(substUnitIndex)

            createChemAtomOrSet.setChemAtomSetLinks()
            if createChemAtomOrSet.checkExistence(baseUnit):
                continue
            newChemAtomSet = createChemAtomOrSet.createNewObject(baseUnit)
            createChemAtomOrSet.setAtomSysName(namingSystem)
            substToBaseDict[chemAtomSet] = newChemAtomSet

        #
        # 3.3 Now add all other objects connected to the chemComp (see global chemCompLinkList)
        #
        # TODO: this is not fully functional - have to add specific settings for some links (but probably never required, so will wait)
        #

        for ccLinkName in chemCompLinkList:
            for substObject in getattr(substUnit, ccLinkName):
                createCcpnObject = CreateCcpnObject(substObject)
                if not createCcpnObject.setLinks(substToBaseDict):
                    continue
                newCcpnObject = createCcpnObject.createNewObject(baseUnit)
                substToBaseDict[substObject] = newCcpnObject
        """            
      # TODO: LINKS THAT REQUIRE SPECIAL TREATMENT/MERGING WITH EXISTING INFO
      # 'chemCompVars', 'applicationData',
      
      # TODO: object that require special treatment (have to be renamed, ...): chemAtomSysNames, chemCompSysNames (?)
    """

        #
        # 4. Connect base binding atom(s) to the new subsituent atoms
        #

        for baseBindingAtom in baseBindingAtoms:

            #
            # 4.1 First remove all chemCompVars and linkEnds that have linking vars including this atom!
            #

            for chemCompVar in baseBindingAtom.chemCompVars:
                deleteVoidChemCompVar(chemCompVar,
                                      'linking',
                                      baseBindingAtomName,
                                      baseUnit,
                                      baseUnitCcc,
                                      actionType='binding')
                deleteLinkEnd(baseUnit,
                              baseBindingAtomName,
                              actionType='binding')

            #
            # 4.2 Calculate the new coordinates for the substituent atoms
            #

            coordsForChemCompVar = {}

            if baseAtomCoords.has_key(baseBindingAtom):
                for jointCoords in baseAtomCoords[baseBindingAtom]:
                    newSubstCoords = tuple([
                        tuple(coord) for coord in superposeNewVectorsOnOld(
                            jointCoords, substCoords)
                    ])
                    coordsForChemCompVar[newSubstCoords] = baseAtomCoords[
                        baseBindingAtom][jointCoords]

            #
            # 4.3 Now create the bond between the new substituent and the existing base unit
            #

            baseUnit.newChemBond(chemAtoms=(baseBindingAtom,
                                            baseSubstBindingAtom),
                                 bondType=newBondType,
                                 stereochem=newStereochem)

            #print "  Creating chemBond between base atom %s and subsituent atom %s" % (baseBindingAtom.name,baseSubstBindingAtom.name)

            for chemCompVar in baseBindingAtom.chemCompVars:
                #print "   ", baseSubstBindingAtom.name, baseSubstBindingAtom.subType,chemCompVar.linking,chemCompVar.descriptor
                for substChemAtom in substChemAtoms:
                    chemCompVar.addChemAtom(substChemAtom)
                # TODO: have to RENAME the chemCompVar in case there are variants for the substituent!!

            #
            # 4.4 Finally set the new coordinates... ignore first one (is the substituted atom from the substituent!)
            #

            for newSubstCoords in coordsForChemCompVar.keys():
                for i in range(1, len(substCoordsBaseAtoms)):
                    newSubstCoord = newSubstCoords[i]
                    substChemAtom = substCoordsBaseAtoms[i]

                    substChemAtomCoord = baseUnitCcc.findFirstChemAtomCoord(
                        chemAtom=substChemAtom,
                        x=newSubstCoord[0],
                        y=newSubstCoord[1],
                        z=newSubstCoord[2])
                    if not substChemAtomCoord:
                        substChemAtomCoord = baseUnitCcc.newChemAtomCoord(
                            name=substChemAtom.name,
                            subType=substChemAtom.subType,
                            x=newSubstCoord[0],
                            y=newSubstCoord[1],
                            z=newSubstCoord[2])

                    for cccv in coordsForChemCompVar[newSubstCoords]:
                        if not cccv.isDeleted:
                            cccv.addChemAtomCoord(substChemAtomCoord)

            print "  Connected new substituent chemAtom %s,%s to base unit atom %s,%s, and included in relevant chemCompVars" % (
                baseSubstBindingAtom.name, baseSubstBindingAtom.subType,
                baseBindingAtom.name, baseBindingAtom.subType)

        #
        # NEXT ON LIST: make sure the chemCompVars make sense if there are any for the substituent!!!
        #
        # ALSO try this on amino acids when working for carbs - need split code and rename some variables though
        #

    #
    # Check validity and save
    #

    baseUnit.checkAllValid(complete=True)

    if baseUnitCcc:
        baseUnitCcc.checkAllValid(complete=True)

    if saveData:

        (filePath,
         existingFilePath) = saveTemporaryChemCompOrCoord(baseUnit,
                                                          testMode=testMode)
        consolidateTemporaryChemCompOrCoord(baseUnit,
                                            filePath,
                                            existingFilePath,
                                            testMode=testMode,
                                            replace=replace)

        if baseUnitCcc:
            (filePath, existingFilePath) = saveTemporaryChemCompOrCoord(
                baseUnitCcc, testMode=testMode)
            consolidateTemporaryChemCompOrCoord(baseUnitCcc,
                                                filePath,
                                                existingFilePath,
                                                testMode=testMode,
                                                replace=replace)