def predictBuiltIn(mol, atomNames, typeRCDist, structureNum=0): predictor = Predictor() for polymer in mol.getPolymers(): if (typeRCDist.lower() == 'dist'): predictor.predictRNAWithDistances(polymer, 0, 0) else: predictor.predictRNAWithRingCurrent(polymer, 0, 0) filterString = "" inFilter = {} for atomId in atomNames: dotIndex = atomId.find(".") if dotIndex != -1: atomId = atomId[2:] if atomId in inFilter: continue inFilter[atomId] = True if filterString == "": filterString = "*." + atomId else: filterString += "," + atomId molFilter = MolFilter(filterString) spatialSets = Molecule.matchAtoms(molFilter) shifts = [] for sp in spatialSets: ppm = sp.atom.getRefPPM() name = sp.atom.getShortName() if ppm != None: shift = [] shift.append(str(name)) shift.append(ppm) shifts.append(shift) return shifts
def readMMCIF(fileName): ''' Reads a pdb file and modifies the static Molecule object. ''' compound = None MMcifReader.read(fileName) mol = Molecule.getActive() updateAtomArray() return mol
def predictWithRCFromPDB(pdbFile, refShifts, ringRatio, typeRCDist, atomNames, builtin): """ Reads an RNA molecule from PDB file and predict chemical shifts. # Parameters: pdbFile (str); the name of the PDB file refShifts (dict); the reference shifts for each atom to be predicted. ringRatio (float); A scale parameter to multiply the ring-current contributions by typeRCDist (String): Type of analysis to perform, 'rc' (ring current) or 'dist' (distances) # Returns: shifts (dict) the chemical shifts that were predicted. The reference ppm value of each atom is also updated with shift """ Molecule.removeAll() pdb = PDBFile() mol = molio.readPDB(pdbFile) #pdb.readCoordinates(pdbFile,-1,False, False) activeStructures = mol.getActiveStructures() avgOverStructures = False if not avgOverStructures: if len(activeStructures) > 0: repI = super.findRepresentative(mol) iStruct = repI[0] else: iStruct=0 structList=[iStruct] else: if len(activeStructures) > 0: structList = list(activeStructures) else: structList=[0] if builtin: shifts = rnapred.predictBuiltIn(mol, atomNames, typeRCDist, structList) else: if typeRCDist.lower() == 'rc': shifts = rnapred.predictRCShifts(mol, structList, refShifts, ringRatio, ringTypes) elif typeRCDist.lower() == 'dist': alphas = ringRatio shifts = rnapred.predictDistShifts(mol, rmax, structList, refShifts, alphas) return mol, shifts
def predictFromSequence(molecule=None, vienna=None, ppmSet=-1): if molecule == None: molecule = Molecule.getActive() if vienna == None: vienna = molecule.getDotBracket() pairs = getPairs(vienna) seqList = getFullSequence(molecule) outLines = genRNAData(seqList, pairs) predPPMs = predictFromAttr(seqList, outLines) setPredictions(molecule, predPPMs, ppmSet)
def readPDB(fileName, isCompound=False): ''' Reads a pdb file and modifies the static Molecule object. isCompound is used to specify if the file should be read in as a ligand or small molecule. Important note to take into consideration: if isCompound is false, HETATM fields will be ignored in file and the file will be read in as a sequence, ultimately creating polyer(s) This command returns either None or a compound depending on whether the isCompound is true. ''' compound = None pdb = PDBFile() if not isCompound: pdb.readSequence(fileName, 0) mol = Molecule.getActive() else: mol = pdb.readResidue(fileName, None, Molecule.getActive(), None) updateAtomArray() return mol
def readPDBXCoords(fileName, structNum, noComplain, genCoords): ''' Reads a pdb file and modifies the static Molecule object. structNum is the structure number, noComplain is a boolean for printing out an error message, and genCoords is a boolean for generating coordinates. ''' pdb = PDBFile() pdb.readCoordinates(fileName, structNum, False, True) updateAtomArray() mol = Molecule.getActive() return mol
def genRCTrainingMatrix(outFileName, pdbFiles, shiftSources, atomNames, ringMode, typeRCDist): """ Generate the training data from a list of pdbFiles and dotBracket values. Each file is predicted using the attribute method based on a specified dot-bracket string and the output is appended to the training matrix # Parameters: outFileName (str); The output file name. File is deleted if present already. pdbFiles (list); list of PDB Files to use shiftSources (list); list of sources for shifts. Either dot-bracket values (vienna string) or bmrb id to use for each pdb file atomNames (list): list of atom names ringMode typeRCDist (String): Type of analysis to perform, 'rc' (ring current) or 'dist' (distances) # Returns: _ (None); Training data is written to specified file """ try: os.remove(outFileName) except: pass with open(outFileName,'a') as f1: for pdbID,shiftSource in zip(pdbFiles,shiftSources): Molecule.removeAll() pdbFile = 'pdbfiles/'+pdbID+'.pdb' if not getPDBFile(pdbID): print 'skip',pdbFile continue print 'train',pdbFile pdb = PDBFile() mol = molio.readPDB(pdbFile) if shiftSource[0]=="." or shiftSource[0]=="(": rnapred.predictFromSequence(mol,shiftSource) else: setRefShiftsFromBMRB(shiftSource, {}) genRCMat(mol,atomNames,f1, ringMode, typeRCDist)
def __init__(self, mol=None): if mol == None: mol = Molecule.getActive() self.mol = mol self.widths = [self.widthH, self.widthH] self.intensity = 100.0 self.refMode = False #self.labelScheme = "All: A.C2',C8,Hn,Hr G.C1',Cn,Hn,Hr U.C2',C6,Hn,Hr C.C1',C6,Hn,Hr" self.labelScheme = "" self.editSchemes = ["ef", "fe", "ee", "ff", "aa"] if mol != None: self.vienna = mol.getDotBracket() self.mol.activateAtoms()
def readSequence(seqFile, convert=False, polymerName=None, seqReader=None): if convert: import os import osfiles dir = os.path.dirname(seqFile) seqFile = osfiles.convertSeqFile(seqFile, dir) if (seqReader == None): seqReader = Sequence() seqReader.newPolymer() seqReader.read(seqFile, polymerName) if polymerName else seqReader.read(seqFile) updateAtomArray() mol = Molecule.getActive() return mol
def setPredictions(molecule, predPPMs, ppmSet=-1): molecule.updateAtomArray() for atomName in predPPMs: ppm, errorVal = predPPMs[atomName] if atomName[-1] == 'p': atomName = atomName[0:-1] + "'" if atomName[-2] == 'p': atomName = atomName[0:-2] + "''" atom = Molecule.getAtomByName(atomName) if ppmSet < 0: atom.setRefPPM(-ppmSet - 1, ppm) atom.setRefError(-ppmSet - 1, errorVal) else: atom.setPPM(ppmSet, ppm) atom.setPPMError(ppmSet, ppm)
def readSequenceString(polymerName, sequence, seqReader=None): ''' Creates a polymer from the sequence provided with the name of polymerName The sequence input can either be a chain of characters but will only work if the desired polymer is RNA. If creating a polymer for a protein, sequence must be a list using the 3 letter code. ''' seqAList = ArrayList() seqAList.addAll(sequence) if (seqReader == None): seqReader = Sequence() seqReader.newPolymer() seqReader.read(polymerName, seqAList, "") updateAtomArray() mol = Molecule.getActive() return mol
def loadPDBModels(files, yaml, out): global outDir outDir = out outDir += '/' iFile = 1 refiner = refine() refiner.loadFromYaml(yaml,0,pdbFile=files[0]) if not os.path.exists(outDir): os.mkdir(outDir) pdb = PDBFile() referenceFile = outDir + '/referenceFile.txt' outFiles = [] data = [] for file in files: outFile = os.path.join(outDir,'output'+str(iFile)+'.txt') pdb.readCoordinates(file,0,False, False) mol = Molecule.getActive() refiner.setPars({'coarse':False,'useh':True,'dislim':4.6,'end':10000,'hardSphere':0.0,'shrinkValue':0.0, 'shrinkHValue':0.0}) refiner.setForces(yaml['anneal']['force']) refiner.energy() inFileName=getFileName(file) outFileName=getFileName(outFile) datum = [inFileName,outFileName] refiner.molecule.updateVecCoords() distanceEnergy=refiner.molecule.getEnergyCoords().calcNOE(False,1.0) datum.append("%.1f" % (distanceEnergy)) if ("shifts" in yaml): shiftEnergy = refiner.energyLists.calcShift(False) datum.append("%.1f" % (shiftEnergy)) data.append(datum) refiner.dump(0.1,.20,outFile) outFiles.append(outFile) iFile += 1 data.sort(key=lambda x: x[0]) header = ['PDB File Name','Output File'] header.append('Dis Viol') header.append('Shift Viol') writeLines(data,referenceFile, header) return outFiles
def setRefShiftsFromBMRB(bmrbID, offsets): bmrbFile = 'star/bmr'+bmrbID+'.str' shiftDict = seqalgs.readBMRBShifts(bmrbID, bmrbFile) chains = ['','A','B','C','D'] for bID in shiftDict.keys(): for chain in shiftDict[bID]: chainName = chains[chain] offset = 0 if bID in offsets: if chainName in offsets[bID]: offset = offsets[bID][chainName] for res in shiftDict[bID][chain]: for aname in shiftDict[bID][chain][res]: atomSpec = chainName+':'+str(res+offset)+'.'+aname atom = Molecule.getAtomByName(atomSpec) ppm = shiftDict[bID][chain][res][aname] if atom == None: print 'no atom',chain,atomSpec else: atom.setRefPPM(ppm)
def predictDistShifts(mol, rmax, structureNum=0, refShifts=None, alphaDict=None): defaultRefShifts = { "U.H1'": 5.702, "U.H2'": 4.449, "U.H3'": 4.341, "U.H4'": 4.357, "U.H5'": 4.275, "U.H5''": 4.274, 'U.H5': 5.642, 'U.H6': 8.061, "A.H1'": 6.606, "A.H2'": 4.449, "A.H3'": 4.341, "A.H4'": 4.357, "A.H5'": 4.275, "A.H5''": 4.274, 'A.H2': 7.977, 'A.H8': 8.316, "G.H1'": 6.234, "G.H2'": 4.449, "G.H3'": 4.341, "G.H4'": 4.357, "G.H5'": 4.275, "G.H5''": 4.274, 'G.H8': 7.871, "C.H1'": 5.7, "C.H2'": 4.449, "C.H3'": 4.341, "C.H4'": 4.357, "C.H5'": 4.275, "C.H5''": 4.274, 'C.H5': 5.698, 'C.H6': 7.978 } defaultAlphas = { 'ribose': [ 2.629, -1.978, -2.491, -0.551, 2.6, 2.402, -0.884, 0.028, 0.39, 1.681, -0.218, -1.22, -2.413, 7.099, 5.023, -26.883, 11.952, -0.527, -7.7, 28.734, -50.508, 19.122, -3.53, -4.062, 0.709, 8.823, -36.481, 21.023, 6.634, 1.267, -2.01, 6.7, 12.972, -65.587, 9.095, 8.952, -9.218, 4.321, 0.207, 14.587, 10.079, -3.146, -3.358, 1.418, -3.314, -5.648, 6.943, -0.543 ], 'base': [ 6.865, -3.892, -1.983, -0.507, 4.033, 1.264, -0.721, -0.055, 0.83, 0.705, -0.346, -0.859, -17.689, 19.241, -4.373, -34.864, 0.819, 0.957, 0.521, -1.355, 20.992, 2.978, -7.787, -1.922, 1.409, 10.776, -9.739, -0.055, 5.104, -2.825, -14.755, 12.592, -2.459, -26.824, 2.379, 5.485, -8.897, 5.564, -2.356, 23.225, -5.205, -5.813, 17.198, -6.817, -20.967, 25.346, -11.519, -0.974 ] } if refShifts == None: refShifts = defaultRefShifts if alphaDict == None: alphaDict = defaultAlphas #alphaDict['ribose'] = [0.54 for i in range(len(refShifts))] #alphaDict['base'] = [0.54 for i in range(len(refShifts))] else: if not isinstance(alphaDict, (dict)): alphaDict = {'ribose': alphaDict, 'base': alphaDict} filterString = "" inFilter = {} for atomId in refShifts: dotIndex = atomId.find(".") if dotIndex != -1: atomId = atomId[2:] if atomId in inFilter: continue inFilter[atomId] = True if filterString == "": filterString = "*." + atomId else: filterString += "," + atomId molFilter = MolFilter(filterString) spatialSets = Molecule.matchAtoms(molFilter) plusRingMode = False chiMode = True if plusRingMode: ringShifts = RingCurrentShift() ringShifts.makeRingList(mol) shifts = [] for sp in spatialSets: name = sp.atom.getShortName() aName = sp.atom.getName() if aName[-1] == "'": aType = 'ribose' else: aType = 'base' alphas = alphaDict[aType] nucName = sp.atom.getEntity().getName() if (nucName + "." + aName in refShifts): basePPM = refShifts[nucName + "." + aName] else: continue if isinstance(structureNum, (list, tuple)): distPPM = 0.0 ringPPM = 0.0 for iStruct in structureNum: distances = mol.calcDistanceInputMatrixRow( iStruct, rmax, sp.atom) if plusRingMode: alphasOnly = alphas[0:-2] ringRatio = alphas[-1] else: alphasOnly = alphas if chiMode: chi = sp.atom.getEntity().calcChi() sinchi = math.sin(chi) coschi = math.cos(chi) distances.append(coschi) distances.append(sinchi) distPPM += sum([ alphasOnly[i] * distances[i] for i in range(len(alphasOnly)) ]) if plusRingMode: ringPPM += ringShifts.calcRingContributions( sp, iStruct, ringRatio) distPPM = (distPPM + ringPPM) / len(structureNum) else: distances = mol.calcDistanceInputMatrixRow(structureNum, rmax, sp.atom) distPPM = sum( [alphas[i] * distances[i] for i in range(len(alphas))]) ppm = basePPM + distPPM atom = Molecule.getAtomByName(name) atom.setRefPPM(ppm) shift = [] shift.append(str(name)) shift.append(ppm) shifts.append(shift) return shifts
def readSDF(fileName, newMolecule=False): sdf = SDFile() molecule = Molecule.getActive() if not newMolecule else None compound = sdf.read(fileName, None, molecule, None) updateAtomArray() return compound
def analyzeFiles(pdbs, bmrbs, typeRCDist, aType, offsets, refShifts=None, ringRatio=None, atomNames=None, builtin=False): """ Analyze a whole set of pdb files and associated chemical shifts in bmrb files Chemical shifts will be predicted with 3D Ring Current shift code and the result will contain predicted and experimental shifts which can then be statistically analyzed. # Parameters: pdbs (list); The list of pdb identifiers. The actual files must be in a pdbfiles subdirectory bmrbs (list); The list of bmrb identifiers. The actual files must be in a star2 subdirectory typeRCDist (String): Type of analysis to perform, 'rc' (ring current) or 'dist' (distances) offsets (dict): the offset values for certain bmrb files refShifts (dict); the reference shifts for each atom to be predicted. ringRatio (float); A scale parameter to multiply the ring-current contributions by # Returns: ppmDatas,aNames (list, list) the list of PPMData values with predicted and experimental values and atom names that were used """ ppmDatas=[] aNames = {} chains = ['','A','B','C','D'] for pdbID,bmrbID in zip(pdbs,bmrbs): print pdbID,bmrbID bmrbFile = 'star/bmr'+bmrbID+'.str' if not os.path.exists(bmrbFile): bmrbFile = 'star2/bmr'+bmrbID+'.str' if not os.path.exists(bmrbFile): print 'skip',bmrbFile continue pdb = 'pdbfiles/'+pdbID+'.pdb' if not getPDBFile(pdbID): print 'skip',pdb continue shiftDict = seqalgs.readBMRBShifts(bmrbID, bmrbFile) mol, shifts = predictWithRCFromPDB(pdb, refShifts, ringRatio, typeRCDist, atomNames, builtin) for bID in shiftDict.keys(): for chain in shiftDict[bID]: chainName = chains[chain] offset = 0 if bID in offsets: if chainName in offsets[bID]: offset = offsets[bID][chainName] for res in shiftDict[bID][chain]: for aname in shiftDict[bID][chain][res]: if aname[0] != aType: continue atomSpec = chainName+':'+str(res+offset)+'.'+aname atom = Molecule.getAtomByName(atomSpec) if atom == None: print 'no atom',chain,atomSpec else: ppmV = atom.getRefPPM(0) if (ppmV != None): predPPM = ppmV.getValue() expPPM = shiftDict[bID][chain][res][aname] delta = predPPM-expPPM deltaAbs = abs(predPPM-expPPM) ppmDatas.append(PPMData(predPPM, expPPM,bID,pdbID,chain,res,aname)) aNames[aname] = 1 #print bID,chain,res,aname,expPPM,predPPM,delta return (ppmDatas,aNames.keys())
def genRCMat(mol, atomNames, f1, ringMode, typeRCDist): """ Generate training data from molecule for parameterizing ring-current shifts. Data is appended to the specified file. # Parameters: mol (Molecule); the molecule to be analyzed atoms (list); a list of atoms to be used f1 (File); the output file ringMode (boolean): typeRCDist (String): Type of analysis to perform, 'rc' (ring current) or 'dist' (distances) # Returns: _ (None); See also: `loadRCTrainingMatrix(...)` """ plusRingMode = False chiMode = True if (typeRCDist.lower()=='rc') or plusRingMode: ringShifts = RingCurrentShift() ringShifts.makeRingList(mol) inFilter = {} filterString = "" for atomId in atomNames: dotIndex = atomId.find(".") if dotIndex != -1: atomId = atomId[2:] if atomId in inFilter: continue inFilter[atomId] = True if filterString == "": filterString = "*."+atomId else: filterString += ","+atomId if (typeRCDist.lower()=='rc'): mol.calcLCMB(0, True) molFilter = MolFilter(filterString) spatialSets = Molecule.matchAtoms(molFilter) ringRatio = 1.0 for sp in spatialSets: atom = sp.atom name = atom.getShortName() aName = atom.getName() nucName = atom.getEntity().getName() name = nucName+'.'+aName row = [name] found = False for atomName in atomNames: if name == atomName or aName == atomName: row.append('1') found = True else: row.append('0') if not found: continue ppm = atom.getRefPPM() if ppm == None: continue if (typeRCDist.lower()=='rc'): ringPPM = ringShifts.calcRingContributions(sp,0,ringRatio) ringFactors = ringShifts.calcRingGeometricFactors(sp, 0) if ringMode: for ringType in ringTypes: if ringType in ringFactors: factor = ringFactors[ringType] else: factor = 0.0 s = "%.6f" % (factor * 5.45) row.append(s) else: s = "%.3f" % (ringPPM) row.append(s) elif (typeRCDist.lower()=='dist'): distances = mol.calcDistanceInputMatrixRow(0, rmax, atom)#/ 5.45 s = [ '%.6f' % elem for elem in distances ] # "%.3f" % (distances) row += s if plusRingMode: ringPPM = ringShifts.calcRingContributions(sp,0,ringRatio) s = "%.3f" % (ringPPM) row.append(s) if chiMode: chi = atom.getEntity().calcChi() sinchi = math.sin(chi) coschi = math.cos(chi) s = "%.3f" % (coschi) row.append(s) s = "%.3f" % (sinchi) row.append(s) s = "%.3f" % (ppm) row.append(s) f1.write('\t'.join(row)+'\n')
def updateAtomArray(): ''' Updates the molecule atom array ''' mol = Molecule.getActive() mol.updateAtomArray()
def predictRCShifts(mol, structureNum=0, refShifts=None, ringRatio=None, ringTypes=None): defaultRefShifts = { "U.H6": 8.00, "U.H3'": 4.56, "U.H5": 5.80, "U.H5'": 4.36, "A.H5'": 4.36, "G.H5''": 4.11, "U.H1'": 5.49, "A.H3'": 4.56, "G.H1'": 5.43, "G.H3'": 4.56, "G.H5'": 4.36, "A.H5''": 4.11, "C.H2'": 4.48, "C.H4'": 4.38, "G.H8": 7.77, "A.H1'": 5.51, "U.H4'": 4.38, "A.H8": 8.21, "C.H6": 7.94, "C.H5''": 4.11, "C.H5": 5.85, "U.H2'": 4.48, "A.H4'": 4.38, "G.H2'": 4.48, "A.H2": 7.79, "C.H5'": 4.36, "G.H4'": 4.38, "U.H5''": 4.11, "C.H1'": 5.46, "C.H3'": 4.56, "A.H2'": 4.4 } if refShifts == None: refShifts = defaultRefShifts if ringRatio == None: ringRatio = 0.475 filterString = "" inFilter = {} for atomId in refShifts: dotIndex = atomId.find(".") if dotIndex != -1: atomId = atomId[2:] if atomId in inFilter: continue inFilter[atomId] = True if filterString == "": filterString = "*." + atomId else: filterString += "," + atomId ringShifts = RingCurrentShift() ringShifts.makeRingList(mol) if isinstance(ringRatio, (tuple, list, array)): for ringType, factor in zip(ringTypes, ringRatio): ringShifts.setRingFactor(ringType, factor) ringRatio = 1.0 molFilter = MolFilter(filterString) spatialSets = Molecule.matchAtoms(molFilter) shifts = [] for sp in spatialSets: name = sp.atom.getShortName() aName = sp.atom.getName() nucName = sp.atom.getEntity().getName() if not nucName + "." + aName in refShifts: continue basePPM = refShifts[nucName + "." + aName] if isinstance(structureNum, (list, tuple)): ringPPM = 0.0 for iStruct in structureNum: ringPPM += ringShifts.calcRingContributions( sp, iStruct, ringRatio) ringPPM /= len(structureNum) else: ringPPM = ringShifts.calcRingContributions(sp, structureNum, ringRatio) ppm = basePPM + ringPPM atom = Molecule.getAtomByName(name) atom.setRefPPM(ppm) shift = [] shift.append(str(name)) shift.append(ppm) shifts.append(shift) return shifts