class maps2DensMetrics(): def __init__(self,filesIn,filesOut,pdbname,mapfilname1,maptype1,mapfilname2,maptype2,plot): self.filesIn = filesIn self.filesOut = filesOut # output directory self.pdbname = pdbname self.map1 = {'filename':mapfilname1,'type':maptype1} self.map2 = {'filename':mapfilname2,'type':maptype2} self.plot = plot def maps2atmdensity(self): self.printTitle() # write a log file for this eTrack run logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'w') logfile.write('eTrack run log file\n') logfile.write('Date: '+ str(time.strftime("%d/%m/%Y"))+'\n') logfile.write('Time: '+ str(time.strftime("%H:%M:%S"))+'\n') self.readPDBfile() self.readAtomMap() self.readDensityMap() self.reportDensMapInfo() self.checkMapCompatibility() self.createVoxelList() self.plotDensHistPlots() self.calculateDensMetrics() if self.plot == True: self.plotDensScatterPlots() self.plotPerResidueBoxPlots() self.pickleAtomList() def readPDBfile(self): # read in pdb file info here logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'a') self.startTimer() print 'Reading in pdb file...' print 'pdb name: {}{}.pdb'.format(self.filesIn,self.pdbname) logfile.write('pdb name: {}{}.pdb\n'.format(self.filesOut,self.pdbname)) # next read in the pdb structure file: # run function to fill PDBarray list with atom objects from structure self.PDBarray = PDBtoList('{}{}.pdb'.format(self.filesIn,self.pdbname),[]) self.success() self.stopTimer() # want to make sure array of structure atoms ordered by atomnumber # before reading through them self.PDBarray.sort(key=lambda x: x.atomnum) # need to get VDW radius for each atom: for atom in self.PDBarray: atom.VDW_get() def readAtomMap(self): # read in the atom map logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'a') self.startTimer() self.fillerLine() print 'Reading in Atom map file...' print 'Atom map name: {}{}'.format(self.filesIn,self.map1['filename']) logfile.write('atom map name: {}{}\n'.format(self.filesOut,self.map1['filename'])) self.atmmap,self.atom_indices = readMap(self.filesIn,self.filesOut,self.pdbname, self.map1['filename'],self.map1['type'],[]) self.success() self.stopTimer() # find number of atoms in structure num_atoms = len(self.PDBarray) # find atom numbers present in list (repeated atom numbers removed) seen = set() seen_add = seen.add uniq_atms = [x for x in self.atmmap.vxls_val if not (x in seen or seen_add(x))] # find set of atoms numbers not present (i.e atoms not assigned to voxels) Atms_notpres = set(range(1,num_atoms+1)) - set(uniq_atms) print 'Number of atoms not assigned to voxels: %s' %str(len(Atms_notpres)) # append to log file for this eTrack run logfile.write('Number of atoms not assigned to voxels: %s\n' %str(len(Atms_notpres))) def readDensityMap(self): # read in the density map logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'a') self.startTimer() self.fillerLine() print 'Reading in Density map file...' print 'Density map name: {}{}'.format(self.filesIn,self.map2['filename']) logfile.write('density map name: {}{}\n'.format(self.filesIn,self.map2['filename'])) self.densmap = readMap(self.filesIn,self.filesOut,self.pdbname, self.map2['filename'],self.map2['type'], self.atom_indices) self.success() self.stopTimer() def reportDensMapInfo(self): # print density map summary information to command line totalNumVxls = np.product(self.atmmap.nxyz.values()) structureNumVxls = len(self.densmap.vxls_val) totalMean = self.densmap.density['mean'] structureMean = np.mean(self.densmap.vxls_val) solvNumVxls = totalNumVxls - structureNumVxls solvMean = (totalNumVxls*totalMean - structureNumVxls*structureMean)/solvNumVxls print 'For voxels assigned to structure:' print 'mean structure density : {}'.format(structureMean) print 'max structure density : {}'.format(max(self.densmap.vxls_val)) print 'min structure density : {}'.format(min(self.densmap.vxls_val)) print 'std structure density : {}'.format(np.std(self.densmap.vxls_val)) print '# voxels included : {}'.format(structureNumVxls) print 'For voxels assigned to solvent:' print 'mean solvent-region density : {}'.format(solvMean) print '# voxels included : {}'.format(solvNumVxls) def checkMapCompatibility(self): # check that atom-tagged and density map can be combined successfully logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'a') self.fillerLine() print 'Checking that maps have same dimensions and sampling properties...' self.startTimer() # Check that the maps have the same dimensions, grid sampling,.. if (self.atmmap.axis != self.densmap.axis or self.atmmap.gridsamp != self.densmap.gridsamp or self.atmmap.start != self.densmap.start or self.atmmap.nxyz != self.densmap.nxyz or self.atmmap.type != self.densmap.type): print 'Incompatible map properties --> terminating script' logfile.write('Incompatible map properties --> terminating script\n') sys.exit() elif self.atmmap.celldims != self.densmap.celldims: print 'Not exact same map grid dimensions..' logfile.write('Not exactly same map grid dimensions..') # now check if grid dims same to a specific dp and consider continuing stop = True for i in list(reversed(range(7))): count = 0 for key in self.atmmap.celldims.keys(): if np.round(self.atmmap.celldims[key],i) == np.round(self.densmap.celldims[key],i): count += 1 if count == 6: print 'Map grid dimensions same to {}dp'.format(i) logfile.write('Map grid dimensions same to {}dp --> continuing with processing anyway'.format(i)) stop = False break if stop == True: print 'Map grid dimensions still not same to 0dp' logfile.write('Map grid dimensions still not same to 0dp --> terminating script\n') sys.exit() else: self.success() print 'The atom and density map are of compatible format!' logfile.write('The atom and density map are of compatible format!\n') self.stopTimer() self.fillerLine() print 'Total number of voxels assigned to atoms: %s' %str(len(self.atmmap.vxls_val)) logfile.write('Total number of voxels assigned to atoms: %s\n' %str(len(self.atmmap.vxls_val))) logfile.close() def createVoxelList(self): # create dictionary of voxels with atom numbers as keys self.startTimer() self.fillerLine() print 'Combining voxel density and atom values...' vxl_list = {atm:[] for atm in self.atmmap.vxls_val} for atm,dens in zip(self.atmmap.vxls_val,self.densmap.vxls_val): vxl_list[atm].append(dens) self.vxlsPerAtom = vxl_list # delete atmmap and densmap now to save memory self.densmap,self.atmmap =[],[] self.stopTimer() def plotDensHistPlots(self): # histogram & kde plots of number of voxels per atom for plotType in ('histogram','kde'): plotVxlsPerAtm(self.pdbname,self.filesOut,self.vxlsPerAtom,plotType) def calculateDensMetrics(self): # determine density summary metrics per atom, including: # max, min, mean, median, standard deviation, 90-tile min, # 90-tile max, 95-tile min, 95-tile max, mode (why not!), # relative standard deviation (rsd = std/mean) self.fillerLine() self.startTimer() print 'Calculating electron density statistics per atom...' for atom in self.PDBarray: atomVxls = self.vxlsPerAtom[atom.atomnum] if len(atomVxls) != 0: atom.meandensity = np.mean(atomVxls) atom.mediandensity = np.median(atomVxls) atom.mindensity = min(atomVxls) atom.maxdensity = max(atomVxls) atom.stddensity = np.std(atomVxls) atom.min90tile = np.percentile(atomVxls,10) atom.max90tile = np.percentile(atomVxls,90) atom.min95tile = np.percentile(atomVxls,5) atom.max95tile = np.percentile(atomVxls,95) atom.numvoxels = len(atomVxls) self.success() self.stopTimer() # delete the vxlsPerAtom list now to save memory del self.vxlsPerAtom # get additional metrics per atom for atom in self.PDBarray: atom.getAdditionalMetrics() def plotDensScatterPlots(self): # plot scatter plots for density metrics self.startTimer() self.fillerLine() print 'Plotting scatter plots for electron density statistics...' plotVars = (['mean','max'],['mean','median'],['mean','min'],['min','max'], ['mean','std'],['std','rsd'],['min','min90tile'],['max','max90tile'], ['min90tile','min95tile'],['max90tile','max95tile'], ['std','range'],['mean','range']) for pVars in plotVars: edens_scatter(self.filesOut,pVars,self.PDBarray,self.pdbname) def plotPerResidueBoxPlots(self): # perform residue analysis for datatset, outputting boxplots for each atom specific # to each residue, and also a combined boxplot across all residues in structures. for densMet in ('mean','min','max'): residueArray = densper_resatom_NOresidueclass(self.filesOut,self.PDBarray,'y',densMet,self.pdbname) minresnum = 0 sideormain = ['sidechain','mainchain'] densper_res(self.filesOut,residueArray,minresnum,sideormain,'min',self.pdbname) # remove residueArray now to save memory residueArray = [] self.stopTimer() def pickleAtomList(self): self.pklFileName = save_objectlist(self.PDBarray,self.pdbname) def startTimer(self): self.timeStart = time.time() def stopTimer(self): elapsedTime = time.time() - self.timeStart print 'section time: {}'.format(elapsedTime) sys.stdout.flush() def success(self): print '---> success' def fillerLine(self): print '\n------------------------------------------------' def printTitle(self): print '\n================================================' print '------------------------------------------------' print '||| eTrack run |||' print '------------------------------------------------' print '================================================\n'
class maps2DensMetrics(object): # assign values within a density map to specific atoms, using # the an atom-tagged map to determine which regions of space # are to be assigned to each atom def __init__(self, filesIn='', filesOut='', pdbName='', atomTagMap='', densityMap='', FCmap='', plotScatter=False, plotHist=False, logFile='./untitled.log', calcFCmap=True, doXYZanalysis=False): # the input directory self.filesIn = filesIn # the output directory self.filesOut = filesOut # the pdb file name self.pdbName = pdbName # atom-tagged map name self.atomMapIn = atomTagMap # density map name (typically Fo-Fo) self.densMapIn = densityMap # FC map name self.FCmapIn = FCmap # (bool) plot scatter plots or not self.plotScatter = plotScatter # (bool) plot histogram plots or not self.plotHist = plotHist # log file name self.log = logFile # whether FC map should be generated self.calcFCmap = calcFCmap # whether to do analysis based on xyz of each voxel self.doXYZanalysis = doXYZanalysis def maps2atmdensity(self, mapsAlreadyRead=False): # the map run method for this class. Will read in an atom-tagged map # and density map and assign density values for each individual atom # (as specified within the atom-tagged map). From these summary metrics # describing the density map behaviour in the vicinity of each refined # atom can be calculated if not mapsAlreadyRead: self.readPDBfile() self.readAtomMap() self.readDensityMap() self.reportDensMapInfo() self.checkMapCompatibility() if self.calcFCmap and not mapsAlreadyRead: self.readFCMap() self.reportDensMapInfo(mapType='calc') self.createVoxelList() if self.plotHist: self.plotDensHistPlots() self.calcDensMetrics(showProgress=False) if self.plotScatter: self.plotDensScatterPlots() def readPDBfile(self): # read in pdb file info here. A list of atom objects # is created, to which density metric information # will be added as additional attributes in the # methods included below self.printStepNumber() self.startTimer() self.lgwrite(ln='Reading pdb file: {}'.format(self.pdbName)) # read in the pdb file to fill list of atom objects self.PDBarray = PDBtoList('{}{}'.format( self.filesIn, self.pdbName)) self.stopTimer() # make sure array of atoms ordered by atom number self.PDBarray.sort(key=lambda x: x.atomnum) def readAtomMap(self): # read in the atom-tagged map self.printStepNumber() self.startTimer() self.lgwrite(ln='Reading atom-tagged map file...\n' + 'Atom map name: {}'.format(self.atomMapIn)) self.atmmap, self.atomIndices = readMap( dirIn=self.filesIn, dirOut=self.filesOut, mapName=self.atomMapIn, mapType='atom_map', log=self.log) self.stopTimer() # find number of atoms in structure numAtms = len(self.PDBarray) # find atom numbers present in list (repeated atom numbers removed) seen = set() seenAdd = seen.add uniqAtms = [x for x in self.atmmap.vxls_val if not (x in seen or seenAdd(x))] # find set of atoms numbers not present # (i.e atoms not assigned to voxels) AtmsNotPres = set(range(1, numAtms+1)) - set(uniqAtms) self.lgwrite( ln='Number of atoms not assigned to voxels: ' + '{}'.format(len(AtmsNotPres))) def readDensityMap(self): # read in the density map self.printStepNumber() self.startTimer() self.lgwrite(ln='Reading density map file...\n' + 'Density map name: {}'.format(self.densMapIn)) self.densmap = readMap(dirIn=self.filesIn, dirOut=self.filesOut, mapName=self.densMapIn, mapType='density_map', atomInds=self.atomIndices, log=self.log) self.stopTimer() def readFCMap(self): # read in the FC (calculated structure factor) density map. # This method should not be called if no FC density map # has been provided in the current run. self.printStepNumber() self.startTimer() self.lgwrite(ln='Reading Fcalc density map file...\n' + 'Density map name: {}'.format(self.FCmapIn)) self.FCmap = readMap(dirIn=self.filesIn, dirOut=self.filesOut, mapName=self.FCmapIn, mapType='density_map', atomInds=self.atomIndices, log=self.log) self.stopTimer() def reportDensMapInfo(self, numSfs=4, mapType='density'): # report the density map summary information to a log file if mapType == 'density': mp = self.densmap elif mapType == 'calc': mp = self.FCmap totalNumVxls = np.product(list(self.atmmap.nxyz.values())) structureNumVxls = len(mp.vxls_val) totalMean = mp.density['mean'] structureMean = np.mean(mp.vxls_val) solvNumVxls = totalNumVxls - structureNumVxls solvMean = (totalNumVxls*totalMean - structureNumVxls*structureMean)/solvNumVxls self.lgwrite( ln='\nFor voxels assigned to structure:\n' + '\tmean structure density : {}\n'.format( round(structureMean, numSfs)) + '\tmax structure density : {}\n'.format( round(max(mp.vxls_val), numSfs)) + '\tmin structure density : {}\n'.format( round(min(mp.vxls_val), numSfs)) + '\tstd structure density : {}\n'.format( round(np.std(mp.vxls_val), numSfs)) + '\t# voxels included : {}\n'.format(structureNumVxls) + '\nFor voxels assigned to solvent:\n' + '\tmean solvent-region density : {}\n'.format( round(solvMean), numSfs) + '\t# voxels included : {}'.format(solvNumVxls)) def checkMapCompatibility(self): # check that atom-tagged and density map # can be combined successfully. This # requirement is met if the maps have the # the same map header information. Grid # dimensions are permitted to deviate # between the two maps, however this is # flagged at run time self.printStepNumber() self.lgwrite( ln='Checking that maps have same dimensions and sampling...') self.startTimer() # Check that the maps have the same dimensions, grid sampling,.. if (self.atmmap.axis != self.densmap.axis or self.atmmap.gridsamp != self.densmap.gridsamp or self.atmmap.start != self.densmap.start or self.atmmap.nxyz != self.densmap.nxyz or self.atmmap.type != self.densmap.type): error(text='Incompatible map properties', log=self.log, type='error') elif self.atmmap.celldims != self.densmap.celldims: self.lgwrite(ln='Not exact same map grid dimensions..') # now check if grid dims same to a # specific dp and consider continuing stop = True for i in list(reversed(list(range(7)))): count = 0 for key in list(self.atmmap.celldims.keys()): roundedAtmmapDim = np.round(self.atmmap.celldims[key], i) roundedDensmapDim = np.round(self.densmap.celldims[key], i) if roundedAtmmapDim == roundedDensmapDim: count += 1 if count == 6: self.lgwrite( ln='Map grid dimensions same to {}dp\n'.format(i) + '--> continuing with processing anyway') stop = False break if stop: error(text='Map grid dimensions still not same to 0dp', log=self.log, type='error') else: self.success() self.lgwrite( ln='The atom and density map are of compatible format!') self.stopTimer() self.lgwrite( ln='Total number of voxels assigned to atoms: {}'.format( len(self.atmmap.vxls_val))) def createVoxelList(self, inclOnlyGluAsp=False): # create dictionary of voxels with atom numbers as keys self.startTimer() self.printStepNumber() self.lgwrite(ln='Combining voxel density and atom values...') self.success() vxlDic = {atm: [] for atm in self.atmmap.vxls_val} xyzDic = {atm: [] for atm in self.atmmap.vxls_val} self.densmap.reshape1dTo3d() self.densmap.abs2xyz_params() for atm, dens in zip(self.atmmap.vxls_val, self.densmap.vxls_val): vxlDic[atm].append(dens) self.vxlsPerAtom = vxlDic # The following is not essential for run and should not be called by default if self.doXYZanalysis: # call this extra module that is requried for XYZ analysis from perAtomClusterAnalysis import perAtomXYZAnalysis xyz_list = self.densmap.getVoxXYZ( self.atomIndices, coordType='fractional') for atm, xyz in zip(self.atmmap.vxls_val, xyz_list): xyzDic[atm].append(xyz) # get the mid points for each atom from the set of voxels # per atom, whilst accounting for symmetry (the asym unit # may not contain 1 single whole molecule, but split up) xyzDic2 = {} for atom in self.PDBarray: # this is more of testing reasons that any clear use if inclOnlyGluAsp: atmTypes = ['GLU-CD', 'GLU-OE1', 'GLU-OE2', 'ASP-OD1', 'ASP-OD2', 'ASP-CG', 'CYS-SG', 'CYS-CB', 'CYS-CA', 'MET-SD', 'MET-CE', 'MET-CG'] tag = '-'.join(atom.getAtomID().split('-')[2:]) if tag not in atmTypes: continue xyzAnalysis = perAtomXYZAnalysis( atomObj=atom, vxlRefPoint=np.mean(xyz_list, 0), densPerVxl=np.round(np.array(vxlDic[atom.atomnum]), 3), xyzsPerAtom=xyzDic[atom.atomnum], densMapObj=self.densmap) xyzAnalysis.getxyzPerAtom() atom.vxlMidPt = xyzAnalysis.findVoxelMidPt() xyzDic2[atom.getAtomID()] = xyzAnalysis.keptPts self.xyzsPerAtom = xyzDic2 if self.calcFCmap: vxlDic2 = {atm: [] for atm in self.atmmap.vxls_val} for atm, dens in zip(self.atmmap.vxls_val, self.FCmap.vxls_val): vxlDic2[atm].append(dens) self.FCperAtom = vxlDic2 self.deleteMapsAttributes() self.stopTimer() def deleteMapsAttributes(self): # Provide the option to delete atmmap and # densmap attributes to save memory, if # they are no longer needed during a run # del self.atmmap # if self.calcFCmap: # del self.FCmap del self.densmap.vxls_val def plotDensHistPlots(self, getVoxelStats=False, perAtmDensHist=False): # Create histogram or kde plots of number of voxels per atom self.startTimer() self.printStepNumber() self.lgwrite(ln='Plotting histogram plots of voxels per atom...\n' + 'Plots written to "{}plots"'.format(self.filesOut)) stats = plotVxlsPerAtm(pdbName=self.pdbName, where=self.filesOut, vxlsPerAtom=self.vxlsPerAtom, plotType='both', returnStats=getVoxelStats) if stats != '': print('mean: {}\nstd: {}\nmax: {}\nmin: {}'.format(*stats)) if perAtmDensHist: plotDensForAtm(pdbName=self.pdbName, where=self.filesOut, vxlsPerAtom=self.vxlsPerAtom, plotType='both', PDBarray=self.PDBarray) self.stopTimer() def calcDensMetricsForAtom(self, atom=[], plotDistn=False): # calculate density metrics for a particular atom. # This method includes the option to perform # cluster analysis on the voxel values assigned # to this atom, howeverm this should not be selected # for a standard run of the code try: atomVxls = self.vxlsPerAtom[atom.atomnum] except KeyError: error( text='No voxels assigned to an atom. Consider ' + 'increasing per-atom search radius parameter in RIDL ' + 'input .txt file.', log=self.log, type='warning') atomVxls = [np.nan] if len(atomVxls) != 0: atom.meandensity = np.mean(atomVxls) atom.mediandensity = np.median(atomVxls) atom.mindensity = min(atomVxls) atom.maxdensity = max(atomVxls) atom.stddensity = np.std(atomVxls) atom.min90tile = np.percentile(atomVxls, 10) atom.max90tile = np.percentile(atomVxls, 90) atom.min95tile = np.percentile(atomVxls, 5) atom.max95tile = np.percentile(atomVxls, 95) atom.numvoxels = len(atomVxls) posVals = [w for w in atomVxls if w > 0] if posVals != []: atom.meanPosOnly = np.mean(posVals) else: atom.meanPosOnly = 0 negVals = [w for w in atomVxls if w < 0] if negVals != []: atom.meanNegOnly = np.mean(negVals) else: atom.meanNegOnly = 0 if self.calcFCmap: # if the user has opted to calculate an Fcalc map in addition # to the difference map, then additional metrics can be # derived using this map. These metrics typically use the Fcalc # map density at each voxel to weight the contribution that # each voxel's difference map value should play when # calculating damage metrics. Effectively, a voxel far from an # atom (but still included in the search radius around that # atom) should not contribute to a damage indicator as much as # a voxel close to the atomic centre atomFCvals = self.FCperAtom[atom.atomnum] # NOTE: currently set all negative values to zero. This has # effect of ignoring Fcalc density that is less than the map # mean. This is implemented such that all per-voxel weights # (see below) are positive and so therefore sensible # weighted-means can be calculated. This may need to be # reconsidered for future use! atomFCvals = [v if v > 0 else 0 for v in atomFCvals] atomFCvalsMaxNormed = np.array(atomFCvals)/max(atomFCvals) minIndex = np.array(atomVxls).argmin() weightedVxls = np.multiply(atomVxls, atomFCvalsMaxNormed) atom.densityWeightedMean = np.mean(weightedVxls) atom.densityWeightedMin = np.min(weightedVxls) atom.densityWeightedMax = np.max(weightedVxls) # the following attribute provides an indication of the # fraction of the local maximum Fcalc map density around # the current atom at the point where the minimum difference # map value has been located to be. A higher value (closer to # 1) indicates that the min density value is found at an # electron density-rich region of space, whereas a lower # value (closer to 0) indicates that the min density value is # located away from where the majority of the electron density # assigned to the atom is predicted to be. atom.fracOfMaxAtomDensAtMin = atomFCvalsMaxNormed[minIndex] posVals = [w for w in weightedVxls if w > 0] negVals = [w for w in weightedVxls if w < 0] posValsSum = np.sum(posVals) negValsSum = np.sum(negVals) posWeights = [v for v, w in zip( atomFCvalsMaxNormed, weightedVxls) if w > 0] negWeights = [v for v, w in zip( atomFCvalsMaxNormed, weightedVxls) if w < 0] posWeightsSum = np.sum(posWeights) negWeightsSum = np.sum(negWeights) if posVals != []: atom.densityWeightedMeanPosOnly = posValsSum/posWeightsSum else: atom.densityWeightedMeanPosOnly = 0 if negVals != []: atom.densityWeightedMeanNegOnly = negValsSum/negWeightsSum else: atom.densityWeightedMeanNegOnly = 0 if plotDistn: # typically only to be used for testing purposes self.plotFCdistnPlot( atomsToPlot=['GLU-CD', 'CYS-SG'], atomOfInterest=atom, atomFCvals=atomFCvals, FCatMin=atomFCvals[minIndex], atomFCvalsMaxNorm=atomFCvalsMaxNormed) if self.doXYZanalysis: # provides the user with the option to also run # per-atom cluster analysis on the spatial # distribution of voxels assigned to a single atom. # This would be useful to distinguish 'clumps' of # positive or negative difference density, in order # to decide whether an atom may have shifted # position upon irradiation. # It should be noted that this option takes a # significant time to run, and should be deselected # in a standard run of the code self.clustDoneOnAtm.append(atom.getAtomID()) clustAnalysis = perAtomXYZAnalysis( atomObj=atom, vxlMidPt=atom.vxlMidPt, knownRefPoint=self.knownRefPt1, knownRefPoint2=self.knownRefPt2) clustAnalysis.keptPts = self.xyzsPerAtom[atom.getAtomID()] clustAnalysis.partitionPtsByVec() # atom.negClusterVal = clustAnalysis.topNegClustMean # atom.totDensShift = clustAnalysis.netDensShift self.densByRegion.append(clustAnalysis.densByRegion) def calcDensMetrics(self, plotDistn=False, showProgress=True, parallel=False, makeTrainSet=False, inclOnlyGluAsp=False, doRandomSubset=False): # determine density summary metrics per atom. 'includeOnlyGluAsp' # allows calculations to be performed only for Glu/asp carboxylates # (this is not typically suitable and will cause later analysis to # break), however allows quicker generation of per-atom training sets # for glu/asp groups over a structure. Training sets for supervised # learning classification can be created by setting the 'makeTrainSet' # input to True if makeTrainSet: self.doXYZanalysis = True inclOnlyGluAsp = True self.startTimer() self.printStepNumber() self.lgwrite(ln='Calculating electron density statistics per atom...') total = len(self.PDBarray) if parallel: # TODO: this would be great to implement at some point print('Parallel processing not currently implemented!') pass else: self.densByRegion = [] self.clustDoneOnAtm = [] for i, atom in enumerate(self.PDBarray): # this is more of testing reasons that any clear use if inclOnlyGluAsp: atmTypes = ['GLU-CD', 'GLU-OE1', 'GLU-OE2', 'ASP-OD1', 'ASP-OD2', 'ASP-CG', 'CYS-SG', 'MET-SD'] tag = '-'.join(atom.getAtomID().split('-')[2:]) if tag not in atmTypes: continue if self.doXYZanalysis: num = '-'.join(atom.getAtomID().split('-')[:2]) if tag == 'GLU-CD': lookFor1 = num+'-GLU-OE1' lookFor2 = num+'-GLU-OE2' elif tag == 'GLU-OE1': lookFor1 = num+'-GLU-CD' lookFor2 = num+'-GLU-OE2' elif tag == 'GLU-OE2': lookFor1 = num+'-GLU-CD' lookFor2 = num+'-GLU-OE1' elif tag == 'ASP-CG': lookFor1 = num+'-ASP-OD1' lookFor2 = num+'-ASP-OD2' elif tag == 'ASP-OD1': lookFor1 = num+'-ASP-CG' lookFor2 = num+'-ASP-OD2' elif tag == 'ASP-OD2': lookFor1 = num+'-ASP-CG' lookFor2 = num+'-ASP-OD1' elif tag == 'CYS-SG': lookFor1 = num+'-CYS-CB' lookFor2 = num+'-CYS-CA' elif tag == 'MET-SD': lookFor1 = num+'-MET-CE' lookFor2 = num+'-MET-CG' if i < 10: srt = 0 else: srt = i-10 if i < len(self.PDBarray)-10: stp = i + 10 else: stp = len(self.PDBarray) for atm2 in self.PDBarray[srt:stp]: if atm2.getAtomID() == lookFor1: self.knownRefPt1 = atm2.vxlMidPt elif atm2.getAtomID() == lookFor2: self.knownRefPt2 = atm2.vxlMidPt # only calculate metrics for a random subset of atoms # - for testing purposes if doRandomSubset: import random if random.uniform(0, 1) > 0.01: continue else: print('Random atom used: ' + atom.getAtomID()) if showProgress: sys.stdout.write('\r') sys.stdout.write( '{}%'.format(round(100*float(i)/total, 3))) sys.stdout.flush() self.calcDensMetricsForAtom(atom=atom, plotDistn=plotDistn) atom.getAdditionalMetrics() if makeTrainSet: self.makeTrainingSet() self.success() self.stopTimer() # delete vxlsPerAtom since no longer needed del self.vxlsPerAtom # ############################################################################ # # TEST: cluster the density values per atom based off xyz. # # KEEP THIS COMMENTED WHEN USING THE CODE # from sklearn.cluster import KMeans # from sklearn.decomposition import PCA # d = self.densByRegion # numClusts = 5 # reduced_data = PCA(n_components=2).fit_transform(d) # kmeans = KMeans(init='k-means++', n_clusters=numClusts) # kmeans.fit(reduced_data) # # Step size of the mesh. Decrease to increase the quality of the VQ. # h = .02 # point in the mesh [x_min, x_max]x[y_min, y_max]. # # Plot decision boundary. For that, we will assign a color to each # x_min = reduced_data[:, 0].min()-0.5*np.abs(reduced_data[:, 0].min()) # x_max = reduced_data[:, 0].max()+0.5*np.abs(reduced_data[:, 0].max()) # y_min = reduced_data[:, 1].min()-0.5*np.abs(reduced_data[:, 1].min()) # y_max = reduced_data[:, 1].max()+0.5*np.abs(reduced_data[:, 1].max()) # xx, yy = np.meshgrid( # np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # # Obtain labels for each point in mesh. Use last trained model. # Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()]) # Zatoms = kmeans.predict(reduced_data) # atmNames = [x for _, x in sorted(zip(Zatoms, self.clustDoneOnAtm))] # Zatoms.sort() # for Za, atom in zip(Zatoms, atmNames): # print('{} --> {}'.format(atom, Za)) # # Put the result into a color plot # Z = Z.reshape(xx.shape) # plt.figure(1) # plt.clf() # plt.imshow(Z, interpolation='nearest', # extent=(xx.min(), xx.max(), yy.min(), yy.max()), # cmap=plt.cm.Paired, # aspect='auto', origin='lower') # plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2) # # Plot the centroids as a white X # centroids = kmeans.cluster_centers_ # plt.scatter(centroids[:, 0], centroids[:, 1], # marker='x', s=169, linewidths=3, # color='w', zorder=10) # import pylab as pl # for i in range(numClusts): # pl.text(centroids[i, 0], centroids[i, 1], # str(i), color="white", fontsize=20) # plt.title('K-means clustering on per-atom density (PCA-reduced data)\n' # 'Centroids are marked with white cross') # plt.xlim(x_min, x_max) # plt.ylim(y_min, y_max) # plt.xticks(()) # plt.yticks(()) # plt.show() # import sys # sys.exit() # ############################################################################ def makeTrainingSet(self, killNow=True, standardise=False): # make a training set of per-atom density values on # which a supervised-learning classifier could be trained. # NOTE: This should NOT be included in a standard run print('Preparing classifier training dataset') if standardise: from sklearn.preprocessing import StandardScaler X = StandardScaler().fit_transform(self.densByRegion) else: X = self.densByRegion # get bfactors for atoms on which densByRegion is known bfactors = [] for atmID in self.clustDoneOnAtm: for atm in self.PDBarray: if atm.getAtomID() == atmID: bfactors.append(atm.Bfactor) break # Write classification features to output file here f = lambda x: '{}clusterTrainingSet-{}.trset'.format(self.filesOut, x) i = 1 while os.path.isfile(f(i)): i += 1 print('Writing calculated features to file: "{}"'.format(f(i))) csvIn = open(f(i), 'w') for i, (atmID, dens) in enumerate(zip(self.clustDoneOnAtm, X)): csvIn.write(atmID+',' + ','.join([str(np.round(d, 3)) for d in dens]) + ',{}\n'.format(bfactors[i])) csvIn.close() if killNow: import sys sys.exit() def plotFCdistnPlot(self, plot=True, atomOfInterest='', atomsToPlot=['GLU-CD', 'CYS-SG'], atomFCvals=[], atomFCvalsMaxNorm=[], FCatMin=[], plotType='.png', axesFont=18): # plot a kde & histrogram distribution plot for the FCalc values for an # atom, both raw, and after being divided by the maximum FCalc value # attained for that atom (normalised-FCalc). The plot will also include # vertical lines indicating the FCalc and normalised-FCalc values # attained for the voxel where the most negative density map (not FC # map) voxel within the local region around the atom (this is the # voxel corresponding to the DLoss metric value). for tag in atomsToPlot: if tag in atomOfInterest.getAtomID(): sns.set_style("dark") sns.set_context(rc={"figure.figsize": (10, 6)}) fig = plt.figure() ax = plt.subplot(111) sns.distplot(np.array(atomFCvals), label='Fcalc') sns.distplot(np.array(atomFCvalsMaxNorm), label='Fcalc/max(Fcalc)') ylims = ax.get_ylim() plt.plot((FCatMin, FCatMin), (ylims[0], ylims[1]), label='Fcalc, at position of min diff density') leg = plt.legend(frameon=1) frame = leg.get_frame() frame.set_color('white') plt.xlabel('Per-voxel density map values', fontsize=axesFont) plt.ylabel('Normed-frequency', fontsize=axesFont) plt.title('Distribution of Fcalc density values: {}'.format( atomOfInterest.getAtomID())) fig.savefig('{}testDistnPlot-{}{}'.format( self.filesOut, atomOfInterest.getAtomID(), plotType)) def plotDensScatterPlots(self, printText=False, clustAnalys=False): # plot scatter plots for density metrics for # quick assessment of whether per-atom metrics # are behaving as expecting self.startTimer() self.fillerLine(style='line') self.lgwrite( ln='Plotting scatter plots for electron density statistics...', forcePrint=printText) plotVars = [['meandensity', 'maxdensity'], ['meandensity', 'mediandensity'], ['meandensity', 'mindensity'], ['mindensity', 'maxdensity'], ['meandensity', 'stddensity'], ['mindensity', 'min90tile'], ['maxdensity', 'max90tile'], ['min90tile', 'min95tile'], ['max90tile', 'max95tile'], ['meandensity', 'meanPosOnly'], ['meandensity', 'meanNegOnly'], ['mindensity', 'meanNegOnly'], ['maxdensity', 'meanPosOnly']] # # only include below if per-atom clusters are # # calculated - currently very slow if clustAnalys: plotVars += [['negClusterVal', 'meandensity'], ['negClusterVal', 'mindensity'], ['totDensShift', 'meandensity'], ['totDensShift', 'mindensity']] if self.calcFCmap: plotVars.append(['meandensity', 'densityWeightedMean']) plotVars.append(['mindensity', 'densityWeightedMin']) plotVars.append(['maxdensity', 'densityWeightedMax']) plotVars.append(['maxdensity', 'densityWeightedMeanPosOnly']) plotVars.append(['mindensity', 'densityWeightedMeanNegOnly']) plotVars.append(['meanNegOnly', 'densityWeightedMeanNegOnly']) plotVars.append(['meanPosOnly', 'densityWeightedMeanPosOnly']) plotVars.append( ['densityWeightedMean', 'densityWeightedMeanPosOnly']) plotVars.append( ['densityWeightedMean', 'densityWeightedMeanNegOnly']) for pVars in plotVars: logStr = edens_scatter(outputDir=self.filesOut, metrics=pVars, PDBarray=self.PDBarray, pdbName=self.pdbName) self.lgwrite(ln=logStr) def startTimer(self): # start a timer self.timeStart = time.time() def stopTimer(self, includeInLog=False): # stop a timer (must run startTimer before) elapsedTime = time.time() - self.timeStart if includeInLog: self.lgwrite( ln='section time: {}s\n'.format(round(elapsedTime, 3))) sys.stdout.flush() def success(self): # report success to log file self.lgwrite(ln='---> success') def fillerLine(self, style='blank'): # print a filler line (several styles) # to command line if style == 'stars': ln = '\n***' elif style == 'line': ln = '\n'+'-'*30 elif style == 'blank': ln = '\n' self.lgwrite(ln=ln) def lgwrite(self, ln='', strip=True, forcePrint=False): # write line to log file self.log.writeToLog(str=ln, strip=strip, forcePrint=forcePrint) def printStepNumber(self): # print a string indicating the current pipeline # step number directory to the command line try: self.stepNumber except AttributeError: self.stepNumber = 1 self.lgwrite(ln='\n_______' + '\nSTEP {})'.format(self.stepNumber)) self.stepNumber += 1
class maps2DensMetrics(): # assign values within a density map to specific atoms, using # the an atom-tagged map to determine which regions of space # are to be assigned to each atom def __init__(self, filesIn = '', filesOut = '', pdbName = '', atomTagMap = '', densityMap = '', FCmap = '', plotScatter = False, plotHist = False, logFile = logFile, calcFCmap = True): self.filesIn = filesIn # the input directory self.filesOut = filesOut # the output directory self.pdbName = pdbName # the pdb file name self.map1 = atomTagMap # atom-tagged map self.map2 = densityMap # density map (typically Fo-Fo) self.map3 = FCmap # FC map self.plotScatter = plotScatter # (bool) plot scatter plots or not self.plotHist = plotHist # (bool) plot histogram plots or not self.log = logFile self.calcFCmap = calcFCmap # whether FC map should be generated def maps2atmdensity(self): # the map run method for this class. Will read in an atom-tagged map # and density map and assign density values for each individual atom # (as specified within the atom-tagged map). From these summary metrics # describing the density map behaviour in the vicinity of each refined # atom can be calculated self.readPDBfile() self.readAtomMap() self.readDensityMap() self.reportDensMapInfo() self.checkMapCompatibility() if self.calcFCmap: self.readFCMap() self.createVoxelList() if self.plotHist: self.plotDensHistPlots() self.calcDensMetrics() if self.plotScatter: self.plotDensScatterPlots() self.pickleAtomList() def readPDBfile(self): # read in pdb file info here self.printStepNumber() self.startTimer() self.lgwrite(ln = 'Reading pdb file: {}'.format(self.pdbName)) # read in the pdb file to fill list of atom objects self.PDBarray = PDBtoList('{}{}.pdb'.format(self.filesIn,self.pdbName)) self.stopTimer() # make sure array of atoms ordered by atom number self.PDBarray.sort(key = lambda x: x.atomnum) # need to get VDW radius for each atom: for atom in self.PDBarray: atom.VDW_get() def readAtomMap(self): # read in the atom map self.printStepNumber() self.startTimer() self.lgwrite(ln = 'Reading atom-tagged map file...') self.lgwrite(ln = 'Atom map name: {}'.format(self.map1)) self.atmmap,self.atomIndices = readMap(dirIn = self.filesIn, dirOut = self.filesOut, mapName = self.map1, mapType = 'atom_map', log = self.log) self.stopTimer() # find number of atoms in structure num_atoms = len(self.PDBarray) # find atom numbers present in list (repeated atom numbers removed) seen = set() seen_add = seen.add uniq_atms = [x for x in self.atmmap.vxls_val if not (x in seen or seen_add(x))] # find set of atoms numbers not present (i.e atoms not assigned to voxels) Atms_notpres = set(range(1,num_atoms+1)) - set(uniq_atms) self.lgwrite(ln = 'Number of atoms not assigned to voxels: {}'.format(len(Atms_notpres))) def readDensityMap(self): # read in the density map self.printStepNumber() self.startTimer() self.lgwrite(ln = 'Reading density map file...') self.lgwrite(ln = 'Density map name: {}'.format(self.map2)) self.densmap = readMap(dirIn = self.filesIn, dirOut = self.filesOut, mapName = self.map2, mapType = 'density_map', atomInds = self.atomIndices, log = self.log) self.stopTimer() def readFCMap(self): # read in the FC (calculated structure factor) density map self.printStepNumber() self.startTimer() self.lgwrite(ln = 'Reading Fcalc density map file...') self.lgwrite(ln = 'Density map name: {}'.format(self.map3)) self.FCmap = readMap(dirIn = self.filesIn, dirOut = self.filesOut, mapName = self.map3, mapType = 'density_map', atomInds = self.atomIndices, log = self.log) self.stopTimer() def reportDensMapInfo(self): # print density map summary information to command line totalNumVxls = np.product(self.atmmap.nxyz.values()) structureNumVxls = len(self.densmap.vxls_val) totalMean = self.densmap.density['mean'] structureMean = np.mean(self.densmap.vxls_val) solvNumVxls = totalNumVxls - structureNumVxls solvMean = (totalNumVxls*totalMean - structureNumVxls*structureMean)/solvNumVxls txt = '\nFor voxels assigned to structure:\n'+\ '\tmean structure density : {}\n'.format(round(structureMean,4))+\ '\tmax structure density : {}\n'.format(round(max(self.densmap.vxls_val),4))+\ '\tmin structure density : {}\n'.format(round(min(self.densmap.vxls_val),4))+\ '\tstd structure density : {}\n'.format(round(np.std(self.densmap.vxls_val),4))+\ '\t# voxels included : {}\n'.format(structureNumVxls)+\ '\nFor voxels assigned to solvent:\n'+\ '\tmean solvent-region density : {}\n'.format(round(solvMean),4)+\ '\t# voxels included : {}'.format(solvNumVxls) self.lgwrite(ln = txt) def checkMapCompatibility(self): # check that atom-tagged and density map # can be combined successfully self.printStepNumber() self.lgwrite(ln = 'Checking that maps have same dimensions and sampling properties...' ) self.startTimer() # Check that the maps have the same dimensions, grid sampling,.. if (self.atmmap.axis != self.densmap.axis or self.atmmap.gridsamp != self.densmap.gridsamp or self.atmmap.start != self.densmap.start or self.atmmap.nxyz != self.densmap.nxyz or self.atmmap.type != self.densmap.type): self.lgwrite(ln = 'Incompatible map properties --> terminating script') sys.exit() elif self.atmmap.celldims != self.densmap.celldims: self.lgwrite(ln = 'Not exact same map grid dimensions..') # now check if grid dims same to a specific dp and consider continuing stop = True for i in list(reversed(range(7))): count = 0 for key in self.atmmap.celldims.keys(): if np.round(self.atmmap.celldims[key],i) == np.round(self.densmap.celldims[key],i): count += 1 if count == 6: str = 'Map grid dimensions same to {}dp\n'.format(i)+\ '--> continuing with processing anyway' self.lgwrite(ln = str) stop = False break if stop: err = 'Map grid dimensions still not same to 0dp\n'+\ ' --> terminating script' self.lgwrite(ln = err) sys.exit() else: self.success() self.lgwrite(ln = 'The atom and density map are of compatible format!') self.stopTimer() str = 'Total number of voxels assigned to atoms: {}'.format(len(self.atmmap.vxls_val)) self.lgwrite(ln = str) def createVoxelList(self): # create dictionary of voxels with atom numbers as keys self.startTimer() self.printStepNumber() self.lgwrite(ln = 'Combining voxel density and atom values...') self.success() vxlDic = {atm:[] for atm in self.atmmap.vxls_val} xyzDic = {atm:[] for atm in self.atmmap.vxls_val} self.densmap.reshape1dTo3d() self.densmap.abs2xyz_params() for atm,dens in zip(self.atmmap.vxls_val,self.densmap.vxls_val): vxlDic[atm].append(dens) xyz_list = self.densmap.getVoxXYZ(self.atomIndices,coordType = 'fractional') for atm,xyz in zip(self.atmmap.vxls_val,xyz_list): xyzDic[atm].append(xyz) self.vxlsPerAtom = vxlDic self.xyzsPerAtom = xyzDic # not essential for run if self.calcFCmap: vxlDic2 = {atm:[] for atm in self.atmmap.vxls_val} for atm,dens in zip(self.atmmap.vxls_val,self.FCmap.vxls_val): vxlDic2[atm].append(dens) self.FCperAtom = vxlDic2 self.deleteMapsAttributes() self.stopTimer() def deleteMapsAttributes(self): # delete atmmap and densmap attributes to save memory del self.atmmap del self.FCmap del self.densmap.vxls_val def plotDensHistPlots(self, getVoxelStats = False, perAtmDensHist = False): # histogram & kde plots of number of voxels per atom self.startTimer() self.printStepNumber() self.lgwrite(ln = 'Plotting histogram plots of voxels per atom...') self.lgwrite(ln = 'Plots written to "{}plots"'.format(self.filesOut)) stats = plotVxlsPerAtm(pdbName = self.pdbName, where = self.filesOut, vxlsPerAtom = self.vxlsPerAtom, plotType = 'both', returnStats = getVoxelStats) if stats != '': print 'mean: {}\nstd: {}\nmax: {}\nmin: {}'.format(*stats) if perAtmDensHist: plotDensForAtm(pdbName = self.pdbName, where = self.filesOut, vxlsPerAtom = self.vxlsPerAtom, plotType = 'both', PDBarray = self.PDBarray) self.stopTimer() def calcDensMetricsForAtom(self, atom = [], plotDistn = False, clustAnalys = False): # calculate density metrics for a particular atom try: atomVxls = self.vxlsPerAtom[atom.atomnum] except KeyError: err = 'Warning!: No voxels assigned to an atom. Consider increasing '+\ 'per-atom search radius parameter in RIDL input .txt file.' self.lgwrite(ln = err,forcePrint = True) atomVxls = [np.nan] if self.calcFCmap: # calculate reliability measures based on electron # density probability at position of min density atomFCvals = self.FCperAtom[atom.atomnum] atomFCvalsMaxNormalised = np.array(atomFCvals)/max(atomFCvals) minIndex = np.array(atomVxls).argmin() reliability = atomFCvalsMaxNormalised[minIndex] FCatMin = atomFCvals[minIndex] weightedVxls = np.multiply(atomVxls,atomFCvalsMaxNormalised) if len(atomVxls) != 0: atom.meandensity = np.mean(atomVxls) atom.mediandensity = np.median(atomVxls) atom.mindensity = min(atomVxls) atom.maxdensity = max(atomVxls) atom.stddensity = np.std(atomVxls) atom.min90tile = np.percentile(atomVxls,10) atom.max90tile = np.percentile(atomVxls,90) atom.min95tile = np.percentile(atomVxls,5) atom.max95tile = np.percentile(atomVxls,95) atom.numvoxels = len(atomVxls) if self.calcFCmap: atom.reliability = reliability atom.wMean = np.mean(weightedVxls) if plotDistn: self.plotFCdistnPlot(atomOfInterest = atom, atomFCvals = atomFCvals, atomFCvalsMaxNorm = atomFCvalsMaxNormalised, FCatMin = FCatMin, reliability = reliability) if clustAnalys: # if 'MET-SD' in atom.getAtomID(): rnd = np.random.random() if rnd < 0.05: # if atom.side_or_main() == 'sidechain': print atom.getAtomID() clustAnalysis = perAtomClusterAnalysis(atmNum = atom.atomnum, atmId = atom.getAtomID(), densMapObj = self.densmap, xyzsPerAtom = self.xyzsPerAtom, vxlsPerAtom = self.vxlsPerAtom) atom.negClusterVal = clustAnalysis.output[0] atom.totDensShift = clustAnalysis.output[-1] def calcDensMetrics(self, plotDistn = False, clustAnalys = False, showProgress = True, parallel = False): # determine density summary metrics per atom self.startTimer() self.printStepNumber() self.lgwrite(ln = 'Calculating electron density statistics per atom...') total = len(self.PDBarray) if parallel: from test import testRun testRun() else: # tRun=time.time() for i,atom in enumerate(self.PDBarray): if showProgress: sys.stdout.write('\r') sys.stdout.write('{}%'.format(round(100*float(i)/total,3))) sys.stdout.flush() self.calcDensMetricsForAtom(atom = atom, plotDistn = plotDistn, clustAnalys = clustAnalys) # atomIDs = [atom.getAtomID() for atom in self.PDBarray if not np.isnan(atom.totDensShift)] # shifts = [atom.totDensShift for atom in self.PDBarray if not np.isnan(atom.totDensShift)] # shifts, atomIDs = (list(t) for t in zip(*sorted(zip(shifts, atomIDs)))) # for s,a in zip(shifts,atomIDs): # print s,a # print 'Run time: {}s'.format(round(time.time()-tRun,3)) self.success() self.stopTimer() # delete the vxlsPerAtom list now to save memory del self.vxlsPerAtom # get additional metrics per atom for atom in self.PDBarray: atom.getAdditionalMetrics() def plotFCdistnPlot(self, plot = True, atomOfInterest = '', atomsToPlot = ['GLU-CD','CYS-SG'], atomFCvals = [], atomFCvalsMaxNorm = [], FCatMin = [], reliability = [], plotType = '.png', axesFont = 18): # plot a kde & histrogram distribution plot for the FCalc values for an # atom, both raw, and after being divided by the maximum FCalc value # attained for that atom (normalised-FCalc). The plot will also include # vertical lines indicating the FCalc and normalised-FCalc values attained # for the voxel where the most negative density map (not FC map) voxel # within the local region around the atom (this is the voxel corresponding # to the DLoss metric value). for tag in atomsToPlot: if tag in atomOfInterest.getAtomID(): sns.set_style("dark") sns.set_context(rc = {"figure.figsize": (10, 6)}) fig = plt.figure() ax = plt.subplot(111) sns.distplot(np.array(atomFCvals), label = 'Fcalc') sns.distplot(np.array(atomFCvalsMaxNorm), label = 'Fcalc/max(Fcalc)') ylims = ax.get_ylim() plt.plot((FCatMin,FCatMin), (ylims[0],ylims[1]), label='Fcalc, at position of min diff density') plt.plot((reliability,reliability), (ylims[0],ylims[1]), label = 'Fcalc/max(Fcalc), at position of min diff density') leg = plt.legend(frameon = 1) frame = leg.get_frame() frame.set_color('white') plt.xlabel('Per-voxel density map values', fontsize = axesFont) plt.ylabel('Normed-frequency', fontsize = axesFont) plt.title('Distribution of Fcalc density values: {}'.format(atomOfInterest.getAtomID())) fig.savefig('{}testDistnPlot-{}{}'.format(self.filesOut,atomOfInterest.getAtomID(),plotType)) def plotDensScatterPlots(self, printText = False, clustAnalys = False): # plot scatter plots for density metrics self.startTimer() self.fillerLine(style = 'line') str = 'Plotting scatter plots for electron density statistics...' self.lgwrite(ln = str,forcePrint = printText) plotVars = [['meandensity','maxdensity'], ['meandensity','mediandensity'], ['meandensity','mindensity'], ['mindensity','maxdensity'], ['meandensity','stddensity'], ['mindensity','min90tile'], ['maxdensity','max90tile'], ['min90tile','min95tile'], ['max90tile','max95tile']] # # only include below if per-atom clusters are # # calculated - currently very slow if clustAnalys: plotVars += [['negClusterVal','meandensity'], ['negClusterVal','mindensity'], ['totDensShift','meandensity'], ['totDensShift','mindensity']] if self.calcFCmap: plotVars.append(['meandensity','wMean']) plotVars.append(['mindensity','wMean']) for pVars in plotVars: logStr = edens_scatter(outputDir = self.filesOut, metrics = pVars, PDBarray = self.PDBarray, pdbName = self.pdbName) self.lgwrite(ln = logStr) def pickleAtomList(self): # save list of atom objects to a .pkl file self.pklFileName = save_objectlist(self.PDBarray,self.pdbName) def startTimer(self): # start a timer self.timeStart = time.time() def stopTimer(self, includeInLog = False): # stop a timer (must run startTimer before) elapsedTime = time.time() - self.timeStart if includeInLog: ln = 'section time: {}s\n'.format(round(elapsedTime,3)) self.lgwrite(ln = ln) sys.stdout.flush() def success(self): # report success to log file self.lgwrite(ln = '---> success') def fillerLine(self, style = 'blank'): # print a filler line (several styles) # to command line if style == 'stars': ln = '\n***' elif style == 'line': ln = '\n'+'-'*30 elif style == 'blank': ln = '\n' self.lgwrite(ln = ln) def lgwrite(self, ln = '', strip = True, forcePrint = False): # write line to log file self.log.writeToLog(str = ln, strip = strip, forcePrint = forcePrint) def printStepNumber(self): # print a string indicating the current pipeline # step number directory to the command line try: self.stepNumber except AttributeError: self.stepNumber = 1 ln = '\n_______'+\ '\nSTEP {})'.format(self.stepNumber) self.lgwrite(ln = ln) self.stepNumber += 1