Exemple #1
0
class maps2DensMetrics():
    def __init__(self,filesIn,filesOut,pdbname,mapfilname1,maptype1,mapfilname2,maptype2,plot):
        self.filesIn = filesIn
        self.filesOut = filesOut # output directory
        self.pdbname = pdbname
        self.map1 = {'filename':mapfilname1,'type':maptype1}
        self.map2 = {'filename':mapfilname2,'type':maptype2}
        self.plot = plot

    def maps2atmdensity(self):
        self.printTitle()

        # write a log file for this eTrack run
        logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'w')
        logfile.write('eTrack run log file\n')
        logfile.write('Date: '+ str(time.strftime("%d/%m/%Y"))+'\n')
        logfile.write('Time: '+ str(time.strftime("%H:%M:%S"))+'\n')

        self.readPDBfile()
        self.readAtomMap()
        self.readDensityMap()
        self.reportDensMapInfo()
        self.checkMapCompatibility()
        self.createVoxelList()
        self.plotDensHistPlots()
        self.calculateDensMetrics()
        if self.plot == True:
            self.plotDensScatterPlots()
            self.plotPerResidueBoxPlots()
        self.pickleAtomList()

    def readPDBfile(self):
        # read in pdb file info here
        logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'a')
        self.startTimer()
        print 'Reading in pdb file...'
        print 'pdb name: {}{}.pdb'.format(self.filesIn,self.pdbname)
        logfile.write('pdb name: {}{}.pdb\n'.format(self.filesOut,self.pdbname))

        # next read in the pdb structure file:
        # run function to fill PDBarray list with atom objects from structure
        self.PDBarray = PDBtoList('{}{}.pdb'.format(self.filesIn,self.pdbname),[])
        self.success()
        self.stopTimer()

        # want to make sure array of structure atoms ordered by atomnumber
        # before reading through them
        self.PDBarray.sort(key=lambda x: x.atomnum)
           
        # need to get VDW radius for each atom:
        for atom in self.PDBarray:
            atom.VDW_get()  
        
    def readAtomMap(self):
        # read in the atom map
        logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'a')

        self.startTimer()
        self.fillerLine()
        print 'Reading in Atom map file...'
        print 'Atom map name: {}{}'.format(self.filesIn,self.map1['filename'])
        logfile.write('atom map name: {}{}\n'.format(self.filesOut,self.map1['filename']))

        self.atmmap,self.atom_indices = readMap(self.filesIn,self.filesOut,self.pdbname,
                                                self.map1['filename'],self.map1['type'],[])  

        self.success()
        self.stopTimer()

        # find number of atoms in structure
        num_atoms = len(self.PDBarray)

        # find atom numbers present in list (repeated atom numbers removed)
        seen = set()
        seen_add = seen.add
        uniq_atms = [x for x in self.atmmap.vxls_val if not (x in seen or seen_add(x))] 
        
        # find set of atoms numbers not present (i.e atoms not assigned to voxels)
        Atms_notpres = set(range(1,num_atoms+1)) - set(uniq_atms)
        print 'Number of atoms not assigned to voxels: %s' %str(len(Atms_notpres))

        # append to log file for this eTrack run
        logfile.write('Number of atoms not assigned to voxels: %s\n' %str(len(Atms_notpres)))
        
    def readDensityMap(self):
        # read in the density map
        logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'a')

        self.startTimer()
        self.fillerLine()
        print 'Reading in Density map file...'
        print 'Density map name: {}{}'.format(self.filesIn,self.map2['filename'])
        logfile.write('density map name: {}{}\n'.format(self.filesIn,self.map2['filename']))

        self.densmap = readMap(self.filesIn,self.filesOut,self.pdbname,
                               self.map2['filename'],self.map2['type'],
                               self.atom_indices)  
        self.success()
        self.stopTimer()

    def reportDensMapInfo(self):
        # print density map summary information to command line
        totalNumVxls     = np.product(self.atmmap.nxyz.values())
        structureNumVxls = len(self.densmap.vxls_val)
        totalMean        = self.densmap.density['mean']
        structureMean    = np.mean(self.densmap.vxls_val)
        solvNumVxls      = totalNumVxls - structureNumVxls
        solvMean         = (totalNumVxls*totalMean - structureNumVxls*structureMean)/solvNumVxls
        print 'For voxels assigned to structure:'
        print 'mean structure density : {}'.format(structureMean)
        print 'max structure density : {}'.format(max(self.densmap.vxls_val))
        print 'min structure density : {}'.format(min(self.densmap.vxls_val))
        print 'std structure density : {}'.format(np.std(self.densmap.vxls_val))
        print '# voxels included : {}'.format(structureNumVxls)
        print 'For voxels assigned to solvent:'
        print 'mean solvent-region density : {}'.format(solvMean)
        print '# voxels included : {}'.format(solvNumVxls)

    def checkMapCompatibility(self):
        # check that atom-tagged and density map can be combined successfully
        logfile = open('{}{}_log.txt'.format(self.filesOut,self.pdbname),'a')

        self.fillerLine()
        print 'Checking that maps have same dimensions and sampling properties...' 
        self.startTimer()
        # Check that the maps have the same dimensions, grid sampling,..
        if (self.atmmap.axis != self.densmap.axis or
            self.atmmap.gridsamp != self.densmap.gridsamp or 
            self.atmmap.start != self.densmap.start or
            self.atmmap.nxyz != self.densmap.nxyz or
            self.atmmap.type != self.densmap.type):

            print 'Incompatible map properties --> terminating script'
            logfile.write('Incompatible map properties --> terminating script\n')
            sys.exit()

        elif self.atmmap.celldims != self.densmap.celldims:
            print 'Not exact same map grid dimensions..'
            logfile.write('Not exactly same map grid dimensions..')
            # now check if grid dims same to a specific dp and consider continuing
            stop = True
            for i in list(reversed(range(7))):
                count = 0
                for key in self.atmmap.celldims.keys():
                    if np.round(self.atmmap.celldims[key],i) == np.round(self.densmap.celldims[key],i):
                        count += 1
                if count == 6:
                    print 'Map grid dimensions same to {}dp'.format(i)
                    logfile.write('Map grid dimensions same to {}dp --> continuing with processing anyway'.format(i))
                    stop = False
                    break
            if stop == True:
                print 'Map grid dimensions still not same to 0dp' 
                logfile.write('Map grid dimensions still not same to 0dp --> terminating script\n')
                sys.exit()

        else:
            self.success()
            print 'The atom and density map are of compatible format!'
            logfile.write('The atom and density map are of compatible format!\n')
        self.stopTimer()

        self.fillerLine()
        print 'Total number of voxels assigned to atoms: %s' %str(len(self.atmmap.vxls_val))
        logfile.write('Total number of voxels assigned to atoms: %s\n' %str(len(self.atmmap.vxls_val)))
        logfile.close()

    def createVoxelList(self):
        # create dictionary of voxels with atom numbers as keys 
        self.startTimer()
        self.fillerLine()
        print 'Combining voxel density and atom values...'
        vxl_list = {atm:[] for atm in self.atmmap.vxls_val}
        for atm,dens in zip(self.atmmap.vxls_val,self.densmap.vxls_val):
            vxl_list[atm].append(dens)
        self.vxlsPerAtom = vxl_list

        # delete atmmap and densmap now to save memory
        self.densmap,self.atmmap =[],[]
        self.stopTimer()
 
    def plotDensHistPlots(self):
        # histogram & kde plots of number of voxels per atom
        for plotType in ('histogram','kde'):
            plotVxlsPerAtm(self.pdbname,self.filesOut,self.vxlsPerAtom,plotType)

    def calculateDensMetrics(self):
        # determine density summary metrics per atom, including:
        # max, min, mean, median, standard deviation, 90-tile min,
        # 90-tile max, 95-tile min, 95-tile max, mode (why not!), 
        # relative standard deviation (rsd = std/mean)
        self.fillerLine()
        self.startTimer()
        print 'Calculating electron density statistics per atom...'
        for atom in self.PDBarray:
            atomVxls = self.vxlsPerAtom[atom.atomnum]
            if len(atomVxls) != 0:
                atom.meandensity    = np.mean(atomVxls)
                atom.mediandensity  = np.median(atomVxls)
                atom.mindensity     = min(atomVxls)
                atom.maxdensity     = max(atomVxls)
                atom.stddensity     = np.std(atomVxls)
                atom.min90tile      = np.percentile(atomVxls,10)
                atom.max90tile      = np.percentile(atomVxls,90)
                atom.min95tile      = np.percentile(atomVxls,5)
                atom.max95tile      = np.percentile(atomVxls,95)
                atom.numvoxels      = len(atomVxls)
        
        self.success()
        self.stopTimer()

        # delete the vxlsPerAtom list now to save memory
        del self.vxlsPerAtom

        # get additional metrics per atom
        for atom in self.PDBarray:
            atom.getAdditionalMetrics()

    def plotDensScatterPlots(self):
        # plot scatter plots for density metrics 
        self.startTimer()
        self.fillerLine()
        print 'Plotting scatter plots for electron density statistics...'
        plotVars = (['mean','max'],['mean','median'],['mean','min'],['min','max'],
                    ['mean','std'],['std','rsd'],['min','min90tile'],['max','max90tile'],
                    ['min90tile','min95tile'],['max90tile','max95tile'],
                    ['std','range'],['mean','range'])
        for pVars in plotVars:
            edens_scatter(self.filesOut,pVars,self.PDBarray,self.pdbname)

    def plotPerResidueBoxPlots(self):
        # perform residue analysis for datatset, outputting boxplots for each atom specific
        # to each residue, and also a combined boxplot across all residues in structures.
        for densMet in ('mean','min','max'):
            residueArray = densper_resatom_NOresidueclass(self.filesOut,self.PDBarray,'y',densMet,self.pdbname)

        minresnum = 0
        sideormain = ['sidechain','mainchain']
        densper_res(self.filesOut,residueArray,minresnum,sideormain,'min',self.pdbname)

        # remove residueArray now to save memory 
        residueArray = []
        self.stopTimer()

    def pickleAtomList(self):
        self.pklFileName = save_objectlist(self.PDBarray,self.pdbname)

    def startTimer(self):
        self.timeStart = time.time()

    def stopTimer(self):
        elapsedTime = time.time() - self.timeStart
        print 'section time: {}'.format(elapsedTime)
        sys.stdout.flush()

    def success(self):
        print '---> success'

    def fillerLine(self):
        print '\n------------------------------------------------'

    def printTitle(self):
        print '\n================================================'
        print '------------------------------------------------'
        print '|||              eTrack run                  |||'
        print '------------------------------------------------'
        print '================================================\n'
class maps2DensMetrics(object):

    # assign values within a density map to specific atoms, using
    # the an atom-tagged map to determine which regions of space
    # are to be assigned to each atom

    def __init__(self,
                 filesIn='', filesOut='', pdbName='', atomTagMap='',
                 densityMap='', FCmap='',  plotScatter=False, plotHist=False,
                 logFile='./untitled.log', calcFCmap=True,
                 doXYZanalysis=False):

        # the input directory
        self.filesIn = filesIn

        # the output directory
        self.filesOut = filesOut

        # the pdb file name
        self.pdbName = pdbName

        # atom-tagged map name
        self.atomMapIn = atomTagMap

        # density map name (typically Fo-Fo)
        self.densMapIn = densityMap

        # FC map name
        self.FCmapIn = FCmap

        # (bool) plot scatter plots or not
        self.plotScatter = plotScatter

        # (bool) plot histogram plots or not
        self.plotHist = plotHist

        # log file name
        self.log = logFile

        # whether FC map should be generated
        self.calcFCmap = calcFCmap

        # whether to do analysis based on xyz of each voxel
        self.doXYZanalysis = doXYZanalysis

    def maps2atmdensity(self,
                        mapsAlreadyRead=False):

        # the map run method for this class. Will read in an atom-tagged map
        # and density map and assign density values for each individual atom
        # (as specified within the atom-tagged map). From these summary metrics
        # describing the density map behaviour in the vicinity of each refined
        # atom can be calculated

        if not mapsAlreadyRead:
            self.readPDBfile()
            self.readAtomMap()
        self.readDensityMap()
        self.reportDensMapInfo()
        self.checkMapCompatibility()

        if self.calcFCmap and not mapsAlreadyRead:
            self.readFCMap()
            self.reportDensMapInfo(mapType='calc')

        self.createVoxelList()

        if self.plotHist:
            self.plotDensHistPlots()

        self.calcDensMetrics(showProgress=False)

        if self.plotScatter:
            self.plotDensScatterPlots()

    def readPDBfile(self):

        # read in pdb file info here. A list of atom objects
        # is created, to which density metric information
        # will be added as additional attributes in the
        # methods included below

        self.printStepNumber()
        self.startTimer()
        self.lgwrite(ln='Reading pdb file: {}'.format(self.pdbName))

        # read in the pdb file to fill list of atom objects
        self.PDBarray = PDBtoList('{}{}'.format(
            self.filesIn, self.pdbName))
        self.stopTimer()

        # make sure array of atoms ordered by atom number
        self.PDBarray.sort(key=lambda x: x.atomnum)

    def readAtomMap(self):

        # read in the atom-tagged map

        self.printStepNumber()
        self.startTimer()
        self.lgwrite(ln='Reading atom-tagged map file...\n' +
                        'Atom map name: {}'.format(self.atomMapIn))

        self.atmmap, self.atomIndices = readMap(
            dirIn=self.filesIn, dirOut=self.filesOut, mapName=self.atomMapIn,
            mapType='atom_map', log=self.log)
        self.stopTimer()

        # find number of atoms in structure
        numAtms = len(self.PDBarray)

        # find atom numbers present in list (repeated atom numbers removed)
        seen = set()
        seenAdd = seen.add
        uniqAtms = [x for x in self.atmmap.vxls_val if not
                    (x in seen or seenAdd(x))]

        # find set of atoms numbers not present
        # (i.e atoms not assigned to voxels)
        AtmsNotPres = set(range(1, numAtms+1)) - set(uniqAtms)
        self.lgwrite(
            ln='Number of atoms not assigned to voxels: ' +
               '{}'.format(len(AtmsNotPres)))

    def readDensityMap(self):

        # read in the density map

        self.printStepNumber()
        self.startTimer()
        self.lgwrite(ln='Reading density map file...\n' +
                        'Density map name: {}'.format(self.densMapIn))

        self.densmap = readMap(dirIn=self.filesIn, dirOut=self.filesOut,
                               mapName=self.densMapIn, mapType='density_map',
                               atomInds=self.atomIndices, log=self.log)
        self.stopTimer()

    def readFCMap(self):

        # read in the FC (calculated structure factor) density map.
        # This method should not be called if no FC density map
        # has been provided in the current run.

        self.printStepNumber()
        self.startTimer()
        self.lgwrite(ln='Reading Fcalc density map file...\n' +
                        'Density map name: {}'.format(self.FCmapIn))

        self.FCmap = readMap(dirIn=self.filesIn, dirOut=self.filesOut,
                             mapName=self.FCmapIn, mapType='density_map',
                             atomInds=self.atomIndices, log=self.log)

        self.stopTimer()

    def reportDensMapInfo(self,
                          numSfs=4, mapType='density'):

        # report the density map summary information to a log file

        if mapType == 'density':
            mp = self.densmap
        elif mapType == 'calc':
            mp = self.FCmap

        totalNumVxls = np.product(list(self.atmmap.nxyz.values()))
        structureNumVxls = len(mp.vxls_val)
        totalMean = mp.density['mean']
        structureMean = np.mean(mp.vxls_val)
        solvNumVxls = totalNumVxls - structureNumVxls
        solvMean = (totalNumVxls*totalMean -
                    structureNumVxls*structureMean)/solvNumVxls

        self.lgwrite(
            ln='\nFor voxels assigned to structure:\n' +
               '\tmean structure density : {}\n'.format(
                round(structureMean, numSfs)) +
               '\tmax structure density : {}\n'.format(
                round(max(mp.vxls_val), numSfs)) +
               '\tmin structure density : {}\n'.format(
                round(min(mp.vxls_val), numSfs)) +
               '\tstd structure density : {}\n'.format(
                round(np.std(mp.vxls_val), numSfs)) +
               '\t# voxels included : {}\n'.format(structureNumVxls) +
               '\nFor voxels assigned to solvent:\n' +
               '\tmean solvent-region density : {}\n'.format(
                round(solvMean), numSfs) +
               '\t# voxels included : {}'.format(solvNumVxls))

    def checkMapCompatibility(self):

        # check that atom-tagged and density map
        # can be combined successfully. This
        # requirement is met if the maps have the
        # the same map header information. Grid
        # dimensions are permitted to deviate
        # between the two maps, however this is
        # flagged at run time

        self.printStepNumber()
        self.lgwrite(
            ln='Checking that maps have same dimensions and sampling...')

        self.startTimer()
        # Check that the maps have the same dimensions, grid sampling,..
        if (self.atmmap.axis != self.densmap.axis or
            self.atmmap.gridsamp != self.densmap.gridsamp or
            self.atmmap.start != self.densmap.start or
            self.atmmap.nxyz != self.densmap.nxyz or
                self.atmmap.type != self.densmap.type):

            error(text='Incompatible map properties',
                  log=self.log, type='error')

        elif self.atmmap.celldims != self.densmap.celldims:
            self.lgwrite(ln='Not exact same map grid dimensions..')
            # now check if grid dims same to a
            # specific dp and consider continuing
            stop = True
            for i in list(reversed(list(range(7)))):
                count = 0
                for key in list(self.atmmap.celldims.keys()):
                    roundedAtmmapDim = np.round(self.atmmap.celldims[key], i)
                    roundedDensmapDim = np.round(self.densmap.celldims[key], i)
                    if roundedAtmmapDim == roundedDensmapDim:
                        count += 1
                if count == 6:
                    self.lgwrite(
                        ln='Map grid dimensions same to {}dp\n'.format(i) +
                           '--> continuing with processing anyway')
                    stop = False
                    break
            if stop:
                    error(text='Map grid dimensions still not same to 0dp',
                          log=self.log, type='error')

        else:
            self.success()
            self.lgwrite(
                ln='The atom and density map are of compatible format!')
        self.stopTimer()

        self.lgwrite(
            ln='Total number of voxels assigned to atoms: {}'.format(
                len(self.atmmap.vxls_val)))

    def createVoxelList(self,
                        inclOnlyGluAsp=False):

        # create dictionary of voxels with atom numbers as keys

        self.startTimer()
        self.printStepNumber()
        self.lgwrite(ln='Combining voxel density and atom values...')
        self.success()
        vxlDic = {atm: [] for atm in self.atmmap.vxls_val}
        xyzDic = {atm: [] for atm in self.atmmap.vxls_val}

        self.densmap.reshape1dTo3d()
        self.densmap.abs2xyz_params()
        for atm, dens in zip(self.atmmap.vxls_val, self.densmap.vxls_val):
            vxlDic[atm].append(dens)

        self.vxlsPerAtom = vxlDic

        # The following is not essential for run and should not be called by default
        if self.doXYZanalysis:

            # call this extra module that is requried for XYZ analysis
            from perAtomClusterAnalysis import perAtomXYZAnalysis


            xyz_list = self.densmap.getVoxXYZ(
                self.atomIndices, coordType='fractional')

            for atm, xyz in zip(self.atmmap.vxls_val, xyz_list):
                xyzDic[atm].append(xyz)

            # get the mid points for each atom from the set of voxels
            # per atom, whilst accounting for symmetry (the asym unit
            # may not contain 1 single whole molecule, but split up)
            xyzDic2 = {}
            for atom in self.PDBarray:

                # this is more of testing reasons that any clear use
                if inclOnlyGluAsp:
                    atmTypes = ['GLU-CD', 'GLU-OE1', 'GLU-OE2',
                                'ASP-OD1', 'ASP-OD2', 'ASP-CG',
                                'CYS-SG', 'CYS-CB', 'CYS-CA',
                                'MET-SD', 'MET-CE', 'MET-CG']
                    tag = '-'.join(atom.getAtomID().split('-')[2:])
                    if tag not in atmTypes:
                        continue

                xyzAnalysis = perAtomXYZAnalysis(
                    atomObj=atom, vxlRefPoint=np.mean(xyz_list, 0),
                    densPerVxl=np.round(np.array(vxlDic[atom.atomnum]), 3),
                    xyzsPerAtom=xyzDic[atom.atomnum], densMapObj=self.densmap)
                xyzAnalysis.getxyzPerAtom()
                atom.vxlMidPt = xyzAnalysis.findVoxelMidPt()
                xyzDic2[atom.getAtomID()] = xyzAnalysis.keptPts
                self.xyzsPerAtom = xyzDic2

        if self.calcFCmap:
            vxlDic2 = {atm: [] for atm in self.atmmap.vxls_val}
            for atm, dens in zip(self.atmmap.vxls_val, self.FCmap.vxls_val):
                vxlDic2[atm].append(dens)
            self.FCperAtom = vxlDic2

        self.deleteMapsAttributes()
        self.stopTimer()

    def deleteMapsAttributes(self):

        # Provide the option to delete atmmap and
        # densmap attributes to save memory, if
        # they are no longer needed during a run

        # del self.atmmap
        # if self.calcFCmap:
        #     del self.FCmap
        del self.densmap.vxls_val

    def plotDensHistPlots(self,
                          getVoxelStats=False, perAtmDensHist=False):

        # Create histogram or kde plots of number of voxels per atom

        self.startTimer()
        self.printStepNumber()
        self.lgwrite(ln='Plotting histogram plots of voxels per atom...\n' +
                        'Plots written to "{}plots"'.format(self.filesOut))

        stats = plotVxlsPerAtm(pdbName=self.pdbName, where=self.filesOut,
                               vxlsPerAtom=self.vxlsPerAtom, plotType='both',
                               returnStats=getVoxelStats)

        if stats != '':
            print('mean: {}\nstd: {}\nmax: {}\nmin: {}'.format(*stats))

        if perAtmDensHist:
            plotDensForAtm(pdbName=self.pdbName, where=self.filesOut,
                           vxlsPerAtom=self.vxlsPerAtom, plotType='both',
                           PDBarray=self.PDBarray)

        self.stopTimer()

    def calcDensMetricsForAtom(self,
                               atom=[], plotDistn=False):

        # calculate density metrics for a particular atom.
        # This method includes the option to perform
        # cluster analysis on the voxel values assigned
        # to this atom, howeverm this should not be selected
        # for a standard run of the code

        try:
            atomVxls = self.vxlsPerAtom[atom.atomnum]
        except KeyError:
            error(
                text='No voxels assigned to an atom. Consider ' +
                     'increasing per-atom search radius parameter in RIDL ' +
                     'input .txt file.',
                log=self.log, type='warning')
            atomVxls = [np.nan]

        if len(atomVxls) != 0:
            atom.meandensity = np.mean(atomVxls)
            atom.mediandensity = np.median(atomVxls)
            atom.mindensity = min(atomVxls)
            atom.maxdensity = max(atomVxls)
            atom.stddensity = np.std(atomVxls)
            atom.min90tile = np.percentile(atomVxls, 10)
            atom.max90tile = np.percentile(atomVxls, 90)
            atom.min95tile = np.percentile(atomVxls, 5)
            atom.max95tile = np.percentile(atomVxls, 95)
            atom.numvoxels = len(atomVxls)

            posVals = [w for w in atomVxls if w > 0]
            if posVals != []:
                atom.meanPosOnly = np.mean(posVals)
            else:
                atom.meanPosOnly = 0

            negVals = [w for w in atomVxls if w < 0]
            if negVals != []:
                atom.meanNegOnly = np.mean(negVals)
            else:
                atom.meanNegOnly = 0

            if self.calcFCmap:
                # if the user has opted to calculate an Fcalc map in addition
                # to the difference map, then additional metrics can be
                # derived using this map. These metrics typically use the Fcalc
                # map density at each voxel to weight the contribution that
                # each voxel's difference map value should play when
                # calculating damage metrics. Effectively, a voxel far from an
                # atom (but still included in the search radius around that
                # atom) should not contribute to a damage indicator as much as
                # a voxel close to the atomic centre

                atomFCvals = self.FCperAtom[atom.atomnum]
                # NOTE: currently set all negative values to zero. This has
                # effect of ignoring Fcalc density that is less than the map
                # mean. This is implemented such that all per-voxel weights
                # (see below) are positive and so therefore sensible
                # weighted-means can be calculated. This may need to be
                # reconsidered for future use!
                atomFCvals = [v if v > 0 else 0 for v in atomFCvals]

                atomFCvalsMaxNormed = np.array(atomFCvals)/max(atomFCvals)

                minIndex = np.array(atomVxls).argmin()
                weightedVxls = np.multiply(atomVxls, atomFCvalsMaxNormed)

                atom.densityWeightedMean = np.mean(weightedVxls)
                atom.densityWeightedMin = np.min(weightedVxls)
                atom.densityWeightedMax = np.max(weightedVxls)

                # the following attribute provides an indication of the
                # fraction of the local maximum Fcalc map density around
                # the current atom at the point where the minimum difference
                # map value has been located to be. A higher value (closer to
                # 1) indicates that the min density value is found at an
                # electron density-rich region of space, whereas a lower
                # value (closer to 0) indicates that the min density value is
                # located away from where the majority of the electron density
                # assigned to the atom is predicted to be.
                atom.fracOfMaxAtomDensAtMin = atomFCvalsMaxNormed[minIndex]

                posVals = [w for w in weightedVxls if w > 0]
                negVals = [w for w in weightedVxls if w < 0]
                posValsSum = np.sum(posVals)
                negValsSum = np.sum(negVals)

                posWeights = [v for v, w in zip(
                    atomFCvalsMaxNormed, weightedVxls) if w > 0]
                negWeights = [v for v, w in zip(
                    atomFCvalsMaxNormed, weightedVxls) if w < 0]
                posWeightsSum = np.sum(posWeights)
                negWeightsSum = np.sum(negWeights)

                if posVals != []:
                    atom.densityWeightedMeanPosOnly = posValsSum/posWeightsSum
                else:
                    atom.densityWeightedMeanPosOnly = 0

                if negVals != []:
                    atom.densityWeightedMeanNegOnly = negValsSum/negWeightsSum
                else:
                    atom.densityWeightedMeanNegOnly = 0

                if plotDistn:
                    # typically only to be used for testing purposes
                    self.plotFCdistnPlot(
                        atomsToPlot=['GLU-CD', 'CYS-SG'], atomOfInterest=atom,
                        atomFCvals=atomFCvals, FCatMin=atomFCvals[minIndex],
                        atomFCvalsMaxNorm=atomFCvalsMaxNormed)

            if self.doXYZanalysis:
                # provides the user with the option to also run
                # per-atom cluster analysis on the spatial
                # distribution of voxels assigned to a single atom.
                # This would be useful to distinguish 'clumps' of
                # positive or negative difference density, in order
                # to decide whether an atom may have shifted
                # position upon irradiation.
                # It should be noted that this option takes a
                # significant time to run, and should be deselected
                # in a standard run of the code

                self.clustDoneOnAtm.append(atom.getAtomID())

                clustAnalysis = perAtomXYZAnalysis(
                    atomObj=atom, vxlMidPt=atom.vxlMidPt,
                    knownRefPoint=self.knownRefPt1,
                    knownRefPoint2=self.knownRefPt2)

                clustAnalysis.keptPts = self.xyzsPerAtom[atom.getAtomID()]
                clustAnalysis.partitionPtsByVec()

                # atom.negClusterVal = clustAnalysis.topNegClustMean
                # atom.totDensShift = clustAnalysis.netDensShift

                self.densByRegion.append(clustAnalysis.densByRegion)

    def calcDensMetrics(self,
                        plotDistn=False, showProgress=True, parallel=False,
                        makeTrainSet=False, inclOnlyGluAsp=False,
                        doRandomSubset=False):

        # determine density summary metrics per atom. 'includeOnlyGluAsp'
        # allows calculations to be performed only for Glu/asp carboxylates
        # (this is not typically suitable and will cause later analysis to
        # break), however allows quicker generation of per-atom training sets
        # for glu/asp groups over a structure. Training sets for supervised
        # learning classification can be created by setting the 'makeTrainSet'
        # input to True

        if makeTrainSet:
            self.doXYZanalysis = True
            inclOnlyGluAsp = True

        self.startTimer()
        self.printStepNumber()
        self.lgwrite(ln='Calculating electron density statistics per atom...')

        total = len(self.PDBarray)

        if parallel:
            # TODO: this would be great to implement at some point
            print('Parallel processing not currently implemented!')
            pass
        else:

            self.densByRegion = []
            self.clustDoneOnAtm = []

            for i, atom in enumerate(self.PDBarray):

                # this is more of testing reasons that any clear use
                if inclOnlyGluAsp:

                    atmTypes = ['GLU-CD', 'GLU-OE1', 'GLU-OE2',
                                'ASP-OD1', 'ASP-OD2', 'ASP-CG',
                                'CYS-SG', 'MET-SD']

                    tag = '-'.join(atom.getAtomID().split('-')[2:])
                    if tag not in atmTypes:
                        continue

                    if self.doXYZanalysis:

                        num = '-'.join(atom.getAtomID().split('-')[:2])

                        if tag == 'GLU-CD':
                            lookFor1 = num+'-GLU-OE1'
                            lookFor2 = num+'-GLU-OE2'
                        elif tag == 'GLU-OE1':
                            lookFor1 = num+'-GLU-CD'
                            lookFor2 = num+'-GLU-OE2'
                        elif tag == 'GLU-OE2':
                            lookFor1 = num+'-GLU-CD'
                            lookFor2 = num+'-GLU-OE1'
                        elif tag == 'ASP-CG':
                            lookFor1 = num+'-ASP-OD1'
                            lookFor2 = num+'-ASP-OD2'
                        elif tag == 'ASP-OD1':
                            lookFor1 = num+'-ASP-CG'
                            lookFor2 = num+'-ASP-OD2'
                        elif tag == 'ASP-OD2':
                            lookFor1 = num+'-ASP-CG'
                            lookFor2 = num+'-ASP-OD1'
                        elif tag == 'CYS-SG':
                            lookFor1 = num+'-CYS-CB'
                            lookFor2 = num+'-CYS-CA'
                        elif tag == 'MET-SD':
                            lookFor1 = num+'-MET-CE'
                            lookFor2 = num+'-MET-CG'

                        if i < 10:
                            srt = 0
                        else:
                            srt = i-10
                        if i < len(self.PDBarray)-10:
                            stp = i + 10
                        else:
                            stp = len(self.PDBarray)

                        for atm2 in self.PDBarray[srt:stp]:
                            if atm2.getAtomID() == lookFor1:
                                self.knownRefPt1 = atm2.vxlMidPt
                            elif atm2.getAtomID() == lookFor2:
                                self.knownRefPt2 = atm2.vxlMidPt

                # only calculate metrics for a random subset of atoms
                # - for testing purposes
                if doRandomSubset:
                    import random
                    if random.uniform(0, 1) > 0.01:
                        continue
                    else:
                        print('Random atom used: ' + atom.getAtomID())

                if showProgress:
                    sys.stdout.write('\r')
                    sys.stdout.write(
                        '{}%'.format(round(100*float(i)/total, 3)))
                    sys.stdout.flush()

                self.calcDensMetricsForAtom(atom=atom, plotDistn=plotDistn)
                atom.getAdditionalMetrics()

            if makeTrainSet:
                self.makeTrainingSet()

        self.success()
        self.stopTimer()

        # delete vxlsPerAtom since no longer needed
        del self.vxlsPerAtom

        # ############################################################################
        # # TEST: cluster the density values per atom based off xyz.
        # # KEEP THIS COMMENTED WHEN USING THE CODE
        # from sklearn.cluster import KMeans
        # from sklearn.decomposition import PCA

        # d = self.densByRegion

        # numClusts = 5
        # reduced_data = PCA(n_components=2).fit_transform(d)
        # kmeans = KMeans(init='k-means++', n_clusters=numClusts)
        # kmeans.fit(reduced_data)

        # # Step size of the mesh. Decrease to increase the quality of the VQ.
        # h = .02     # point in the mesh [x_min, x_max]x[y_min, y_max].

        # # Plot decision boundary. For that, we will assign a color to each
        # x_min = reduced_data[:, 0].min()-0.5*np.abs(reduced_data[:, 0].min())
        # x_max = reduced_data[:, 0].max()+0.5*np.abs(reduced_data[:, 0].max())
        # y_min = reduced_data[:, 1].min()-0.5*np.abs(reduced_data[:, 1].min())
        # y_max = reduced_data[:, 1].max()+0.5*np.abs(reduced_data[:, 1].max())

        # xx, yy = np.meshgrid(
        #     np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

        # # Obtain labels for each point in mesh. Use last trained model.
        # Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])

        # Zatoms = kmeans.predict(reduced_data)
        # atmNames = [x for _, x in sorted(zip(Zatoms, self.clustDoneOnAtm))]
        # Zatoms.sort()
        # for Za, atom in zip(Zatoms, atmNames):
        #     print('{} --> {}'.format(atom, Za))

        # # Put the result into a color plot
        # Z = Z.reshape(xx.shape)
        # plt.figure(1)
        # plt.clf()
        # plt.imshow(Z, interpolation='nearest',
        #            extent=(xx.min(), xx.max(), yy.min(), yy.max()),
        #            cmap=plt.cm.Paired,
        #            aspect='auto', origin='lower')

        # plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
        # # Plot the centroids as a white X
        # centroids = kmeans.cluster_centers_
        # plt.scatter(centroids[:, 0], centroids[:, 1],
        #             marker='x', s=169, linewidths=3,
        #             color='w', zorder=10)

        # import pylab as pl
        # for i in range(numClusts):
        #     pl.text(centroids[i, 0], centroids[i, 1],
        #             str(i), color="white", fontsize=20)

        # plt.title('K-means clustering on per-atom density (PCA-reduced data)\n'
        #           'Centroids are marked with white cross')
        # plt.xlim(x_min, x_max)
        # plt.ylim(y_min, y_max)
        # plt.xticks(())
        # plt.yticks(())
        # plt.show()
        # import sys
        # sys.exit()
        # ############################################################################

    def makeTrainingSet(self,
                        killNow=True, standardise=False):

        # make a training set of per-atom density values on
        # which a supervised-learning classifier could be trained.
        # NOTE: This should NOT be included in a standard run

        print('Preparing classifier training dataset')

        if standardise:
            from sklearn.preprocessing import StandardScaler
            X = StandardScaler().fit_transform(self.densByRegion)
        else:
            X = self.densByRegion

        # get bfactors for atoms on which densByRegion is known
        bfactors = []
        for atmID in self.clustDoneOnAtm:
            for atm in self.PDBarray:
                if atm.getAtomID() == atmID:
                    bfactors.append(atm.Bfactor)
                    break

        # Write classification features to output file here
        f = lambda x: '{}clusterTrainingSet-{}.trset'.format(self.filesOut, x)
        i = 1
        while os.path.isfile(f(i)):
            i += 1
        print('Writing calculated features to file: "{}"'.format(f(i)))
        csvIn = open(f(i), 'w')
        for i, (atmID, dens) in enumerate(zip(self.clustDoneOnAtm, X)):
            csvIn.write(atmID+',' +
                        ','.join([str(np.round(d, 3)) for d in dens]) +
                        ',{}\n'.format(bfactors[i]))
        csvIn.close()

        if killNow:
            import sys
            sys.exit()

    def plotFCdistnPlot(self,
                        plot=True, atomOfInterest='',
                        atomsToPlot=['GLU-CD', 'CYS-SG'], atomFCvals=[],
                        atomFCvalsMaxNorm=[], FCatMin=[],
                        plotType='.png', axesFont=18):

        # plot a kde & histrogram distribution plot for the FCalc values for an
        # atom, both raw, and after being divided by the maximum FCalc value
        # attained for that atom (normalised-FCalc). The plot will also include
        # vertical lines indicating the FCalc and normalised-FCalc values
        # attained for the voxel where the most negative density map (not FC
        # map) voxel within the local region around the atom (this is the
        # voxel corresponding to the DLoss metric value).

        for tag in atomsToPlot:
            if tag in atomOfInterest.getAtomID():
                sns.set_style("dark")
                sns.set_context(rc={"figure.figsize": (10, 6)})
                fig = plt.figure()
                ax = plt.subplot(111)
                sns.distplot(np.array(atomFCvals), label='Fcalc')
                sns.distplot(np.array(atomFCvalsMaxNorm),
                             label='Fcalc/max(Fcalc)')
                ylims = ax.get_ylim()

                plt.plot((FCatMin, FCatMin),
                         (ylims[0], ylims[1]),
                         label='Fcalc, at position of min diff density')
                leg = plt.legend(frameon=1)
                frame = leg.get_frame()
                frame.set_color('white')
                plt.xlabel('Per-voxel density map values', fontsize=axesFont)
                plt.ylabel('Normed-frequency', fontsize=axesFont)
                plt.title('Distribution of Fcalc density values: {}'.format(
                    atomOfInterest.getAtomID()))
                fig.savefig('{}testDistnPlot-{}{}'.format(
                    self.filesOut, atomOfInterest.getAtomID(), plotType))

    def plotDensScatterPlots(self,
                             printText=False, clustAnalys=False):

        # plot scatter plots for density metrics for
        # quick assessment of whether per-atom metrics
        # are behaving as expecting

        self.startTimer()
        self.fillerLine(style='line')
        self.lgwrite(
            ln='Plotting scatter plots for electron density statistics...',
            forcePrint=printText)

        plotVars = [['meandensity', 'maxdensity'],
                    ['meandensity', 'mediandensity'],
                    ['meandensity', 'mindensity'],
                    ['mindensity', 'maxdensity'],
                    ['meandensity', 'stddensity'],
                    ['mindensity', 'min90tile'],
                    ['maxdensity', 'max90tile'],
                    ['min90tile', 'min95tile'],
                    ['max90tile', 'max95tile'],
                    ['meandensity', 'meanPosOnly'],
                    ['meandensity', 'meanNegOnly'],
                    ['mindensity', 'meanNegOnly'],
                    ['maxdensity', 'meanPosOnly']]

        # # only include below if per-atom clusters are
        # # calculated - currently very slow
        if clustAnalys:
            plotVars += [['negClusterVal', 'meandensity'],
                         ['negClusterVal', 'mindensity'],
                         ['totDensShift', 'meandensity'],
                         ['totDensShift', 'mindensity']]

        if self.calcFCmap:
            plotVars.append(['meandensity', 'densityWeightedMean'])
            plotVars.append(['mindensity', 'densityWeightedMin'])
            plotVars.append(['maxdensity', 'densityWeightedMax'])
            plotVars.append(['maxdensity', 'densityWeightedMeanPosOnly'])
            plotVars.append(['mindensity', 'densityWeightedMeanNegOnly'])
            plotVars.append(['meanNegOnly', 'densityWeightedMeanNegOnly'])
            plotVars.append(['meanPosOnly', 'densityWeightedMeanPosOnly'])
            plotVars.append(
                ['densityWeightedMean', 'densityWeightedMeanPosOnly'])
            plotVars.append(
                ['densityWeightedMean', 'densityWeightedMeanNegOnly'])

        for pVars in plotVars:
            logStr = edens_scatter(outputDir=self.filesOut, metrics=pVars,
                                   PDBarray=self.PDBarray,
                                   pdbName=self.pdbName)
            self.lgwrite(ln=logStr)

    def startTimer(self):

        # start a timer

        self.timeStart = time.time()

    def stopTimer(self,
                  includeInLog=False):

        # stop a timer (must run startTimer before)

        elapsedTime = time.time() - self.timeStart
        if includeInLog:
            self.lgwrite(
                ln='section time: {}s\n'.format(round(elapsedTime, 3)))
        sys.stdout.flush()

    def success(self):

        # report success to log file

        self.lgwrite(ln='---> success')

    def fillerLine(self,
                   style='blank'):

        # print a filler line (several styles)
        # to command line

        if style == 'stars':
            ln = '\n***'
        elif style == 'line':
            ln = '\n'+'-'*30
        elif style == 'blank':
            ln = '\n'
        self.lgwrite(ln=ln)

    def lgwrite(self,
                ln='', strip=True, forcePrint=False):

        # write line to log file

        self.log.writeToLog(str=ln, strip=strip, forcePrint=forcePrint)

    def printStepNumber(self):

        # print a string indicating the current pipeline
        # step number directory to the command line

        try:
            self.stepNumber
        except AttributeError:
            self.stepNumber = 1
        self.lgwrite(ln='\n_______' +
                        '\nSTEP {})'.format(self.stepNumber))

        self.stepNumber += 1
Exemple #3
0
class maps2DensMetrics():

    # assign values within a density map to specific atoms, using
    # the an atom-tagged map to determine which regions of space
    # are to be assigned to each atom

    def __init__(self,
                 filesIn     = '',
                 filesOut    = '',
                 pdbName     = '',
                 atomTagMap  = '',
                 densityMap  = '',
                 FCmap       = '',
                 plotScatter = False,
                 plotHist    = False,
                 logFile     = logFile,
                 calcFCmap   = True):

        self.filesIn     = filesIn     # the input directory
        self.filesOut    = filesOut    # the output directory
        self.pdbName     = pdbName     # the pdb file name
        self.map1        = atomTagMap  # atom-tagged map
        self.map2        = densityMap  # density map (typically Fo-Fo)
        self.map3        = FCmap       # FC map
        self.plotScatter = plotScatter # (bool) plot scatter plots or not
        self.plotHist    = plotHist    # (bool) plot histogram plots or not
        self.log         = logFile
        self.calcFCmap   = calcFCmap   # whether FC map should be generated

    def maps2atmdensity(self):

        # the map run method for this class. Will read in an atom-tagged map
        # and density map and assign density values for each individual atom
        # (as specified within the atom-tagged map). From these summary metrics
        # describing the density map behaviour in the vicinity of each refined 
        # atom can be calculated 

        self.readPDBfile()
        self.readAtomMap()
        self.readDensityMap()
        self.reportDensMapInfo()
        self.checkMapCompatibility()

        if self.calcFCmap:
            self.readFCMap()

        self.createVoxelList()

        if self.plotHist:
            self.plotDensHistPlots()

        self.calcDensMetrics()

        if self.plotScatter:
            self.plotDensScatterPlots()

        self.pickleAtomList()

    def readPDBfile(self):

        # read in pdb file info here

        self.printStepNumber()
        self.startTimer()
        self.lgwrite(ln = 'Reading pdb file: {}'.format(self.pdbName))

        # read in the pdb file to fill list of atom objects
        self.PDBarray = PDBtoList('{}{}.pdb'.format(self.filesIn,self.pdbName))
        self.stopTimer()

        # make sure array of atoms ordered by atom number
        self.PDBarray.sort(key = lambda x: x.atomnum)
           
        # need to get VDW radius for each atom:
        for atom in self.PDBarray:
            atom.VDW_get()  
        
    def readAtomMap(self):

        # read in the atom map

        self.printStepNumber()
        self.startTimer()
        self.lgwrite(ln = 'Reading atom-tagged map file...')
        self.lgwrite(ln = 'Atom map name: {}'.format(self.map1))

        self.atmmap,self.atomIndices = readMap(dirIn   = self.filesIn,
                                               dirOut  = self.filesOut,
                                               mapName = self.map1,
                                               mapType = 'atom_map',
                                               log     = self.log)  
        self.stopTimer()

        # find number of atoms in structure
        num_atoms = len(self.PDBarray)

        # find atom numbers present in list (repeated atom numbers removed)
        seen = set()
        seen_add = seen.add
        uniq_atms = [x for x in self.atmmap.vxls_val if not (x in seen or seen_add(x))] 
        
        # find set of atoms numbers not present (i.e atoms not assigned to voxels)
        Atms_notpres = set(range(1,num_atoms+1)) - set(uniq_atms)
        self.lgwrite(ln = 'Number of atoms not assigned to voxels: {}'.format(len(Atms_notpres)))

    def readDensityMap(self):

        # read in the density map

        self.printStepNumber()
        self.startTimer()
        self.lgwrite(ln = 'Reading density map file...')
        self.lgwrite(ln = 'Density map name: {}'.format(self.map2))
        
        self.densmap = readMap(dirIn    = self.filesIn,
                               dirOut   = self.filesOut,
                               mapName  = self.map2,
                               mapType  = 'density_map',
                               atomInds = self.atomIndices,
                               log      = self.log)  
        self.stopTimer()

    def readFCMap(self):

        # read in the FC (calculated structure factor) density map

        self.printStepNumber()
        self.startTimer()
        self.lgwrite(ln = 'Reading Fcalc density map file...')
        self.lgwrite(ln = 'Density map name: {}'.format(self.map3))

        self.FCmap = readMap(dirIn    = self.filesIn,
                             dirOut   = self.filesOut,
                             mapName  = self.map3,
                             mapType  = 'density_map',
                             atomInds = self.atomIndices,
                             log      = self.log)  

        self.stopTimer()

    def reportDensMapInfo(self):

        # print density map summary information to command line

        totalNumVxls     = np.product(self.atmmap.nxyz.values())
        structureNumVxls = len(self.densmap.vxls_val)
        totalMean        = self.densmap.density['mean']
        structureMean    = np.mean(self.densmap.vxls_val)
        solvNumVxls      = totalNumVxls - structureNumVxls
        solvMean         = (totalNumVxls*totalMean - structureNumVxls*structureMean)/solvNumVxls

        txt = '\nFor voxels assigned to structure:\n'+\
              '\tmean structure density : {}\n'.format(round(structureMean,4))+\
              '\tmax structure density : {}\n'.format(round(max(self.densmap.vxls_val),4))+\
              '\tmin structure density : {}\n'.format(round(min(self.densmap.vxls_val),4))+\
              '\tstd structure density : {}\n'.format(round(np.std(self.densmap.vxls_val),4))+\
              '\t# voxels included : {}\n'.format(structureNumVxls)+\
              '\nFor voxels assigned to solvent:\n'+\
              '\tmean solvent-region density : {}\n'.format(round(solvMean),4)+\
              '\t# voxels included : {}'.format(solvNumVxls)
        self.lgwrite(ln = txt)

    def checkMapCompatibility(self):

        # check that atom-tagged and density map 
        # can be combined successfully

        self.printStepNumber()
        self.lgwrite(ln = 'Checking that maps have same dimensions and sampling properties...' )

        self.startTimer()
        # Check that the maps have the same dimensions, grid sampling,..
        if (self.atmmap.axis != self.densmap.axis or
            self.atmmap.gridsamp != self.densmap.gridsamp or 
            self.atmmap.start != self.densmap.start or
            self.atmmap.nxyz != self.densmap.nxyz or
            self.atmmap.type != self.densmap.type):

            self.lgwrite(ln = 'Incompatible map properties --> terminating script')
            sys.exit()

        elif self.atmmap.celldims != self.densmap.celldims:
            self.lgwrite(ln = 'Not exact same map grid dimensions..')
            # now check if grid dims same to a specific dp and consider continuing
            stop = True
            for i in list(reversed(range(7))):
                count = 0
                for key in self.atmmap.celldims.keys():
                    if np.round(self.atmmap.celldims[key],i) == np.round(self.densmap.celldims[key],i):
                        count += 1
                if count == 6:
                    str = 'Map grid dimensions same to {}dp\n'.format(i)+\
                          '--> continuing with processing anyway'
                    self.lgwrite(ln = str)
                    stop = False
                    break
            if stop:
                err = 'Map grid dimensions still not same to 0dp\n'+\
                      ' --> terminating script'
                self.lgwrite(ln = err)
                sys.exit()

        else:
            self.success()
            self.lgwrite(ln = 'The atom and density map are of compatible format!')
        self.stopTimer()

        str = 'Total number of voxels assigned to atoms: {}'.format(len(self.atmmap.vxls_val))
        self.lgwrite(ln = str)

    def createVoxelList(self):

        # create dictionary of voxels with atom numbers as keys 

        self.startTimer()
        self.printStepNumber()
        self.lgwrite(ln = 'Combining voxel density and atom values...')
        self.success()
        vxlDic = {atm:[] for atm in self.atmmap.vxls_val}
        xyzDic = {atm:[] for atm in self.atmmap.vxls_val}

        self.densmap.reshape1dTo3d()
        self.densmap.abs2xyz_params()
        for atm,dens in zip(self.atmmap.vxls_val,self.densmap.vxls_val):
            vxlDic[atm].append(dens)

        xyz_list = self.densmap.getVoxXYZ(self.atomIndices,coordType = 'fractional')
        for atm,xyz in zip(self.atmmap.vxls_val,xyz_list):
            xyzDic[atm].append(xyz)

        self.vxlsPerAtom = vxlDic
        self.xyzsPerAtom = xyzDic # not essential for run

        if self.calcFCmap:
            vxlDic2 = {atm:[] for atm in self.atmmap.vxls_val}
            for atm,dens in zip(self.atmmap.vxls_val,self.FCmap.vxls_val):
                vxlDic2[atm].append(dens)
            self.FCperAtom = vxlDic2

        self.deleteMapsAttributes()

        self.stopTimer()

    def deleteMapsAttributes(self):

        # delete atmmap and densmap attributes to save memory

        del self.atmmap
        del self.FCmap
        del self.densmap.vxls_val

    def plotDensHistPlots(self,
                          getVoxelStats  = False,
                          perAtmDensHist = False):

        # histogram & kde plots of number of voxels per atom

        self.startTimer()
        self.printStepNumber()
        self.lgwrite(ln = 'Plotting histogram plots of voxels per atom...')
        self.lgwrite(ln = 'Plots written to "{}plots"'.format(self.filesOut))

        stats = plotVxlsPerAtm(pdbName     = self.pdbName,
                               where       = self.filesOut,
                               vxlsPerAtom = self.vxlsPerAtom,
                               plotType    = 'both',
                               returnStats = getVoxelStats)

        if stats != '':
            print 'mean: {}\nstd: {}\nmax: {}\nmin: {}'.format(*stats)

        if perAtmDensHist:
            plotDensForAtm(pdbName     = self.pdbName,
                           where       = self.filesOut,
                           vxlsPerAtom = self.vxlsPerAtom,
                           plotType    = 'both',
                           PDBarray    = self.PDBarray)

        self.stopTimer()

    def calcDensMetricsForAtom(self,
                               atom        = [],
                               plotDistn   = False,
                               clustAnalys = False):

        # calculate density metrics for a particular atom

        try:
            atomVxls = self.vxlsPerAtom[atom.atomnum]
        except KeyError:
            err = 'Warning!: No voxels assigned to an atom. Consider increasing '+\
                  'per-atom search radius parameter in RIDL input .txt file.'
            self.lgwrite(ln = err,forcePrint = True)
            atomVxls = [np.nan]

        if self.calcFCmap:
            # calculate reliability measures based on electron 
            # density probability at position of min density 
            atomFCvals = self.FCperAtom[atom.atomnum]
            atomFCvalsMaxNormalised = np.array(atomFCvals)/max(atomFCvals)

            minIndex     = np.array(atomVxls).argmin()
            reliability  = atomFCvalsMaxNormalised[minIndex]
            FCatMin      = atomFCvals[minIndex]
            weightedVxls = np.multiply(atomVxls,atomFCvalsMaxNormalised)

        if len(atomVxls) != 0:
            atom.meandensity   = np.mean(atomVxls)
            atom.mediandensity = np.median(atomVxls)
            atom.mindensity    = min(atomVxls)
            atom.maxdensity    = max(atomVxls)
            atom.stddensity    = np.std(atomVxls)
            atom.min90tile     = np.percentile(atomVxls,10)
            atom.max90tile     = np.percentile(atomVxls,90)
            atom.min95tile     = np.percentile(atomVxls,5)
            atom.max95tile     = np.percentile(atomVxls,95)
            atom.numvoxels     = len(atomVxls)

            if self.calcFCmap:
                atom.reliability  = reliability
                atom.wMean        = np.mean(weightedVxls)

                if plotDistn:
                    self.plotFCdistnPlot(atomOfInterest    = atom,
                                         atomFCvals        = atomFCvals,
                                         atomFCvalsMaxNorm = atomFCvalsMaxNormalised,
                                         FCatMin           = FCatMin,
                                         reliability       = reliability)

            if clustAnalys:
                # if 'MET-SD' in atom.getAtomID():
                rnd = np.random.random()
                if rnd < 0.05:
                # if atom.side_or_main() == 'sidechain':
                    print atom.getAtomID()
                    clustAnalysis = perAtomClusterAnalysis(atmNum      = atom.atomnum,
                                                           atmId       = atom.getAtomID(),
                                                           densMapObj  = self.densmap,
                                                           xyzsPerAtom = self.xyzsPerAtom,
                                                           vxlsPerAtom = self.vxlsPerAtom)

                    atom.negClusterVal = clustAnalysis.output[0]
                    atom.totDensShift  = clustAnalysis.output[-1]

    def calcDensMetrics(self,
                        plotDistn    = False,
                        clustAnalys  = False,
                        showProgress = True,
                        parallel     = False):

        # determine density summary metrics per atom

        self.startTimer()
        self.printStepNumber()
        self.lgwrite(ln = 'Calculating electron density statistics per atom...')

        total = len(self.PDBarray)

        if parallel:

            from test import testRun
            testRun()

        else:    

            # tRun=time.time()
            for i,atom in enumerate(self.PDBarray):

                if showProgress:
                    sys.stdout.write('\r')
                    sys.stdout.write('{}%'.format(round(100*float(i)/total,3)))
                    sys.stdout.flush()

                self.calcDensMetricsForAtom(atom        = atom,
                                            plotDistn   = plotDistn,
                                            clustAnalys = clustAnalys)

            # atomIDs = [atom.getAtomID() for atom in self.PDBarray if not np.isnan(atom.totDensShift)]
            # shifts  = [atom.totDensShift for atom in self.PDBarray if not np.isnan(atom.totDensShift)]

            # shifts, atomIDs = (list(t) for t in zip(*sorted(zip(shifts, atomIDs))))

            # for s,a in zip(shifts,atomIDs):
            #     print s,a

            # print 'Run time: {}s'.format(round(time.time()-tRun,3))

        self.success()
        self.stopTimer()

        # delete the vxlsPerAtom list now to save memory
        del self.vxlsPerAtom

        # get additional metrics per atom
        for atom in self.PDBarray:
            atom.getAdditionalMetrics()

    def plotFCdistnPlot(self,
                        plot              = True,
                        atomOfInterest    = '',
                        atomsToPlot       = ['GLU-CD','CYS-SG'],
                        atomFCvals        = [],
                        atomFCvalsMaxNorm = [],
                        FCatMin           = [],
                        reliability       = [],
                        plotType          = '.png',
                        axesFont          = 18):

        # plot a kde & histrogram distribution plot for the FCalc values for an 
        # atom, both raw, and after being divided by the maximum FCalc value
        # attained for that atom (normalised-FCalc). The plot will also include 
        # vertical lines indicating the FCalc and normalised-FCalc values attained
        # for the voxel where the most negative density map (not FC map) voxel 
        # within the local region around the atom (this is the voxel corresponding
        # to the DLoss metric value).

        for tag in atomsToPlot:
            if tag in atomOfInterest.getAtomID():
                sns.set_style("dark")
                sns.set_context(rc = {"figure.figsize": (10, 6)})
                fig = plt.figure()
                ax = plt.subplot(111)
                sns.distplot(np.array(atomFCvals), label = 'Fcalc')
                sns.distplot(np.array(atomFCvalsMaxNorm), label = 'Fcalc/max(Fcalc)')
                ylims = ax.get_ylim()

                plt.plot((FCatMin,FCatMin),
                         (ylims[0],ylims[1]),
                         label='Fcalc, at position of min diff density')
                plt.plot((reliability,reliability),
                         (ylims[0],ylims[1]),
                         label = 'Fcalc/max(Fcalc), at position of min diff density')
                leg = plt.legend(frameon = 1)
                frame = leg.get_frame()
                frame.set_color('white')
                plt.xlabel('Per-voxel density map values', fontsize = axesFont)
                plt.ylabel('Normed-frequency', fontsize = axesFont)
                plt.title('Distribution of Fcalc density values: {}'.format(atomOfInterest.getAtomID()))
                fig.savefig('{}testDistnPlot-{}{}'.format(self.filesOut,atomOfInterest.getAtomID(),plotType))

    def plotDensScatterPlots(self,
                             printText   = False,
                             clustAnalys = False):

        # plot scatter plots for density metrics 

        self.startTimer()
        self.fillerLine(style = 'line')
        str = 'Plotting scatter plots for electron density statistics...'
        self.lgwrite(ln = str,forcePrint = printText)

        plotVars = [['meandensity','maxdensity'],
                    ['meandensity','mediandensity'],
                    ['meandensity','mindensity'],
                    ['mindensity','maxdensity'],
                    ['meandensity','stddensity'],
                    ['mindensity','min90tile'],
                    ['maxdensity','max90tile'],
                    ['min90tile','min95tile'],
                    ['max90tile','max95tile']]

        # # only include below if per-atom clusters are
        # # calculated - currently very slow
        if clustAnalys:
            plotVars += [['negClusterVal','meandensity'],
                         ['negClusterVal','mindensity'],
                         ['totDensShift','meandensity'],
                         ['totDensShift','mindensity']]

        if self.calcFCmap:
            plotVars.append(['meandensity','wMean'])
            plotVars.append(['mindensity','wMean'])

        for pVars in plotVars:
            logStr = edens_scatter(outputDir = self.filesOut,
                                   metrics   = pVars,
                                   PDBarray  = self.PDBarray,
                                   pdbName   = self.pdbName)
            self.lgwrite(ln = logStr)

    def pickleAtomList(self):

        # save list of atom objects to a .pkl file

        self.pklFileName = save_objectlist(self.PDBarray,self.pdbName)

    def startTimer(self):

        # start a timer

        self.timeStart = time.time()

    def stopTimer(self,
                  includeInLog = False):

        # stop a timer (must run startTimer before)

        elapsedTime = time.time() - self.timeStart
        if includeInLog:
            ln = 'section time: {}s\n'.format(round(elapsedTime,3))
            self.lgwrite(ln = ln)
        sys.stdout.flush()

    def success(self):

        # report success to log file

        self.lgwrite(ln = '---> success')

    def fillerLine(self,
                   style = 'blank'):

        # print a filler line (several styles)
        # to command line

        if style == 'stars':
            ln = '\n***'
        elif style == 'line':
            ln = '\n'+'-'*30
        elif style == 'blank':
            ln = '\n'
        self.lgwrite(ln = ln)

    def lgwrite(self,
                ln         = '',
                strip      = True,
                forcePrint = False):

        # write line to log file

        self.log.writeToLog(str        = ln, 
                            strip      = strip,
                            forcePrint = forcePrint)

    def printStepNumber(self):

        # print a string indicating the current pipeline 
        # step number directory to the command line

        try:
            self.stepNumber
        except AttributeError:
            self.stepNumber = 1
        ln =  '\n_______'+\
              '\nSTEP {})'.format(self.stepNumber)
        self.lgwrite(ln = ln)

        self.stepNumber += 1