def runPipeline(self,map_process,post_process,retrieve_PDBmulti,inputfilename): # the following function reads in the above functions one by one in a scripted # pipeline. Takes inputs to specify which parts of above pipeline are to be # included. For each function input specify True if this part is to be performed # and False otherwise. self.titleCaption('ETRACK pipeline') # check whether valid inputs to function valid = self.checkValidInputs(map_process,post_process,retrieve_PDBmulti,inputfilename) if valid is False: return # first need to run function above to read in input file containing info # on where input files are and where output files should be written print 'Reading input file: {}'.format(str(inputfilename)) success = self.readInputFile(inputfilename) if success is False: return if map_process is True: self.map_processing() else: print 'Map processing task not chosen...' self.fillerLine() if post_process is True: self.post_processing() # save PDBmulti as pkl file pklSeries = saveGenericObject(self.combinedAtoms,self.seriesName) os.system('mv {} {}{}'.format(pklSeries,self.outputDir,pklSeries)) self.pklSeries = pklSeries else: print 'Post processing job not chosen...' self.fillerLine() if retrieve_PDBmulti is True: self.PDBmulti_retrieve() else: print 'PDBmulti retrieval from pkl file not chosen...' self.fillerLine()
def runPipeline(self, map_process = True, post_process = True, retrieve = True, inputFileName = ''): # the following function reads in the above functions one by one in a scripted # pipeline. Takes inputs to specify which parts of above pipeline are to be # included. For each function input specify True if this part is to be performed # and False otherwise. self.inputFileName = inputFileName # check whether valid inputs to function valid = self.checkValidInputs(map_process,post_process,retrieve) if not valid: return # first need to run function above to read in input file containing info # on where input files are and where output files should be written ln = 'Reading input file: {}'.format(self.inputFileName) self.logFile.writeToLog(str = ln) success = self.readInputFile() if not success: return success = self.checkInOutDirExist() if not success: return self.setOutputDirs() if map_process: self.map_processing() else: ln = 'Map processing task not chosen...' self.logFile.writeToLog(str = ln) self.fillerLine() if post_process: self.post_processing() # save metric data to pkl file pklSeries = saveGenericObject(obj = self.combinedAtoms, fileName = self.seriesName) move(pklSeries, '{}{}'.format(self.outputDir,pklSeries)) self.pklSeries = pklSeries inputfile = open(self.inputFileName,'a') inputfile.write('\npklDataFile ' + pklSeries) inputfile.close() self.feedback(csvOnly = not self.sumFiles) else: ln = 'Post processing job not chosen...' self.logFile.writeToLog(str = ln) if retrieve: self.PDBmulti_retrieve() self.feedback() self.fillerLine(blank = True)
def post_processing(self): # group the per-atom density metrics for each dataset together self.logFile.writeToLog( str='Combining density metric information for each dataset ' + 'together within the damage series') txt = 'Input pkl files for post processing chosen from input file:' for file in self.pklFiles: txt += '\n\t{}'.format(file.replace(self.outDir, "")) self.logFile.writeToLog(str=txt) # next read in the pdb structure file as list of atom objects initialPDBlist = PDBtoList(pdbFileName=self.get1stDsetPDB()) # retrieve object lists of atoms for each damage set ln = '\nReading in pkl files for higher dataset structures...' self.logFile.writeToLog(str=ln) dList = [] for pkl_filename in self.pklFiles: ln = 'Damage file number: {}'.format(len(dList)+1) self.logFile.writeToLog(str=ln) PDB_ret = retrieve_objectlist( fileName=pkl_filename, logFile=self.logFile) # remove pkl file since no longer needed remove(pkl_filename) # add new retrieved damage set list to dList dList.append(PDB_ret) # create a list of atom objects with attributes as lists varying over # dose range, only including atoms present in ALL damage datasets self.logFile.writeToLog( str='New list of atoms over full dose range calculated...') combinedAtoms = combinedAtomList( datasetList=dList, numLigRegDsets=len(dList), doseList=self.doses, initialPDBList=initialPDBlist, outputDir=self.outputDataDir, seriesName=self.seriesName, inclFCmetrics=self.inclFCmets) combinedAtoms.getMultiDoseAtomList(logFile=self.logFile) # calculate 'average' variant Dloss metrics combinedAtoms.calcAdditionalMetrics(newMetric='average') # calculate normalised metrics, if suitable atoms exist if self.normSet != [[]]: if combinedAtoms.checkSpecificAtomsExist(self.normSet): metricsOfInterest = ['loss', 'mean', 'gain', 'Bfactor'] if self.inclFCmets: metricsOfInterest += ['density_weighted_mean_negOnly', 'density_weighted_loss', 'density_weighted_mean'] for m in metricsOfInterest: combinedAtoms.calcAdditionalMetrics( metric=m, newMetric='X-normalised', normalisationSet=self.normSet) else: # if there is a problem finding the set of atoms error(text='Failed to find the specified set of ' + 'atoms to normalise metrics', log=self.logFile) # save metric data to pkl file pklDataFile = saveGenericObject( obj=combinedAtoms, fileName=self.seriesName) move(pklDataFile, self.pklFileDir + pklDataFile) self.pklDataFile = self.pklFileDir + pklDataFile # append the pklfile name to the input RIDL file inputfile = open(self.RIDLinputFile, 'r') pklFound = False for l in inputfile.readlines(): if l.startswith('pklDataFile'): pklFound = True break inputfile.close() if not pklFound: inputfile = open(self.RIDLinputFile, 'a') inputfile.write('\npklDataFile ' + self.pklDataFile) inputfile.close()