Beispiel #1
0
	def runPipeline(self,map_process,post_process,retrieve_PDBmulti,inputfilename):
		# the following function reads in the above functions one by one in a scripted 
		# pipeline. Takes inputs to specify which parts of above pipeline are to be
		# included. For each function input specify True if this part is to be performed
		# and False otherwise.
		self.titleCaption('ETRACK pipeline')

		# check whether valid inputs to function
		valid = self.checkValidInputs(map_process,post_process,retrieve_PDBmulti,inputfilename)
		if valid is False: return

		# first need to run function above to read in input file containing info
		# on where input files are and where output files should be written
		print 'Reading input file: {}'.format(str(inputfilename))
		success = self.readInputFile(inputfilename)
		if success is False: return

		if map_process is True:
			self.map_processing()
		else:
			print 'Map processing task not chosen...'
		self.fillerLine()

		if post_process is True:
			self.post_processing()
			# save PDBmulti as pkl file
			pklSeries = saveGenericObject(self.combinedAtoms,self.seriesName)
			os.system('mv {} {}{}'.format(pklSeries,self.outputDir,pklSeries))
			self.pklSeries = pklSeries
		else: 
			print 'Post processing job not chosen...'
		self.fillerLine()

		if retrieve_PDBmulti is True:
			self.PDBmulti_retrieve()
		else:
			print 'PDBmulti retrieval from pkl file not chosen...'
		self.fillerLine()
Beispiel #2
0
	def runPipeline(self,
					map_process   = True,
					post_process  = True,
					retrieve      = True,
					inputFileName = ''):

		# the following function reads in the above functions one by one in a scripted 
		# pipeline. Takes inputs to specify which parts of above pipeline are to be
		# included. For each function input specify True if this part is to be performed
		# and False otherwise.

		self.inputFileName = inputFileName

		# check whether valid inputs to function
		valid = self.checkValidInputs(map_process,post_process,retrieve)
		if not valid: 
			return

		# first need to run function above to read in input file containing info
		# on where input files are and where output files should be written
		ln = 'Reading input file: {}'.format(self.inputFileName)
		self.logFile.writeToLog(str = ln)

		success = self.readInputFile()
		if not success: 
			return

		success = self.checkInOutDirExist()
		if not success: 
			return

		self.setOutputDirs()

		if map_process:
			self.map_processing()
		else:
			ln = 'Map processing task not chosen...'
			self.logFile.writeToLog(str = ln)
		self.fillerLine()

		if post_process:
			self.post_processing()

			# save metric data to pkl file
			pklSeries = saveGenericObject(obj      = self.combinedAtoms,
										  fileName = self.seriesName)

			move(pklSeries,
				 '{}{}'.format(self.outputDir,pklSeries))
			self.pklSeries = pklSeries

			inputfile = open(self.inputFileName,'a')
			inputfile.write('\npklDataFile ' + pklSeries)
			inputfile.close()

			self.feedback(csvOnly = not self.sumFiles)

		else: 
			ln = 'Post processing job not chosen...'
			self.logFile.writeToLog(str = ln)

		if retrieve:
			self.PDBmulti_retrieve()
			self.feedback()

		self.fillerLine(blank = True)
Beispiel #3
0
    def post_processing(self):

        # group the per-atom density metrics for each dataset together

        self.logFile.writeToLog(
            str='Combining density metric information for each dataset ' +
                'together within the damage series')

        txt = 'Input pkl files for post processing chosen from input file:'
        for file in self.pklFiles:
            txt += '\n\t{}'.format(file.replace(self.outDir, ""))
        self.logFile.writeToLog(str=txt)

        # next read in the pdb structure file as list of atom objects
        initialPDBlist = PDBtoList(pdbFileName=self.get1stDsetPDB())

        # retrieve object lists of atoms for each damage set
        ln = '\nReading in pkl files for higher dataset structures...'
        self.logFile.writeToLog(str=ln)

        dList = []
        for pkl_filename in self.pklFiles:
            ln = 'Damage file number: {}'.format(len(dList)+1)
            self.logFile.writeToLog(str=ln)
            PDB_ret = retrieve_objectlist(
                fileName=pkl_filename, logFile=self.logFile)

            # remove pkl file since no longer needed
            remove(pkl_filename)

            # add new retrieved damage set list to dList
            dList.append(PDB_ret)

        # create a list of atom objects with attributes as lists varying over
        # dose range, only including atoms present in ALL damage datasets
        self.logFile.writeToLog(
            str='New list of atoms over full dose range calculated...')
        combinedAtoms = combinedAtomList(
            datasetList=dList, numLigRegDsets=len(dList), doseList=self.doses,
            initialPDBList=initialPDBlist, outputDir=self.outputDataDir,
            seriesName=self.seriesName, inclFCmetrics=self.inclFCmets)

        combinedAtoms.getMultiDoseAtomList(logFile=self.logFile)

        # calculate 'average' variant Dloss metrics
        combinedAtoms.calcAdditionalMetrics(newMetric='average')

        # calculate normalised metrics, if suitable atoms exist
        if self.normSet != [[]]:
            if combinedAtoms.checkSpecificAtomsExist(self.normSet):

                metricsOfInterest = ['loss', 'mean', 'gain', 'Bfactor']

                if self.inclFCmets:
                    metricsOfInterest += ['density_weighted_mean_negOnly',
                                          'density_weighted_loss',
                                          'density_weighted_mean']

                for m in metricsOfInterest:
                    combinedAtoms.calcAdditionalMetrics(
                        metric=m, newMetric='X-normalised',
                        normalisationSet=self.normSet)
            else:
                # if there is a problem finding the set of atoms
                error(text='Failed to find the specified set of ' +
                      'atoms to normalise metrics', log=self.logFile)

        # save metric data to pkl file
        pklDataFile = saveGenericObject(
            obj=combinedAtoms, fileName=self.seriesName)

        move(pklDataFile, self.pklFileDir + pklDataFile)
        self.pklDataFile = self.pklFileDir + pklDataFile

        # append the pklfile name to the input RIDL file
        inputfile = open(self.RIDLinputFile, 'r')
        pklFound = False
        for l in inputfile.readlines():
            if l.startswith('pklDataFile'):
                pklFound = True
                break
        inputfile.close()
        if not pklFound:
            inputfile = open(self.RIDLinputFile, 'a')
            inputfile.write('\npklDataFile ' + self.pklDataFile)
            inputfile.close()