def __init__(self, **kwargs):
     OrderedObject.__init__(self, **kwargs)
     self.name = String('default')
     self.maxCores = Integer()
     self.allowMPI = Boolean()
     self.allowThreads = Boolean()
     self.maxHours = Integer()
Beispiel #2
0
    def _insertAllSteps(self):
        self.sym = CHIMERA_SYM_NAME[self.symmetryGroup.get()]
        self.symOrder = self.symmetryOrder.get()
        if not self.applySymmetry:
            self.sym = "Cn"
            self.symOrder = 1
            self.SYMMETRY = Boolean(False)
        elif (self.sym == "Cn" or self.sym == "Dn") and self.symOrder == 1:
            self.SYMMETRY = Boolean(False)
        # connect to database, delete table and recreate it
        # execute chimera findclash
        self._insertFunctionStep('chimeraClashesStep')
        self._insertFunctionStep('postProcessStep')

        self._store()
Beispiel #3
0
    def createOutputStep(self):
        set1 = self.inputSets[0].get()  # 1st set (we use it many times)

        # Read ClassName and create the corresponding EMSet (SetOfParticles...)
        outputSet = getattr(self, "_create%s" % set1.getClassName())()

        # Copy info from input sets (sampling rate, etc).
        outputSet.copyInfo(set1)  # all sets must have the same info as set1!

        # Renumber from the beginning if either the renumber option is selected
        # or we find duplicated ids in the sets
        cleanIds = self.renumber.get() or self.duplicatedIds()

        #TODO ROB remove ignoreExtraAttributes condition
        #or implement it. But this will be for Scipion 1.2
        self.ignoreExtraAttributes = Boolean(True)
        if self.ignoreExtraAttributes:
            commonAttrs = list(self.commonAttributes())

        for itemSet in self.inputSets:
            for obj in itemSet.get():
                if self.ignoreExtraAttributes:
                    newObj = itemSet.get().ITEM_TYPE()
                    newObj.copyAttributes(obj, *commonAttrs)
                else:
                    newObj = obj

                if cleanIds:
                    newObj.cleanObjId()
                outputSet.append(newObj)

        self._defineOutputs(outputSet=outputSet)
        for itemSet in self.inputSets:
            self._defineSourceRelation(itemSet, outputSet)
class QueueConfig(OrderedObject):
    def __init__(self, **kwargs):
        OrderedObject.__init__(self, **kwargs)
        self.name = String('default')
        self.maxCores = Integer()
        self.allowMPI = Boolean()
        self.allowThreads = Boolean()
        self.maxHours = Integer()

    def getName(self):
        return self.name.get()

    def getMaxCores(self):
        return self.maxCores.get()

    def getAllowMPI(self):
        return self.allowMPI.get()

    def getAllowThreads(self):
        return self.allowThreads.get()

    def getMaxHours(self):
        return self.maxHours.get()

    def setName(self, name):
        self.name.set(name)

    def setMaxCores(self, maxCores):
        self.maxCores.set(maxCores)

    def setAllowMPI(self, allowMPI):
        self.allowMPI.set(allowMPI)

    def setAllowThreads(self, allowThreads):
        self.allowThreads.set(allowThreads)

    def setMaxHours(self, maxHours):
        self.maxHours.set(maxHours)
Beispiel #5
0
def getBoolListFromValues(valuesStr, length=None):
    ''' Convert a string to a list of booleans'''
    from pyworkflow.object import Boolean
    return [
        Boolean(value=v).get() for v in getListFromValues(valuesStr, length)
    ]
Beispiel #6
0
projectsDir = os.path.join(pw.Config.SCIPION_USER_DATA, 'projects')
projName = sys.argv[1]
manager = Manager()
project = manager.loadProject(projName)

if project is None:
    usage("Project '%s' does not exist in: \n  %s" % (projName, projectsDir))

setReadOnly = False
setLifeTime = False

for arg in sys.argv:
    if arg.startswith('readOnly='):
        setReadOnly = True
        value = arg.split('readOnly=')[1]
        b = Boolean(value=value)
        readOnlyValue = b.get()
    elif arg.startswith('lifeTime='):
        setLifeTime = True
        value = arg.split('lifeTime=')[1]
        lifeTimeValue = None if value == 'None' else int(value)

if setReadOnly:
    project.setReadOnly(readOnlyValue)

if setLifeTime:
    project.settings.setLifeTime(lifeTimeValue)

if setReadOnly or setLifeTime:
    # Truly write settings
    project.settings.write()
Beispiel #7
0
 def __init__(self, **kwargs):
     EMProtocol.__init__(self, **kwargs)
     self.stepsExecutionMode = STEPS_PARALLEL
     self.isFirstTime = Boolean(False)
Beispiel #8
0
class ProtCTFMicrographs(ProtMicrographs):
    """ Base class for all protocols that estimates the CTF"""
    def __init__(self, **kwargs):
        EMProtocol.__init__(self, **kwargs)
        self.stepsExecutionMode = STEPS_PARALLEL
        self.isFirstTime = Boolean(False)

    # -------------------------- DEFINE param functions -----------------------
    def _defineParams(self, form):
        form.addSection(label=Message.LABEL_CTF_ESTI)
        form.addParam('recalculate',
                      BooleanParam,
                      default=False,
                      condition='recalculate',
                      label="Do recalculate ctf?")

        form.addParam('continueRun',
                      PointerParam,
                      allowsNull=True,
                      condition='recalculate',
                      label="Input previous run",
                      pointerClass=self.getClassName())
        form.addHidden('sqliteFile',
                       FileParam,
                       condition='recalculate',
                       allowsNull=True)

        form.addParam('inputMicrographs',
                      PointerParam,
                      important=True,
                      condition='not recalculate',
                      label=Message.LABEL_INPUT_MIC,
                      pointerClass='SetOfMicrographs')
        form.addParam(
            'ctfDownFactor',
            FloatParam,
            default=1.,
            label='CTF Downsampling factor',
            condition='not recalculate',
            help='Set to 1 for no downsampling. Non-integer downsample '
            'factors are possible. This downsampling is only used '
            'for estimating the CTF and it does not affect any '
            'further calculation. Ideally the estimation of the '
            'CTF is optimal when the Thon rings are not too '
            'concentrated at the origin (too small to be seen) '
            'and not occupying the whole power spectrum (since '
            'this downsampling might entail aliasing).')

        self._defineProcessParams(form)

        line = form.addLine('Resolution',
                            condition='not recalculate',
                            help='Give a value in digital frequency '
                            '(i.e. between 0.0 and 0.5). These cut-offs '
                            'prevent the typical peak at the center of the'
                            ' PSD and high-resolution terms where only '
                            'noise exists, to interfere with CTF '
                            'estimation. The default lowest value is 0.05 '
                            'but for micrographs with a very fine sampling '
                            'this may be lowered towards 0. The default '
                            'highest value is 0.35, but it should be '
                            'increased for micrographs with signals '
                            'extending beyond this value. However, if '
                            'your micrographs extend further than 0.35, '
                            'you should consider sampling them at a finer '
                            'rate.')
        line.addParam('lowRes', FloatParam, default=0.05, label='Lowest')
        line.addParam('highRes', FloatParam, default=0.35, label='Highest')
        line = form.addLine('Defocus search range (microns)',
                            condition='not recalculate',
                            expertLevel=LEVEL_ADVANCED,
                            help='Select _minimum_ and _maximum_ values for '
                            'defocus search range (in microns). Underfocus'
                            ' is represented by a positive number.')
        line.addParam('minDefocus', FloatParam, default=0.25, label='Min')
        line.addParam('maxDefocus', FloatParam, default=4., label='Max')

        form.addParam('windowSize',
                      IntParam,
                      default=256,
                      expertLevel=LEVEL_ADVANCED,
                      label='Window size',
                      condition='not recalculate',
                      help='The PSD is estimated from small patches of this '
                      'size. Bigger patches allow identifying more '
                      'details. However, since there are fewer windows, '
                      'estimations are noisier.')

        form.addParallelSection(threads=2, mpi=1)

    def _defineProcessParams(self, form):
        """ This method should be implemented by subclasses
        to add other parameter relatives to the specific operation."""
        pass

    # -------------------------- INSERT steps functions -----------------------
    def _insertAllSteps(self):
        """ Insert the steps to perform CTF estimation, or re-estimation,
        on a set of micrographs.
        """
        if not self.recalculate:
            self.initialIds = self._insertInitialSteps()
            self.micDict = OrderedDict()
            micDict, _ = self._loadInputList()
            ctfIds = self._insertNewMicsSteps(micDict.values())
            self._insertFinalSteps(ctfIds)
            # For the streaming mode, the steps function have a 'wait' flag
            # that can be turned on/off. For example, here we insert the
            # createOutputStep but it wait=True, which means that can not be
            # executed until it is set to False
            # (when the input micrographs stream is closed)
            waitCondition = self._getFirstJoinStepName() == 'createOutputStep'
        else:
            if self.isFirstTime:
                # Insert previous estimation or re-estimation an so on...
                self._insertPreviousSteps()
                self.isFirstTime.set(False)
            ctfIds = self._insertRecalculateSteps()
            # For now the streaming is not allowed for recalculate CTF
            waitCondition = False

        self._insertFunctionStep('createOutputStep',
                                 prerequisites=ctfIds,
                                 wait=waitCondition)

    def _insertInitialSteps(self):
        """ Override this function to insert some steps before the
        estimate ctfs steps.
        Should return a list of ids of the initial steps. """
        return []

    def _insertNewMicsSteps(self, inputMics):
        """ Insert steps to process new mics (from streaming)
        Params:
            inputMics: input mics set to be check
        """
        deps = []
        # For each mic insert the step to process it
        for mic in inputMics:
            micKey = mic.getMicName()
            if micKey not in self.micDict:
                args = [mic.getFileName(), self._getMicrographDir(mic), micKey]
                stepId = self._insertEstimationSteps(self.initialIds, *args)
                deps.append(stepId)
                self.micDict[micKey] = mic
        return deps

    def _insertEstimationSteps(self, prerequisites, *args):
        """ Basic method to insert a estimateCTF step for a given micrograph."""
        self._defineValues()
        self._prepareCommand()
        micStepId = self._insertFunctionStep('_estimateCTF',
                                             *args,
                                             prerequisites=prerequisites)
        return micStepId

    def _insertRecalculateSteps(self):
        recalDeps = []
        # For each psd insert the steps to process it
        self.recalculateSet = SetOfCTF(filename=self.sqliteFile.get(),
                                       objDoStore=False)
        for ctf in self.recalculateSet:
            line = ctf.getObjComment()
            if ctf.isEnabled() and line:
                # CTF Re-estimation
                copyId = self._insertFunctionStep('copyMicDirectoryStep',
                                                  ctf.getObjId())
                # Make estimation steps independent between them
                stepId = self._insertFunctionStep('_restimateCTF',
                                                  ctf.getObjId(),
                                                  prerequisites=[copyId])
                recalDeps.append(stepId)
        return recalDeps

    def _insertFinalSteps(self, deps):
        """ This should be implemented in subclasses"""
        return deps

    def _getFirstJoinStepName(self):
        # This function will be used for streamming, to check which is
        # the first function that need to wait for all micrographs
        # to have completed, this can be overriden in subclasses
        # (e.g., in Xmipp 'sortPSDStep')
        return 'createOutputStep'

    def _getFirstJoinStep(self):
        for s in self._steps:
            if s.funcName == self._getFirstJoinStepName():
                return s
        return None

    #--------------------------- STEPS functions -------------------------------
    def _estimateCTF(self, micFn, micDir, micName):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)

    def _restimateCTF(self, micId):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)

    def copyMicDirectoryStep(self, micId):
        """ Copy micrograph's directory tree for recalculation"""
        ctfModel = self.recalculateSet[micId]
        mic = ctfModel.getMicrograph()

        prevDir = self._getPrevMicDir(ctfModel)
        micDir = self._getMicrographDir(mic)
        if not prevDir == micDir:
            # Create micrograph dir under extra directory
            makePath(micDir)
            if not exists(micDir):
                raise Exception("No created dir: %s " % micDir)
            copyTree(prevDir, micDir)

    def _createCtfModel(self, mic):
        """ This should be implemented in subclasses
        in order to create a CTF model from program results.
        """
        pass

    def createOutputStep(self):
        """ This function is shared by Xmipp and CTFfind
        estimation, or recalculate, protocols.
        if is recalculate, it will iterated for each CTF model, see
        if was recalculated and update with new defocus values.
        Else, the function that should be implemented in each subclass.
        """
        if self.recalculate:
            ctfSet = self._createSetOfCTF("_recalculated")
            prot = self.continueRun.get() or self
            micSet = prot.outputCTF.getMicrographs()
            # We suppose this is reading the ctf selection
            # (with enabled/disabled) to only consider the enabled ones
            # in the final SetOfCTF
            #TODO: maybe we can remove the need of the extra text file
            # with the recalculate parameters
            newCount = 0
            for ctfModel in self.recalculateSet:
                if ctfModel.isEnabled() and ctfModel.getObjComment():
                    mic = ctfModel.getMicrograph()
                    # Update the CTF models that where recalculated and append
                    # later to the set, we don't want to copy the id here since
                    # it is already correct
                    newCtf = self._createCtfModel(mic, updateSampling=False)
                    ctfModel.copy(newCtf, copyId=False)
                    ctfModel.setEnabled(True)
                    newCount += 1
                ctfSet.append(ctfModel)
            ctfSet.setMicrographs(micSet)
            self._defineOutputs(outputCTF=ctfSet)
            self._defineCtfRelation(micSet, ctfSet)
            self._computeDefocusRange(ctfSet)
            self.summaryVar.set("CTF Re-estimation of %d micrographs" %
                                newCount)
        else:
            self._createOutputStep()

    #--------------------------- INFO functions --------------------------------
    def _summary(self):
        summary = []

        if self.recalculate:
            if self.isFinished():
                if self.summaryVar.hasValue():
                    summary.append(self.summaryVar.get())
            else:
                summary.append(Message.TEXT_NO_CTF_READY)
        else:
            if not hasattr(self, 'outputCTF'):
                summary.append(Message.TEXT_NO_CTF_READY)
            else:
                summary.append("CTF estimation of %d micrographs." %
                               self.inputMicrographs.get().getSize())

        return summary

    def _methods(self):
        methods = []

        if hasattr(self, 'outputCTF') and self.isFinished():
            methods.append(self.methodsVar.get())
        else:
            methods.append(Message.TEXT_NO_CTF_READY)

        return methods

    #--------------------------- UTILS functions -------------------------------
    def _defineValues(self):
        """ This function get some parameters of the micrographs"""
        # Get pointer to input micrographs
        self.inputMics = self.getInputMicrographs()
        acq = self.inputMics.getAcquisition()

        self._params = {
            'voltage': acq.getVoltage(),
            'sphericalAberration': acq.getSphericalAberration(),
            'magnification': acq.getMagnification(),
            'ampContrast': acq.getAmplitudeContrast(),
            'samplingRate': self.inputMics.getSamplingRate(),
            'scannedPixelSize': self.inputMics.getScannedPixelSize(),
            'windowSize': self.windowSize.get(),
            'lowRes': self.lowRes.get(),
            'highRes': self.highRes.get(),
            # Convert from microns to Amstrongs
            'minDefocus': self.minDefocus.get() * 1e+4,
            'maxDefocus': self.maxDefocus.get() * 1e+4
        }

    def _defineRecalValues(self, ctfModel):
        """ This function get the acquisition info of the micrographs"""
        mic = ctfModel.getMicrograph()

        acq = mic.getAcquisition()
        mag = acq.getMagnification()
        scannedPixelSize = mic.getSamplingRate() * mag / 10000
        self._params = {
            'voltage': acq.getVoltage(),
            'sphericalAberration': acq.getSphericalAberration(),
            'magnification': mag,
            'ampContrast': acq.getAmplitudeContrast(),
            'scannedPixelSize': scannedPixelSize,
            'samplingRate': mic.getSamplingRate()
        }

    def _getPrevMicDir(self, ctfModel):
        return dirname(ctfModel.getPsdFile())

    def _ctfCounter(self, values):
        """ This function return the number of CTFs that was recalculated.
        """
        numberOfCTF = len(values) / 2
        msg = "CTF Re-estimation of %d micrographs" % numberOfCTF
        self.summaryVar.set(msg)

    def _getInputCtf(self):
        if self.continueRecal:
            sqliteFile = self._getPath()
        #             return self.outputCTF.get()
        else:
            return self.inputCtf.get()

    def _getMicrographDir(self, mic):
        """ Return an unique dir name for results of the micrograph. """
        return self._getExtraPath(removeBaseExt(mic.getFileName()))

    def _getMicrographDone(self, micDir):
        """ Return the file that is used as a flag of termination. """
        return join(micDir, 'done.txt')

    def _writeMicrographDone(self, micDir):
        open(self._getMicrographDone(micDir), 'w').close()

    def _iterMicrographs(self, inputMics=None):
        """ Iterate over micrographs and yield
        micrograph name and a directory to process.
        """
        if inputMics is None:
            inputMics = self.inputMics

        for mic in inputMics:
            micFn = mic.getFileName()
            micDir = self._getMicrographDir(mic)
            yield (micFn, micDir, mic)

    def _prepareCommand(self):
        """ This function should be implemented to prepare the
        arguments template if doesn't change for each micrograph
        After this method self._program and self._args should be set. 
        """
        pass

    def _computeDefocusRange(self, ctfSet):
        """ Compute the minimum and maximu defoucs in a set of CTFs.
        The protocol methodsVar will be updated with new values.

        Params:
            ctfSet: the set of CTFs to compute min and max
        """
        defocusList = []

        for ctf in ctfSet:
            defocusList.append(ctf.getDefocusU())
            defocusList.append(ctf.getDefocusV())

        minD = min(defocusList) / 10000.
        maxD = max(defocusList) / 10000.

        self.methodsVar.set("Estimated  defocus range defocus was"
                            " %0.3f - %0.3f microns. " % (minD, maxD))

        self._store(self.methodsVar)

    def _defocusMaxMin(self, defocusList):
        """ This function return the minimum and maximum of the defocus
        of a SetOfMicrographs.
        """
        raise Exception("DEPRECATED")

    def getInputMicrographsPointer(self):
        return self.inputMicrographs

    def getInputMicrographs(self):
        return self.getInputMicrographsPointer().get()

    # ------ Methods for Streaming picking --------------
    def _stepsCheck(self):
        # To allow streaming ctf estimation we need to detect:
        #   1) new micrographs ready to be picked
        #   2) new output ctfs that have been produced and add then
        #      to the output set.

        # For now the streaming is not allowed for recalculate CTF
        if self.recalculate:
            return
        self._checkNewInput()
        self._checkNewOutput()

    def _checkNewInput(self):
        # Check if there are new micrographs to process from the input set
        localFile = self.getInputMicrographs().getFileName()
        now = datetime.now()
        self.lastCheck = getattr(self, 'lastCheck', now)
        mTime = datetime.fromtimestamp(getmtime(localFile))
        self.debug('Last check: %s, modification: %s' %
                   (prettyTime(self.lastCheck), prettyTime(mTime)))
        # If the input micrographs.sqlite have not changed since our last check,
        # it does not make sense to check for new input data
        if self.lastCheck > mTime and hasattr(self, 'listOfMics'):
            return None

        self.lastCheck = now
        # Open input micrographs.sqlite and close it as soon as possible
        micDict, self.streamClosed = self._loadInputList()
        newMics = micDict.values()
        outputStep = self._getFirstJoinStep()

        if newMics:
            fDeps = self._insertNewMicsSteps(newMics)
            if outputStep is not None:
                outputStep.addPrerequisites(*fDeps)
            self.updateSteps()

    def _checkNewOutput(self):
        if getattr(self, 'finished', False):
            return
        # Load previously done items (from text file)
        doneList = self._readDoneList()
        # Check for newly done items
        listOfMics = self.micDict.values()
        nMics = len(listOfMics)
        newDone = [
            m for m in listOfMics
            if m.getObjId() not in doneList and self._isMicDone(m)
        ]

        # Update the file with the newly done mics
        # or exit from the function if no new done mics
        self.debug('_checkNewOutput: ')
        self.debug('   listOfMics: %s, doneList: %s, newDone: %s' %
                   (nMics, len(doneList), len(newDone)))

        allDone = len(doneList) + len(newDone)
        # We have finished when there is not more input mics (stream closed)
        # and the number of processed mics is equal to the number of inputs
        self.finished = self.streamClosed and allDone == nMics
        streamMode = Set.STREAM_CLOSED if self.finished else Set.STREAM_OPEN
        self.debug('   streamMode: %s newDone: %s' %
                   (streamMode, not (newDone == [])))

        if newDone:
            newDoneUpdated = self._updateOutputCTFSet(newDone, streamMode)
            self._writeDoneList(newDoneUpdated)
        elif not self.finished:
            # If we are not finished and no new output have been produced
            # it does not make sense to proceed and updated the outputs
            # so we exit from the function here

            # Maybe it would be good idea to take a snap to avoid
            # so much IO if this protocol does not have much to do now
            if allDone == nMics:
                self._streamingSleepOnWait()

            return

        self.debug('   finished: %s ' % self.finished)
        self.debug('        self.streamClosed (%s) AND' % self.streamClosed)
        self.debug('        allDone (%s) == len(self.listOfMics (%s)' %
                   (allDone, nMics))

        if self.finished:  # Unlock createOutputStep if finished all jobs
            self._updateStreamState(streamMode)
            outputStep = self._getFirstJoinStep()
            if outputStep and outputStep.isWaiting():
                outputStep.setStatus(STATUS_NEW)

    def _loadInputList(self):
        """ Load the input set of micrographs that are ready to be picked. """
        return self._loadSet(self.getInputMicrographs(), SetOfMicrographs,
                             lambda mic: mic.getMicName())

    def _loadSet(self, inputSet, SetClass, getKeyFunc):
        """ Load a given input set if their items are not already present
        in the self.micDict.
        This can be used to load new micrographs for picking as well as
        new CTF (if used) in streaming.
        """
        setFn = inputSet.getFileName()
        self.debug("Loading input db: %s" % setFn)
        updatedSet = SetClass(filename=setFn)
        updatedSet.loadAllProperties()
        newItemDict = OrderedDict()
        for item in updatedSet:
            micKey = getKeyFunc(item)
            if micKey not in self.micDict:
                newItemDict[micKey] = item.clone()
        streamClosed = updatedSet.isStreamClosed()
        updatedSet.close()
        self.debug("Closed db.")

        return newItemDict, streamClosed

    def _updateOutputCTFSet(self, micList, streamMode):
        micDoneList = [mic for mic in micList]
        # Do no proceed if there is not micrograph ready
        if not micDoneList:
            return []

        outputName = 'outputCTF'
        outputCtf = getattr(self, outputName, None)

        # If there is not outputCTF yet, it means that is the first
        # time we are updating output CTFs, so we need to first create
        # the output set
        firstTime = outputCtf is None

        if firstTime:
            outputCtf = self._createSetOfCTF()
            outputCtf.setMicrographs(self.getInputMicrographsPointer())
        else:
            outputCtf.enableAppend()

        for micFn, micDir, mic in self._iterMicrographs(micList):
            ctf = self._createCtfModel(mic)
            outputCtf.append(ctf)

        self.debug(" _updateOutputCTFSet Stream Mode: %s " % streamMode)
        self._updateOutputSet(outputName, outputCtf, streamMode)

        if firstTime:  # define relation just once
            # Using a pointer to define the relations is more robust to
            # scheduling and id changes between the protocol run.db and
            # the main project database.
            self._defineCtfRelation(self.getInputMicrographsPointer(),
                                    outputCtf)

        return micDoneList

    def _updateStreamState(self, streamMode):
        outputName = 'outputCTF'
        outputCtf = getattr(self, outputName, None)

        # If there are not outputCoordinates yet, it means that is the first
        # time we are updating output coordinates, so we need to first create
        # the output set
        firstTime = outputCtf is None

        if firstTime:
            micSetPtr = self.getInputMicrographsPointer()
            outputCtf = self._createSetOfCoordinates(micSetPtr)
        else:
            outputCtf.enableAppend()

        self.debug(" _updateStreamState Stream Mode: %s " % streamMode)
        self._updateOutputSet(outputName, outputCtf, streamMode)

    def _readDoneList(self):
        """ Read from a text file the id's of the items that have been done. """
        doneFile = self._getAllDone()
        doneList = []
        # Check what items have been previously done
        if exists(doneFile):
            with open(doneFile) as f:
                doneList += [int(line.strip()) for line in f]
        return doneList

    def _writeDoneList(self, micList):
        """ Write to a text file the items that have been done. """
        doneFile = self._getAllDone()

        if not exists(doneFile):
            makeFilePath(doneFile)

        with open(doneFile, 'a') as f:
            for mic in micList:
                f.write('%d\n' % mic.getObjId())

    def _isMicDone(self, mic):
        """ A mic is done if the marker file exists. """
        micDir = self._getMicrographDir(mic)
        return exists(self._getMicrographDone(micDir))

    def _getAllDone(self):
        return self._getExtraPath('DONE', 'all.TXT')
Beispiel #9
0
class ProtCTFMicrographs(ProtMicrographs):
    """ Base class for all protocols that estimates the CTF"""
    def __init__(self, **args):
        EMProtocol.__init__(self, **args)
        self.stepsExecutionMode = STEPS_PARALLEL
        self.isFirstTime = Boolean(False)

    #--------------------------- DEFINE param functions --------------------------------------------
    def _defineParams(self, form):
        form.addSection(label=Message.LABEL_CTF_ESTI)
        form.addParam('recalculate',
                      BooleanParam,
                      default=False,
                      condition='recalculate',
                      label="Do recalculate ctf?")

        form.addParam('continueRun',
                      PointerParam,
                      allowsNull=True,
                      condition='recalculate',
                      label="Input previous run",
                      pointerClass=self.getClassName())
        form.addHidden('sqliteFile',
                       FileParam,
                       condition='recalculate',
                       allowsNull=True)

        form.addParam('inputMicrographs',
                      PointerParam,
                      important=True,
                      condition='not recalculate',
                      label=Message.LABEL_INPUT_MIC,
                      pointerClass='SetOfMicrographs')
        form.addParam(
            'ctfDownFactor',
            FloatParam,
            default=1.,
            label='CTF Downsampling factor',
            condition='not recalculate',
            help=
            'Set to 1 for no downsampling. Non-integer downsample factors are possible. '
            'This downsampling is only used for estimating the CTF and it does not affect '
            'any further calculation. Ideally the estimation of the CTF is optimal when '
            'the Thon rings are not too concentrated at the origin (too small to be seen) '
            'and not occupying the whole power spectrum (since this downsampling might '
            'entail aliasing).')

        self._defineProcessParams(form)

        line = form.addLine(
            'Resolution',
            condition='not recalculate',
            help='Give a value in digital frequency (i.e. between 0.0 and 0.5). '
            'These cut-offs prevent the typical peak at the center of the PSD and high-resolution'
            'terms where only noise exists, to interfere with CTF estimation. The default lowest '
            'value is 0.05 but for micrographs with a very fine sampling this may be lowered towards 0.'
            'The default highest value is 0.35, but it should ' +
            'be increased for micrographs with '
            'signals extending beyond this value. However, if your micrographs extend further than '
            '0.35, you should consider sampling them at a finer rate.')
        line.addParam('lowRes', FloatParam, default=0.05, label='Lowest')
        line.addParam('highRes', FloatParam, default=0.35, label='Highest')
        # Switched (microns) by 'in microns' by fail in the identifier with jquery
        line = form.addLine(
            'Defocus search range (microns)',
            expertLevel=LEVEL_ADVANCED,
            condition='not recalculate',
            help=
            'Select _minimum_ and _maximum_ values for defocus search range (in microns).'
            'Underfocus is represented by a positive number.')
        line.addParam('minDefocus', FloatParam, default=0.25, label='Min')
        line.addParam('maxDefocus', FloatParam, default=4., label='Max')

        form.addParam(
            'windowSize',
            IntParam,
            default=256,
            expertLevel=LEVEL_ADVANCED,
            label='Window size',
            condition='not recalculate',
            help=
            'The PSD is estimated from small patches of this size. Bigger patches '
            'allow identifying more details. However, since there are fewer windows, '
            'estimations are noisier.')

        form.addParallelSection(threads=2, mpi=1)

    def _defineProcessParams(self, form):
        """ This method should be implemented by subclasses
        to add other parameter relatives to the specific operation."""
        pass

    #--------------------------- INSERT steps functions --------------------------------------------
    def _insertAllSteps(self):
        """ Insert the steps to perform CTF estimation, or re-estimation, on a set of micrographs.
        """
        deps = [
        ]  # Store all steps ids, final step createOutput depends on all of them
        fDeps = []

        if not self.recalculate:
            deps = self._insertEstimationSteps()
            # Insert step to create output objects
            fDeps = self._insertFinalSteps(deps)
        else:
            if self.isFirstTime:
                self._insertPreviousSteps(
                )  # Insert previous estimation or re-estimation an so on...
                self.isFirstTime.set(False)
            fDeps = self._insertRecalculateSteps()

        self._insertFunctionStep('createOutputStep', prerequisites=fDeps)

    def _insertFinalSteps(self, deps):
        """ This should be implemented in subclasses"""
        return deps

    def _insertEstimationSteps(self):
        estimDeps = []
        self._defineValues()
        self._prepareCommand()
        # For each micrograph insert the steps to process it
        for micFn, micDir, mic in self._iterMicrographs():
            # CTF estimation
            # Make estimation steps independent between them
            stepId = self._insertFunctionStep(
                '_estimateCTF',
                micFn,
                micDir,
                mic.getMicName(),
                prerequisites=[]
            )  # Make estimation steps independent between them
            estimDeps.append(stepId)
        return estimDeps

    def _insertRecalculateSteps(self):
        recalDeps = []
        # For each psd insert the steps to process it
        self.recalculateSet = SetOfCTF(filename=self.sqliteFile.get(),
                                       objDoStore=False)
        for ctf in self.recalculateSet:
            line = ctf.getObjComment()
            if ctf.isEnabled() and line:
                # CTF Re-estimation
                copyId = self._insertFunctionStep('copyMicDirectoryStep',
                                                  ctf.getObjId())
                # Make estimation steps independent between them
                stepId = self._insertFunctionStep('_restimateCTF',
                                                  ctf.getObjId(),
                                                  prerequisites=[copyId])
                recalDeps.append(stepId)
        return recalDeps

    #--------------------------- STEPS functions ---------------------------------------------------
    def _estimateCTF(self, micFn, micDir, micName):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)

    def _restimateCTF(self, id):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)

    def copyMicDirectoryStep(self, id):
        """ Copy micrograph's directory tree for recalculation"""
        ctfModel = self.recalculateSet[id]
        mic = ctfModel.getMicrograph()

        prevDir = self._getPrevMicDir(ctfModel)
        micDir = self._getMicrographDir(mic)
        if not prevDir == micDir:
            # Create micrograph dir under extra directory
            makePath(micDir)
            if not exists(micDir):
                raise Exception("No created dir: %s " % micDir)
            copyTree(prevDir, micDir)

    def _createNewCtfModel(self, mic):
        """ This should be implemented in subclasses
        in order to create a CTF model 
        """
        pass

    def createOutputStep(self):
        """ This function is shared by Xmipp and CTFfind
        estimation, or recalculate, protocols.
        if is recalculate, it will iterated for each CTF model, see
        if was recalculated and update with new defocus values.
        Else, the function that should be implemented in each subclass.
        """
        if self.recalculate:
            ctfSet = self._createSetOfCTF("_recalculated")
            defocusList = []
            if self.continueRun.get() is not None:
                oldCtfSet = getattr(self.continueRun.get(), 'outputCTF')
            else:
                oldCtfSet = getattr(self, 'outputCTF')
            micSet = oldCtfSet.getMicrographs()
            # README: We suppose this is reading the ctf selection (with enabled/disabled)
            # to only consider the enabled ones in the final SetOfCTF

            #TODO: maybe we can remove the need of the extra text file
            # with the recalculate parameters
            for ctfModel in self.recalculateSet:
                if ctfModel.isEnabled() and ctfModel.getObjComment():
                    mic = ctfModel.getMicrograph()
                    # Update the CTF models that where recalculated
                    # and append later to the set
                    # we don't want to copy the id here since it is already correct
                    ctfModel.copy(self._createNewCtfModel(mic), copyId=False)
                    ctfModel.setEnabled(True)
                ctfSet.append(ctfModel)
                # save the values of defocus for each micrograph in a list
                defocusList.append(ctfModel.getDefocusU())
                defocusList.append(ctfModel.getDefocusV())
            ctfSet.setMicrographs(micSet)
            self._defineOutputs(outputCTF=ctfSet)
            self._defineCtfRelation(micSet, ctfSet)

            self._defocusMaxMin(defocusList)
            self._ctfCounter(defocusList)
        else:
            self._createOutputStep()

    #--------------------------- INFO functions ----------------------------------------------------
    def _summary(self):
        summary = []

        if self.recalculate:
            if self.isFinished():
                if self.summaryVar.hasValue():
                    summary.append(self.summaryVar.get())
            else:
                summary.append(Message.TEXT_NO_CTF_READY)
        else:
            if not hasattr(self, 'outputCTF'):
                summary.append(Message.TEXT_NO_CTF_READY)
            else:
                summary.append("CTF estimation of %d micrographs." %
                               self.inputMicrographs.get().getSize())

        return summary

    def _methods(self):
        methods = []

        if hasattr(self, 'outputCTF') and self.isFinished():
            methods.append(self.methodsVar.get())
        else:
            methods.append(Message.TEXT_NO_CTF_READY)

        return methods

    #--------------------------- UTILS functions ---------------------------------------------------
    def _defineValues(self):
        """ This function get some parameters of the micrographs"""
        # Get pointer to input micrographs
        self.inputMics = self.inputMicrographs.get()
        acquisition = self.inputMics.getAcquisition()

        self._params = {
            'voltage': acquisition.getVoltage(),
            'sphericalAberration': acquisition.getSphericalAberration(),
            'magnification': acquisition.getMagnification(),
            'ampContrast': acquisition.getAmplitudeContrast(),
            'samplingRate': self.inputMics.getSamplingRate(),
            'scannedPixelSize': self.inputMics.getScannedPixelSize(),
            'windowSize': self.windowSize.get(),
            'lowRes': self.lowRes.get(),
            'highRes': self.highRes.get(),
            # Convert from microns to Amstrongs
            'minDefocus': self.minDefocus.get() * 1e+4,
            'maxDefocus': self.maxDefocus.get() * 1e+4
        }

    def _defineRecalValues(self, ctfModel):
        """ This function get the acquisition info of the micrographs"""
        mic = ctfModel.getMicrograph()

        acquisition = mic.getAcquisition()
        scannedPixelSize = mic.getSamplingRate(
        ) * acquisition.getMagnification() / 10000
        self._params = {
            'voltage': acquisition.getVoltage(),
            'sphericalAberration': acquisition.getSphericalAberration(),
            'magnification': acquisition.getMagnification(),
            'ampContrast': acquisition.getAmplitudeContrast(),
            'scannedPixelSize': scannedPixelSize,
            'samplingRate': mic.getSamplingRate()
        }

    def _getPrevMicDir(self, ctfModel):
        return dirname(ctfModel.getPsdFile())

    def _ctfCounter(self, values):
        """ This function return the number of CTFs that was recalculated.
        """
        numberOfCTF = len(values) / 2
        msg = "CTF Re-estimation of %d micrographs" % numberOfCTF
        self.summaryVar.set(msg)

    def _getInputCtf(self):
        if self.continueRecal:
            sqliteFile = self._getPath()


#             return self.outputCTF.get()
        else:
            return self.inputCtf.get()

    def _getMicrographDir(self, mic):
        """ Return an unique dir name for results of the micrograph. """
        return self._getExtraPath(removeBaseExt(mic.getFileName()))

    def _iterMicrographs(self):
        """ Iterate over micrographs and yield
        micrograph name and a directory to process.
        """
        for mic in self.inputMics:
            micFn = mic.getFileName()
            micDir = self._getMicrographDir(mic)
            yield (micFn, micDir, mic)

    def _prepareCommand(self):
        """ This function should be implemented to prepare the
        arguments template if doesn't change for each micrograph
        After this method self._program and self._args should be set. 
        """
        pass

    def _defocusMaxMin(self, defocusList):
        """ This function return the minimum and maximum of the defocus
        of a SetOfMicrographs.
        """
        minimum = float(min(defocusList)) / 10000
        maximum = float(max(defocusList)) / 10000
        msg = "The range of micrograph's experimental defocus was %(minimum)0.3f - %(maximum)0.3f microns. " % locals(
        )

        self.methodsVar.set(msg)
class ProtCTFMicrographs(ProtMicrographs):
    """ Base class for all protocols that estimates the CTF"""
    def __init__(self, **kwargs):
        EMProtocol.__init__(self, **kwargs)
        self.stepsExecutionMode = STEPS_PARALLEL
        self.isFirstTime = Boolean(False)

    #--------------------------- DEFINE param functions ------------------------
    def _defineParams(self, form):
        form.addSection(label=Message.LABEL_CTF_ESTI)
        form.addParam('recalculate',
                      BooleanParam,
                      default=False,
                      condition='recalculate',
                      label="Do recalculate ctf?")

        form.addParam('continueRun',
                      PointerParam,
                      allowsNull=True,
                      condition='recalculate',
                      label="Input previous run",
                      pointerClass=self.getClassName())
        form.addHidden('sqliteFile',
                       FileParam,
                       condition='recalculate',
                       allowsNull=True)

        form.addParam('inputMicrographs',
                      PointerParam,
                      important=True,
                      condition='not recalculate',
                      label=Message.LABEL_INPUT_MIC,
                      pointerClass='SetOfMicrographs')
        form.addParam(
            'ctfDownFactor',
            FloatParam,
            default=1.,
            label='CTF Downsampling factor',
            condition='not recalculate',
            help='Set to 1 for no downsampling. Non-integer downsample '
            'factors are possible. This downsampling is only used '
            'for estimating the CTF and it does not affect any '
            'further calculation. Ideally the estimation of the '
            'CTF is optimal when the Thon rings are not too '
            'concentrated at the origin (too small to be seen) '
            'and not occupying the whole power spectrum (since '
            'this downsampling might entail aliasing).')

        self._defineProcessParams(form)

        line = form.addLine('Resolution',
                            condition='not recalculate',
                            help='Give a value in digital frequency '
                            '(i.e. between 0.0 and 0.5). These cut-offs '
                            'prevent the typical peak at the center of the'
                            ' PSD and high-resolution terms where only '
                            'noise exists, to interfere with CTF '
                            'estimation. The default lowest value is 0.05 '
                            'but for micrographs with a very fine sampling '
                            'this may be lowered towards 0. The default '
                            'highest value is 0.35, but it should be '
                            'increased for micrographs with signals '
                            'extending beyond this value. However, if '
                            'your micrographs extend further than 0.35, '
                            'you should consider sampling them at a finer '
                            'rate.')
        line.addParam('lowRes', FloatParam, default=0.05, label='Lowest')
        line.addParam('highRes', FloatParam, default=0.35, label='Highest')
        line = form.addLine('Defocus search range (microns)',
                            condition='not recalculate',
                            expertLevel=LEVEL_ADVANCED,
                            help='Select _minimum_ and _maximum_ values for '
                            'defocus search range (in microns). Underfocus'
                            ' is represented by a positive number.')
        line.addParam('minDefocus', FloatParam, default=0.25, label='Min')
        line.addParam('maxDefocus', FloatParam, default=4., label='Max')

        form.addParam('windowSize',
                      IntParam,
                      default=256,
                      expertLevel=LEVEL_ADVANCED,
                      label='Window size',
                      condition='not recalculate',
                      help='The PSD is estimated from small patches of this '
                      'size. Bigger patches allow identifying more '
                      'details. However, since there are fewer windows, '
                      'estimations are noisier.')

        form.addParallelSection(threads=2, mpi=1)

    def _defineProcessParams(self, form):
        """ This method should be implemented by subclasses
        to add other parameter relatives to the specific operation."""
        pass

    #--------------------------- INSERT steps functions ------------------------
    def _insertAllSteps(self):
        """ Insert the steps to perform CTF estimation, or re-estimation,
        on a set of micrographs.
        """
        # Store all steps ids, final step createOutput depends on all of them
        deps = []
        fDeps = []
        self.insertedDict = {}

        if not self.recalculate:
            deps = self._insertEstimationSteps(self.insertedDict,
                                               self.inputMicrographs.get())
            # Insert step to create output objects
            fDeps = self._insertFinalSteps(deps)
            # For the streaming mode, the steps function have a 'wait' flag
            # that can be turned on/off. For example, here we insert the
            # createOutputStep but it wait=True, which means that can not be
            # executed until it is set to False
            # (when the input micrographs stream is closed)
            waitCondition = self._getFirstJoinStepName() == 'createOutputStep'
        else:
            if self.isFirstTime:
                # Insert previous estimation or re-estimation an so on...
                self._insertPreviousSteps()
                self.isFirstTime.set(False)
            fDeps = self._insertRecalculateSteps()
            # For now the streaming is not allowed for recalculate CTF
            waitCondition = False

        self._insertFunctionStep('createOutputStep',
                                 prerequisites=fDeps,
                                 wait=waitCondition)

    def _insertFinalSteps(self, deps):
        """ This should be implemented in subclasses"""
        return deps

    def _getFirstJoinStepName(self):
        # This function will be used for streamming, to check which is
        # the first function that need to wait for all micrographs
        # to have completed, this can be overriden in subclasses
        # (e.g., in Xmipp 'sortPSDStep')
        return 'createOutputStep'

    def _getFirstJoinStep(self):
        for s in self._steps:
            if s.funcName == self._getFirstJoinStepName():
                return s
        return None

    def _checkNewMicrographs(self, micSet, outputStep):
        """ Check if there are new micrographs to be processed
        and add the necessary steps.
        """
        newMics = []
        for micFn, _, mic in self._iterMicrographs(micSet):
            if mic.getMicName() not in self.insertedDict:
                newMics.append(micFn)

        if newMics:
            fDeps = self._insertEstimationSteps(self.insertedDict, micSet)
            self._storeSteps()
            self._numberOfSteps.set(len(self._steps))
            self._store(self._numberOfSteps)
            if outputStep:
                outputStep.addPrerequisites(*fDeps)

        return newMics

    def _checkNewCTFs(self, micSet):
        """ Check for already computed CTF and update the output set. """
        newCTFs = []
        ctfDict = {}
        ctfSet = SetOfCTF(filename=self._getPath('ctfs.sqlite'))
        ctfSet.setMicrographs(self.inputMicrographs.get())

        for ctf in ctfSet:
            ctfDict[ctf.getObjId()] = True

        if ctfDict:  # it means there are previous ctfs computed
            ctfSet.loadAllProperties()
            if ctfSet.getSize():
                ctfSet.enableAppend()
        else:
            ctfSet.setStreamState(ctfSet.STREAM_OPEN)

        for micFn, micDir, mic in self._iterMicrographs(micSet):
            if (exists(self._getMicrographDone(micDir))
                    and not mic.getObjId() in ctfDict):
                ctf = self._createCtfModel(mic)
                ctfSet.append(ctf)
                newCTFs.append(mic.getObjId())

        return ctfSet, newCTFs

    def _stepsCheck(self):
        # For now the streaming is not allowed for recalculate CTF
        if self.recalculate:
            return

        # Check if there are new micrographs to process
        micFn = self.inputMicrographs.get().getFileName()
        micSet = SetOfMicrographs(filename=micFn)
        micSet.loadAllProperties()
        streamClosed = micSet.isStreamClosed()

        outputStep = self._getFirstJoinStep()
        self._checkNewMicrographs(micSet, outputStep)
        ctfSet, newCTFs = self._checkNewCTFs(micSet)

        if ctfSet is None:
            return

        endCTFs = streamClosed and micSet.getSize() == ctfSet.getSize()
        if newCTFs:
            # Check if it is the first time we are registering CTF to
            # create the CTF_RELATION only once
            firstTime = not self.hasAttribute('outputCTF')
            ctfSet.setMicrographs(self.inputMics)
            self._computeDefocusRange(ctfSet)
            streamMode = ctfSet.STREAM_CLOSED if endCTFs else ctfSet.STREAM_OPEN
            self._updateOutputSet('outputCTF', ctfSet, streamMode)
            if firstTime:  # define relation just once
                self._defineCtfRelation(self.inputMics, ctfSet)
        else:
            ctfSet.close()

        if outputStep and outputStep.isWaiting() and endCTFs:
            outputStep.setStatus(STATUS_NEW)

        micSet.close()

    def _insertEstimationSteps(self, insertedDict, inputMics):
        estimDeps = []
        self._defineValues()
        self._prepareCommand()
        # For each micrograph insert the steps to process it
        for micFn, micDir, mic in self._iterMicrographs(inputMics):
            if mic.getMicName() not in insertedDict:
                # CTF estimation
                # Make estimation steps independent between them
                stepId = self._insertFunctionStep('_estimateCTF',
                                                  micFn,
                                                  micDir,
                                                  mic.getMicName(),
                                                  prerequisites=[])
                estimDeps.append(stepId)
                insertedDict[mic.getMicName()] = stepId
        return estimDeps

    def _insertRecalculateSteps(self):
        recalDeps = []
        # For each psd insert the steps to process it
        self.recalculateSet = SetOfCTF(filename=self.sqliteFile.get(),
                                       objDoStore=False)
        for ctf in self.recalculateSet:
            line = ctf.getObjComment()
            if ctf.isEnabled() and line:
                # CTF Re-estimation
                copyId = self._insertFunctionStep('copyMicDirectoryStep',
                                                  ctf.getObjId())
                # Make estimation steps independent between them
                stepId = self._insertFunctionStep('_restimateCTF',
                                                  ctf.getObjId(),
                                                  prerequisites=[copyId])
                recalDeps.append(stepId)
        return recalDeps

    #--------------------------- STEPS functions -------------------------------
    def _estimateCTF(self, micFn, micDir, micName):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)

    def _restimateCTF(self, micId):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)

    def copyMicDirectoryStep(self, micId):
        """ Copy micrograph's directory tree for recalculation"""
        ctfModel = self.recalculateSet[micId]
        mic = ctfModel.getMicrograph()

        prevDir = self._getPrevMicDir(ctfModel)
        micDir = self._getMicrographDir(mic)
        if not prevDir == micDir:
            # Create micrograph dir under extra directory
            makePath(micDir)
            if not exists(micDir):
                raise Exception("No created dir: %s " % micDir)
            copyTree(prevDir, micDir)

    def _createCtfModel(self, mic):
        """ This should be implemented in subclasses
        in order to create a CTF model from program results.
        """
        pass

    def createOutputStep(self):
        """ This function is shared by Xmipp and CTFfind
        estimation, or recalculate, protocols.
        if is recalculate, it will iterated for each CTF model, see
        if was recalculated and update with new defocus values.
        Else, the function that should be implemented in each subclass.
        """
        if self.recalculate:
            ctfSet = self._createSetOfCTF("_recalculated")
            prot = self.continueRun.get() or self
            micSet = prot.outputCTF.getMicrographs()
            # We suppose this is reading the ctf selection
            # (with enabled/disabled) to only consider the enabled ones
            # in the final SetOfCTF
            #TODO: maybe we can remove the need of the extra text file
            # with the recalculate parameters
            newCount = 0
            for ctfModel in self.recalculateSet:
                if ctfModel.isEnabled() and ctfModel.getObjComment():
                    mic = ctfModel.getMicrograph()
                    # Update the CTF models that where recalculated and append
                    # later to the set, we don't want to copy the id here since
                    # it is already correct
                    newCtf = self._createCtfModel(mic, updateSampling=False)
                    ctfModel.copy(newCtf, copyId=False)
                    ctfModel.setEnabled(True)
                    newCount += 1
                ctfSet.append(ctfModel)
            ctfSet.setMicrographs(micSet)
            self._defineOutputs(outputCTF=ctfSet)
            self._defineCtfRelation(micSet, ctfSet)
            self._computeDefocusRange(ctfSet)
            self.summaryVar.set("CTF Re-estimation of %d micrographs" %
                                newCount)
        else:
            self._createOutputStep()

    #--------------------------- INFO functions --------------------------------
    def _summary(self):
        summary = []

        if self.recalculate:
            if self.isFinished():
                if self.summaryVar.hasValue():
                    summary.append(self.summaryVar.get())
            else:
                summary.append(Message.TEXT_NO_CTF_READY)
        else:
            if not hasattr(self, 'outputCTF'):
                summary.append(Message.TEXT_NO_CTF_READY)
            else:
                summary.append("CTF estimation of %d micrographs." %
                               self.inputMicrographs.get().getSize())

        return summary

    def _methods(self):
        methods = []

        if hasattr(self, 'outputCTF') and self.isFinished():
            methods.append(self.methodsVar.get())
        else:
            methods.append(Message.TEXT_NO_CTF_READY)

        return methods

    #--------------------------- UTILS functions -------------------------------
    def _defineValues(self):
        """ This function get some parameters of the micrographs"""
        # Get pointer to input micrographs
        self.inputMics = self.inputMicrographs.get()
        acq = self.inputMics.getAcquisition()

        self._params = {
            'voltage': acq.getVoltage(),
            'sphericalAberration': acq.getSphericalAberration(),
            'magnification': acq.getMagnification(),
            'ampContrast': acq.getAmplitudeContrast(),
            'samplingRate': self.inputMics.getSamplingRate(),
            'scannedPixelSize': self.inputMics.getScannedPixelSize(),
            'windowSize': self.windowSize.get(),
            'lowRes': self.lowRes.get(),
            'highRes': self.highRes.get(),
            # Convert from microns to Amstrongs
            'minDefocus': self.minDefocus.get() * 1e+4,
            'maxDefocus': self.maxDefocus.get() * 1e+4
        }

    def _defineRecalValues(self, ctfModel):
        """ This function get the acquisition info of the micrographs"""
        mic = ctfModel.getMicrograph()

        acq = mic.getAcquisition()
        mag = acq.getMagnification()
        scannedPixelSize = mic.getSamplingRate() * mag / 10000
        self._params = {
            'voltage': acq.getVoltage(),
            'sphericalAberration': acq.getSphericalAberration(),
            'magnification': mag,
            'ampContrast': acq.getAmplitudeContrast(),
            'scannedPixelSize': scannedPixelSize,
            'samplingRate': mic.getSamplingRate()
        }

    def _getPrevMicDir(self, ctfModel):
        return dirname(ctfModel.getPsdFile())

    def _ctfCounter(self, values):
        """ This function return the number of CTFs that was recalculated.
        """
        numberOfCTF = len(values) / 2
        msg = "CTF Re-estimation of %d micrographs" % numberOfCTF
        self.summaryVar.set(msg)

    def _getInputCtf(self):
        if self.continueRecal:
            sqliteFile = self._getPath()
        #             return self.outputCTF.get()
        else:
            return self.inputCtf.get()

    def _getMicrographDir(self, mic):
        """ Return an unique dir name for results of the micrograph. """
        return self._getExtraPath(removeBaseExt(mic.getFileName()))

    def _getMicrographDone(self, micDir):
        """ Return the file that is used as a flag of termination. """
        return join(micDir, 'done.txt')

    def _writeMicrographDone(self, micDir):
        open(self._getMicrographDone(micDir), 'w').close()

    def _iterMicrographs(self, inputMics=None):
        """ Iterate over micrographs and yield
        micrograph name and a directory to process.
        """
        if inputMics is None:
            inputMics = self.inputMics

        for mic in inputMics:
            micFn = mic.getFileName()
            micDir = self._getMicrographDir(mic)
            yield (micFn, micDir, mic)

    def _prepareCommand(self):
        """ This function should be implemented to prepare the
        arguments template if doesn't change for each micrograph
        After this method self._program and self._args should be set. 
        """
        pass

    def _computeDefocusRange(self, ctfSet):
        """ Compute the minimum and maximu defoucs in a set of CTFs.
        The protocol methodsVar will be updated with new values.

        Params:
            ctfSet: the set of CTFs to compute min and max
        """
        defocusList = []

        for ctf in ctfSet:
            defocusList.append(ctf.getDefocusU())
            defocusList.append(ctf.getDefocusV())

        minD = min(defocusList) / 10000.
        maxD = max(defocusList) / 10000.

        self.methodsVar.set(
            "The range of micrograph's experimental defocus was"
            " %0.3f - %0.3f microns. " % (minD, maxD))

        self._store(self.methodsVar)

    def _defocusMaxMin(self, defocusList):
        """ This function return the minimum and maximum of the defocus
        of a SetOfMicrographs.
        """
        raise Exception("DEPRECATED")
 def __init__(self, **args):
     EMProtocol.__init__(self, **args)
     self.stepsExecutionMode = STEPS_PARALLEL
     self.isFirstTime = Boolean(False)
class ProtCTFMicrographs(ProtMicrographs):
    """ Base class for all protocols that estimates the CTF"""
    
    def __init__(self, **args):
        EMProtocol.__init__(self, **args)
        self.stepsExecutionMode = STEPS_PARALLEL
        self.isFirstTime = Boolean(False)
    
    #--------------------------- DEFINE param functions --------------------------------------------
    def _defineParams(self, form):
        form.addSection(label=Message.LABEL_CTF_ESTI)
        form.addParam('recalculate', BooleanParam, default=False, condition='recalculate',
                      label="Do recalculate ctf?")
        
        form.addParam('continueRun', PointerParam, allowsNull=True,
                      condition='recalculate', label="Input previous run",
                      pointerClass=self.getClassName())
        form.addHidden('sqliteFile', FileParam, condition='recalculate',
                       allowsNull=True)
        
        form.addParam('inputMicrographs', PointerParam, important=True,
                       condition='not recalculate', label=Message.LABEL_INPUT_MIC,
                       pointerClass='SetOfMicrographs')
        form.addParam('ctfDownFactor', FloatParam, default=1.,
                      label='CTF Downsampling factor',
                      condition='not recalculate',
                      help='Set to 1 for no downsampling. Non-integer downsample factors are possible. '
                      'This downsampling is only used for estimating the CTF and it does not affect '
                      'any further calculation. Ideally the estimation of the CTF is optimal when '
                      'the Thon rings are not too concentrated at the origin (too small to be seen) '
                      'and not occupying the whole power spectrum (since this downsampling might '
                      'entail aliasing).')
        
        self._defineProcessParams(form)

        line = form.addLine('Resolution', condition='not recalculate',
                            help='Give a value in digital frequency (i.e. between 0.0 and 0.5). '
                                 'These cut-offs prevent the typical peak at the center of the PSD and high-resolution'
                                 'terms where only noise exists, to interfere with CTF estimation. The default lowest '
                                 'value is 0.05 but for micrographs with a very fine sampling this may be lowered towards 0.'
                                 'The default highest value is 0.35, but it should '+'be increased for micrographs with '
                                 'signals extending beyond this value. However, if your micrographs extend further than '
                                 '0.35, you should consider sampling them at a finer rate.')
        line.addParam('lowRes', FloatParam, default=0.05,
                      label='Lowest' )
        line.addParam('highRes', FloatParam, default=0.35,
                      label='Highest')
        # Switched (microns) by 'in microns' by fail in the identifier with jquery
        line = form.addLine('Defocus search range (microns)', expertLevel=LEVEL_ADVANCED,
                            condition='not recalculate',
                            help='Select _minimum_ and _maximum_ values for defocus search range (in microns).'
                                 'Underfocus is represented by a positive number.')
        line.addParam('minDefocus', FloatParam, default=0.25, 
                      label='Min')
        line.addParam('maxDefocus', FloatParam, default=4.,
                      label='Max')
        
        form.addParam('windowSize', IntParam, default=256, expertLevel=LEVEL_ADVANCED,
                      label='Window size', condition='not recalculate',
                      help='The PSD is estimated from small patches of this size. Bigger patches '
                           'allow identifying more details. However, since there are fewer windows, '
                           'estimations are noisier.')
        
        form.addParallelSection(threads=2, mpi=1)       
    
    def _defineProcessParams(self, form):
        """ This method should be implemented by subclasses
        to add other parameter relatives to the specific operation."""
        pass
    
    #--------------------------- INSERT steps functions --------------------------------------------
    def _insertAllSteps(self):
        """ Insert the steps to perform CTF estimation, or re-estimation, on a set of micrographs.
        """
        deps = [] # Store all steps ids, final step createOutput depends on all of them
        fDeps = []
        
        if not self.recalculate:
            deps = self._insertEstimationSteps()
            # Insert step to create output objects
            fDeps = self._insertFinalSteps(deps)
        else:
            if self.isFirstTime:
                self._insertPreviousSteps() # Insert previous estimation or re-estimation an so on...
                self.isFirstTime.set(False)
            fDeps = self._insertRecalculateSteps()
        
        self._insertFunctionStep('createOutputStep', prerequisites=fDeps)
    
    def _insertFinalSteps(self, deps):
        """ This should be implemented in subclasses"""
        return deps
    
    def _insertEstimationSteps(self):
        estimDeps = []
        self._defineValues()
        self._prepareCommand()
        # For each micrograph insert the steps to process it
        for micFn, micDir, _ in self._iterMicrographs():
            # CTF estimation
            # Make estimation steps independent between them
            stepId = self._insertFunctionStep('_estimateCTF', micFn, micDir,
                                                  prerequisites=[]) # Make estimation steps independent between them
            estimDeps.append(stepId)
        return estimDeps
    
    def _insertRecalculateSteps(self):
        recalDeps = []
        # For each psd insert the steps to process it
        self.recalculateSet = SetOfCTF(filename=self.sqliteFile.get(), objDoStore=False)
        for ctf in self.recalculateSet:
            line = ctf.getObjComment()
            if ctf.isEnabled() and line:
                # CTF Re-estimation
                copyId = self._insertFunctionStep('copyMicDirectoryStep', ctf.getObjId())
                # Make estimation steps independent between them
                stepId = self._insertFunctionStep('_restimateCTF', ctf.getObjId(), prerequisites=[copyId])
                recalDeps.append(stepId)
        return recalDeps
    
    #--------------------------- STEPS functions ---------------------------------------------------
    def _estimateCTF(self, micFn, micDir):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)
    
    def _restimateCTF(self, id):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)
    
    def copyMicDirectoryStep(self, id):
        """ Copy micrograph's directory tree for recalculation"""
        ctfModel = self.recalculateSet[id]
        mic = ctfModel.getMicrograph()
        
        prevDir = self._getPrevMicDir(ctfModel)
        micDir = self._getMicrographDir(mic)
        if not prevDir == micDir:
            # Create micrograph dir under extra directory
            makePath(micDir)
            if not exists(micDir):
                raise Exception("No created dir: %s " % micDir)
            copyTree(prevDir, micDir)
    
    def _createNewCtfModel(self, mic):
        """ This should be implemented in subclasses
        in order to create a CTF model 
        """
        pass
    
    def createOutputStep(self):
        """ This function is shared by Xmipp and CTFfind
        estimation, or recalculate, protocols.
        if is recalculate, it will iterated for each CTF model, see
        if was recalculated and update with new defocus values.
        Else, the function that should be implemented in each subclass.
        """
        if self.recalculate:
            ctfSet = self._createSetOfCTF("_recalculated")
            defocusList = []
            if self.continueRun.get() is not None:
                oldCtfSet = getattr(self.continueRun.get(), 'outputCTF')
            else:
                oldCtfSet = getattr(self, 'outputCTF')
            micSet = oldCtfSet.getMicrographs()
            # README: We suppose this is reading the ctf selection (with enabled/disabled)
            # to only consider the enabled ones in the final SetOfCTF
            
            #TODO: maybe we can remove the need of the extra text file
            # with the recalculate parameters
            for ctfModel in self.recalculateSet:
                if ctfModel.isEnabled() and ctfModel.getObjComment():
                    mic = ctfModel.getMicrograph()
                    # Update the CTF models that where recalculated
                    # and append later to the set
                    # we dont want to copy the id here since it is already correct
                    ctfModel.copy(self._createNewCtfModel(mic), copyId=False)
                    ctfModel.setEnabled(True)
                ctfSet.append(ctfModel)
                # save the values of defocus for each micrograph in a list
                defocusList.append(ctfModel.getDefocusU())
                defocusList.append(ctfModel.getDefocusV())
            ctfSet.setMicrographs(micSet)
            self._defineOutputs(outputCTF=ctfSet)
            self._defineCtfRelation(micSet, ctfSet)
    
            self._defocusMaxMin(defocusList)
            self._ctfCounter(defocusList)
        else:
            self._createOutputStep()
        
    #--------------------------- INFO functions ----------------------------------------------------
    def _summary(self):
        summary = []
        
        if self.recalculate:
            if self.isFinished():
                if self.summaryVar.hasValue():
                    summary.append(self.summaryVar.get())
            else:
                summary.append(Message.TEXT_NO_CTF_READY)
        else:
            if not hasattr(self, 'outputCTF'):
                summary.append(Message.TEXT_NO_CTF_READY)
            else:
                summary.append("CTF estimation of %d micrographs." % self.inputMicrographs.get().getSize())
        
        return summary
    
    def _methods(self):
        methods = []
        
        if hasattr(self, 'outputCTF') and self.isFinished():
            methods.append(self.methodsVar.get())
        else:
            methods.append(Message.TEXT_NO_CTF_READY)
            
        return methods
    
    #--------------------------- UTILS functions ---------------------------------------------------
    def _defineValues(self):
        """ This function get some parameters of the micrographs"""
        # Get pointer to input micrographs 
        self.inputMics = self.inputMicrographs.get() 
        acquisition = self.inputMics.getAcquisition()
        
        self._params = {'voltage': acquisition.getVoltage(),
                        'sphericalAberration': acquisition.getSphericalAberration(),
                        'magnification': acquisition.getMagnification(),
                        'ampContrast': acquisition.getAmplitudeContrast(),
                        'samplingRate': self.inputMics.getSamplingRate(),
                        'scannedPixelSize': self.inputMics.getScannedPixelSize(),
                        'windowSize': self.windowSize.get(),
                        'lowRes': self.lowRes.get(),
                        'highRes': self.highRes.get(),
                        # Convert from microns to Amstrongs
                        'minDefocus': self.minDefocus.get() * 1e+4, 
                        'maxDefocus': self.maxDefocus.get() * 1e+4
                       }
    
    def _defineRecalValues(self, ctfModel):
        """ This function get the acquisition info of the micrographs"""
        mic = ctfModel.getMicrograph()
        
        acquisition = mic.getAcquisition()
        scannedPixelSize = mic.getSamplingRate() * acquisition.getMagnification() / 10000
        self._params = {'voltage': acquisition.getVoltage(),
                        'sphericalAberration': acquisition.getSphericalAberration(),
                        'magnification': acquisition.getMagnification(),
                        'ampContrast': acquisition.getAmplitudeContrast(),
                        'scannedPixelSize': scannedPixelSize,
                        'samplingRate': mic.getSamplingRate()
                       }
    
    def _getPrevMicDir(self, ctfModel):
        return dirname(ctfModel.getPsdFile())
    
    def _ctfCounter(self, values):
        """ This function return the number of CTFs that was recalculated.
        """
        numberOfCTF = len(values)/2
        msg = "CTF Re-estimation of %d micrographs" % numberOfCTF
        self.summaryVar.set(msg)
    
    def _getInputCtf(self):
        if self.continueRecal:
            sqliteFile = self._getPath()
#             return self.outputCTF.get()
        else:
            return self.inputCtf.get()
        
    def _getMicrographDir(self, mic):
        """ Return an unique dir name for results of the micrograph. """
        return self._getExtraPath(removeBaseExt(mic.getFileName()))        
    
    def _iterMicrographs(self):
        """ Iterate over micrographs and yield
        micrograph name and a directory to process.
        """
        for mic in self.inputMics:
            micFn = mic.getFileName()
            micDir = self._getMicrographDir(mic)
            yield (micFn, micDir, mic)  
    
    def _prepareCommand(self):
        """ This function should be implemented to prepare the
        arguments template if doesn't change for each micrograph
        After this method self._program and self._args should be set. 
        """
        pass
    
    def _defocusMaxMin(self, defocusList):
        """ This function return the minimum and maximum of the defocus
        of a SetOfMicrographs.
        """
        minimum = float(min(defocusList))/10000
        maximum = float(max(defocusList))/10000
        msg = "The range of micrograph's experimental defocus was %(minimum)0.3f - %(maximum)0.3f microns. " % locals()

        self.methodsVar.set(msg)
Beispiel #13
0
class ProtCTFMicrographs(ProtMicrographs):
    """ Base class for all protocols that estimates the CTF"""

    def __init__(self, **kwargs):
        EMProtocol.__init__(self, **kwargs)
        self.stepsExecutionMode = STEPS_PARALLEL
        self.isFirstTime = Boolean(False)

    # -------------------------- DEFINE param functions -----------------------
    def _defineParams(self, form):
        form.addSection(label=Message.LABEL_CTF_ESTI)
        form.addParam('recalculate', BooleanParam, default=False,
                      condition='recalculate',
                      label="Do recalculate ctf?")

        form.addParam('continueRun', PointerParam, allowsNull=True,
                      condition='recalculate', label="Input previous run",
                      pointerClass=self.getClassName())
        form.addHidden('sqliteFile', FileParam, condition='recalculate',
                       allowsNull=True)

        form.addParam('inputMicrographs', PointerParam, important=True,
                      condition='not recalculate',
                      label=Message.LABEL_INPUT_MIC,
                      pointerClass='SetOfMicrographs')
        form.addParam('ctfDownFactor', FloatParam, default=1.,
                      label='CTF Downsampling factor',
                      condition='not recalculate',
                      help='Set to 1 for no downsampling. Non-integer downsample '
                           'factors are possible. This downsampling is only used '
                           'for estimating the CTF and it does not affect any '
                           'further calculation. Ideally the estimation of the '
                           'CTF is optimal when the Thon rings are not too '
                           'concentrated at the origin (too small to be seen) '
                           'and not occupying the whole power spectrum (since '
                           'this downsampling might entail aliasing).')

        self._defineProcessParams(form)

        line = form.addLine('Resolution', condition='not recalculate',
                            help='Give a value in digital frequency '
                                 '(i.e. between 0.0 and 0.5). These cut-offs '
                                 'prevent the typical peak at the center of the'
                                 ' PSD and high-resolution terms where only '
                                 'noise exists, to interfere with CTF '
                                 'estimation. The default lowest value is 0.05 '
                                 'but for micrographs with a very fine sampling '
                                 'this may be lowered towards 0. The default '
                                 'highest value is 0.35, but it should be '
                                 'increased for micrographs with signals '
                                 'extending beyond this value. However, if '
                                 'your micrographs extend further than 0.35, '
                                 'you should consider sampling them at a finer '
                                 'rate.')
        line.addParam('lowRes', FloatParam, default=0.05, label='Lowest' )
        line.addParam('highRes', FloatParam, default=0.35, label='Highest')
        line = form.addLine('Defocus search range (microns)',
                            condition='not recalculate',
                            expertLevel=LEVEL_ADVANCED,
                            help='Select _minimum_ and _maximum_ values for '
                                 'defocus search range (in microns). Underfocus'
                                 ' is represented by a positive number.')
        line.addParam('minDefocus', FloatParam, default=0.25,
                      label='Min')
        line.addParam('maxDefocus', FloatParam, default=4.,
                      label='Max')

        form.addParam('windowSize', IntParam, default=256,
                      expertLevel=LEVEL_ADVANCED,
                      label='Window size', condition='not recalculate',
                      help='The PSD is estimated from small patches of this '
                           'size. Bigger patches allow identifying more '
                           'details. However, since there are fewer windows, '
                           'estimations are noisier.')

        form.addParallelSection(threads=2, mpi=1)

    def _defineProcessParams(self, form):
        """ This method should be implemented by subclasses
        to add other parameter relatives to the specific operation."""
        pass

    # -------------------------- INSERT steps functions -----------------------
    def _insertAllSteps(self):
        """ Insert the steps to perform CTF estimation, or re-estimation,
        on a set of micrographs.
        """
        if not self.recalculate:
            self.initialIds = self._insertInitialSteps()
            self.micDict = OrderedDict()
            micDict, _ = self._loadInputList()
            ctfIds = self._insertNewMicsSteps(micDict.values())
            self._insertFinalSteps(ctfIds)
            # For the streaming mode, the steps function have a 'wait' flag
            # that can be turned on/off. For example, here we insert the
            # createOutputStep but it wait=True, which means that can not be
            # executed until it is set to False
            # (when the input micrographs stream is closed)
            waitCondition = self._getFirstJoinStepName() == 'createOutputStep'
        else:
            if self.isFirstTime:
                # Insert previous estimation or re-estimation an so on...
                self._insertPreviousSteps()
                self.isFirstTime.set(False)
            ctfIds = self._insertRecalculateSteps()
            # For now the streaming is not allowed for recalculate CTF
            waitCondition = False

        self._insertFunctionStep('createOutputStep', prerequisites=ctfIds,
                                 wait=waitCondition)

    def _insertInitialSteps(self):
        """ Override this function to insert some steps before the
        estimate ctfs steps.
        Should return a list of ids of the initial steps. """
        return []

    def _insertNewMicsSteps(self, inputMics):
        """ Insert steps to process new mics (from streaming)
        Params:
            inputMics: input mics set to be check
        """
        deps = []
        # For each mic insert the step to process it
        for mic in inputMics:
            micKey = mic.getMicName()
            if micKey not in self.micDict:
                args = [mic.getFileName(), self._getMicrographDir(mic), micKey]
                stepId = self._insertEstimationSteps(self.initialIds, *args)
                deps.append(stepId)
                self.micDict[micKey] = mic
        return deps

    def _insertEstimationSteps(self, prerequisites, *args):
        """ Basic method to insert a estimateCTF step for a given micrograph."""
        self._defineValues()
        self._prepareCommand()
        micStepId = self._insertFunctionStep('_estimateCTF', *args,
                                             prerequisites=prerequisites)
        return micStepId

    def _insertRecalculateSteps(self):
        recalDeps = []
        # For each psd insert the steps to process it
        self.recalculateSet = SetOfCTF(filename=self.sqliteFile.get(),
                                       objDoStore=False)
        for ctf in self.recalculateSet:
            line = ctf.getObjComment()
            if ctf.isEnabled() and line:
                # CTF Re-estimation
                copyId = self._insertFunctionStep('copyMicDirectoryStep',
                                                  ctf.getObjId())
                # Make estimation steps independent between them
                stepId = self._insertFunctionStep('_restimateCTF',
                                                  ctf.getObjId(),
                                                  prerequisites=[copyId])
                recalDeps.append(stepId)
        return recalDeps

    def _insertFinalSteps(self, deps):
        """ This should be implemented in subclasses"""
        return deps

    def _getFirstJoinStepName(self):
        # This function will be used for streamming, to check which is
        # the first function that need to wait for all micrographs
        # to have completed, this can be overriden in subclasses
        # (e.g., in Xmipp 'sortPSDStep')
        return 'createOutputStep'

    def _getFirstJoinStep(self):
        for s in self._steps:
            if s.funcName == self._getFirstJoinStepName():
                return s
        return None

    #--------------------------- STEPS functions -------------------------------
    def _estimateCTF(self, micFn, micDir, micName):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)

    def _restimateCTF(self, micId):
        """ Do the CTF estimation with the specific program
        and the parameters required.
        Params:
         micFn: micrograph filename
         micDir: micrograph directory
        """
        raise Exception(Message.ERROR_NO_EST_CTF)

    def copyMicDirectoryStep(self, micId):
        """ Copy micrograph's directory tree for recalculation"""
        ctfModel = self.recalculateSet[micId]
        mic = ctfModel.getMicrograph()

        prevDir = self._getPrevMicDir(ctfModel)
        micDir = self._getMicrographDir(mic)
        if not prevDir == micDir:
            # Create micrograph dir under extra directory
            makePath(micDir)
            if not exists(micDir):
                raise Exception("No created dir: %s " % micDir)
            copyTree(prevDir, micDir)

    def _createCtfModel(self, mic):
        """ This should be implemented in subclasses
        in order to create a CTF model from program results.
        """
        pass

    def createOutputStep(self):
        """ This function is shared by Xmipp and CTFfind
        estimation, or recalculate, protocols.
        if is recalculate, it will iterated for each CTF model, see
        if was recalculated and update with new defocus values.
        Else, the function that should be implemented in each subclass.
        """
        if self.recalculate:
            ctfSet = self._createSetOfCTF("_recalculated")
            prot = self.continueRun.get() or self
            micSet = prot.outputCTF.getMicrographs()
            # We suppose this is reading the ctf selection
            # (with enabled/disabled) to only consider the enabled ones
            # in the final SetOfCTF
            #TODO: maybe we can remove the need of the extra text file
            # with the recalculate parameters
            newCount = 0
            for ctfModel in self.recalculateSet:
                if ctfModel.isEnabled() and ctfModel.getObjComment():
                    mic = ctfModel.getMicrograph()
                    # Update the CTF models that where recalculated and append
                    # later to the set, we don't want to copy the id here since
                    # it is already correct
                    newCtf = self._createCtfModel(mic, updateSampling=False)
                    ctfModel.copy(newCtf, copyId=False)
                    ctfModel.setEnabled(True)
                    newCount += 1
                ctfSet.append(ctfModel)
            ctfSet.setMicrographs(micSet)
            self._defineOutputs(outputCTF=ctfSet)
            self._defineCtfRelation(micSet, ctfSet)
            self._computeDefocusRange(ctfSet)
            self.summaryVar.set("CTF Re-estimation of %d micrographs"
                                % newCount)
        else:
            self._createOutputStep()

    #--------------------------- INFO functions --------------------------------
    def _summary(self):
        summary = []

        if self.recalculate:
            if self.isFinished():
                if self.summaryVar.hasValue():
                    summary.append(self.summaryVar.get())
            else:
                summary.append(Message.TEXT_NO_CTF_READY)
        else:
            if not hasattr(self, 'outputCTF'):
                summary.append(Message.TEXT_NO_CTF_READY)
            else:
                summary.append("CTF estimation of %d micrographs."
                               % self.inputMicrographs.get().getSize())

        return summary

    def _methods(self):
        methods = []

        if hasattr(self, 'outputCTF') and self.isFinished():
            methods.append(self.methodsVar.get())
        else:
            methods.append(Message.TEXT_NO_CTF_READY)

        return methods

    #--------------------------- UTILS functions -------------------------------
    def _defineValues(self):
        """ This function get some parameters of the micrographs"""
        # Get pointer to input micrographs
        self.inputMics = self.getInputMicrographs()
        acq = self.inputMics.getAcquisition()

        self._params = {'voltage': acq.getVoltage(),
                        'sphericalAberration': acq.getSphericalAberration(),
                        'magnification': acq.getMagnification(),
                        'ampContrast': acq.getAmplitudeContrast(),
                        'samplingRate': self.inputMics.getSamplingRate(),
                        'scannedPixelSize': self.inputMics.getScannedPixelSize(),
                        'windowSize': self.windowSize.get(),
                        'lowRes': self.lowRes.get(),
                        'highRes': self.highRes.get(),
                        # Convert from microns to Amstrongs
                        'minDefocus': self.minDefocus.get() * 1e+4,
                        'maxDefocus': self.maxDefocus.get() * 1e+4
                        }

    def _defineRecalValues(self, ctfModel):
        """ This function get the acquisition info of the micrographs"""
        mic = ctfModel.getMicrograph()

        acq = mic.getAcquisition()
        mag = acq.getMagnification()
        scannedPixelSize = mic.getSamplingRate() * mag / 10000
        self._params = {'voltage': acq.getVoltage(),
                        'sphericalAberration': acq.getSphericalAberration(),
                        'magnification': mag,
                        'ampContrast': acq.getAmplitudeContrast(),
                        'scannedPixelSize': scannedPixelSize,
                        'samplingRate': mic.getSamplingRate()
                        }

    def _getPrevMicDir(self, ctfModel):
        return dirname(ctfModel.getPsdFile())

    def _ctfCounter(self, values):
        """ This function return the number of CTFs that was recalculated.
        """
        numberOfCTF = len(values)/2
        msg = "CTF Re-estimation of %d micrographs" % numberOfCTF
        self.summaryVar.set(msg)

    def _getInputCtf(self):
        if self.continueRecal:
            sqliteFile = self._getPath()
        #             return self.outputCTF.get()
        else:
            return self.inputCtf.get()

    def _getMicrographDir(self, mic):
        """ Return an unique dir name for results of the micrograph. """
        return self._getExtraPath(removeBaseExt(mic.getFileName()))

    def _getMicrographDone(self, micDir):
        """ Return the file that is used as a flag of termination. """
        return join(micDir, 'done.txt')

    def _writeMicrographDone(self, micDir):
        open(self._getMicrographDone(micDir), 'w').close()

    def _iterMicrographs(self, inputMics=None):
        """ Iterate over micrographs and yield
        micrograph name and a directory to process.
        """
        if inputMics is None:
            inputMics = self.inputMics

        for mic in inputMics:
            micFn = mic.getFileName()
            micDir = self._getMicrographDir(mic)
            yield (micFn, micDir, mic)

    def _prepareCommand(self):
        """ This function should be implemented to prepare the
        arguments template if doesn't change for each micrograph
        After this method self._program and self._args should be set. 
        """
        pass

    def _computeDefocusRange(self, ctfSet):
        """ Compute the minimum and maximu defoucs in a set of CTFs.
        The protocol methodsVar will be updated with new values.

        Params:
            ctfSet: the set of CTFs to compute min and max
        """
        defocusList = []

        for ctf in ctfSet:
            defocusList.append(ctf.getDefocusU())
            defocusList.append(ctf.getDefocusV())

        minD = min(defocusList) / 10000.
        maxD = max(defocusList) / 10000.

        self.methodsVar.set("Estimated  defocus range defocus was"
                            " %0.3f - %0.3f microns. " % (minD, maxD))

        self._store(self.methodsVar)

    def _defocusMaxMin(self, defocusList):
        """ This function return the minimum and maximum of the defocus
        of a SetOfMicrographs.
        """
        raise Exception("DEPRECATED")

    def getInputMicrographsPointer(self):
        return self.inputMicrographs

    def getInputMicrographs(self):
        return self.getInputMicrographsPointer().get()

    # ------ Methods for Streaming picking --------------
    def _stepsCheck(self):
        # To allow streaming ctf estimation we need to detect:
        #   1) new micrographs ready to be picked
        #   2) new output ctfs that have been produced and add then
        #      to the output set.
    
        # For now the streaming is not allowed for recalculate CTF
        if self.recalculate:
            return
        self._checkNewInput()
        self._checkNewOutput()

    def _checkNewInput(self):
        # Check if there are new micrographs to process from the input set
        localFile = self.getInputMicrographs().getFileName()
        now = datetime.now()
        self.lastCheck = getattr(self, 'lastCheck', now)
        mTime = datetime.fromtimestamp(getmtime(localFile))
        self.debug('Last check: %s, modification: %s'
                  % (prettyTime(self.lastCheck),
                     prettyTime(mTime)))
        # If the input micrographs.sqlite have not changed since our last check,
        # it does not make sense to check for new input data
        if self.lastCheck > mTime and hasattr(self, 'listOfMics'):
            return None

        self.lastCheck = now
        # Open input micrographs.sqlite and close it as soon as possible
        micDict, self.streamClosed = self._loadInputList()
        newMics = micDict.values()
        outputStep = self._getFirstJoinStep()

        if newMics:
            fDeps = self._insertNewMicsSteps(newMics)
            if outputStep is not None:
                outputStep.addPrerequisites(*fDeps)
            self.updateSteps()

    def _checkNewOutput(self):
        if getattr(self, 'finished', False):
            return
        # Load previously done items (from text file)
        doneList = self._readDoneList()
        # Check for newly done items
        listOfMics = self.micDict.values()
        nMics = len(listOfMics)
        newDone = [m for m in listOfMics
                   if m.getObjId() not in doneList and self._isMicDone(m)]

        # Update the file with the newly done mics
        # or exit from the function if no new done mics
        self.debug('_checkNewOutput: ')
        self.debug('   listOfMics: %s, doneList: %s, newDone: %s'
                   % (nMics, len(doneList), len(newDone)))

        allDone = len(doneList) + len(newDone)
        # We have finished when there is not more input mics (stream closed)
        # and the number of processed mics is equal to the number of inputs
        self.finished = self.streamClosed and allDone == nMics
        streamMode = Set.STREAM_CLOSED if self.finished else Set.STREAM_OPEN
        self.debug('   streamMode: %s newDone: %s' % (streamMode,
                                                      not(newDone == [])))

        if newDone:
            newDoneUpdated = self._updateOutputCTFSet(newDone, streamMode)
            self._writeDoneList(newDoneUpdated)
        elif not self.finished:
            # If we are not finished and no new output have been produced
            # it does not make sense to proceed and updated the outputs
            # so we exit from the function here

            # Maybe it would be good idea to take a snap to avoid
            # so much IO if this protocol does not have much to do now
            if allDone == nMics:
                self._streamingSleepOnWait()

            return

        self.debug('   finished: %s ' % self.finished)
        self.debug('        self.streamClosed (%s) AND' % self.streamClosed)
        self.debug('        allDone (%s) == len(self.listOfMics (%s)'
                   % (allDone, nMics))

        if self.finished:  # Unlock createOutputStep if finished all jobs
            self._updateStreamState(streamMode)
            outputStep = self._getFirstJoinStep()
            if outputStep and outputStep.isWaiting():
                outputStep.setStatus(STATUS_NEW)

    def _loadInputList(self):
        """ Load the input set of micrographs that are ready to be picked. """
        return self._loadSet(self.getInputMicrographs(), SetOfMicrographs,
                        lambda mic: mic.getMicName())
        
    def _loadSet(self, inputSet, SetClass, getKeyFunc):
        """ Load a given input set if their items are not already present
        in the self.micDict.
        This can be used to load new micrographs for picking as well as
        new CTF (if used) in streaming.
        """
        setFn = inputSet.getFileName()
        self.debug("Loading input db: %s" % setFn)
        updatedSet = SetClass(filename=setFn)
        updatedSet.loadAllProperties()
        newItemDict = OrderedDict()
        for item in updatedSet:
            micKey = getKeyFunc(item)
            if micKey not in self.micDict:
                newItemDict[micKey] = item.clone()
        streamClosed = updatedSet.isStreamClosed()
        updatedSet.close()
        self.debug("Closed db.")

        return newItemDict, streamClosed

    def _updateOutputCTFSet(self, micList, streamMode):
        micDoneList = [mic for mic in micList]
        # Do no proceed if there is not micrograph ready
        if not micDoneList:
            return []

        outputName = 'outputCTF'
        outputCtf = getattr(self, outputName, None)

        # If there is not outputCTF yet, it means that is the first
        # time we are updating output CTFs, so we need to first create
        # the output set
        firstTime = outputCtf is None

        if firstTime:
            outputCtf = self._createSetOfCTF()
            outputCtf.setMicrographs(self.getInputMicrographsPointer())
        else:
            outputCtf.enableAppend()



        for micFn, micDir, mic in self._iterMicrographs(micList):
            ctf = self._createCtfModel(mic)
            outputCtf.append(ctf)

        self.debug(" _updateOutputCTFSet Stream Mode: %s " % streamMode)
        self._updateOutputSet(outputName, outputCtf, streamMode)

        if firstTime:  # define relation just once
            # Using a pointer to define the relations is more robust to
            # scheduling and id changes between the protocol run.db and
            # the main project database.
            self._defineCtfRelation(self.getInputMicrographsPointer(),
                                    outputCtf)

        return micDoneList

    def _updateStreamState(self, streamMode):
        outputName = 'outputCTF'
        outputCtf = getattr(self, outputName, None)

        # If there are not outputCoordinates yet, it means that is the first
        # time we are updating output coordinates, so we need to first create
        # the output set
        firstTime = outputCtf is None

        if firstTime:
            micSetPtr = self.getInputMicrographsPointer()
            outputCtf = self._createSetOfCoordinates(micSetPtr)
        else:
            outputCtf.enableAppend()

        self.debug(" _updateStreamState Stream Mode: %s " % streamMode)
        self._updateOutputSet(outputName, outputCtf, streamMode)

    def _readDoneList(self):
        """ Read from a text file the id's of the items that have been done. """
        doneFile = self._getAllDone()
        doneList = []
        # Check what items have been previously done
        if exists(doneFile):
            with open(doneFile) as f:
                doneList += [int(line.strip()) for line in f]
        return doneList

    def _writeDoneList(self, micList):
        """ Write to a text file the items that have been done. """
        doneFile = self._getAllDone()

        if not exists(doneFile):
            makeFilePath(doneFile)

        with open(doneFile, 'a') as f:
            for mic in micList:
                f.write('%d\n' % mic.getObjId())

    def _isMicDone(self, mic):
        """ A mic is done if the marker file exists. """
        micDir = self._getMicrographDir(mic)
        return exists(self._getMicrographDone(micDir))

    def _getAllDone(self):
        return self._getExtraPath('DONE', 'all.TXT')
Beispiel #14
0
 def __init__(self, **args):
     EMProtocol.__init__(self, **args)
     self.SYMMETRY = Boolean(True)
Beispiel #15
0
class ChimeraProtContacts(EMProtocol):
    """Identifies interatomic clashes and contacts based on van der Waals radii
    """
    _label = 'contacts'
    _program = ""
    commandDropView = """DROP view IF EXISTS {viewName}"""
    TetrahedralOrientation = ['222', 'z3']

    @classmethod
    def getClassPackageName(cls):
        return "chimerax"

    def __init__(self, **args):
        EMProtocol.__init__(self, **args)
        self.SYMMETRY = Boolean(True)

    def _defineParams(self, form):
        form.addSection(label='Input')
        # pdbFileToBeRefined name is needed by the wizard. Do not change it
        form.addParam('pdbFileToBeRefined',
                      PointerParam,
                      pointerClass="AtomStruct",
                      label='Atomic Structure:',
                      allowsNull=True,
                      important=True,
                      help="Input atomic structure.")
        form.addParam(
            'chainStructure',
            StringParam,
            default="",
            label='Chain Labeling',
            help="Dictionary that maps chains to labels.\n"
            "Example: {'A':'h1', 'B':'h1', 'E':'h2'}\n"
            "Contacts are calculated between two chains with distinct "
            "labels. Two chains with the same label are considered as "
            "a group. Contacts will be computed between any chain included "
            "in this group and any other group/chain. However, no contacts "
            "among members of the group will be calculated.")
        form.addParam(
            'applySymmetry',
            BooleanParam,
            label="Apply symmetry:",
            default=True,
            help=
            "'Symmetry = Yes' indicates that symmetry will be applied, and then"
            " contacts will be computed between any two chains of the "
            "atomic structure (the unit cell) "
            "and between a chain of the unit cell and another chain of a "
            "neigbour unit cell. Output results will show only non "
            "redundant contatcs, i.e., contacts than you can infer by"
            " symmetry will not be shown.\n'Symmetry = No' indicates that "
            "symmetry will not be applied, and then  "
            "contacts will only be calculated between chains within the "
            "atomic structure. Output results will show all contacts between"
            " any couple of interacting chains.\n")
        form.addParam(
            'symmetryGroup',
            EnumParam,
            choices=CHIMERA_LIST,
            default=CHIMERA_I222,
            label="Symmetry",
            condition='applySymmetry',
            help=
            "https://scipion-em.github.io/docs/release-2.0.0/docs/developer/symmetries.html?highlight=symmetry"
            "Symmetry for a description of the symmetry groups "
            "format in CHIMERA.\n"
            "If no symmetry is present, use _c1_."
            'More information: \n'
            'https://www.cgl.ucsf.edu/chimera/current/docs/UsersGuide/midas/sym.html'
        )
        form.addParam('symmetryOrder',
                      IntParam,
                      default=1,
                      condition='applySymmetry and symmetryGroup<=%d' %
                      SYM_DIHEDRAL_X,
                      label='Symmetry Order',
                      help='Select the order of cyclic or dihedral symmetry.')

        group = form.addGroup('Fit params for clashes and contacts')
        group.addParam(
            'cutoff',
            FloatParam,
            label="cutoff (Angstroms): ",
            default=-0.4,
            expertLevel=LEVEL_ADVANCED,
            help="Large positive cutoff identifies the more severe clashes, "
            "whereas negative cutoff indicates favorable contacts:\n"
            "default contact rule: -0.4 (from 0.0 to -1.0)\n"
            "default clash rule: 0.6 (from 0.4 to 1.0)\n"
            'More information: \n'
            'https://www.cgl.ucsf.edu/chimera/current/docs/UsersGuide/midas/findclash.html'
        )
        group.addParam(
            'allowance',
            FloatParam,
            label="allowance (Angstroms): ",
            default=0.0,
            expertLevel=LEVEL_ADVANCED,
            help="default contact rule: 0.0\n"
            "default clash rule: 0.4\n"
            'More information: \n'
            'https://www.cgl.ucsf.edu/chimera/current/docs/UsersGuide/midas/findclash.html'
        )
        form.addLine('')

    # --------------------------- INSERT steps functions --------------------
    def _insertAllSteps(self):
        self.sym = CHIMERA_SYM_NAME[self.symmetryGroup.get()]
        self.symOrder = self.symmetryOrder.get()
        if not self.applySymmetry:
            self.sym = "Cn"
            self.symOrder = 1
            self.SYMMETRY = Boolean(False)
        elif (self.sym == "Cn" or self.sym == "Dn") and self.symOrder == 1:
            self.SYMMETRY = Boolean(False)
        # connect to database, delete table and recreate it
        # execute chimera findclash
        self._insertFunctionStep('chimeraClashesStep')
        self._insertFunctionStep('postProcessStep')

        self._store()

    def postProcessStep(self):
        c, conn = connectDB(self.getDataBaseName(), None)
        self.removeDuplicates(c)

    def chimeraClashesStep(self):
        labelDictAux = json.loads(self.chainStructure.get(),
                                  object_pairs_hook=collections.OrderedDict)
        labelDict = collections.OrderedDict(
            sorted(labelDictAux.items(), key=itemgetter(1)))
        # labelDict = collections.OrderedDict(sorted(list(labelDictAux.items()), key=itemgetter(1)))
        pdbFileName = os.path.abspath(
            self.pdbFileToBeRefined.get().getFileName())
        # first element of dictionary
        firstValue = labelDict[list(labelDict)[0]]
        outFiles = []
        f = open(self.getChimeraScriptFileName1(), "w")
        f.write("from chimerax.core.commands import run\n")
        f.write("run(session, 'open {}')\n".format(pdbFileName))
        if self.sym == "Cn" and self.symOrder != 1:
            f.write("run(session,'sym #1 C%d copies t')\n" % self.symOrder)
        elif self.sym == "Dn" and self.symOrder != 1:
            f.write("run(session,'sym #1 d%d copies t')\n" % self.symOrder)
        elif self.sym == "T222" or self.sym == "TZ3":
            f.write("run(session,'sym #1 t,%s copies t')\n" % self.sym[1:])
        elif self.sym == "O":
            f.write("run(session,'sym #1 O copies t')\n")
        elif self.sym == "I222" or self.sym == "I222r" or self.sym == "In25" or \
                self.sym == "In25r" or self.sym == "I2n3" or self.sym == "I2n3r" or \
                self.sym == "I2n5" or self.sym == "I2n5r":
            f.write("run(session,'sym #1 i,%s copies t')\n" % self.sym[1:])
        self.SYMMETRY = self.SYMMETRY.get()
        if self.SYMMETRY:
            f.write("run(session,'delete #2 & #1 #>3')\n")
            f.write("run(session,'save {symmetrizedModelName} #2')\n".format(
                symmetrizedModelName=self.getSymmetrizedModelName()))
            f.write("run(session, 'close #1')\n")
            f.write("run(session, 'rename #2 id #1')\n")
        self.endChimeraScript(firstValue, labelDict, outFiles, f)
        f.write("run(session, 'exit')\n")
        f.close()
        args = " --nogui --script " + self.getChimeraScriptFileName1()
        self._log.info('Launching: ' + Plugin.getProgram() + ' ' + args)
        Chimera.runProgram(Plugin.getProgram(), args)

        if self.SYMMETRY and not os.path.exists(
                self.getSymmetrizedModelName()):
            # When self.SYMMETRY = TRUE and no one neighbor unit cell has not been
            # generated at less than 3 Angstroms, probably because the symmetry
            # center is not equal to the origin of coordinates, at least we have the
            # contacts that are within the unit cell.
            print(
                red("Error: No neighbor unit cells are available. "
                    "Is the symmetry center equal to the origin of "
                    "coordinates?"))
            self.SYMMETRY = False
            f = open(self.getChimeraScriptFileName2(), "w")
            f.write("from chimerax.core.commands import run\n")
            f.write("session, run('open {}')\n".format(pdbFileName))
            self.endChimeraScript(firstValue, labelDict, outFiles, f)
            f.write("run(session, 'exit')\n")
            f.close()
            args = " --nogui --script " + self.getChimeraScriptFileName2()
            self._log.info('Launching: ' + Plugin.getProgram() + ' ' + args)
            Chimera.runProgram(Plugin.getProgram(), args)

        # parse all files created by chimera
        c, conn = self.prepareDataBase()
        self.parseFiles(outFiles, c)
        conn.commit()
        conn.close()
        # return outFiles

    def prepareDataBase(self, drop=True):
        if drop:
            return connectDB(self.getDataBaseName(), self.getTableName())
        else:
            return connectDB(self.getDataBaseName())

    def parseFiles(self, outFiles, c):
        labelDictAux = json.loads(self.chainStructure.get(),
                                  object_pairs_hook=collections.OrderedDict)
        labelDict = collections.OrderedDict(
            sorted(labelDictAux.items(), key=itemgetter(1)))
        # labelDict = collections.OrderedDict(sorted(list(labelDictAux.items()), key=itemgetter(1)))
        d = {}
        d1 = {}
        d2 = {}
        anyResult = False
        for inFile in outFiles:
            print("processing file", inFile)
            if not os.path.exists(inFile):
                continue
            else:
                anyResult = True
            counter = 0
            # parse contact files. Note that C1 symmetry file is different from the rest
            for line in open(inFile):
                if counter < 8:
                    # print ("skip line", line
                    counter += 1
                else:
                    # if not self.SYMMETRY:
                    if not self.SYMMETRY or line.split()[0].startswith("/"):
                        # Second option (line.split()[0].startswith("/") stands for
                        # cases in which the result of applying symmetry is identical
                        # to the starting structure (see test testContactsSymC2_b
                        # where after deleting the #2 submodel far more than 3 A from
                        # the input model, the resulting model is the same as the initial one.
                        info = line.split(
                        )  # ['/A002', 'HEM', '1', 'ND', '/A', 'HIS', '87', 'NE2', '0.620', '2.660']
                        d1['modelId'] = "'" + "#1" + "'"
                        d1['aaName'] = "'" + info[1][0] + info[1][1:].lower(
                        ) + "'"  # 'Hem'
                        d1['aaNumber'] = info[2]  # '1'
                        d1['chainId'] = "'" + info[0].split(
                            "/")[1] + "'"  # 'A002'
                        d1['atomId'] = "'" + info[3] + "'"  # 'ND'
                        d1['protId'] = "'" + labelDict[info[0].split("/")
                                                       [1]] + "'"

                        d2['modelId'] = "'" + "#1" + "'"
                        d2['aaName'] = "'" + info[5][0] + info[5][1:].lower(
                        ) + "'"  # 'His'
                        d2['aaNumber'] = info[6]  # '87'
                        d2['chainId'] = "'" + info[4].split(
                            "/")[1] + "'"  # 'A'
                        d2['protId'] = "'" + labelDict[info[4].split("/")
                                                       [1]] + "'"
                        d2['atomId'] = "'" + info[7] + "'"  # 'NE2'
                        d['overlap'] = info[8]  # '0.620'
                        d['distance'] = info[9]  # '2.660'

                    else:
                        info = line.split()
                        # 5ni1_unit_cell_HEM.cif #1.2/A002 HEM 1 ND   5ni1_unit_cell_HEM.cif #1.2/A HIS 87 NE2    0.620    2.660
                        d1['modelId'] = "'" + info[1].split(
                            "/")[0] + "'"  # '#1.2'
                        d1['aaName'] = "'" + info[2][0] + info[2][1:].lower(
                        ) + "'"  # 'Hem'
                        d1['aaNumber'] = info[3]  # '1'
                        d1['chainId'] = "'" + info[1].split(
                            "/")[1] + "'"  # 'A002'
                        d1['atomId'] = "'" + info[4] + "'"  # 'ND'
                        d1['protId'] = "'" + labelDict[info[1].split("/")
                                                       [1]] + "'"  # 'HEM_A'

                        d2['modelId'] = "'" + info[6].split(
                            "/")[0] + "'"  # '#1.2'
                        d2['aaName'] = "'" + info[7][0] + info[7][1:].lower(
                        ) + "'"  # 'His'
                        d2['aaNumber'] = info[8]  # '87'
                        d2['chainId'] = "'" + info[6].split("/")[1] + "'"  # N
                        d2['protId'] = "'" + labelDict[info[6].split("/")
                                                       [1]] + "'"  # 'chainA'
                        d2['atomId'] = "'" + info[9] + "'"  # 'NE2'

                        d['overlap'] = info[10]  # '0.620'
                        d['distance'] = info[11]  # '2.660'

                    if d1['modelId'] == d2['modelId']:
                        if d1['protId'] <= d2['protId']:
                            for k in d1.keys():
                                # for k in list(d1.keys()):
                                d[k + '_1'] = d1[k]
                                d[k + '_2'] = d2[k]
                        else:
                            for k in d1.keys():
                                # for k in list(d1.keys()):
                                d[k + '_1'] = d2[k]
                                d[k + '_2'] = d1[k]
                    else:
                        if d1['modelId'] <= d2['modelId']:
                            for k in d1.keys():
                                # for k in list(d1.keys()):
                                d[k + '_1'] = d1[k]
                                d[k + '_2'] = d2[k]
                        else:
                            for k in d1.keys():
                                # for k in list(d1.keys()):
                                d[k + '_1'] = d2[k]
                                d[k + '_2'] = d1[k]

                    command = "INSERT INTO contacts "
                    keys = "("
                    values = " ("
                    for key, value in d.items():
                        keys += key + ", "
                        values += str(value) + ", "
                    keys = keys[:-2] + ")"
                    values = values[:-2] + ")"

                    command += keys + " VALUES " + values
                    # print(command)
                    c.execute(command)

        return anyResult

    #    --------- util functions -----

    def getDataBaseName(self):
        return self._getExtraPath("overlaps.sqlite")

    def getSymmetrizedModelName(self):
        return os.path.abspath(self._getExtraPath("symModel.cif"))
        # return self._getExtraPath("symModel.pdb")

    def getTableName(self):
        return "contacts"

    def getView2Name(self):
        return "view_ND_2"

    def getView1Name(self):
        return "view_ND_1"

    def getChimeraScriptFileName1(self):
        return os.path.abspath(self._getTmpPath("chimera1.cxc"))

    def getChimeraScriptFileName2(self):
        return os.path.abspath(self._getTmpPath("chimera2.cxc"))

    def endChimeraScript(self, firstValue, labelDict, outFiles, f):
        protId = firstValue
        chains = ""
        comma = ''
        for k, v in labelDict.items():
            if protId == v:
                # chains += "{}/{}".format(comma, k)
                chains += "{}{}".format(comma, k)
                comma = ','
                outFileBase = v

            else:
                outFile = os.path.abspath(
                    self._getExtraPath("{}.over".format(outFileBase)))
                # outFile = self._getExtraPath("{}.over".format(outFileBase))
                outFiles.append(outFile)
                f.write(
                    "run(session,'echo {}')\nrun(session, 'contacts  #1{} "
                    "intersubmodel true "
                    "intramol False "
                    "restrict any "
                    "saveFile {} overlapCutoff {} hbondAllowance {} namingStyle simple')\n"
                    .format(chains, chains, outFile, self.cutoff,
                            self.allowance))
                protId = v
                # chains = "/{}".format(k)
                chains = "{}".format(k)
                outFileBase = v

            chains = "/" + chains.split("/")[-1]
        outFile = os.path.abspath(
            self._getExtraPath("{}.over".format(outFileBase)))
        # outFile = self._getExtraPath("{}.over".format(outFileBase))
        outFiles.append(outFile)

        f.write(
            "run(session,'echo {}')\nrun(session, 'contacts  #1{} "
            "intersubmodel true "
            "intramol False "
            "restrict any "
            "savefile {} overlap {} hbond {} namingStyle simple')\n".format(
                chains, chains, outFile, self.cutoff, self.allowance))
        # f.write("run('save %s')\n" % os.path.abspath(self._getExtraPath(sessionFile)))

    def removeDuplicates(self, c):
        # Remove duplicate contacts
        # that is, given chains A,B
        # we have contact A-B and B-A
        commandEliminateDuplicates = """CREATE VIEW {} AS
        SELECT DISTINCT modelId_1,
             protId_1,
             chainId_1,
             aaName_1,
             aaNumber_1,
             atomId_1,
             modelId_2,
             protId_2,
             chainId_2,
             aaName_2,
             aaNumber_2,
             atomId_2,
             overlap,
             distance
        FROM {}

        """
        commandEliminateDuplicates2 = """
        CREATE VIEW {} AS
        SELECT *
        FROM {}

        EXCEPT -- Each bound appears two times, delete one of them

        SELECT ca.*
        FROM {} ca, {} cb
        WHERE
                ca.protId_1    = cb.protId_2
            AND cb.protId_1    = ca.protId_2
            AND cb.chainId_1   = ca.chainId_2
            AND ca.aaNumber_1  = cb.aaNumber_2
            AND cb.aaNumber_1  = ca.aaNumber_2
            AND ca.atomId_1  = cb.atomId_2
            AND cb.atomId_1  = ca.atomId_2
            AND ca.modelId_2   > cb.modelId_2
        
        EXCEPT -- Interprotein bounds in the same model are not allowed

        SELECT ca.*
        FROM {} ca
        WHERE  ca.modelId_1 = ca.modelId_2 
           AND ca.protId_1 = ca.protId_2 
     
        """
        if self.SYMMETRY:
            sqlCommand = """
            SELECT count(*) FROM {} ca
            WHERE ca.modelId_1 = '#1.1'
            """.format(self.getTableName())
            c.execute(sqlCommand)
            row = c.fetchone()
            if int(row[0]) == 0:
                self.SYMMETRY = False
            else:
                commandEliminateDuplicates2 += """
                EXCEPT -- One of the atoms must belong to the input unit cell
            
                SELECT ca.*
                FROM {} ca
                WHERE ca.modelId_1 != '#1.1'  AND 
                      ca.modelId_2 != '#1.1'
        """.format(self.getView1Name())
        # # Remove duplicate contacts
        # that is, given chains A,B
        # we have contact A.a-B.b and B.b-A.a
        c.execute(self.commandDropView.format(viewName="view_ND_1"))
        # TODO: remove second contacts
        c.execute(
            commandEliminateDuplicates.format("view_ND_1", "contacts",
                                              "contacts", "contacts"))

        # remove duplicate contacts due to symmetry
        # h1-h1p, h1-h2p
        c.execute(self.commandDropView.format(viewName="view_ND_2"))
        c.execute(
            commandEliminateDuplicates2.format("view_ND_2", "view_ND_1",
                                               "view_ND_1", "view_ND_1",
                                               "view_ND_1"))

    def _validate(self):
        errors = []
        if self.symmetryOrder.get() <= 0:
            errors.append("Error: Symmetry Order should be a positive integer")

        return errors
Beispiel #16
0
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# * GNU General Public License for more details.
# *
# * You should have received a copy of the GNU General Public License
# * along with this program; if not, write to the Free Software
# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
# * 02111-1307  USA
# *
# *  All comments concerning this program package may be sent to the
# *  e-mail address '*****@*****.**'
# *
# **************************************************************************

import sys

from pyworkflow.object import Boolean
from convert import runGempicker


if __name__ == '__main__':
    micName = sys.argv[1]
    workDir = sys.argv[2]
    useGPU = Boolean(sys.argv[3])
    args = " ".join(sys.argv[4:])

    runGempicker(micName, workDir, useGPU, args)