def ClCla(alignedstack, numpart=None, numclasses=40,
                factorlist=range(1,5), corandata="coran/corandata", dataext=".spi"):
        """
        this doesn't work
        """
        if alignedstack[-4:] == dataext:
                alignedstack = alignedstack[:-4]

        rundir = "cluster"
        classavg = rundir+"/"+("classavgstack%03d" % numclasses)
        classvar = rundir+"/"+("classvarstack%03d" % numclasses)
        apParam.createDirectory(rundir)
        for i in range(numclasses):
                apFile.removeFile(rundir+("/classdoc%04d" % (i+1))+dataext)
        apFile.removeFile(rundir+"/clusterdoc"+dataext)

        factorstr, factorkey = operations.intListToString(factorlist)

        ### do hierarchical clustering
        mySpider = spyder.SpiderSession(dataext=dataext, logo=True)
        mySpider.toSpider(
                "CL CLA",
                corandata, # path to coran data
                rundir+"/clusterdoc",   #clusterdoc file
                factorstr, #factor numbers
                "5,8",
                "4",
                "2", # minimum number of particles per class
                "Y", rundir+"/dendrogram.ps",
                "Y", rundir+"/dendrogramdoc",
        )
        mySpider.close()
def ClCla(alignedstack, numpart=None, numclasses=40,
		factorlist=range(1,5), corandata="coran/corandata", dataext=".spi"):
	"""
	this doesn't work
	"""
	if alignedstack[-4:] == dataext:
		alignedstack = alignedstack[:-4]

	rundir = "cluster"
	classavg = rundir+"/"+("classavgstack%03d" % numclasses)
	classvar = rundir+"/"+("classvarstack%03d" % numclasses)
	apParam.createDirectory(rundir)
	for i in range(numclasses):
		apFile.removeFile(rundir+("/classdoc%04d" % (i+1))+dataext)
	apFile.removeFile(rundir+"/clusterdoc"+dataext)

	factorstr, factorkey = operations.intListToString(factorlist)

	### do hierarchical clustering
	mySpider = spyder.SpiderSession(dataext=dataext, logo=True)
	mySpider.toSpider(
		"CL CLA",
		corandata, # path to coran data
		rundir+"/clusterdoc",	#clusterdoc file
		factorstr, #factor numbers
		"5,8",
		"4",
		"2", # minimum number of particles per class
		"Y", rundir+"/dendrogram.ps",
		"Y", rundir+"/dendrogramdoc",
	)
	mySpider.close()
Ejemplo n.º 3
0
	def setRunDir(self):
		stackdata = apStack.getOnlyStackData(self.params['tiltstackid'], msg=False)
		path = stackdata['path']['path']
		uppath = os.path.dirname(os.path.dirname(os.path.abspath(path)))
		classliststr = operations.intListToString(self.classlist)

		self.params['rundir'] = os.path.join(uppath, "rctvolume",
			self.params['runname'] )
Ejemplo n.º 4
0
    def setRunDir(self):
        stackdata = apStack.getOnlyStackData(self.params['tiltstackid'],
                                             msg=False)
        path = stackdata['path']['path']
        uppath = os.path.dirname(os.path.dirname(os.path.abspath(path)))
        classliststr = operations.intListToString(self.classlist)

        self.params['rundir'] = os.path.join(uppath, "rctvolume",
                                             self.params['runname'])
Ejemplo n.º 5
0
	def insertRctRun(self, volfile):

		### setup resolutions
		fscresq = appiondata.ApResolutionData()
		fscresq['type'] = "fsc"
		fscresq['half'] = self.fscresolution
		fscresq['fscfile'] = "fscdata"+self.timestamp+".fsc"
		rmeasureq = appiondata.ApResolutionData()
		rmeasureq['type'] = "rmeasure"
		rmeasureq['half'] = self.rmeasureresolution
		rmeasureq['fscfile'] = None

		### insert rct run data
		rctrunq = appiondata.ApRctRunData()
		rctrunq['runname']    = self.params['runname']
		classliststr = operations.intListToString(self.classlist)
		rctrunq['classnums']  = classliststr
		rctrunq['numiter']    = self.params['numiters']
		rctrunq['maskrad']    = self.params['radius']
		rctrunq['lowpassvol'] = self.params['lowpassvol']
		rctrunq['highpasspart'] = self.params['highpasspart']
		rctrunq['lowpasspart'] = self.params['lowpasspart']
		rctrunq['median'] = self.params['median']
		rctrunq['description'] = self.params['description']
		rctrunq['path']  = appiondata.ApPathData(path=os.path.abspath(self.params['rundir']))
		rctrunq['alignstack'] = self.alignstackdata
		rctrunq['tiltstack']  = apStack.getOnlyStackData(self.params['tiltstackid'])
		rctrunq['numpart']  = self.numpart
		rctrunq['fsc_resolution'] = fscresq
		rctrunq['rmeasure_resolution'] = rmeasureq
		if self.params['commit'] is True:
			rctrunq.insert()

		### insert 3d volume density
		densq = appiondata.Ap3dDensityData()
		densq['rctrun'] = rctrunq
		densq['path'] = appiondata.ApPathData(path=os.path.dirname(os.path.abspath(volfile)))
		densq['name'] = os.path.basename(volfile)
		densq['hidden'] = False
		densq['norm'] = True
		densq['symmetry'] = appiondata.ApSymmetryData.direct_query(25)
		densq['pixelsize'] = apStack.getStackPixelSizeFromStackId(self.params['tiltstackid'])*self.params['tiltbin']
		densq['boxsize'] = self.getBoxSize()
		densq['lowpass'] = self.params['lowpassvol']
		densq['highpass'] = self.params['highpasspart']
		densq['mask'] = self.params['radius']
		#densq['iterid'] = self.params['numiters']
		densq['description'] = self.params['description']
		densq['resolution'] = self.fscresolution
		densq['rmeasure'] = self.rmeasureresolution
		densq['session'] = apStack.getSessionDataFromStackId(self.params['tiltstackid'])
		densq['md5sum'] = apFile.md5sumfile(volfile)
		if self.params['commit'] is True:
			densq.insert()

		return
def hierarchClusterProcess(numpart=None,
                           factorlist=range(1, 5),
                           corandata="coran/corandata",
                           rundir=".",
                           dataext=".spi"):
    """
        inputs:
                coran data
                number of particles
                factor list
                output directory
        output:
                dendrogram doc file
                factorkey
        """
    #apFile.removeFile(rundir+"/dendrogramdoc"+dataext)

    factorstr, factorkey = operations.intListToString(factorlist)

    dendrogramfile = rundir + "/dendrogramdoc" + factorkey + dataext
    if os.path.isfile(dendrogramfile):
        apDisplay.printMsg(
            "Dendrogram file already exists, skipping processing " +
            dendrogramfile)
        return dendrogramfile

    apDisplay.printMsg("Creating dendrogram file: " + dendrogramfile)
    ### do hierarchical clustering
    mySpider = spyder.SpiderSession(dataext=dataext, logo=False, log=True)
    mySpider.toSpider(
        "CL HC",
        spyder.fileFilter(corandata) + "_IMC",  # path to coran data
        factorstr,  # factor string
    )
    ## weight for each factor
    for fact in factorlist:
        mySpider.toSpiderQuiet("1.0")
    minclasssize = "%.4f" % (numpart * 0.0001 + 2.0)
    mySpider.toSpider(
        "5",  #use Ward's method
        "T",
        minclasssize,
        rundir + "/dendrogram.ps",  #dendrogram image file
        "Y",
        spyder.fileFilter(dendrogramfile),  #dendrogram doc file
    )
    mySpider.close()

    if not os.path.isfile(dendrogramfile):
        apDisplay.printError(
            "SPIDER dendrogram creation (CL HC) failed, too many particles??")
    apImage.convertPostscriptToPng("cluster/dendrogram.ps", "dendrogram.png")

    return dendrogramfile
def hierarchClusterProcess(numpart=None, factorlist=range(1,5),
                corandata="coran/corandata", rundir=".", dataext=".spi"):
        """
        inputs:
                coran data
                number of particles
                factor list
                output directory
        output:
                dendrogram doc file
                factorkey
        """
        #apFile.removeFile(rundir+"/dendrogramdoc"+dataext)

        factorstr, factorkey = operations.intListToString(factorlist)

        dendrogramfile = rundir+"/dendrogramdoc"+factorkey+dataext
        if os.path.isfile(dendrogramfile):
                apDisplay.printMsg("Dendrogram file already exists, skipping processing "+dendrogramfile)
                return dendrogramfile

        apDisplay.printMsg("Creating dendrogram file: "+dendrogramfile)
        ### do hierarchical clustering
        mySpider = spyder.SpiderSession(dataext=dataext, logo=False, log=True)
        mySpider.toSpider(
                "CL HC",
                spyder.fileFilter(corandata)+"_IMC", # path to coran data
                factorstr, # factor string
        )
        ## weight for each factor
        for fact in factorlist:
                mySpider.toSpiderQuiet("1.0")
        minclasssize = "%.4f" % (numpart*0.0001+2.0)
        mySpider.toSpider(
                "5",         #use Ward's method
                "T", minclasssize, rundir+"/dendrogram.ps", #dendrogram image file
                "Y", spyder.fileFilter(dendrogramfile), #dendrogram doc file
        )
        mySpider.close()

        if not os.path.isfile(dendrogramfile):
                apDisplay.printError("SPIDER dendrogram creation (CL HC) failed, too many particles??")
        apImage.convertPostscriptToPng("cluster/dendrogram.ps", "dendrogram.png")

        return dendrogramfile
Ejemplo n.º 8
0
    def insertRctRun(self, volfile):

        ### setup resolutions
        fscresq = appiondata.ApResolutionData()
        fscresq['type'] = "fsc"
        fscresq['half'] = self.fscresolution
        fscresq['fscfile'] = "fscdata" + self.timestamp + ".fsc"
        rmeasureq = appiondata.ApResolutionData()
        rmeasureq['type'] = "rmeasure"
        rmeasureq['half'] = self.rmeasureresolution
        rmeasureq['fscfile'] = None

        ### insert rct run data
        rctrunq = appiondata.ApRctRunData()
        rctrunq['runname'] = self.params['runname']
        classliststr = operations.intListToString(self.classlist)
        rctrunq['classnums'] = classliststr
        rctrunq['numiter'] = self.params['numiters']
        rctrunq['maskrad'] = self.params['radius']
        rctrunq['lowpassvol'] = self.params['lowpassvol']
        rctrunq['highpasspart'] = self.params['highpasspart']
        rctrunq['lowpasspart'] = self.params['lowpasspart']
        rctrunq['median'] = self.params['median']
        rctrunq['description'] = self.params['description']
        rctrunq['path'] = appiondata.ApPathData(
            path=os.path.abspath(self.params['rundir']))
        rctrunq['alignstack'] = self.alignstackdata
        rctrunq['tiltstack'] = apStack.getOnlyStackData(
            self.params['tiltstackid'])
        rctrunq['numpart'] = self.numpart
        rctrunq['fsc_resolution'] = fscresq
        rctrunq['rmeasure_resolution'] = rmeasureq
        if self.params['commit'] is True:
            rctrunq.insert()

        ### insert 3d volume density
        densq = appiondata.Ap3dDensityData()
        densq['rctrun'] = rctrunq
        densq['path'] = appiondata.ApPathData(
            path=os.path.dirname(os.path.abspath(volfile)))
        densq['name'] = os.path.basename(volfile)
        densq['hidden'] = False
        densq['norm'] = True
        densq['symmetry'] = appiondata.ApSymmetryData.direct_query(25)
        densq['pixelsize'] = apStack.getStackPixelSizeFromStackId(
            self.params['tiltstackid']) * self.params['tiltbin']
        densq['boxsize'] = self.getBoxSize()
        densq['lowpass'] = self.params['lowpassvol']
        densq['highpass'] = self.params['highpasspart']
        densq['mask'] = self.params['radius']
        #densq['iterid'] = self.params['numiters']
        densq['description'] = self.params['description']
        densq['resolution'] = self.fscresolution
        densq['rmeasure'] = self.rmeasureresolution
        densq['session'] = apStack.getSessionDataFromStackId(
            self.params['tiltstackid'])
        densq['md5sum'] = apFile.md5sumfile(volfile)
        if self.params['commit'] is True:
            densq.insert()

        return
def kmeansCluster(alignedstack, numpart=None, numclasses=40, timestamp=None,
                factorlist=range(1,5), corandata="coran/corandata", dataext=".spi"):
        """
        inputs:

        outputs:

        """
        if timestamp is None:
                timestamp = apParam.makeTimestamp()

        if alignedstack[-4:] == dataext:
                alignedstack = alignedstack[:-4]

        rundir = "cluster"
        classavg = rundir+"/"+("classavgstack_%s_%03d" %  (timestamp, numclasses))
        classvar = rundir+"/"+("classvarstack_%s_%03d" %  (timestamp, numclasses))
        apParam.createDirectory(rundir)
        for i in range(numclasses):
                apFile.removeFile(rundir+("/classdoc%04d" % (i+1))+dataext)
        apFile.removeFile(rundir+("/allclassesdoc%04d" % (numclasses))+dataext)

        ### make list of factors
        factorstr, factorkey = operations.intListToString(factorlist)

        ### do k-means clustering
        mySpider = spyder.SpiderSession(dataext=dataext, logo=True, log=False)
        mySpider.toSpider(
                "CL KM",
                corandata+"_IMC", # path to coran data
                str(numclasses), # num classes
                factorstr, # factor string
        )
        ## weight for each factor
        for fact in factorlist:
                mySpider.toSpiderQuiet("1.0")
        randnum = (int(random.random()*1000) + 1)
        mySpider.toSpider(
                str(randnum),
                rundir+"/classdoc_"+timestamp+"_****", # class doc file
                rundir+("/allclassesdoc%04d" % (numclasses)),   #clusterdoc file
        )
        mySpider.close()

        ### delete existing files
        sys.stderr.write("delete existing files")
        for dext in (".hed", ".img", dataext):
                apFile.removeFile(classavg+dext)
                apFile.removeFile(classvar+dext)
        print ""

        mySpider = spyder.SpiderSession(dataext=dataext, logo=True, log=False)
        ### create class averages
        apDisplay.printMsg("Averaging particles into classes")
        for i in range(numclasses):
                classnum = i+1
                mySpider.toSpiderQuiet(
                        "AS R",
                        spyder.fileFilter(alignedstack)+"@******",
                        rundir+("/classdoc_"+timestamp+"_%04d" % (classnum)),
                        "A",
                        (classavg+"@%04d" % (classnum)),
                        (classvar+"@%04d" % (classnum)),
                )
                if classnum % 10 == 0:
                        sys.stderr.write(".")
                time.sleep(1)
        mySpider.close()

        ### convert to IMAGIC
        emancmd = "proc2d "+classavg+".spi "+classavg+".hed"
        apEMAN.executeEmanCmd(emancmd, verbose=False, showcmd=True)
        emancmd = "proc2d "+classvar+".spi "+classvar+".hed"
        apEMAN.executeEmanCmd(emancmd, verbose=False, showcmd=True)

        return classavg,classvar
        def start(self):
                ### get original aligned stack name
                astack = self.analysisdata['alignstack']['imagicfile']
                ### spider has problems with file name if it includes an "x#"
                astack = re.sub(r'x(\d)',r'x-\1',astack)
                ### get original align stack
                imagicalignedstack = os.path.join(self.analysisdata['alignstack']['path']['path'],
                        astack)
                alignedstack = re.sub("\.", "_", imagicalignedstack)+".spi"
                while os.path.isfile(alignedstack):
                        apFile.removeFile(alignedstack)
                emancmd = "proc2d %s %s spiderswap"%(imagicalignedstack, alignedstack)
                apEMAN.executeEmanCmd(emancmd, showcmd=True, verbose=True)

                ### get database information
                numpart = self.analysisdata['alignstack']['num_particles']
                corandata = os.path.join(self.analysisdata['path']['path'],"coran/corandata")

                ### parse factor list
                factorlist = self.params['factorstr'].split(",")
                factorstr, factorkey = operations.intListToString(factorlist)
                factorstr = re.sub(",", ", ", factorstr)
                apDisplay.printMsg("using factorlist "+factorstr)
                if len(factorlist) > self.analysisdata['coranrun']['num_factors']:
                        apDisplay.printError("Requested factor list is longer than available factors")

                if self.params['commit'] is True:
                        self.insertClusterRun(insert=True)
                else:
                        apDisplay.printWarning("not committing results to DB")

                numclasslist = self.params['numclasslist'].split(",")
                if self.params['method'] != "kmeans":
                        rundir = "cluster"
                        apParam.createDirectory(rundir)
                        ### step 1: use coran data to create hierarchy
                        dendrogramfile = classification.hierarchClusterProcess(numpart, factorlist, corandata, rundir, dataext=".spi")
                        ### step 2: asssign particles to groups based on hierarchy

                for item in  numclasslist:
                        t0 = time.time()
                        if not item or not re.match("^[0-9]+$", item):
                                continue
                        numclass = int(item)
                        apDisplay.printColor("\n============================\nprocessing class averages for "
                                +str(numclass)+" classes\n============================\n", "green")

                        #run the classification
                        if self.params['method'] == "kmeans":
                                apDisplay.printMsg("Using the k-means clustering method")
                                classavg,classvar = classification.kmeansCluster(alignedstack, numpart, numclasses=numclass,
                                        timestamp=self.timestamp, factorlist=factorlist, corandata=corandata, dataext=".spi")
                        else:
                                apDisplay.printMsg("Using the hierarch clustering method")
                                classavg,classvar = classification.hierarchClusterClassify(alignedstack, dendrogramfile, numclass,
                                        self.timestamp, rundir, dataext=".spi")
                                #classavg,classvar = classification.hierarchCluster(alignedstack, numpart, numclasses=numclass,
                                #       timestamp=self.timestamp, factorlist=factorlist, corandata=corandata, dataext=".spi")
                        if self.params['commit'] is True:
                                self.insertClusterStack(classavg, classvar, numclass, insert=True)
                        else:
                                apDisplay.printWarning("not committing results to DB")

                        apDisplay.printMsg("Completed "+str(numclass)+" classes in "+apDisplay.timeString(time.time()-t0))
def kmeansCluster(alignedstack,
                  numpart=None,
                  numclasses=40,
                  timestamp=None,
                  factorlist=range(1, 5),
                  corandata="coran/corandata",
                  dataext=".spi"):
    """
        inputs:

        outputs:

        """
    if timestamp is None:
        timestamp = apParam.makeTimestamp()

    if alignedstack[-4:] == dataext:
        alignedstack = alignedstack[:-4]

    rundir = "cluster"
    classavg = rundir + "/" + ("classavgstack_%s_%03d" %
                               (timestamp, numclasses))
    classvar = rundir + "/" + ("classvarstack_%s_%03d" %
                               (timestamp, numclasses))
    apParam.createDirectory(rundir)
    for i in range(numclasses):
        apFile.removeFile(rundir + ("/classdoc%04d" % (i + 1)) + dataext)
    apFile.removeFile(rundir + ("/allclassesdoc%04d" % (numclasses)) + dataext)

    ### make list of factors
    factorstr, factorkey = operations.intListToString(factorlist)

    ### do k-means clustering
    mySpider = spyder.SpiderSession(dataext=dataext, logo=True, log=False)
    mySpider.toSpider(
        "CL KM",
        corandata + "_IMC",  # path to coran data
        str(numclasses),  # num classes
        factorstr,  # factor string
    )
    ## weight for each factor
    for fact in factorlist:
        mySpider.toSpiderQuiet("1.0")
    randnum = (int(random.random() * 1000) + 1)
    mySpider.toSpider(
        str(randnum),
        rundir + "/classdoc_" + timestamp + "_****",  # class doc file
        rundir + ("/allclassesdoc%04d" % (numclasses)),  #clusterdoc file
    )
    mySpider.close()

    ### delete existing files
    sys.stderr.write("delete existing files")
    for dext in (".hed", ".img", dataext):
        apFile.removeFile(classavg + dext)
        apFile.removeFile(classvar + dext)
    print ""

    mySpider = spyder.SpiderSession(dataext=dataext, logo=True, log=False)
    ### create class averages
    apDisplay.printMsg("Averaging particles into classes")
    for i in range(numclasses):
        classnum = i + 1
        mySpider.toSpiderQuiet(
            "AS R",
            spyder.fileFilter(alignedstack) + "@******",
            rundir + ("/classdoc_" + timestamp + "_%04d" % (classnum)),
            "A",
            (classavg + "@%04d" % (classnum)),
            (classvar + "@%04d" % (classnum)),
        )
        if classnum % 10 == 0:
            sys.stderr.write(".")
        time.sleep(1)
    mySpider.close()

    ### convert to IMAGIC
    emancmd = "proc2d " + classavg + ".spi " + classavg + ".hed"
    apEMAN.executeEmanCmd(emancmd, verbose=False, showcmd=True)
    emancmd = "proc2d " + classvar + ".spi " + classvar + ".hed"
    apEMAN.executeEmanCmd(emancmd, verbose=False, showcmd=True)

    return classavg, classvar
Ejemplo n.º 12
0
    def start(self):
        ### get original aligned stack name
        astack = self.analysisdata['alignstack']['imagicfile']
        ### spider has problems with file name if it includes an "x#"
        astack = re.sub(r'x(\d)', r'x-\1', astack)
        ### get original align stack
        imagicalignedstack = os.path.join(
            self.analysisdata['alignstack']['path']['path'], astack)
        alignedstack = re.sub("\.", "_", imagicalignedstack) + ".spi"
        while os.path.isfile(alignedstack):
            apFile.removeFile(alignedstack)
        emancmd = "proc2d %s %s spiderswap" % (imagicalignedstack,
                                               alignedstack)
        apEMAN.executeEmanCmd(emancmd, showcmd=True, verbose=True)

        ### get database information
        numpart = self.analysisdata['alignstack']['num_particles']
        corandata = os.path.join(self.analysisdata['path']['path'],
                                 "coran/corandata")

        ### parse factor list
        factorlist = self.params['factorstr'].split(",")
        factorstr, factorkey = operations.intListToString(factorlist)
        factorstr = re.sub(",", ", ", factorstr)
        apDisplay.printMsg("using factorlist " + factorstr)
        if len(factorlist) > self.analysisdata['coranrun']['num_factors']:
            apDisplay.printError(
                "Requested factor list is longer than available factors")

        if self.params['commit'] is True:
            self.insertClusterRun(insert=True)
        else:
            apDisplay.printWarning("not committing results to DB")

        numclasslist = self.params['numclasslist'].split(",")
        if self.params['method'] != "kmeans":
            rundir = "cluster"
            apParam.createDirectory(rundir)
            ### step 1: use coran data to create hierarchy
            dendrogramfile = classification.hierarchClusterProcess(
                numpart, factorlist, corandata, rundir, dataext=".spi")
            ### step 2: asssign particles to groups based on hierarchy

        for item in numclasslist:
            t0 = time.time()
            if not item or not re.match("^[0-9]+$", item):
                continue
            numclass = int(item)
            apDisplay.printColor(
                "\n============================\nprocessing class averages for "
                + str(numclass) + " classes\n============================\n",
                "green")

            #run the classification
            if self.params['method'] == "kmeans":
                apDisplay.printMsg("Using the k-means clustering method")
                classavg, classvar = classification.kmeansCluster(
                    alignedstack,
                    numpart,
                    numclasses=numclass,
                    timestamp=self.timestamp,
                    factorlist=factorlist,
                    corandata=corandata,
                    dataext=".spi")
            else:
                apDisplay.printMsg("Using the hierarch clustering method")
                classavg, classvar = classification.hierarchClusterClassify(
                    alignedstack,
                    dendrogramfile,
                    numclass,
                    self.timestamp,
                    rundir,
                    dataext=".spi")
                #classavg,classvar = classification.hierarchCluster(alignedstack, numpart, numclasses=numclass,
                #       timestamp=self.timestamp, factorlist=factorlist, corandata=corandata, dataext=".spi")
            if self.params['commit'] is True:
                self.insertClusterStack(classavg,
                                        classvar,
                                        numclass,
                                        insert=True)
            else:
                apDisplay.printWarning("not committing results to DB")

            apDisplay.printMsg("Completed " + str(numclass) + " classes in " +
                               apDisplay.timeString(time.time() - t0))