def centerParticles(stack, mask=None, maxshift=None): apDisplay.printMsg("Centering stack: "+stack) stacksize = apFile.stackSize(stack) freemem = mem.free()*1024 #convert memory to bytes apDisplay.printMsg("file is %s, mem is %s" %(apDisplay.bytes(stacksize), apDisplay.bytes(freemem))) ### from EMAN FAQ: need to have at least 3x as much ram as the size of the file memsize = freemem/3.0 numfrac = int(math.ceil(stacksize/memsize)) apDisplay.printMsg("file is %s, will be split into %d fractions" %(apDisplay.bytes(stacksize), numfrac)) for i in range(numfrac): emancmd = "cenalignint "+stack if numfrac > 1: emancmd += " frac="+str(i)+"/"+str(numfrac) if mask is not None: emancmd += " mask="+str(mask) if maxshift is not None: emancmd += " maxshift="+str(maxshift) apEMAN.executeEmanCmd(emancmd, verbose=False, showcmd=True) return
def initValues(self, stackfile, numrequest=None): ### check for stack if not os.path.isfile(stackfile): apDisplay.printError("stackfile does not exist: "+stackfile) ### amount of free memory on machine (converted to bytes) self.freememory = mem.free()*1024 self.message("Free memory: %s"%(apDisplay.bytes(self.freememory))) ### box size of particle self.boxsize = apFile.getBoxSize(stackfile)[0] self.message("Box size: %d"%(self.boxsize)) ### amount of memory used per particles (4 bytes per pixel) self.memperpart = self.boxsize**2 * 4.0 self.message("Memory used per part: %s"%(apDisplay.bytes(self.memperpart))) ### maximum number particles that fit into memory self.maxpartinmem = self.freememory/self.memperpart self.message("Max particles in memory: %d"%(self.maxpartinmem)) ### number particles to fit into memory self.partallowed = int(self.maxpartinmem/20.0) self.message("Particles allowed in memory: %d"%(self.partallowed)) ### number particles in stack numpart = apFile.numImagesInStack(stackfile) if self.numpart is None or self.numpart > numpart: self.numpart = numpart if numrequest is not None and self.numpart > numrequest: self.numpart = numrequest self.message("Number of particles in stack: %d"%(self.numpart)) if self.numpart > self.partallowed: numchucks = math.ceil(self.numpart/float(self.partallowed)) self.stepsize = int(self.numpart/numchucks) else: numchucks = 1 self.stepsize = self.numpart self.message("Particle loop num chunks: %d"%(numchucks)) self.message("Particle loop step size: %d"%(self.stepsize))
def getParticlesPerCycle(self, stackfile): """ it more efficient to process X particles and write them to disk rather than write each particle to disk each time. particles are read using a memory map (numpy.memmap), so we can pretend to continuously read all into memory """ ### amount of free memory on machine (converted to bytes) freememory = mem.free()*1024 self.message("Free memory: %s"%(apDisplay.bytes(freememory))) ### box size of particle self.boxsize = apFile.getBoxSize(stackfile)[0] self.message("Box size: %d"%(self.boxsize)) ### amount of memory used per particles (4 bytes per pixel) memperpart = self.boxsize**2 * 4.0 self.message("Memory used per part: %s"%(apDisplay.bytes(memperpart))) ### maximum number particles that fit into memory maxpartinmem = freememory/memperpart self.message("Max particles in memory: %d"%(maxpartinmem)) ### number particles to fit into memory partallowed = int(maxpartinmem/20.0) self.message("Particles allowed in memory: %d"%(partallowed)) ### number particles in stack numpart = self.params['last'] if numpart > partallowed: numcycles = math.ceil(numpart/float(partallowed)) stepsize = int(numpart/numcycles) else: numcycles = 1 stepsize = numpart self.message("Particle loop num cycles: %d"%(numcycles)) self.message("Particle loop step size: %d"%(stepsize)) return stepsize
def initValues(self, stackfile, numrequest=None): ### check for stack if not os.path.isfile(stackfile): apDisplay.printError("stackfile does not exist: " + stackfile) ### amount of free memory on machine (converted to bytes) self.freememory = mem.free() * 1024 self.message("Free memory: %s" % (apDisplay.bytes(self.freememory))) ### box size of particle self.boxsize = apFile.getBoxSize(stackfile)[0] self.message("Box size: %d" % (self.boxsize)) ### amount of memory used per particles (4 bytes per pixel) self.memperpart = self.boxsize**2 * 4.0 self.message("Memory used per part: %s" % (apDisplay.bytes(self.memperpart))) ### maximum number particles that fit into memory self.maxpartinmem = self.freememory / self.memperpart self.message("Max particles in memory: %d" % (self.maxpartinmem)) ### number particles to fit into memory self.partallowed = int(self.maxpartinmem / 20.0) self.message("Particles allowed in memory: %d" % (self.partallowed)) ### number particles in stack numpart = apFile.numImagesInStack(stackfile) if self.numpart is None or self.numpart > numpart: self.numpart = numpart if numrequest is not None and self.numpart > numrequest: self.numpart = numrequest self.message("Number of particles in stack: %d" % (self.numpart)) if self.numpart > self.partallowed: numchucks = math.ceil(self.numpart / float(self.partallowed)) self.stepsize = int(self.numpart / numchucks) else: numchucks = 1 self.stepsize = self.numpart self.message("Particle loop num chunks: %d" % (numchucks)) self.message("Particle loop step size: %d" % (self.stepsize))
def convertStackToXmippData(instack, outdata, maskpixrad, boxsize, numpart=None): """ From http://xmipp.cnb.csic.es/twiki/bin/view/Xmipp/Img2Data This program applies a mask to a set of images. This set is given by a selfile. After applying the mask the result is storaged as a vector in the following format: The first line indicates the dimension of the vectors and the number of vectors. The rest of the lines are the feature vectors. Each line is a vector and each column is a vectors' component (pixels values inside the mask). """ apDisplay.printMsg("Convert stack file to Xmipp data file") maskfile = "circlemask.spi" operations.createMask(maskfile, maskpixrad, boxsize) partlistdocfile = breakupStackIntoSingleFiles(instack, numpart=numpart) convertcmd = "xmipp_convert_img2data -i %s -mask %s -o %s"%(partlistdocfile, maskfile, outdata) proc = subprocess.Popen(convertcmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,) proc.wait() outfilesize = apFile.fileSize(outdata) partfilesize = apFile.fileSize(partlistdocfile) if outfilesize < 2*partfilesize: apDisplay.printError("Outdata conversion did not work, data file smaller than docfile, %s < %s" %(apDisplay.bytes(outfilesize), apDisplay.bytes(partfilesize))) apFile.removeFilePattern("partfiles/*") return outdata
def readImagic(filename, first=1, last=None, msg=True): """ Rudimentary Imagic stack reader Could be improved with more sophisticated error testing and header parsing Currently only reads image data as floats Currently reads header information for only first image in stack """ t0 = time.time() if first < 1: apDisplay.printError("particle numbering starts at 1") if last is not None and first > last: apDisplay.printError("requested first particle %d is greater than last particle %d"%(first,last)) if msg is True: apDisplay.printMsg("reading stack from disk into memory: "+os.path.basename(filename)) if last is not None: apDisplay.printMsg("particles %d through %d"%(first, last)) root=os.path.splitext(filename)[0] headerfilename=root + ".hed" datafilename=root + ".img" ### check file size, no more than 2 GB is possible ### it takes double memory on machine to read stack filesize = apFile.fileSize(datafilename) if first is None and last is None and filesize > bytelimit: apDisplay.printError("Stack is too large to read %s"%(apDisplay.bytes(filesize))) ### read stack header headerdict = readImagicHeader(headerfilename) ### determine amount of memory needed partbytes = 4*headerdict['rows']*headerdict['lines'] if last is None: last = headerdict['nimg'] elif last > headerdict['nimg']: apDisplay.printWarning("requested particle %d from stack of length %d"%(last, headerdict['nimg'])) last = headerdict['nimg'] numpart = last - first + 1 if partbytes*numpart > filesize: apDisplay.printError("requested particle %d from stack of length %d"%(last, filesize/partbytes)) if partbytes*numpart > bytelimit: apDisplay.printError("Stack is too large to read %d particles, requesting %s" %(numpart, apDisplay.bytes(partbytes*numpart))) ### read stack images images = readImagicData(datafilename, headerdict, first, numpart) stack = {'header': headerdict, 'images': images} if msg is True: apDisplay.printMsg("read %d particles equaling %s in size"%(numpart, apDisplay.bytes(partbytes*numpart))) apDisplay.printMsg("finished in "+apDisplay.timeString(time.time()-t0)) return stack
def appendParticleListToStackFile(partlist, mergestackfile, msg=True): """ takes a list of 2D numpy arrays and add to stack file due to hack, we must re-number the stack later """ ### initialization t0 = time.time() root = os.path.splitext(mergestackfile)[0] mergeheaderfile = root + ".hed" mergedatafile = root + ".img" ### merge data files premergesize = apFile.fileSize(mergedatafile) mergedata = file(mergedatafile, 'ab') for partarray in partlist: part32bit = numpy.asarray(partarray, dtype=numpy.float32) mergedata.write(part32bit.tostring()) mergedata.close() finalsize = apFile.fileSize(mergedatafile) addsize = len(part32bit.tostring() * len(partlist)) if finalsize != addsize + premergesize: apDisplay.printError( "size mismatch %s vs. %s + %s = %s" % (apDisplay.bytes(finalsize), apDisplay.bytes(addsize), apDisplay.bytes(premergesize), apDisplay.bytes(premergesize + addsize))) elif msg is True: apDisplay.printMsg( "size match %s vs. %s + %s = %s" % (apDisplay.bytes(finalsize), apDisplay.bytes(addsize), apDisplay.bytes(premergesize), apDisplay.bytes(premergesize + addsize))) ### merge header files premergenumpart = apFile.numImagesInStack(mergeheaderfile) mergehead = open(mergeheaderfile, 'ab') count = 0 for partarray in partlist: count += 1 headerstr = makeHeaderStrFromArray(premergenumpart + count, partarray) mergehead.write(headerstr) mergehead.close() numberStackFile(mergeheaderfile, msg=msg) finalnumpart = apFile.numImagesInStack(mergeheaderfile) addpart = len(partlist) if finalnumpart != addpart + premergenumpart: apDisplay.printError("size mismatch %d vs. %d + %d = %d" % (finalnumpart, addpart, premergenumpart, addpart + premergenumpart)) elif msg is True: apDisplay.printMsg("size match %d vs. %d + %d = %d" % (finalnumpart, addpart, premergenumpart, addpart + premergenumpart)) return True
def appendStackFileToStackFile(stackfile, mergestackfile, msg=True): """ takes two stack files and merges them into second file """ ### initialization t0 = time.time() root = os.path.splitext(mergestackfile)[0] mergeheaderfile = root + ".hed" mergedatafile = root + ".img" root = os.path.splitext(stackfile)[0] stackheaderfile = root + ".hed" stackdatafile = root + ".img" ### merge data files addnumpart = apFile.numImagesInStack(stackheaderfile) addsize = apFile.fileSize(stackdatafile) premergenumpart = apFile.numImagesInStack(mergeheaderfile) premergesize = apFile.fileSize(mergedatafile) fout = file(mergedatafile, 'ab') fin = file(stackdatafile, 'rb') shutil.copyfileobj(fin, fout, 65536) fin.close() fout.close() finalsize = apFile.fileSize(mergedatafile) if finalsize != addsize + premergesize: apDisplay.printError( "size mismatch %s vs. %s + %s = %s" % (apDisplay.bytes(finalsize), apDisplay.bytes(addsize), apDisplay.bytes(premergesize), apDisplay.bytes(premergesize + addsize))) ### merge header files fout = file(mergeheaderfile, 'ab') fin = file(stackheaderfile, 'rb') shutil.copyfileobj(fin, fout, 65536) fin.close() fout.close() numberStackFile(mergeheaderfile, msg=msg) finalnumpart = apFile.numImagesInStack(mergeheaderfile) if finalnumpart != addnumpart + premergenumpart: apDisplay.printError("size mismatch %d vs. %d + %d = %d" % (finalnumpart, addnumpart, premergenumpart, addnumpart + premergenumpart))
def setIBLOW(self): """ IBLOW expands the volume in memory, larger is faster, but needs more mem; can be 1, 2 or 4 """ self.iblow = 4 if self.calcMemNeeded() > 4e9: self.iblow = 2 if self.calcMemNeeded() > 4e9: self.iblow = 1 ### more than 40GB need then die if self.calcMemNeeded() > 40e9: apDisplay.printError("%s of memory is required which is too much, reduce box size or recons per processor" %(apDisplay.bytes(self.calcMemNeeded()))) apDisplay.printMsg("IBLOW set to %d, requiring %s memory" %(self.iblow, apDisplay.bytes(self.calcMemNeeded())))
def convertStackToXmippData(instack, outdata, maskpixrad, boxsize, numpart=None): """ From http://xmipp.cnb.csic.es/twiki/bin/view/Xmipp/Img2Data This program applies a mask to a set of images. This set is given by a selfile. After applying the mask the result is storaged as a vector in the following format: The first line indicates the dimension of the vectors and the number of vectors. The rest of the lines are the feature vectors. Each line is a vector and each column is a vectors' component (pixels values inside the mask). """ apDisplay.printMsg("Convert stack file to Xmipp data file") maskfile = "circlemask.spi" operations.createMask(maskfile, maskpixrad, boxsize) partlistdocfile = breakupStackIntoSingleFiles(instack, numpart=numpart) convertcmd = "xmipp_convert_img2data -i %s -mask %s -o %s" % ( partlistdocfile, maskfile, outdata) proc = subprocess.Popen( convertcmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) proc.wait() outfilesize = apFile.fileSize(outdata) partfilesize = apFile.fileSize(partlistdocfile) if outfilesize < 2 * partfilesize: apDisplay.printError( "Outdata conversion did not work, data file smaller than docfile, %s < %s" % (apDisplay.bytes(outfilesize), apDisplay.bytes(partfilesize))) apFile.removeFilePattern("partfiles/*") return outdata
def setupMultiNode(self): self.mainjobfile = "frealign.run.job" mainf = open(self.mainjobfile, "w") # mainf.write("#PBS -l nodes=%d:ppn=%d\n"%(self.params['nodes'], self.params['ppn'])) memrequest = self.calcMemNeeded() apDisplay.printMsg("requesting %s of memory" % (apDisplay.bytes(memrequest))) # mainf.write("#PBS -l mem=%s\n"%(apDisplay.clusterBytes(memrequest))) # mainf.write("#PBS -m e\n") # mainf.write("#PBS -r n\n") # mainf.write("#PBS -j oe\n") # mainf.write("\n") ### if local cluster # mainf.write("cd %s\n"%(self.params['rundir'])) ### elseif remote cluster mainf.write("tar -xkf %s.tar\n" % (self.params["runname"])) mainf.write("\n")
def setupMultiNode(self): self.mainjobfile = 'frealign.run.job' mainf = open(self.mainjobfile, 'w') #mainf.write("#PBS -l nodes=%d:ppn=%d\n"%(self.params['nodes'], self.params['ppn'])) memrequest = self.calcMemNeeded() apDisplay.printMsg("requesting %s of memory"%(apDisplay.bytes(memrequest))) #mainf.write("#PBS -l mem=%s\n"%(apDisplay.clusterBytes(memrequest))) #mainf.write("#PBS -m e\n") #mainf.write("#PBS -r n\n") #mainf.write("#PBS -j oe\n") #mainf.write("\n") ### if local cluster #mainf.write("cd %s\n"%(self.params['rundir'])) ### elseif remote cluster mainf.write("tar -xkf %s.tar\n"%(self.params['runname'])) mainf.write("\n")
def readImagic(filename, first=1, last=None, msg=True): """ Rudimentary Imagic stack reader Could be improved with more sophisticated error testing and header parsing Currently only reads image data as floats Currently reads header information for only first image in stack """ t0 = time.time() if first < 1: apDisplay.printError("particle numbering starts at 1") if last is not None and first > last: apDisplay.printError( "requested first particle %d is greater than last particle %d" % (first, last)) if msg is True: apDisplay.printMsg("reading stack from disk into memory: " + os.path.basename(filename)) if last is not None: apDisplay.printMsg("particles %d through %d" % (first, last)) root = os.path.splitext(filename)[0] headerfilename = root + ".hed" datafilename = root + ".img" ### check file size, no more than 2 GB is possible ### it takes double memory on machine to read stack filesize = apFile.fileSize(datafilename) if first is None and last is None and filesize > bytelimit: apDisplay.printError("Stack is too large to read %s" % (apDisplay.bytes(filesize))) ### read stack header headerdict = readImagicHeader(headerfilename) ### determine amount of memory needed partbytes = 4 * headerdict['rows'] * headerdict['lines'] if last is None: last = headerdict['nimg'] elif last > headerdict['nimg']: apDisplay.printWarning( "requested particle %d from stack of length %d" % (last, headerdict['nimg'])) last = headerdict['nimg'] numpart = last - first + 1 if partbytes * numpart > filesize: apDisplay.printError("requested particle %d from stack of length %d" % (last, filesize / partbytes)) if partbytes * numpart > bytelimit: apDisplay.printError( "Stack is too large to read %d particles, requesting %s" % (numpart, apDisplay.bytes(partbytes * numpart))) ### read stack images images = readImagicData(datafilename, headerdict, first, numpart) stack = {'header': headerdict, 'images': images} if msg is True: apDisplay.printMsg("read %d particles equaling %s in size" % (numpart, apDisplay.bytes(partbytes * numpart))) apDisplay.printMsg("finished in " + apDisplay.timeString(time.time() - t0)) return stack
def mergeStacks(stacklist, mergestack): ### initialization t0 = time.time() apFile.removeStack(mergestack) root=os.path.splitext(mergestack)[0] mergeheader = root+".hed" mergedata = root+".img" ### merge data files fout = file(mergedata, 'wb') numpart = 0 totalsize = 0 for stackfile in stacklist: stackdatafile = os.path.splitext(stackfile)[0]+ ".img" ### size checks npart = apFile.numImagesInStack(stackdatafile) size = apFile.fileSize(stackdatafile) apDisplay.printMsg("%d particles in %s (%s)"%(npart, stackdatafile, apDisplay.bytes(size))) totalsize += size numpart += npart fin = file(stackdatafile, 'rb') shutil.copyfileobj(fin, fout, 65536) fin.close() fout.close() if numpart < 1: apDisplay.printError("found %d particles"%(numpart)) apDisplay.printMsg("found %d particles"%(numpart)) finalsize = apFile.fileSize(mergedata) if finalsize != totalsize: apDisplay.printError("size mismatch %s vs. %s"%(apDisplay.bytes(finalsize), apDisplay.bytes(totalsize))) apDisplay.printMsg("size match %s vs. %s"%(apDisplay.bytes(finalsize), apDisplay.bytes(totalsize))) ### merge header files #apDisplay.printError("not finished") mergehead = open(mergeheader, 'wb') partnum = 1 totalsize = 0 for stackfile in stacklist: headerfilename = os.path.splitext(stackfile)[0]+ ".hed" headfile = open(headerfilename, 'rb') ### size checks size = apFile.fileSize(headerfilename) apDisplay.printMsg("%s (%d kB)"%(headerfilename, size/1024)) totalsize += size #apDisplay.printMsg("%d\t%s"%(npart, stackfile)) i = 0 npart = apFile.numImagesInStack(stackfile) while i < npart: #print i, npart, partnum ### read old header data = headfile.read(1024) ### start new string headerstr = "" ### first image number headerstr += intToFourByte(partnum) ### number of images, less one headerstr += intToFourByte(numpart-partnum) ### always 0,1 ??? headerstr += intToFourByte(0) headerstr += intToFourByte(1) ### creation date: day, month, year, hour, min, sec headerstr += intToFourByte(time.localtime()[2]) headerstr += intToFourByte(time.localtime()[1]) #eman always uses month-1? headerstr += intToFourByte(time.localtime()[0]) headerstr += intToFourByte(time.localtime()[3]) headerstr += intToFourByte(time.localtime()[4]) headerstr += intToFourByte(time.localtime()[5]) ### append other header info, 4 character per item headerstr += data[10*4:60*4] ### number of z slices headerstr += intToFourByte(1) ### first image number, EMAN does this headerstr += intToFourByte(partnum) ### append other header info, 4 character per item headerstr += data[62*4:68*4] headerstr += intToFourByte(33686018) headerstr += data[69*4:] mergehead.write(headerstr) partnum += 1 i += 1 mergehead.close() apDisplay.printMsg("wrote %d particles"%(numpart)) finalsize = apFile.fileSize(mergeheader) if finalsize != totalsize: apDisplay.printError("size mismatch %s vs. %s"%(apDisplay.bytes(finalsize), apDisplay.bytes(totalsize))) apDisplay.printMsg("size match %s vs. %s"%(apDisplay.bytes(finalsize), apDisplay.bytes(totalsize))) apDisplay.printMsg("finished stack merge in "+apDisplay.timeString(time.time()-t0))
def mergeStacks(stacklist, mergestack, msg=True): ### initialization t0 = time.time() apFile.removeStack(mergestack, warn=msg) root = os.path.splitext(mergestack)[0] mergeheader = root + ".hed" mergedata = root + ".img" ### merge data files fout = file(mergedata, 'wb') numpart = 0 totalsize = 0 for stackfile in stacklist: stackdatafile = os.path.splitext(stackfile)[0] + ".img" ### size checks npart = apFile.numImagesInStack(stackdatafile) size = apFile.fileSize(stackdatafile) if msg is True: apDisplay.printMsg("%d particles in %s (%s)" % (npart, stackdatafile, apDisplay.bytes(size))) totalsize += size numpart += npart fin = file(stackdatafile, 'rb') shutil.copyfileobj(fin, fout, 65536) fin.close() fout.close() if numpart < 1: apDisplay.printError("found %d particles" % (numpart)) if msg is True: apDisplay.printMsg("found %d particles" % (numpart)) finalsize = apFile.fileSize(mergedata) if finalsize != totalsize: apDisplay.printError( "size mismatch %s vs. %s" % (apDisplay.bytes(finalsize), apDisplay.bytes(totalsize))) if msg is True: apDisplay.printMsg( "size match %s vs. %s" % (apDisplay.bytes(finalsize), apDisplay.bytes(totalsize))) ### merge header files #apDisplay.printError("not finished") mergehead = open(mergeheader, 'wb') partnum = 1 totalsize = 0 for stackfile in stacklist: headerfilename = os.path.splitext(stackfile)[0] + ".hed" headfile = open(headerfilename, 'rb') ### size checks size = apFile.fileSize(headerfilename) if msg is True: apDisplay.printMsg("%s (%d kB)" % (headerfilename, size / 1024)) totalsize += size #apDisplay.printMsg("%d\t%s"%(npart, stackfile)) i = 0 npart = apFile.numImagesInStack(stackfile) while i < npart: #print i, npart, partnum ### read old header data = headfile.read(1024) ### start new string headerstr = "" ### first image number headerstr += intToFourByte(partnum) ### number of images, less one headerstr += intToFourByte(numpart - partnum) ### always 0,1 ??? headerstr += intToFourByte(0) headerstr += intToFourByte(1) ### creation date: day, month, year, hour, min, sec headerstr += intToFourByte(time.localtime()[2]) headerstr += intToFourByte( time.localtime()[1]) #eman always uses month-1? headerstr += intToFourByte(time.localtime()[0]) headerstr += intToFourByte(time.localtime()[3]) headerstr += intToFourByte(time.localtime()[4]) headerstr += intToFourByte(time.localtime()[5]) ### append other header info, 4 character per item headerstr += data[10 * 4:60 * 4] ### number of z slices headerstr += intToFourByte(1) ### first image number, EMAN does this headerstr += intToFourByte(partnum) ### append other header info, 4 character per item headerstr += data[62 * 4:68 * 4] headerstr += intToFourByte(33686018) headerstr += data[69 * 4:] mergehead.write(headerstr) partnum += 1 i += 1 mergehead.close() if msg is True: apDisplay.printMsg("wrote %d particles to file %s" % (numpart, mergestack)) finalsize = apFile.fileSize(mergeheader) if finalsize != totalsize: apDisplay.printError( "size mismatch %s vs. %s" % (apDisplay.bytes(finalsize), apDisplay.bytes(totalsize))) if msg is True: apDisplay.printMsg( "size match %s vs. %s" % (apDisplay.bytes(finalsize), apDisplay.bytes(totalsize))) apDisplay.printMsg( "finished stack merge of %s in %s" % (mergestack, apDisplay.timeString(time.time() - t0)))