def compReadWrite(self, testMage, casttype=None, compressor=None, clevel=1): # This is the main functions which reads and writes from disk. mrcName = os.path.join(tmpDir, 'testMage.mrc') pixelsize = np.array([1.2, 2.6, 3.4]) mrcz.writeMRC(testMage, mrcName, dtype=casttype, pixelsize=pixelsize, pixelunits=u'\AA', voltage=300.0, C3=2.7, gain=1.05, compressor=compressor, clevel=clevel) rereadMage, rereadHeader = mrcz.readMRC(mrcName, pixelunits=u'\AA') # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) npt.assert_array_almost_equal(testMage, rereadMage) npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize) assert (rereadHeader['pixelunits'] == u'\AA') npt.assert_almost_equal(rereadHeader['voltage'], 300.0) npt.assert_almost_equal(rereadHeader['C3'], 2.7) npt.assert_almost_equal(rereadHeader['gain'], 1.05)
def compress(path): global args if path.endswith('.mrc'): mrcz_path = path[:-4] + '.mrcz' elif path.endswith('.mrcs'): mrcz_path = path[:-5] + '.mrczs' if args.verbose: print('compress %s -> %s' % (path, mrcz_path)) original_size = os.path.getsize(path) print('%20s : %d bytes' % ('original size', original_size)) if not args.dry_run: imageData, imageMeta = mrcz.readMRC(path) mrcz.writeMRC(imageData, mrcz_path, compressor='zstd', clevel=9, pixelsize=imageMeta['pixelsize'], pixelunits=imageMeta['pixelunits'], voltage=imageMeta['voltage'], C3=imageMeta['C3'], gain=imageMeta['gain']) if args.verbose: compressed_size = os.path.getsize(mrcz_path) ratio = float(original_size) / compressed_size print('%20s : %d bytes' % ('compressed size', compressed_size)) print('%20s : %.2f' % ('compression ratio', ratio)) if args.delete: if args.verbose: print('delete %s' % (path)) if not args.dry_run and os.path.exists(mrcz_path) and os.path.getsize(mrcz_path) > 0: os.remove(path)
def extract(path): global args if path.endswith('.mrcz'): mrc_path = path[:-5] + '.mrc' elif path.endswith('.mrczs'): mrc_path = path[:-6] + '.mrcs' time_last_modified = os.path.getmtime(path) time_since = (time.time() - time_last_modified) / 60 # in minutes if time_since > args.time: if args.verbose: print('extract %s -> %s' % (path, mrc_path)) compressed_size = os.path.getsize(path) print('%20s : %d bytes' % ('compressed size', compressed_size)) if not args.dry_run: imageData, imageMeta = mrcz.readMRC(path) mrcz.writeMRC(imageData, mrc_path, compressor=None, pixelsize=imageMeta['pixelsize'], pixelunits=imageMeta['pixelunits'], voltage=imageMeta['voltage'], C3=imageMeta['C3'], gain=imageMeta['gain']) if args.verbose: uncompressed_size = os.path.getsize(mrc_path) ratio = float(uncompressed_size) / compressed_size print('%20s : %d bytes' % ('uncompressed size', uncompressed_size)) print('%20s : %.2f' % ('compression ratio', ratio)) if args.delete: if args.verbose: print('delete %s' % (path)) if not args.dry_run and os.path.exists(mrc_path) and os.path.getsize(mrc_path) > 0: os.remove(path)
def compReadWrite(self, testMage, casttype=None, compressor=None, clevel=1): # This is the main functions which reads and writes from disk. mrcName = os.path.join(tmpDir, "testMage.mrc") pixelsize = np.array([1.2, 2.6, 3.4]) mrcz.writeMRC(testMage, mrcName, dtype=casttype, pixelsize=pixelsize, pixelunits=u"\AA", voltage=300.0, C3=2.7, gain=1.05, compressor=compressor, clevel=clevel) rereadMage, rereadHeader = mrcz.readMRC(mrcName, pixelunits=u"\AA") try: os.remove(mrcName) except IOError: log.info("Warning: file {} left on disk".format(mrcName)) npt.assert_array_almost_equal(testMage, rereadMage) npt.assert_array_equal(rereadHeader['voltage'], 300.0) npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize) npt.assert_array_equal(rereadHeader['pixelunits'], u"\AA") npt.assert_array_equal(rereadHeader['C3'], 2.7) npt.assert_array_equal(rereadHeader['gain'], 1.05)
def test_numpy_metadata(self): log.info('Testing NumPy types in meta-data') meta = { 'zoo': np.float64(1.0), 'foo': [np.ones(16), np.ones(16), np.ones(16)], 'bar': { # Note: no support in JSON for complex numbers. 'moo': np.uint64(42), 'boo': np.full(8, 3, dtype=np.int32) } } mage = np.zeros([32, 32], dtype=np.float32) mrcName = os.path.join(tmpDir, 'testMage.mrc') mrcz.writeMRC(mage, mrcName, meta=meta) re_mage, re_meta = mrcz.readMRC(mrcName) # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) npt.assert_array_equal(re_meta['foo'][0], meta['foo'][0]) npt.assert_array_equal(re_meta['bar']['boo'], meta['bar']['boo'])
def test_MRC_append(self): log.info('Testing appending to existing MRC stack, float-32') f32_stack = [ np.random.normal(size=[128, 96]).astype(np.float32) for I in range(2) ] mrcName = os.path.join(tmpDir, 'testStack.mrcs') pixelsize = [5.6, 3.4] for j, I in enumerate(f32_stack): mrcz.writeMRC(I, mrcName, pixelsize=pixelsize, compressor=None, idx=j) rereadMage, _ = mrcz.readMRC(mrcName, pixelunits=u'\AA') try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) assert (rereadMage.shape[0] == len(f32_stack)) for testFrame, rereadFrame in zip(f32_stack, rereadMage): assert (testFrame.dtype == rereadFrame.dtype) npt.assert_array_almost_equal(testFrame, rereadFrame)
def test_list_change_output_shape_compressed(self): testMage = np.random.uniform(high=10, size=[6, 32, 32]).astype('int8') pixelsize = [5.6, 3.4] mrcName = os.path.join(tmpDir, 'testMage.mrcz') for slices in (1, 2): mrcz.writeMRC(testMage, mrcName, pixelsize=pixelsize, compressor='zstd', clevel=1, n_threads=1) rereadMage, _ = mrcz.readMRC(mrcName, pixelunits=u'\AA', slices=slices) try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) assert (isinstance(rereadMage, list)) assert (len(rereadMage) == testMage.shape[0] // slices)
def test_list_3d_compressed(self): testMage = [ np.random.uniform(high=10, size=[3, 32, 32]).astype('int8') ] * 3 mrcName = os.path.join(tmpDir, 'testMage.mrcz') pixelsize = [5.6, 3.4] mrcz.writeMRC(testMage, mrcName, pixelsize=pixelsize, compressor='zstd', clevel=1, n_threads=1) rereadMage, _ = mrcz.readMRC(mrcName, pixelunits=u'\AA') try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) assert (isinstance(rereadMage, list)) assert (len(rereadMage) == len(testMage)) for testFrame, rereadFrame in zip(testMage, rereadMage): assert (testFrame.dtype == rereadFrame.dtype) npt.assert_array_almost_equal(testFrame, rereadFrame)
def test_cast_array_from_f64(self): log.info('Testing float-64 casting') f64_mage = np.random.normal(size=[2, 128, 96]).astype(np.float64) f32_mage = f64_mage.astype(np.float32) mrcName = os.path.join(tmpDir, 'testMage.mrc') mrcz.writeMRC(f64_mage, mrcName, compressor='zstd', clevel=1) rereadMage, rereadHeader = mrcz.readMRC(mrcName) # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) npt.assert_array_almost_equal(f32_mage, rereadMage)
def file_reader(filename, endianess='<', lazy=False, mmap_mode='c', **kwds): _logger.debug("Reading MRCZ file: %s" % filename) if mmap_mode != 'c': # Note also that MRCZ does not support memory-mapping of compressed data. # Perhaps we could use the zarr package for that raise ValueError('MRCZ supports only C-ordering memory-maps') mrcz_endian = 'le' if endianess == '<' else 'be' data, mrcz_header = _mrcz.readMRC(filename, endian=mrcz_endian, useMemmap=lazy, pixelunits='nm', **kwds) # Create the axis objects for each axis names = ['y', 'x', 'z'] navigate = [False, False, True] axes = [{ 'size': data.shape[hsIndex], 'index_in_array': hsIndex, 'name': names[index], 'scale': mrcz_header['pixelsize'][hsIndex], 'offset': 0.0, 'units': mrcz_header['pixelunits'], 'navigate': nav } for index, (hsIndex, nav) in enumerate(zip(_READ_ORDER, navigate))] axes.insert(0, axes.pop(2)) # re-order the axes metadata = mrcz_header.copy() # Remove non-standard fields for popTarget in _POP_FROM_HEADER: metadata.pop(popTarget) dictionary = { 'data': data, 'axes': axes, 'metadata': metadata, 'original_metadata': { 'mrcz_header': mrcz_header }, 'mapping': mapping, } return [ dictionary, ]
def test_cast_list_from_c128(self): log.info('Testing complex-128 casting') c128_mage = [np.random.normal(size=[128,96]).astype(np.float64) + \ 1j * np.random.normal(size=[128,96]).astype(np.float64) for I in range(2)] c64_mage = [frame.astype(np.complex64) for frame in c128_mage] mrcName = os.path.join(tmpDir, 'testMage.mrc') mrcz.writeMRC(c128_mage, mrcName, compressor='zstd', clevel=1) rereadMage, rereadHeader = mrcz.readMRC(mrcName) # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) npt.assert_array_almost_equal(c64_mage[0], rereadMage[0]) npt.assert_array_almost_equal(c64_mage[1], rereadMage[1])
def batchProcPNG(mrcNames): for mrcName in mrcNames: print('Converting %s to PNG format' % mrcName) pngName = os.path.splitext(mrcName)[0] + '.png' image, header = mrcz.readMRC(mrcName) image = np.array(image) if image.ndim == 4: # Pass on multispectral data continue elif image.ndim == 3: image = image.mean(axis=0) print(f'Image {pngName} has shape {image.shape}') image = util.squarekernel(image) image = ni.gaussian_filter(image, 1.0) clim = util.histClim(image, cutoff=1E-4) image = util.normalize(np.clip(image, clim[0], clim[1])) skimage.io.imsave(pngName, image, plugin='pil')
def crossReadWrite(self, testMage, casttype=None, compressor=None, clevel=1): mrcInput = os.path.join(tmpDir, 'testIn.mrcz') mrcOutput = os.path.join(tmpDir, 'testOut.mrcz') compressor = None blocksize = 64 clevel = 1 pixelsize = [1.2, 2.6, 3.4] mrcz.writeMRC(testMage, mrcInput, pixelsize=pixelsize, pixelunits=u'\AA', voltage=300.0, C3=2.7, gain=1.05, compressor=compressor) sub.call(cmrczProg + ' -i %s -o %s -c %s -B %d -l %d' % (mrcInput, mrcOutput, compressor, blocksize, clevel), shell=True) rereadMage, rereadHeader = mrcz.readMRC(mrcOutput, pixelunits=u'\AA') os.remove(mrcOutput) os.remove(mrcInput) assert (np.all(testMage.shape == rereadMage.shape)) assert (testMage.dtype == rereadMage.dtype) npt.assert_array_almost_equal(testMage, rereadMage) npt.assert_array_equal(rereadHeader['voltage'], 300.0) npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize) npt.assert_array_equal(rereadHeader['pixelunits'], u'\AA') npt.assert_array_equal(rereadHeader['C3'], 2.7) npt.assert_array_equal(rereadHeader['gain'], 1.05)
def test_enum_metadata(self): log.info('Testing Enum types in meta-data') class Axis(Enum): X = 0 Y = 1 meta = {'axes': [Axis.Y, Axis.X]} mage = np.zeros([4, 4], dtype=np.float32) mrcName = os.path.join(tmpDir, 'testMage.mrc') mrcz.writeMRC(mage, mrcName, meta=meta) re_mage, re_meta = mrcz.readMRC(mrcName) # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) assert ('Enum.Axis.X' in re_meta['axes']) assert ('Enum.Axis.Y' in re_meta['axes'])
def test_JSON(self): testMage = np.random.uniform(high=10, size=[3, 128, 64]).astype('int8') meta = {'foo': 5, 'bar': 42} mrcName = os.path.join(tmpDir, "testMage.mrcz") pixelsize = [1.2, 5.6, 3.4] mrcz.writeMRC(testMage, mrcName, meta=meta, pixelsize=pixelsize, pixelunits=u"\AA", voltage=300.0, C3=2.7, gain=1.05, compressor='zstd', clevel=1, n_threads=4) rereadMage, rereadHeader = mrcz.readMRC(mrcName, pixelunits=u"\AA") try: os.remove(mrcName) except IOError: log.info("Warning: file {} left on disk".format(mrcName)) assert (np.all(testMage.shape == rereadMage.shape)) assert (testMage.dtype == rereadMage.dtype) for key in meta: assert (meta[key] == rereadHeader[key]) npt.assert_array_almost_equal(testMage, rereadMage) npt.assert_array_equal(rereadHeader['voltage'], 300.0) npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize) npt.assert_array_equal(rereadHeader['pixelunits'], u"\AA") npt.assert_array_equal(rereadHeader['C3'], 2.7) npt.assert_array_equal(rereadHeader['gain'], 1.05) pass
def partExtract(globPath, boxShape, boxExt=".star", binShape=None, binKernel='lanczos2', rootName="part", sigmaFilt=-1.0, invertContrast=True, normalize=True, fitBackground=True, movieMode=False, startFrame=None, endFrame=None, doseFilter=False): """ Extracts particles from aligned and summed micrographs (generally <micrograph>.mrc). cd . globPath = "align\*.mrc" for example, will process all such mrc files. *.log will use Zorro logs Can also just pass a list of files TODO: CHANGE TO LOAD FROM A ZORRO LOG FILE? OR JUST MAKE IT THE PREFERRED OPTION? Expects to find <micrograph>.star in the directory which has all the box centers in Relion .star format binShape = [y,x] is the particle box size to resample to. If == None no resampling is done. For binning, binKernel = 'lanczos2' or 'gauss' reflects the anti-aliasing filter used. rootName affects the suffix appended to each extracted particle stack (<micrograph>_<rootName>.mrcs) sigmaFilt is the standard deviation applied for removal of x-rays and hot pixels, 2.5 - 3.0 is the recommended range. It uses the sigmaFilt value to compute a confidence interval to filter the intensity value of only outlier pixels (typically ~ 1 %) invertContrast = True, inverts the contrast, as required for Relion/Frealign. normalize = True changes the particles to have 0.0 mean and 1.0 standard deviation, as required for Relion/Frealign. fitBackground = True removes a 2D Gaussian from the image. In general it's better to perform this prior to particle picking using the Zorro dose filtering+background subtraction mechanism. TODO: GRAB THE CTF INFORMATION AS WELL AND MAKE A MERGED .STAR FILE TODO: add a movie mode that outputs a substack (dose-filtered) average. """ t0 = time.time() if isinstance(globPath, list) or isinstance(globPath, tuple): mrcFiles = globPath else: mrcFiles = glob.glob(globPath) try: os.mkdir("Particles") except: pass particleStarList = [None] * len(mrcFiles) for K, mrcFileName in enumerate(mrcFiles): boxFileName = os.path.splitext(mrcFileName)[0] + boxExt if not os.path.isfile(boxFileName): print("Could not find .box/.star file: " + boxFileName) continue rlnBox = ReliablePy.ReliablePy() rlnBox.load(boxFileName) xCoord = rlnBox.star['data_']['CoordinateX'] yCoord = rlnBox.star['data_']['CoordinateY'] mrcMage = mrcz.readMRC(mrcFileName)[0] ###### Remove background from whole image ##### if bool(fitBackground): mrcMage -= zorro_util.backgroundEstimate(mrcMage) ###### Check for particles too close to the edge and remove those coordinates. ##### keepElements = ~((xCoord < boxShape[1] / 2) | (yCoord < boxShape[0] / 2) | (xCoord > mrcMage.shape[1] - boxShape[1] / 2) | (yCoord > mrcMage.shape[0] - boxShape[0] / 2)) xCoord = xCoord[keepElements] yCoord = yCoord[keepElements] ##### Extract particles ##### particles = np.zeros([len(xCoord), boxShape[0], boxShape[1]], dtype='float32') for J in np.arange(len(xCoord)): partMat = mrcMage[yCoord[J] - boxShape[0] / 2:yCoord[J] + boxShape[0] / 2, xCoord[J] - boxShape[1] / 2:xCoord[J] + boxShape[1] / 2] ###### Apply confidence-interval gaussian filter ##### if sigmaFilt > 0.0: partMean = np.mean(partMat) partStd = np.std(partMat) partHotpix = np.abs(partMat - partMean) > sigmaFilt * partStd # Clip before applying the median filter to better limit multiple pixel hot spots partMat = np.clip(partMat, partMean - sigmaFilt * partStd, partMean + sigmaFilt * partStd) # Let's stick to a gaussian_filter, it's much faster than a median filter and seems equivalent if we pre-clip # boxFilt[J,:,:] = scipy.ndimage.median_filter( boxMat[J,:,:], [5,5] ) partFilt = scipy.ndimage.gaussian_filter(partMat, 4.0) particles[ J, :, :] = partHotpix * partFilt + (~partHotpix) * partMat else: particles[J, :, :] = partMat #ims( -particles ) ##### Re-scale particles ##### if np.any(binShape): binFact = np.array(boxShape) / np.array(binShape) # Force binFact to power of 2 for now. import math binFact[0] = np.floor(math.log(binFact[0], 2))**2 binFact[1] = np.floor(math.log(binFact[1], 2))**2 [xSample, ySample] = np.meshgrid( np.arange(0, binShape[1]) / binFact[1], np.arange(0, binShape[0]) / binFact[0]) if binKernel == 'lanczos2': # 2nd order Lanczos kernel lOrder = 2 xWin = np.arange(-lOrder, lOrder + 1.0 / binFact[1], 1.0 / binFact[1]) yWin = np.arange(-lOrder, lOrder + 1.0 / binFact[0], 1.0 / binFact[0]) xWinMesh, yWinMesh = np.meshgrid(xWin, yWin) rmesh = np.sqrt(xWinMesh * xWinMesh + yWinMesh * yWinMesh) import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") windowKernel = (lOrder / (np.pi * np.pi * rmesh * rmesh)) * np.sin( np.pi / lOrder * rmesh) * np.sin( np.pi * rmesh) windowKernel[yWin == 0, xWin == 0] = 1.0 elif binKernel == 'gauss': xWin = np.arange(-2.0 * binFact[1], 2.0 * binFact[1] + 1.0) yWin = np.arange(-2.0 * binFact[0], 2.0 * binFact[0] + 1.0) print("binFact = " + str(binFact)) xWinMesh, yWinMesh = np.meshgrid(xWin, yWin) # rmesh = np.sqrt( xWinMesh*xWinMesh + yWinMesh*yWinMesh ) windowKernel = np.exp(-xWinMesh**2 / (0.36788 * binFact[1]) - yWinMesh**2 / (0.36788 * binFact[0])) pass partRescaled = np.zeros([len(xCoord), binShape[0], binShape[1]], dtype='float32') for J in np.arange(len(xCoord)): # TODO: switch from squarekernel to an interpolator so we can use non-powers of 2 partRescaled[J, :, :] = zorro_util.squarekernel( scipy.ndimage.convolve(particles[J, :, :], windowKernel), k=binFact[0]) particles = partRescaled pass # ims( windowKernel ) # DEBUG print(" particles.dtype = " + str(particles.dtype)) ###### Normalize particles after binning and background subtraction ##### if bool(normalize): particles -= np.mean(particles, axis=0) particles *= 1.0 / np.std(particles, axis=0) ##### Invert contrast ##### if bool(invertContrast): particles = -particles ###### Save particles to disk ###### # Relion always saves particles within ./Particles/<fileext> but we could use anything if we want to # make changes and save them in the star file. particleFileName = os.path.join( "Particles", os.path.splitext(mrcFileName)[0] + "_" + rootName + ".mrcs") # TODO: add pixel size to particles file mrcz.writeMRC(particles, particleFileName) # TODO: pixelsize ##### Output a star file with CTF and particle info. ##### print("Particle file: " + particleFileName) particleStarList[K] = os.path.splitext(particleFileName)[0] + ".star" print("star file: " + particleStarList[K]) headerDict = { "ImageName": 1, "CoordinateX": 2, "CoordinateY": 3, "MicrographName": 4 } lookupDict = dict(zip(headerDict.values(), headerDict.keys())) # with open(particleStarList[K], 'wb') as fh: fh.write("\ndata_images\n\nloop_\n") for J in np.sort(lookupDict.keys()): fh.write("_rln" + lookupDict[J] + " #" + str(J) + "\n") for I in np.arange(0, len(xCoord)): mrcsPartName = os.path.splitext( particleStarList[K])[0] + ".mrcs" fh.write( "%06d@%s %.1f %.1f %s\n" % (I + 1, mrcsPartName, xCoord[I], yCoord[I], mrcFileName)) # TODO: join all star files, for multiprocessing this should just return the list of star files # TODO: add CTF Info t1 = time.time() print("Particle extraction finished in (s): %.2f" % (t1 - t0)) return particleStarList
if (rank==0): print ("Parsing %s" %(inputFile)) ############################################################ # READING THE INPUT FILE AND GETTING SOME KEY PARAMETERS images = pims.open(inputFile) [row,col,numFrames] = images.header_dict['height'],images.header_dict['width'],images.header_dict['allocated_frames'] if (numFrames<lastFrame): frameList = range(int(firstFrame),numFrames+1) else: frameList = range(int(firstFrame),int(lastFrame)+1) procFrameList = numpy.array_split(frameList,size) ############################################################ # SETTING UP THE DARK AND GAIN REFERENCEING [darkRef,_] = mrcz.readMRC(darkRefFile) [gainRef,_] = mrcz.readMRC(gainRefFile) if (gainRef.mean()<=10): correction = gainRef.copy() else: correction = gainRef-darkRef correction[numpy.abs(correction)<1e-5] = 1 median = numpy.median(correction) ############################################################ # FIRST PASS THROUGH ALL FILES TO GET THE CONTRAST VALUE outFile = open('convertSeqFileLog_'+str(rank)+'.dat','w') outFile.write("InputFile\tFrame\tCount\tPixInAngstrom (TODO)\tDoseRate\tYear\tMonth\tDay\tHour\tMinute\tSecond\tMicrosecond\n") contrastLimit = open(str(rank)+'.dat','w') minCount,maxCount=1e10,-1e10 comm.Barrier()
def partExtract( globPath, boxShape, boxExt=".star", binShape = None, binKernel = 'lanczos2', rootName="part", sigmaFilt=-1.0, invertContrast=True, normalize=True, fitBackground=True, movieMode=False, startFrame=None, endFrame=None, doseFilter=False ): """ Extracts particles from aligned and summed micrographs (generally <micrograph>.mrc). cd . globPath = "align\*.mrc" for example, will process all such mrc files. *.log will use Zorro logs Can also just pass a list of files TODO: CHANGE TO LOAD FROM A ZORRO LOG FILE? OR JUST MAKE IT THE PREFERRED OPTION? Expects to find <micrograph>.star in the directory which has all the box centers in Relion .star format binShape = [y,x] is the particle box size to resample to. If == None no resampling is done. For binning, binKernel = 'lanczos2' or 'gauss' reflects the anti-aliasing filter used. rootName affects the suffix appended to each extracted particle stack (<micrograph>_<rootName>.mrcs) sigmaFilt is the standard deviation applied for removal of x-rays and hot pixels, 2.5 - 3.0 is the recommended range. It uses the sigmaFilt value to compute a confidence interval to filter the intensity value of only outlier pixels (typically ~ 1 %) invertContrast = True, inverts the contrast, as required for Relion/Frealign. normalize = True changes the particles to have 0.0 mean and 1.0 standard deviation, as required for Relion/Frealign. fitBackground = True removes a 2D Gaussian from the image. In general it's better to perform this prior to particle picking using the Zorro dose filtering+background subtraction mechanism. TODO: GRAB THE CTF INFORMATION AS WELL AND MAKE A MERGED .STAR FILE TODO: add a movie mode that outputs a substack (dose-filtered) average. """ t0 = time.time() if isinstance( globPath, list ) or isinstance( globPath, tuple ): mrcFiles = globPath else: mrcFiles = glob.glob( globPath ) try: os.mkdir( "Particles" ) except: pass particleStarList = [None]*len(mrcFiles) for K, mrcFileName in enumerate(mrcFiles): boxFileName = os.path.splitext( mrcFileName )[0] + boxExt if not os.path.isfile( boxFileName ): print( "Could not find .box/.star file: " + boxFileName ) continue rlnBox = ReliablePy.ReliablePy() rlnBox.load( boxFileName ) xCoord = rlnBox.star['data_']['CoordinateX'] yCoord = rlnBox.star['data_']['CoordinateY'] mrcMage = mrcz.readMRC( mrcFileName )[0] ###### Remove background from whole image ##### if bool( fitBackground ): mrcMage -= zorro_util.backgroundEstimate( mrcMage ) ###### Check for particles too close to the edge and remove those coordinates. ##### keepElements = ~( (xCoord < boxShape[1]/2) | ( yCoord < boxShape[0]/2) | ( xCoord > mrcMage.shape[1]-boxShape[1]/2) | ( yCoord > mrcMage.shape[0]-boxShape[0]/2) ) xCoord = xCoord[keepElements]; yCoord = yCoord[keepElements] ##### Extract particles ##### particles = np.zeros( [len(xCoord), boxShape[0], boxShape[1]], dtype='float32' ) for J in np.arange( len(xCoord) ): partMat = mrcMage[ yCoord[J]-boxShape[0]/2:yCoord[J]+boxShape[0]/2, xCoord[J]-boxShape[1]/2:xCoord[J]+boxShape[1]/2 ] ###### Apply confidence-interval gaussian filter ##### if sigmaFilt > 0.0: partMean = np.mean( partMat ) partStd = np.std( partMat ) partHotpix = np.abs(partMat - partMean) > sigmaFilt*partStd # Clip before applying the median filter to better limit multiple pixel hot spots partMat = np.clip( partMat, partMean - sigmaFilt*partStd, partMean + sigmaFilt*partStd ) # Let's stick to a gaussian_filter, it's much faster than a median filter and seems equivalent if we pre-clip # boxFilt[J,:,:] = scipy.ndimage.median_filter( boxMat[J,:,:], [5,5] ) partFilt = scipy.ndimage.gaussian_filter( partMat, 4.0 ) particles[J,:,:] = partHotpix * partFilt + (~ partHotpix) * partMat else: particles[J,:,:] = partMat #ims( -particles ) ##### Re-scale particles ##### if np.any( binShape ): binFact = np.array( boxShape ) / np.array( binShape ) # Force binFact to power of 2 for now. import math binFact[0] = np.floor( math.log( binFact[0], 2 ) ) ** 2 binFact[1] = np.floor( math.log( binFact[1], 2 ) ) ** 2 [xSample,ySample] = np.meshgrid( np.arange( 0,binShape[1] )/binFact[1], np.arange( 0,binShape[0] )/binFact[0] ) if binKernel == 'lanczos2': # 2nd order Lanczos kernel lOrder = 2 xWin = np.arange( -lOrder, lOrder + 1.0/binFact[1], 1.0/binFact[1] ) yWin = np.arange( -lOrder, lOrder + 1.0/binFact[0], 1.0/binFact[0] ) xWinMesh, yWinMesh = np.meshgrid( xWin, yWin ) rmesh = np.sqrt( xWinMesh*xWinMesh + yWinMesh*yWinMesh ) import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") windowKernel = (lOrder/(np.pi*np.pi*rmesh*rmesh)) * np.sin( np.pi / lOrder * rmesh ) * np.sin( np.pi * rmesh ) windowKernel[ yWin==0, xWin==0 ] = 1.0 elif binKernel == 'gauss': xWin = np.arange( -2.0*binFact[1],2.0*binFact[1]+1.0 ) yWin = np.arange( -2.0*binFact[0],2.0*binFact[0]+1.0 ) print( "binFact = " + str(binFact) ) xWinMesh, yWinMesh = np.meshgrid( xWin, yWin ) # rmesh = np.sqrt( xWinMesh*xWinMesh + yWinMesh*yWinMesh ) windowKernel = np.exp( -xWinMesh**2/(0.36788*binFact[1]) - yWinMesh**2/(0.36788*binFact[0]) ) pass partRescaled = np.zeros( [len(xCoord), binShape[0], binShape[1]], dtype='float32' ) for J in np.arange( len(xCoord) ): # TODO: switch from squarekernel to an interpolator so we can use non-powers of 2 partRescaled[J,:,:] = zorro_util.squarekernel( scipy.ndimage.convolve( particles[J,:,:], windowKernel ), k= binFact[0] ) particles = partRescaled pass # ims( windowKernel ) # DEBUG print( " particles.dtype = " + str(particles.dtype) ) ###### Normalize particles after binning and background subtraction ##### if bool(normalize): particles -= np.mean( particles, axis=0 ) particles *= 1.0 / np.std( particles, axis=0 ) ##### Invert contrast ##### if bool(invertContrast): particles = -particles ###### Save particles to disk ###### # Relion always saves particles within ./Particles/<fileext> but we could use anything if we want to # make changes and save them in the star file. particleFileName = os.path.join( "Particles", os.path.splitext( mrcFileName )[0] +"_" + rootName + ".mrcs" ) # TODO: add pixel size to particles file mrcz.writeMRC( particles, particleFileName ) # TODO: pixelsize ##### Output a star file with CTF and particle info. ##### print( "Particle file: " + particleFileName ) particleStarList[K] = os.path.splitext( particleFileName )[0] + ".star" print( "star file: " + particleStarList[K] ) headerDict = { "ImageName":1, "CoordinateX":2, "CoordinateY":3, "MicrographName": 4 } lookupDict = dict( zip( headerDict.values(), headerDict.keys() ) ) # with open( particleStarList[K], 'wb' ) as fh: fh.write( "\ndata_images\n\nloop_\n") for J in np.sort(lookupDict.keys()): fh.write( "_rln" + lookupDict[J] + " #" + str(J) + "\n") for I in np.arange(0, len(xCoord) ): mrcsPartName = os.path.splitext( particleStarList[K] )[0] + ".mrcs" fh.write( "%06d@%s %.1f %.1f %s\n" % ( I+1, mrcsPartName, xCoord[I], yCoord[I], mrcFileName ) ) # TODO: join all star files, for multiprocessing this should just return the list of star files # TODO: add CTF Info t1 = time.time() print( "Particle extraction finished in (s): %.2f" % (t1-t0) ) return particleStarList
<= 0.4.1 to the 0.5.0 standard, so that the volume sampling is set to the default (Mx, My, Mz) = [0, 0, 1]. """ if len(sys.argv) > 1: directory = sys.argv[1] mrcz_files = glob.glob(path.join(directory, '*.mrc'), recursive=False) mrcz_files.extend(glob.glob(path.join(directory, '*.mrcz'), recursive=False)) backup_dir = path.join(directory, 'backup') os.makedirs(backup_dir) for filename in mrcz_files: shutil.copy(filename, path.join(backup_dir, path.basename(filename))) data, meta = mrcz.readMRC(filename) compressor = meta['compressor'] if 'compressor' in meta else None clevel = meta['clevel'] if 'clevel' in meta else 1 voltage = meta['voltage'] if 'voltage' in meta else 0.0 C3 = meta['C3'] if 'C3' in meta else 0.0 gain = meta['gain'] if 'gain' in meta else 1.0 pixelsize = meta['pixelsize'] if 'pixelsize' in meta else [0.1, 0.1, 0.1] pixelunits = meta['pixelunits'] if 'pixelunits' in meta else u'\\AA' if 'packedBytes' in meta: meta.pop('packedBytes') if 'minImage' in meta: meta.pop('minImage') if 'maxImage' in meta: meta.pop('maxImage')