def test_list_3d_compressed(self): testMage = [ np.random.uniform(high=10, size=[3, 32, 32]).astype('int8') ] * 3 mrcName = os.path.join(tmpDir, 'testMage.mrcz') pixelsize = [5.6, 3.4] mrcz.writeMRC(testMage, mrcName, pixelsize=pixelsize, compressor='zstd', clevel=1, n_threads=1) rereadMage, _ = mrcz.readMRC(mrcName, pixelunits=u'\AA') try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) assert (isinstance(rereadMage, list)) assert (len(rereadMage) == len(testMage)) for testFrame, rereadFrame in zip(testMage, rereadMage): assert (testFrame.dtype == rereadFrame.dtype) npt.assert_array_almost_equal(testFrame, rereadFrame)
def compReadWrite(self, testMage, casttype=None, compressor=None, clevel=1): # This is the main functions which reads and writes from disk. mrcName = os.path.join(tmpDir, 'testMage.mrc') pixelsize = np.array([1.2, 2.6, 3.4]) mrcz.writeMRC(testMage, mrcName, dtype=casttype, pixelsize=pixelsize, pixelunits=u'\AA', voltage=300.0, C3=2.7, gain=1.05, compressor=compressor, clevel=clevel) rereadMage, rereadHeader = mrcz.readMRC(mrcName, pixelunits=u'\AA') # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) npt.assert_array_almost_equal(testMage, rereadMage) npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize) assert (rereadHeader['pixelunits'] == u'\AA') npt.assert_almost_equal(rereadHeader['voltage'], 300.0) npt.assert_almost_equal(rereadHeader['C3'], 2.7) npt.assert_almost_equal(rereadHeader['gain'], 1.05)
def compress(path): global args if path.endswith('.mrc'): mrcz_path = path[:-4] + '.mrcz' elif path.endswith('.mrcs'): mrcz_path = path[:-5] + '.mrczs' if args.verbose: print('compress %s -> %s' % (path, mrcz_path)) original_size = os.path.getsize(path) print('%20s : %d bytes' % ('original size', original_size)) if not args.dry_run: imageData, imageMeta = mrcz.readMRC(path) mrcz.writeMRC(imageData, mrcz_path, compressor='zstd', clevel=9, pixelsize=imageMeta['pixelsize'], pixelunits=imageMeta['pixelunits'], voltage=imageMeta['voltage'], C3=imageMeta['C3'], gain=imageMeta['gain']) if args.verbose: compressed_size = os.path.getsize(mrcz_path) ratio = float(original_size) / compressed_size print('%20s : %d bytes' % ('compressed size', compressed_size)) print('%20s : %.2f' % ('compression ratio', ratio)) if args.delete: if args.verbose: print('delete %s' % (path)) if not args.dry_run and os.path.exists(mrcz_path) and os.path.getsize(mrcz_path) > 0: os.remove(path)
def test_MRC_append(self): log.info('Testing appending to existing MRC stack, float-32') f32_stack = [ np.random.normal(size=[128, 96]).astype(np.float32) for I in range(2) ] mrcName = os.path.join(tmpDir, 'testStack.mrcs') pixelsize = [5.6, 3.4] for j, I in enumerate(f32_stack): mrcz.writeMRC(I, mrcName, pixelsize=pixelsize, compressor=None, idx=j) rereadMage, _ = mrcz.readMRC(mrcName, pixelunits=u'\AA') try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) assert (rereadMage.shape[0] == len(f32_stack)) for testFrame, rereadFrame in zip(f32_stack, rereadMage): assert (testFrame.dtype == rereadFrame.dtype) npt.assert_array_almost_equal(testFrame, rereadFrame)
def test_numpy_metadata(self): log.info('Testing NumPy types in meta-data') meta = { 'zoo': np.float64(1.0), 'foo': [np.ones(16), np.ones(16), np.ones(16)], 'bar': { # Note: no support in JSON for complex numbers. 'moo': np.uint64(42), 'boo': np.full(8, 3, dtype=np.int32) } } mage = np.zeros([32, 32], dtype=np.float32) mrcName = os.path.join(tmpDir, 'testMage.mrc') mrcz.writeMRC(mage, mrcName, meta=meta) re_mage, re_meta = mrcz.readMRC(mrcName) # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) npt.assert_array_equal(re_meta['foo'][0], meta['foo'][0]) npt.assert_array_equal(re_meta['bar']['boo'], meta['bar']['boo'])
def extract(path): global args if path.endswith('.mrcz'): mrc_path = path[:-5] + '.mrc' elif path.endswith('.mrczs'): mrc_path = path[:-6] + '.mrcs' time_last_modified = os.path.getmtime(path) time_since = (time.time() - time_last_modified) / 60 # in minutes if time_since > args.time: if args.verbose: print('extract %s -> %s' % (path, mrc_path)) compressed_size = os.path.getsize(path) print('%20s : %d bytes' % ('compressed size', compressed_size)) if not args.dry_run: imageData, imageMeta = mrcz.readMRC(path) mrcz.writeMRC(imageData, mrc_path, compressor=None, pixelsize=imageMeta['pixelsize'], pixelunits=imageMeta['pixelunits'], voltage=imageMeta['voltage'], C3=imageMeta['C3'], gain=imageMeta['gain']) if args.verbose: uncompressed_size = os.path.getsize(mrc_path) ratio = float(uncompressed_size) / compressed_size print('%20s : %d bytes' % ('uncompressed size', uncompressed_size)) print('%20s : %.2f' % ('compression ratio', ratio)) if args.delete: if args.verbose: print('delete %s' % (path)) if not args.dry_run and os.path.exists(mrc_path) and os.path.getsize(mrc_path) > 0: os.remove(path)
def test_list_change_output_shape_compressed(self): testMage = np.random.uniform(high=10, size=[6, 32, 32]).astype('int8') pixelsize = [5.6, 3.4] mrcName = os.path.join(tmpDir, 'testMage.mrcz') for slices in (1, 2): mrcz.writeMRC(testMage, mrcName, pixelsize=pixelsize, compressor='zstd', clevel=1, n_threads=1) rereadMage, _ = mrcz.readMRC(mrcName, pixelunits=u'\AA', slices=slices) try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) assert (isinstance(rereadMage, list)) assert (len(rereadMage) == testMage.shape[0] // slices)
def compReadWrite(self, testMage, casttype=None, compressor=None, clevel=1): # This is the main functions which reads and writes from disk. mrcName = os.path.join(tmpDir, "testMage.mrc") pixelsize = np.array([1.2, 2.6, 3.4]) mrcz.writeMRC(testMage, mrcName, dtype=casttype, pixelsize=pixelsize, pixelunits=u"\AA", voltage=300.0, C3=2.7, gain=1.05, compressor=compressor, clevel=clevel) rereadMage, rereadHeader = mrcz.readMRC(mrcName, pixelunits=u"\AA") try: os.remove(mrcName) except IOError: log.info("Warning: file {} left on disk".format(mrcName)) npt.assert_array_almost_equal(testMage, rereadMage) npt.assert_array_equal(rereadHeader['voltage'], 300.0) npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize) npt.assert_array_equal(rereadHeader['pixelunits'], u"\AA") npt.assert_array_equal(rereadHeader['C3'], 2.7) npt.assert_array_equal(rereadHeader['gain'], 1.05)
def file_writer(filename, signal, do_async=False, compressor=None, clevel=1, n_threads=None, **kwds): import hyperspy.signals if not isinstance( signal, (hyperspy.signals.Signal2D, hyperspy.signals.ComplexSignal2D)): raise TypeError("MRCZ supports 2D and 3D data only. type(signal) is " "{}".format(type(signal))) endianess = kwds.pop('endianess', '<') mrcz_endian = 'le' if endianess == '<' else 'be' meta = signal.metadata.as_dictionary() # Get pixelsize and pixelunits from the axes pixelunits = signal.axes_manager[-1].units pixelsize = [signal.axes_manager[I].scale for I in _WRITE_ORDER] # Strip out voltage from meta-data voltage = signal.metadata.get_item( 'Acquisition_instrument.TEM.beam_energy') # There aren't hyperspy fields for spherical aberration or detector gain C3 = 0.0 gain = signal.metadata.get_item( "Signal.Noise_properties." "Variance_linear_model.gain_factor", 1.0) if do_async: _mrcz.asyncWriteMRC(signal.data, filename, meta=meta, endian=mrcz_endian, pixelsize=pixelsize, pixelunits=pixelunits, voltage=voltage, C3=C3, gain=gain, compressor=compressor, clevel=clevel, n_threads=n_threads) else: _mrcz.writeMRC(signal.data, filename, meta=meta, endian=mrcz_endian, pixelsize=pixelsize, pixelunits=pixelunits, voltage=voltage, C3=C3, gain=gain, compressor=compressor, clevel=clevel, n_threads=n_threads)
def test_cast_array_from_f64(self): log.info('Testing float-64 casting') f64_mage = np.random.normal(size=[2, 128, 96]).astype(np.float64) f32_mage = f64_mage.astype(np.float32) mrcName = os.path.join(tmpDir, 'testMage.mrc') mrcz.writeMRC(f64_mage, mrcName, compressor='zstd', clevel=1) rereadMage, rereadHeader = mrcz.readMRC(mrcName) # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) npt.assert_array_almost_equal(f32_mage, rereadMage)
def test_cast_list_from_c128(self): log.info('Testing complex-128 casting') c128_mage = [np.random.normal(size=[128,96]).astype(np.float64) + \ 1j * np.random.normal(size=[128,96]).astype(np.float64) for I in range(2)] c64_mage = [frame.astype(np.complex64) for frame in c128_mage] mrcName = os.path.join(tmpDir, 'testMage.mrc') mrcz.writeMRC(c128_mage, mrcName, compressor='zstd', clevel=1) rereadMage, rereadHeader = mrcz.readMRC(mrcName) # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) npt.assert_array_almost_equal(c64_mage[0], rereadMage[0]) npt.assert_array_almost_equal(c64_mage[1], rereadMage[1])
def crossReadWrite(self, testMage, casttype=None, compressor=None, clevel=1): mrcInput = os.path.join(tmpDir, 'testIn.mrcz') mrcOutput = os.path.join(tmpDir, 'testOut.mrcz') compressor = None blocksize = 64 clevel = 1 pixelsize = [1.2, 2.6, 3.4] mrcz.writeMRC(testMage, mrcInput, pixelsize=pixelsize, pixelunits=u'\AA', voltage=300.0, C3=2.7, gain=1.05, compressor=compressor) sub.call(cmrczProg + ' -i %s -o %s -c %s -B %d -l %d' % (mrcInput, mrcOutput, compressor, blocksize, clevel), shell=True) rereadMage, rereadHeader = mrcz.readMRC(mrcOutput, pixelunits=u'\AA') os.remove(mrcOutput) os.remove(mrcInput) assert (np.all(testMage.shape == rereadMage.shape)) assert (testMage.dtype == rereadMage.dtype) npt.assert_array_almost_equal(testMage, rereadMage) npt.assert_array_equal(rereadHeader['voltage'], 300.0) npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize) npt.assert_array_equal(rereadHeader['pixelunits'], u'\AA') npt.assert_array_equal(rereadHeader['C3'], 2.7) npt.assert_array_equal(rereadHeader['gain'], 1.05)
def test_enum_metadata(self): log.info('Testing Enum types in meta-data') class Axis(Enum): X = 0 Y = 1 meta = {'axes': [Axis.Y, Axis.X]} mage = np.zeros([4, 4], dtype=np.float32) mrcName = os.path.join(tmpDir, 'testMage.mrc') mrcz.writeMRC(mage, mrcName, meta=meta) re_mage, re_meta = mrcz.readMRC(mrcName) # `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName)) assert ('Enum.Axis.X' in re_meta['axes']) assert ('Enum.Axis.Y' in re_meta['axes'])
def test_mrcfile_consistency(self): log.info( 'Testing consistency between MRCZ and mrcfile regarding dimensions and sampling, float-32' ) NX = 32 NY = 64 NZ = 128 # It's a particular case when MX == NZ, etc, but we just want to test if MX/MY/MZ and the pixel size are being written correctly MX = NX MY = NY MZ = NZ f32_vol = np.random.normal(size=[NZ, NY, NX]).astype(np.float32) mrcName = os.path.join(tmpDir, 'testVol.mrc') pixelsize = [5.6, 3.4, 1.3] mrcz.writeMRC(f32_vol, mrcName, pixelsize=pixelsize, compressor=None) # Now let's read the file with mrcfile and check if things are how they should be: with mrcfile.open(mrcName, mode='r+') as mrc: # Assert dimensions print(mrc.header.mx, MX) assert (mrc.header.mx == MX) assert (mrc.header.my == MY) assert (mrc.header.mz == MZ) # Assert pixelsize assert (np.isclose(mrc.voxel_size.x, pixelsize[2])) assert (np.isclose(mrc.voxel_size.y, pixelsize[1])) assert (np.isclose(mrc.voxel_size.z, pixelsize[0])) try: os.remove(mrcName) except IOError: log.info('Warning: file {} left on disk'.format(mrcName))
def test_JSON(self): testMage = np.random.uniform(high=10, size=[3, 128, 64]).astype('int8') meta = {'foo': 5, 'bar': 42} mrcName = os.path.join(tmpDir, "testMage.mrcz") pixelsize = [1.2, 5.6, 3.4] mrcz.writeMRC(testMage, mrcName, meta=meta, pixelsize=pixelsize, pixelunits=u"\AA", voltage=300.0, C3=2.7, gain=1.05, compressor='zstd', clevel=1, n_threads=4) rereadMage, rereadHeader = mrcz.readMRC(mrcName, pixelunits=u"\AA") try: os.remove(mrcName) except IOError: log.info("Warning: file {} left on disk".format(mrcName)) assert (np.all(testMage.shape == rereadMage.shape)) assert (testMage.dtype == rereadMage.dtype) for key in meta: assert (meta[key] == rereadHeader[key]) npt.assert_array_almost_equal(testMage, rereadMage) npt.assert_array_equal(rereadHeader['voltage'], 300.0) npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize) npt.assert_array_equal(rereadHeader['pixelunits'], u"\AA") npt.assert_array_equal(rereadHeader['C3'], 2.7) npt.assert_array_equal(rereadHeader['gain'], 1.05) pass
if 'packedBytes' in meta: meta.pop('packedBytes') if 'minImage' in meta: meta.pop('minImage') if 'maxImage' in meta: meta.pop('maxImage') if 'meanImage' in meta: meta.pop('meanImage') if 'extendedBytes' in meta: meta.pop('extendedBytes') mrcz.writeMRC(data, filename, meta=meta, pixelsize=pixelsize, pixelunits=pixelunits, voltage=voltage, C3=C3, gain=gain, compressor=compressor, clevel=clevel) # Try and read every file back in. try: mrcz.readMRC(filename) except: raise ValueError('{} could not be re-read'.format(filename)) print('__All__ originals were backed up to %s, remember to delete them' % path.join(directory, 'backup'))
def partExtract( globPath, boxShape, boxExt=".star", binShape = None, binKernel = 'lanczos2', rootName="part", sigmaFilt=-1.0, invertContrast=True, normalize=True, fitBackground=True, movieMode=False, startFrame=None, endFrame=None, doseFilter=False ): """ Extracts particles from aligned and summed micrographs (generally <micrograph>.mrc). cd . globPath = "align\*.mrc" for example, will process all such mrc files. *.log will use Zorro logs Can also just pass a list of files TODO: CHANGE TO LOAD FROM A ZORRO LOG FILE? OR JUST MAKE IT THE PREFERRED OPTION? Expects to find <micrograph>.star in the directory which has all the box centers in Relion .star format binShape = [y,x] is the particle box size to resample to. If == None no resampling is done. For binning, binKernel = 'lanczos2' or 'gauss' reflects the anti-aliasing filter used. rootName affects the suffix appended to each extracted particle stack (<micrograph>_<rootName>.mrcs) sigmaFilt is the standard deviation applied for removal of x-rays and hot pixels, 2.5 - 3.0 is the recommended range. It uses the sigmaFilt value to compute a confidence interval to filter the intensity value of only outlier pixels (typically ~ 1 %) invertContrast = True, inverts the contrast, as required for Relion/Frealign. normalize = True changes the particles to have 0.0 mean and 1.0 standard deviation, as required for Relion/Frealign. fitBackground = True removes a 2D Gaussian from the image. In general it's better to perform this prior to particle picking using the Zorro dose filtering+background subtraction mechanism. TODO: GRAB THE CTF INFORMATION AS WELL AND MAKE A MERGED .STAR FILE TODO: add a movie mode that outputs a substack (dose-filtered) average. """ t0 = time.time() if isinstance( globPath, list ) or isinstance( globPath, tuple ): mrcFiles = globPath else: mrcFiles = glob.glob( globPath ) try: os.mkdir( "Particles" ) except: pass particleStarList = [None]*len(mrcFiles) for K, mrcFileName in enumerate(mrcFiles): boxFileName = os.path.splitext( mrcFileName )[0] + boxExt if not os.path.isfile( boxFileName ): print( "Could not find .box/.star file: " + boxFileName ) continue rlnBox = ReliablePy.ReliablePy() rlnBox.load( boxFileName ) xCoord = rlnBox.star['data_']['CoordinateX'] yCoord = rlnBox.star['data_']['CoordinateY'] mrcMage = mrcz.readMRC( mrcFileName )[0] ###### Remove background from whole image ##### if bool( fitBackground ): mrcMage -= zorro_util.backgroundEstimate( mrcMage ) ###### Check for particles too close to the edge and remove those coordinates. ##### keepElements = ~( (xCoord < boxShape[1]/2) | ( yCoord < boxShape[0]/2) | ( xCoord > mrcMage.shape[1]-boxShape[1]/2) | ( yCoord > mrcMage.shape[0]-boxShape[0]/2) ) xCoord = xCoord[keepElements]; yCoord = yCoord[keepElements] ##### Extract particles ##### particles = np.zeros( [len(xCoord), boxShape[0], boxShape[1]], dtype='float32' ) for J in np.arange( len(xCoord) ): partMat = mrcMage[ yCoord[J]-boxShape[0]/2:yCoord[J]+boxShape[0]/2, xCoord[J]-boxShape[1]/2:xCoord[J]+boxShape[1]/2 ] ###### Apply confidence-interval gaussian filter ##### if sigmaFilt > 0.0: partMean = np.mean( partMat ) partStd = np.std( partMat ) partHotpix = np.abs(partMat - partMean) > sigmaFilt*partStd # Clip before applying the median filter to better limit multiple pixel hot spots partMat = np.clip( partMat, partMean - sigmaFilt*partStd, partMean + sigmaFilt*partStd ) # Let's stick to a gaussian_filter, it's much faster than a median filter and seems equivalent if we pre-clip # boxFilt[J,:,:] = scipy.ndimage.median_filter( boxMat[J,:,:], [5,5] ) partFilt = scipy.ndimage.gaussian_filter( partMat, 4.0 ) particles[J,:,:] = partHotpix * partFilt + (~ partHotpix) * partMat else: particles[J,:,:] = partMat #ims( -particles ) ##### Re-scale particles ##### if np.any( binShape ): binFact = np.array( boxShape ) / np.array( binShape ) # Force binFact to power of 2 for now. import math binFact[0] = np.floor( math.log( binFact[0], 2 ) ) ** 2 binFact[1] = np.floor( math.log( binFact[1], 2 ) ) ** 2 [xSample,ySample] = np.meshgrid( np.arange( 0,binShape[1] )/binFact[1], np.arange( 0,binShape[0] )/binFact[0] ) if binKernel == 'lanczos2': # 2nd order Lanczos kernel lOrder = 2 xWin = np.arange( -lOrder, lOrder + 1.0/binFact[1], 1.0/binFact[1] ) yWin = np.arange( -lOrder, lOrder + 1.0/binFact[0], 1.0/binFact[0] ) xWinMesh, yWinMesh = np.meshgrid( xWin, yWin ) rmesh = np.sqrt( xWinMesh*xWinMesh + yWinMesh*yWinMesh ) import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") windowKernel = (lOrder/(np.pi*np.pi*rmesh*rmesh)) * np.sin( np.pi / lOrder * rmesh ) * np.sin( np.pi * rmesh ) windowKernel[ yWin==0, xWin==0 ] = 1.0 elif binKernel == 'gauss': xWin = np.arange( -2.0*binFact[1],2.0*binFact[1]+1.0 ) yWin = np.arange( -2.0*binFact[0],2.0*binFact[0]+1.0 ) print( "binFact = " + str(binFact) ) xWinMesh, yWinMesh = np.meshgrid( xWin, yWin ) # rmesh = np.sqrt( xWinMesh*xWinMesh + yWinMesh*yWinMesh ) windowKernel = np.exp( -xWinMesh**2/(0.36788*binFact[1]) - yWinMesh**2/(0.36788*binFact[0]) ) pass partRescaled = np.zeros( [len(xCoord), binShape[0], binShape[1]], dtype='float32' ) for J in np.arange( len(xCoord) ): # TODO: switch from squarekernel to an interpolator so we can use non-powers of 2 partRescaled[J,:,:] = zorro_util.squarekernel( scipy.ndimage.convolve( particles[J,:,:], windowKernel ), k= binFact[0] ) particles = partRescaled pass # ims( windowKernel ) # DEBUG print( " particles.dtype = " + str(particles.dtype) ) ###### Normalize particles after binning and background subtraction ##### if bool(normalize): particles -= np.mean( particles, axis=0 ) particles *= 1.0 / np.std( particles, axis=0 ) ##### Invert contrast ##### if bool(invertContrast): particles = -particles ###### Save particles to disk ###### # Relion always saves particles within ./Particles/<fileext> but we could use anything if we want to # make changes and save them in the star file. particleFileName = os.path.join( "Particles", os.path.splitext( mrcFileName )[0] +"_" + rootName + ".mrcs" ) # TODO: add pixel size to particles file mrcz.writeMRC( particles, particleFileName ) # TODO: pixelsize ##### Output a star file with CTF and particle info. ##### print( "Particle file: " + particleFileName ) particleStarList[K] = os.path.splitext( particleFileName )[0] + ".star" print( "star file: " + particleStarList[K] ) headerDict = { "ImageName":1, "CoordinateX":2, "CoordinateY":3, "MicrographName": 4 } lookupDict = dict( zip( headerDict.values(), headerDict.keys() ) ) # with open( particleStarList[K], 'wb' ) as fh: fh.write( "\ndata_images\n\nloop_\n") for J in np.sort(lookupDict.keys()): fh.write( "_rln" + lookupDict[J] + " #" + str(J) + "\n") for I in np.arange(0, len(xCoord) ): mrcsPartName = os.path.splitext( particleStarList[K] )[0] + ".mrcs" fh.write( "%06d@%s %.1f %.1f %s\n" % ( I+1, mrcsPartName, xCoord[I], yCoord[I], mrcFileName ) ) # TODO: join all star files, for multiprocessing this should just return the list of star files # TODO: add CTF Info t1 = time.time() print( "Particle extraction finished in (s): %.2f" % (t1-t0) ) return particleStarList
def partExtract(globPath, boxShape, boxExt=".star", binShape=None, binKernel='lanczos2', rootName="part", sigmaFilt=-1.0, invertContrast=True, normalize=True, fitBackground=True, movieMode=False, startFrame=None, endFrame=None, doseFilter=False): """ Extracts particles from aligned and summed micrographs (generally <micrograph>.mrc). cd . globPath = "align\*.mrc" for example, will process all such mrc files. *.log will use Zorro logs Can also just pass a list of files TODO: CHANGE TO LOAD FROM A ZORRO LOG FILE? OR JUST MAKE IT THE PREFERRED OPTION? Expects to find <micrograph>.star in the directory which has all the box centers in Relion .star format binShape = [y,x] is the particle box size to resample to. If == None no resampling is done. For binning, binKernel = 'lanczos2' or 'gauss' reflects the anti-aliasing filter used. rootName affects the suffix appended to each extracted particle stack (<micrograph>_<rootName>.mrcs) sigmaFilt is the standard deviation applied for removal of x-rays and hot pixels, 2.5 - 3.0 is the recommended range. It uses the sigmaFilt value to compute a confidence interval to filter the intensity value of only outlier pixels (typically ~ 1 %) invertContrast = True, inverts the contrast, as required for Relion/Frealign. normalize = True changes the particles to have 0.0 mean and 1.0 standard deviation, as required for Relion/Frealign. fitBackground = True removes a 2D Gaussian from the image. In general it's better to perform this prior to particle picking using the Zorro dose filtering+background subtraction mechanism. TODO: GRAB THE CTF INFORMATION AS WELL AND MAKE A MERGED .STAR FILE TODO: add a movie mode that outputs a substack (dose-filtered) average. """ t0 = time.time() if isinstance(globPath, list) or isinstance(globPath, tuple): mrcFiles = globPath else: mrcFiles = glob.glob(globPath) try: os.mkdir("Particles") except: pass particleStarList = [None] * len(mrcFiles) for K, mrcFileName in enumerate(mrcFiles): boxFileName = os.path.splitext(mrcFileName)[0] + boxExt if not os.path.isfile(boxFileName): print("Could not find .box/.star file: " + boxFileName) continue rlnBox = ReliablePy.ReliablePy() rlnBox.load(boxFileName) xCoord = rlnBox.star['data_']['CoordinateX'] yCoord = rlnBox.star['data_']['CoordinateY'] mrcMage = mrcz.readMRC(mrcFileName)[0] ###### Remove background from whole image ##### if bool(fitBackground): mrcMage -= zorro_util.backgroundEstimate(mrcMage) ###### Check for particles too close to the edge and remove those coordinates. ##### keepElements = ~((xCoord < boxShape[1] / 2) | (yCoord < boxShape[0] / 2) | (xCoord > mrcMage.shape[1] - boxShape[1] / 2) | (yCoord > mrcMage.shape[0] - boxShape[0] / 2)) xCoord = xCoord[keepElements] yCoord = yCoord[keepElements] ##### Extract particles ##### particles = np.zeros([len(xCoord), boxShape[0], boxShape[1]], dtype='float32') for J in np.arange(len(xCoord)): partMat = mrcMage[yCoord[J] - boxShape[0] / 2:yCoord[J] + boxShape[0] / 2, xCoord[J] - boxShape[1] / 2:xCoord[J] + boxShape[1] / 2] ###### Apply confidence-interval gaussian filter ##### if sigmaFilt > 0.0: partMean = np.mean(partMat) partStd = np.std(partMat) partHotpix = np.abs(partMat - partMean) > sigmaFilt * partStd # Clip before applying the median filter to better limit multiple pixel hot spots partMat = np.clip(partMat, partMean - sigmaFilt * partStd, partMean + sigmaFilt * partStd) # Let's stick to a gaussian_filter, it's much faster than a median filter and seems equivalent if we pre-clip # boxFilt[J,:,:] = scipy.ndimage.median_filter( boxMat[J,:,:], [5,5] ) partFilt = scipy.ndimage.gaussian_filter(partMat, 4.0) particles[ J, :, :] = partHotpix * partFilt + (~partHotpix) * partMat else: particles[J, :, :] = partMat #ims( -particles ) ##### Re-scale particles ##### if np.any(binShape): binFact = np.array(boxShape) / np.array(binShape) # Force binFact to power of 2 for now. import math binFact[0] = np.floor(math.log(binFact[0], 2))**2 binFact[1] = np.floor(math.log(binFact[1], 2))**2 [xSample, ySample] = np.meshgrid( np.arange(0, binShape[1]) / binFact[1], np.arange(0, binShape[0]) / binFact[0]) if binKernel == 'lanczos2': # 2nd order Lanczos kernel lOrder = 2 xWin = np.arange(-lOrder, lOrder + 1.0 / binFact[1], 1.0 / binFact[1]) yWin = np.arange(-lOrder, lOrder + 1.0 / binFact[0], 1.0 / binFact[0]) xWinMesh, yWinMesh = np.meshgrid(xWin, yWin) rmesh = np.sqrt(xWinMesh * xWinMesh + yWinMesh * yWinMesh) import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") windowKernel = (lOrder / (np.pi * np.pi * rmesh * rmesh)) * np.sin( np.pi / lOrder * rmesh) * np.sin( np.pi * rmesh) windowKernel[yWin == 0, xWin == 0] = 1.0 elif binKernel == 'gauss': xWin = np.arange(-2.0 * binFact[1], 2.0 * binFact[1] + 1.0) yWin = np.arange(-2.0 * binFact[0], 2.0 * binFact[0] + 1.0) print("binFact = " + str(binFact)) xWinMesh, yWinMesh = np.meshgrid(xWin, yWin) # rmesh = np.sqrt( xWinMesh*xWinMesh + yWinMesh*yWinMesh ) windowKernel = np.exp(-xWinMesh**2 / (0.36788 * binFact[1]) - yWinMesh**2 / (0.36788 * binFact[0])) pass partRescaled = np.zeros([len(xCoord), binShape[0], binShape[1]], dtype='float32') for J in np.arange(len(xCoord)): # TODO: switch from squarekernel to an interpolator so we can use non-powers of 2 partRescaled[J, :, :] = zorro_util.squarekernel( scipy.ndimage.convolve(particles[J, :, :], windowKernel), k=binFact[0]) particles = partRescaled pass # ims( windowKernel ) # DEBUG print(" particles.dtype = " + str(particles.dtype)) ###### Normalize particles after binning and background subtraction ##### if bool(normalize): particles -= np.mean(particles, axis=0) particles *= 1.0 / np.std(particles, axis=0) ##### Invert contrast ##### if bool(invertContrast): particles = -particles ###### Save particles to disk ###### # Relion always saves particles within ./Particles/<fileext> but we could use anything if we want to # make changes and save them in the star file. particleFileName = os.path.join( "Particles", os.path.splitext(mrcFileName)[0] + "_" + rootName + ".mrcs") # TODO: add pixel size to particles file mrcz.writeMRC(particles, particleFileName) # TODO: pixelsize ##### Output a star file with CTF and particle info. ##### print("Particle file: " + particleFileName) particleStarList[K] = os.path.splitext(particleFileName)[0] + ".star" print("star file: " + particleStarList[K]) headerDict = { "ImageName": 1, "CoordinateX": 2, "CoordinateY": 3, "MicrographName": 4 } lookupDict = dict(zip(headerDict.values(), headerDict.keys())) # with open(particleStarList[K], 'wb') as fh: fh.write("\ndata_images\n\nloop_\n") for J in np.sort(lookupDict.keys()): fh.write("_rln" + lookupDict[J] + " #" + str(J) + "\n") for I in np.arange(0, len(xCoord)): mrcsPartName = os.path.splitext( particleStarList[K])[0] + ".mrcs" fh.write( "%06d@%s %.1f %.1f %s\n" % (I + 1, mrcsPartName, xCoord[I], yCoord[I], mrcFileName)) # TODO: join all star files, for multiprocessing this should just return the list of star files # TODO: add CTF Info t1 = time.time() print("Particle extraction finished in (s): %.2f" % (t1 - t0)) return particleStarList