Ejemplo n.º 1
0
    def compReadWrite(self,
                      testMage,
                      casttype=None,
                      compressor=None,
                      clevel=1):
        # This is the main functions which reads and writes from disk.
        mrcName = os.path.join(tmpDir, 'testMage.mrc')
        pixelsize = np.array([1.2, 2.6, 3.4])

        mrcz.writeMRC(testMage,
                      mrcName,
                      dtype=casttype,
                      pixelsize=pixelsize,
                      pixelunits=u'\AA',
                      voltage=300.0,
                      C3=2.7,
                      gain=1.05,
                      compressor=compressor,
                      clevel=clevel)

        rereadMage, rereadHeader = mrcz.readMRC(mrcName, pixelunits=u'\AA')
        #  `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it
        try:
            os.remove(mrcName)
        except IOError:
            log.info('Warning: file {} left on disk'.format(mrcName))

        npt.assert_array_almost_equal(testMage, rereadMage)
        npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize)
        assert (rereadHeader['pixelunits'] == u'\AA')
        npt.assert_almost_equal(rereadHeader['voltage'], 300.0)
        npt.assert_almost_equal(rereadHeader['C3'], 2.7)
        npt.assert_almost_equal(rereadHeader['gain'], 1.05)
Ejemplo n.º 2
0
def compress(path):
    global args

    if path.endswith('.mrc'):
        mrcz_path = path[:-4] + '.mrcz'
    elif path.endswith('.mrcs'):
        mrcz_path = path[:-5] + '.mrczs'

    if args.verbose:
        print('compress %s -> %s' % (path, mrcz_path))

        original_size = os.path.getsize(path)
        print('%20s : %d bytes' % ('original size', original_size))

    if not args.dry_run:
        imageData, imageMeta = mrcz.readMRC(path)
        mrcz.writeMRC(imageData, mrcz_path, compressor='zstd', clevel=9, pixelsize=imageMeta['pixelsize'], pixelunits=imageMeta['pixelunits'], voltage=imageMeta['voltage'], C3=imageMeta['C3'], gain=imageMeta['gain'])

        if args.verbose:
            compressed_size = os.path.getsize(mrcz_path)
            ratio = float(original_size) / compressed_size
            print('%20s : %d bytes' % ('compressed size', compressed_size))
            print('%20s : %.2f' % ('compression ratio', ratio))

    if args.delete:
        if args.verbose:
            print('delete %s' % (path))
        if not args.dry_run and os.path.exists(mrcz_path) and os.path.getsize(mrcz_path) > 0:
            os.remove(path)
Ejemplo n.º 3
0
def extract(path):
    global args

    if path.endswith('.mrcz'):
        mrc_path = path[:-5] + '.mrc'
    elif path.endswith('.mrczs'):
        mrc_path = path[:-6] + '.mrcs'

    time_last_modified = os.path.getmtime(path)
    time_since = (time.time() - time_last_modified) / 60    # in minutes

    if time_since > args.time:
        if args.verbose:
            print('extract %s -> %s' % (path, mrc_path))
            compressed_size = os.path.getsize(path)
            print('%20s : %d bytes' % ('compressed size', compressed_size))

        if not args.dry_run:
            imageData, imageMeta = mrcz.readMRC(path)
            mrcz.writeMRC(imageData, mrc_path, compressor=None, pixelsize=imageMeta['pixelsize'], pixelunits=imageMeta['pixelunits'], voltage=imageMeta['voltage'], C3=imageMeta['C3'], gain=imageMeta['gain'])

            if args.verbose:
                uncompressed_size = os.path.getsize(mrc_path)
                ratio = float(uncompressed_size) / compressed_size
                print('%20s : %d bytes' % ('uncompressed size', uncompressed_size))
                print('%20s : %.2f' % ('compression ratio', ratio))

        if args.delete:
            if args.verbose:
                print('delete %s' % (path))
            if not args.dry_run and os.path.exists(mrc_path) and os.path.getsize(mrc_path) > 0:
                os.remove(path)
Ejemplo n.º 4
0
    def compReadWrite(self,
                      testMage,
                      casttype=None,
                      compressor=None,
                      clevel=1):
        # This is the main functions which reads and writes from disk.
        mrcName = os.path.join(tmpDir, "testMage.mrc")
        pixelsize = np.array([1.2, 2.6, 3.4])

        mrcz.writeMRC(testMage,
                      mrcName,
                      dtype=casttype,
                      pixelsize=pixelsize,
                      pixelunits=u"\AA",
                      voltage=300.0,
                      C3=2.7,
                      gain=1.05,
                      compressor=compressor,
                      clevel=clevel)

        rereadMage, rereadHeader = mrcz.readMRC(mrcName, pixelunits=u"\AA")
        try:
            os.remove(mrcName)
        except IOError:
            log.info("Warning: file {} left on disk".format(mrcName))

        npt.assert_array_almost_equal(testMage, rereadMage)
        npt.assert_array_equal(rereadHeader['voltage'], 300.0)
        npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize)
        npt.assert_array_equal(rereadHeader['pixelunits'], u"\AA")
        npt.assert_array_equal(rereadHeader['C3'], 2.7)
        npt.assert_array_equal(rereadHeader['gain'], 1.05)
Ejemplo n.º 5
0
    def test_numpy_metadata(self):
        log.info('Testing NumPy types in meta-data')
        meta = {
            'zoo': np.float64(1.0),
            'foo': [np.ones(16), np.ones(16),
                    np.ones(16)],
            'bar': {
                # Note: no support in JSON for complex numbers.
                'moo': np.uint64(42),
                'boo': np.full(8, 3, dtype=np.int32)
            }
        }
        mage = np.zeros([32, 32], dtype=np.float32)
        mrcName = os.path.join(tmpDir, 'testMage.mrc')
        mrcz.writeMRC(mage, mrcName, meta=meta)

        re_mage, re_meta = mrcz.readMRC(mrcName)
        #  `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it
        try:
            os.remove(mrcName)
        except IOError:
            log.info('Warning: file {} left on disk'.format(mrcName))

        npt.assert_array_equal(re_meta['foo'][0], meta['foo'][0])
        npt.assert_array_equal(re_meta['bar']['boo'], meta['bar']['boo'])
Ejemplo n.º 6
0
    def test_MRC_append(self):
        log.info('Testing appending to existing MRC stack, float-32')
        f32_stack = [
            np.random.normal(size=[128, 96]).astype(np.float32)
            for I in range(2)
        ]

        mrcName = os.path.join(tmpDir, 'testStack.mrcs')

        pixelsize = [5.6, 3.4]
        for j, I in enumerate(f32_stack):
            mrcz.writeMRC(I,
                          mrcName,
                          pixelsize=pixelsize,
                          compressor=None,
                          idx=j)

        rereadMage, _ = mrcz.readMRC(mrcName, pixelunits=u'\AA')

        try:
            os.remove(mrcName)
        except IOError:
            log.info('Warning: file {} left on disk'.format(mrcName))

        assert (rereadMage.shape[0] == len(f32_stack))

        for testFrame, rereadFrame in zip(f32_stack, rereadMage):
            assert (testFrame.dtype == rereadFrame.dtype)
            npt.assert_array_almost_equal(testFrame, rereadFrame)
Ejemplo n.º 7
0
    def test_list_change_output_shape_compressed(self):
        testMage = np.random.uniform(high=10, size=[6, 32, 32]).astype('int8')

        pixelsize = [5.6, 3.4]
        mrcName = os.path.join(tmpDir, 'testMage.mrcz')

        for slices in (1, 2):
            mrcz.writeMRC(testMage,
                          mrcName,
                          pixelsize=pixelsize,
                          compressor='zstd',
                          clevel=1,
                          n_threads=1)

            rereadMage, _ = mrcz.readMRC(mrcName,
                                         pixelunits=u'\AA',
                                         slices=slices)

            try:
                os.remove(mrcName)
            except IOError:
                log.info('Warning: file {} left on disk'.format(mrcName))

            assert (isinstance(rereadMage, list))
            assert (len(rereadMage) == testMage.shape[0] // slices)
Ejemplo n.º 8
0
    def test_list_3d_compressed(self):
        testMage = [
            np.random.uniform(high=10, size=[3, 32, 32]).astype('int8')
        ] * 3
        mrcName = os.path.join(tmpDir, 'testMage.mrcz')
        pixelsize = [5.6, 3.4]

        mrcz.writeMRC(testMage,
                      mrcName,
                      pixelsize=pixelsize,
                      compressor='zstd',
                      clevel=1,
                      n_threads=1)

        rereadMage, _ = mrcz.readMRC(mrcName, pixelunits=u'\AA')

        try:
            os.remove(mrcName)
        except IOError:
            log.info('Warning: file {} left on disk'.format(mrcName))

        assert (isinstance(rereadMage, list))
        assert (len(rereadMage) == len(testMage))

        for testFrame, rereadFrame in zip(testMage, rereadMage):
            assert (testFrame.dtype == rereadFrame.dtype)
            npt.assert_array_almost_equal(testFrame, rereadFrame)
Ejemplo n.º 9
0
    def test_cast_array_from_f64(self):
        log.info('Testing float-64 casting')
        f64_mage = np.random.normal(size=[2, 128, 96]).astype(np.float64)
        f32_mage = f64_mage.astype(np.float32)

        mrcName = os.path.join(tmpDir, 'testMage.mrc')

        mrcz.writeMRC(f64_mage, mrcName, compressor='zstd', clevel=1)
        rereadMage, rereadHeader = mrcz.readMRC(mrcName)
        #  `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it
        try:
            os.remove(mrcName)
        except IOError:
            log.info('Warning: file {} left on disk'.format(mrcName))

        npt.assert_array_almost_equal(f32_mage, rereadMage)
Ejemplo n.º 10
0
def file_reader(filename, endianess='<', lazy=False, mmap_mode='c', **kwds):
    _logger.debug("Reading MRCZ file: %s" % filename)

    if mmap_mode != 'c':
        # Note also that MRCZ does not support memory-mapping of compressed data.
        # Perhaps we could use the zarr package for that
        raise ValueError('MRCZ supports only C-ordering memory-maps')

    mrcz_endian = 'le' if endianess == '<' else 'be'
    data, mrcz_header = _mrcz.readMRC(filename,
                                      endian=mrcz_endian,
                                      useMemmap=lazy,
                                      pixelunits='nm',
                                      **kwds)

    # Create the axis objects for each axis
    names = ['y', 'x', 'z']
    navigate = [False, False, True]
    axes = [{
        'size': data.shape[hsIndex],
        'index_in_array': hsIndex,
        'name': names[index],
        'scale': mrcz_header['pixelsize'][hsIndex],
        'offset': 0.0,
        'units': mrcz_header['pixelunits'],
        'navigate': nav
    } for index, (hsIndex, nav) in enumerate(zip(_READ_ORDER, navigate))]
    axes.insert(0, axes.pop(2))  # re-order the axes

    metadata = mrcz_header.copy()
    # Remove non-standard fields
    for popTarget in _POP_FROM_HEADER:
        metadata.pop(popTarget)

    dictionary = {
        'data': data,
        'axes': axes,
        'metadata': metadata,
        'original_metadata': {
            'mrcz_header': mrcz_header
        },
        'mapping': mapping,
    }

    return [
        dictionary,
    ]
Ejemplo n.º 11
0
    def test_cast_list_from_c128(self):
        log.info('Testing complex-128 casting')
        c128_mage = [np.random.normal(size=[128,96]).astype(np.float64) + \
                        1j * np.random.normal(size=[128,96]).astype(np.float64) for I in range(2)]
        c64_mage = [frame.astype(np.complex64) for frame in c128_mage]

        mrcName = os.path.join(tmpDir, 'testMage.mrc')

        mrcz.writeMRC(c128_mage, mrcName, compressor='zstd', clevel=1)
        rereadMage, rereadHeader = mrcz.readMRC(mrcName)
        #  `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it
        try:
            os.remove(mrcName)
        except IOError:
            log.info('Warning: file {} left on disk'.format(mrcName))

        npt.assert_array_almost_equal(c64_mage[0], rereadMage[0])
        npt.assert_array_almost_equal(c64_mage[1], rereadMage[1])
Ejemplo n.º 12
0
def batchProcPNG(mrcNames):
    for mrcName in mrcNames:
        print('Converting %s to PNG format' % mrcName)
        pngName = os.path.splitext(mrcName)[0] + '.png'
        image, header = mrcz.readMRC(mrcName)
        image = np.array(image)
        if image.ndim == 4:
            # Pass on multispectral data
            continue
        elif image.ndim == 3:
            image = image.mean(axis=0)

        print(f'Image {pngName} has shape {image.shape}')

        image = util.squarekernel(image)
        image = ni.gaussian_filter(image, 1.0)

        clim = util.histClim(image, cutoff=1E-4)

        image = util.normalize(np.clip(image, clim[0], clim[1]))

        skimage.io.imsave(pngName, image, plugin='pil')
Ejemplo n.º 13
0
        def crossReadWrite(self,
                           testMage,
                           casttype=None,
                           compressor=None,
                           clevel=1):
            mrcInput = os.path.join(tmpDir, 'testIn.mrcz')
            mrcOutput = os.path.join(tmpDir, 'testOut.mrcz')
            compressor = None
            blocksize = 64
            clevel = 1
            pixelsize = [1.2, 2.6, 3.4]

            mrcz.writeMRC(testMage,
                          mrcInput,
                          pixelsize=pixelsize,
                          pixelunits=u'\AA',
                          voltage=300.0,
                          C3=2.7,
                          gain=1.05,
                          compressor=compressor)

            sub.call(cmrczProg + ' -i %s -o %s -c %s -B %d -l %d' %
                     (mrcInput, mrcOutput, compressor, blocksize, clevel),
                     shell=True)

            rereadMage, rereadHeader = mrcz.readMRC(mrcOutput,
                                                    pixelunits=u'\AA')

            os.remove(mrcOutput)
            os.remove(mrcInput)

            assert (np.all(testMage.shape == rereadMage.shape))
            assert (testMage.dtype == rereadMage.dtype)
            npt.assert_array_almost_equal(testMage, rereadMage)
            npt.assert_array_equal(rereadHeader['voltage'], 300.0)
            npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize)
            npt.assert_array_equal(rereadHeader['pixelunits'], u'\AA')
            npt.assert_array_equal(rereadHeader['C3'], 2.7)
            npt.assert_array_equal(rereadHeader['gain'], 1.05)
Ejemplo n.º 14
0
    def test_enum_metadata(self):
        log.info('Testing Enum types in meta-data')

        class Axis(Enum):
            X = 0
            Y = 1

        meta = {'axes': [Axis.Y, Axis.X]}

        mage = np.zeros([4, 4], dtype=np.float32)
        mrcName = os.path.join(tmpDir, 'testMage.mrc')
        mrcz.writeMRC(mage, mrcName, meta=meta)

        re_mage, re_meta = mrcz.readMRC(mrcName)
        #  `tempfile.TemporaryDirectory` would be better but Python 2.7 doesn't support it
        try:
            os.remove(mrcName)
        except IOError:
            log.info('Warning: file {} left on disk'.format(mrcName))

        assert ('Enum.Axis.X' in re_meta['axes'])
        assert ('Enum.Axis.Y' in re_meta['axes'])
Ejemplo n.º 15
0
    def test_JSON(self):
        testMage = np.random.uniform(high=10, size=[3, 128, 64]).astype('int8')
        meta = {'foo': 5, 'bar': 42}
        mrcName = os.path.join(tmpDir, "testMage.mrcz")

        pixelsize = [1.2, 5.6, 3.4]

        mrcz.writeMRC(testMage,
                      mrcName,
                      meta=meta,
                      pixelsize=pixelsize,
                      pixelunits=u"\AA",
                      voltage=300.0,
                      C3=2.7,
                      gain=1.05,
                      compressor='zstd',
                      clevel=1,
                      n_threads=4)

        rereadMage, rereadHeader = mrcz.readMRC(mrcName, pixelunits=u"\AA")

        try:
            os.remove(mrcName)
        except IOError:
            log.info("Warning: file {} left on disk".format(mrcName))

        assert (np.all(testMage.shape == rereadMage.shape))
        assert (testMage.dtype == rereadMage.dtype)
        for key in meta:
            assert (meta[key] == rereadHeader[key])

        npt.assert_array_almost_equal(testMage, rereadMage)
        npt.assert_array_equal(rereadHeader['voltage'], 300.0)
        npt.assert_array_almost_equal(rereadHeader['pixelsize'], pixelsize)
        npt.assert_array_equal(rereadHeader['pixelunits'], u"\AA")
        npt.assert_array_equal(rereadHeader['C3'], 2.7)
        npt.assert_array_equal(rereadHeader['gain'], 1.05)
        pass
Ejemplo n.º 16
0
def partExtract(globPath,
                boxShape,
                boxExt=".star",
                binShape=None,
                binKernel='lanczos2',
                rootName="part",
                sigmaFilt=-1.0,
                invertContrast=True,
                normalize=True,
                fitBackground=True,
                movieMode=False,
                startFrame=None,
                endFrame=None,
                doseFilter=False):
    """
    Extracts particles from aligned and summed micrographs (generally <micrograph>.mrc).  
    cd .
    globPath = "align\*.mrc" for example, will process all such mrc files.  
        *.log will use Zorro logs
        Can also just pass a list of files
        
    TODO: CHANGE TO LOAD FROM A ZORRO LOG FILE?  OR JUST MAKE IT THE PREFERRED OPTION?
    
    Expects to find <micrograph>.star in the directory which has all the box centers in Relion .star format
    
    binShape = [y,x] is the particle box size to resample to.  If == None no resampling is done.  
        For binning, binKernel = 'lanczos2' or 'gauss'  reflects the anti-aliasing filter used.
          
    rootName affects the suffix appended to each extracted particle stack (<micrograph>_<rootName>.mrcs)
    
    sigmaFilt is the standard deviation applied for removal of x-rays and hot pixels, 2.5 - 3.0 is the recommended range.
    It uses the sigmaFilt value to compute a confidence interval to filter the intensity value of only outlier 
    pixels (typically ~ 1 %)
    
    invertContrast = True, inverts the contrast, as required for Relion/Frealign.
    
    normalize = True changes the particles to have 0.0 mean and 1.0 standard deviation, as required for Relion/Frealign.
    
    fitBackground = True removes a 2D Gaussian from the image. In general it's better to perform this prior to 
    particle picking using the Zorro dose filtering+background subtraction mechanism.  
    
    TODO: GRAB THE CTF INFORMATION AS WELL AND MAKE A MERGED .STAR FILE
    
    TODO: add a movie mode that outputs a substack (dose-filtered) average.
    """

    t0 = time.time()
    if isinstance(globPath, list) or isinstance(globPath, tuple):
        mrcFiles = globPath
    else:
        mrcFiles = glob.glob(globPath)

    try:
        os.mkdir("Particles")
    except:
        pass

    particleStarList = [None] * len(mrcFiles)

    for K, mrcFileName in enumerate(mrcFiles):

        boxFileName = os.path.splitext(mrcFileName)[0] + boxExt
        if not os.path.isfile(boxFileName):
            print("Could not find .box/.star file: " + boxFileName)
            continue

        rlnBox = ReliablePy.ReliablePy()
        rlnBox.load(boxFileName)

        xCoord = rlnBox.star['data_']['CoordinateX']
        yCoord = rlnBox.star['data_']['CoordinateY']

        mrcMage = mrcz.readMRC(mrcFileName)[0]

        ###### Remove background from whole image #####
        if bool(fitBackground):
            mrcMage -= zorro_util.backgroundEstimate(mrcMage)

        ###### Check for particles too close to the edge and remove those coordinates. #####
        keepElements = ~((xCoord < boxShape[1] / 2) |
                         (yCoord < boxShape[0] / 2) |
                         (xCoord > mrcMage.shape[1] - boxShape[1] / 2) |
                         (yCoord > mrcMage.shape[0] - boxShape[0] / 2))
        xCoord = xCoord[keepElements]
        yCoord = yCoord[keepElements]

        ##### Extract particles #####
        particles = np.zeros([len(xCoord), boxShape[0], boxShape[1]],
                             dtype='float32')
        for J in np.arange(len(xCoord)):

            partMat = mrcMage[yCoord[J] - boxShape[0] / 2:yCoord[J] +
                              boxShape[0] / 2, xCoord[J] -
                              boxShape[1] / 2:xCoord[J] + boxShape[1] / 2]

            ###### Apply confidence-interval gaussian filter #####
            if sigmaFilt > 0.0:
                partMean = np.mean(partMat)
                partStd = np.std(partMat)
                partHotpix = np.abs(partMat - partMean) > sigmaFilt * partStd
                # Clip before applying the median filter to better limit multiple pixel hot spots
                partMat = np.clip(partMat, partMean - sigmaFilt * partStd,
                                  partMean + sigmaFilt * partStd)

                # Let's stick to a gaussian_filter, it's much faster than a median filter and seems equivalent if we pre-clip
                # boxFilt[J,:,:] = scipy.ndimage.median_filter( boxMat[J,:,:], [5,5] )
                partFilt = scipy.ndimage.gaussian_filter(partMat, 4.0)
                particles[
                    J, :, :] = partHotpix * partFilt + (~partHotpix) * partMat
            else:
                particles[J, :, :] = partMat

        #ims( -particles )

        ##### Re-scale particles #####
        if np.any(binShape):
            binFact = np.array(boxShape) / np.array(binShape)
            # Force binFact to power of 2 for now.

            import math
            binFact[0] = np.floor(math.log(binFact[0], 2))**2
            binFact[1] = np.floor(math.log(binFact[1], 2))**2

            [xSample, ySample] = np.meshgrid(
                np.arange(0, binShape[1]) / binFact[1],
                np.arange(0, binShape[0]) / binFact[0])

            if binKernel == 'lanczos2':
                # 2nd order Lanczos kernel
                lOrder = 2
                xWin = np.arange(-lOrder, lOrder + 1.0 / binFact[1],
                                 1.0 / binFact[1])
                yWin = np.arange(-lOrder, lOrder + 1.0 / binFact[0],
                                 1.0 / binFact[0])
                xWinMesh, yWinMesh = np.meshgrid(xWin, yWin)
                rmesh = np.sqrt(xWinMesh * xWinMesh + yWinMesh * yWinMesh)

                import warnings
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    windowKernel = (lOrder /
                                    (np.pi * np.pi * rmesh * rmesh)) * np.sin(
                                        np.pi / lOrder * rmesh) * np.sin(
                                            np.pi * rmesh)
                windowKernel[yWin == 0, xWin == 0] = 1.0

            elif binKernel == 'gauss':
                xWin = np.arange(-2.0 * binFact[1], 2.0 * binFact[1] + 1.0)
                yWin = np.arange(-2.0 * binFact[0], 2.0 * binFact[0] + 1.0)
                print("binFact = " + str(binFact))
                xWinMesh, yWinMesh = np.meshgrid(xWin, yWin)
                # rmesh = np.sqrt( xWinMesh*xWinMesh + yWinMesh*yWinMesh )
                windowKernel = np.exp(-xWinMesh**2 / (0.36788 * binFact[1]) -
                                      yWinMesh**2 / (0.36788 * binFact[0]))
                pass

            partRescaled = np.zeros([len(xCoord), binShape[0], binShape[1]],
                                    dtype='float32')
            for J in np.arange(len(xCoord)):
                # TODO: switch from squarekernel to an interpolator so we can use non-powers of 2
                partRescaled[J, :, :] = zorro_util.squarekernel(
                    scipy.ndimage.convolve(particles[J, :, :], windowKernel),
                    k=binFact[0])

            particles = partRescaled
        pass
        # ims( windowKernel ) # DEBUG

        print(" particles.dtype = " + str(particles.dtype))
        ###### Normalize particles after binning and background subtraction #####
        if bool(normalize):
            particles -= np.mean(particles, axis=0)
            particles *= 1.0 / np.std(particles, axis=0)

        ##### Invert contrast #####
        if bool(invertContrast):
            particles = -particles

        ###### Save particles to disk ######
        # Relion always saves particles within ./Particles/<fileext> but we could use anything if we want to
        # make changes and save them in the star file.
        particleFileName = os.path.join(
            "Particles",
            os.path.splitext(mrcFileName)[0] + "_" + rootName + ".mrcs")
        # TODO: add pixel size to particles file
        mrcz.writeMRC(particles, particleFileName)  # TODO: pixelsize

        ##### Output a star file with CTF and particle info. #####
        print("Particle file: " + particleFileName)
        particleStarList[K] = os.path.splitext(particleFileName)[0] + ".star"
        print("star file: " + particleStarList[K])

        headerDict = {
            "ImageName": 1,
            "CoordinateX": 2,
            "CoordinateY": 3,
            "MicrographName": 4
        }
        lookupDict = dict(zip(headerDict.values(), headerDict.keys()))
        #
        with open(particleStarList[K], 'wb') as fh:
            fh.write("\ndata_images\n\nloop_\n")

            for J in np.sort(lookupDict.keys()):
                fh.write("_rln" + lookupDict[J] + " #" + str(J) + "\n")

            for I in np.arange(0, len(xCoord)):
                mrcsPartName = os.path.splitext(
                    particleStarList[K])[0] + ".mrcs"
                fh.write(
                    "%06d@%s  %.1f  %.1f  %s\n" %
                    (I + 1, mrcsPartName, xCoord[I], yCoord[I], mrcFileName))

    # TODO: join all star files, for multiprocessing this should just return the list of star files
    # TODO: add CTF Info
    t1 = time.time()
    print("Particle extraction finished in (s): %.2f" % (t1 - t0))
    return particleStarList
Ejemplo n.º 17
0
 if (rank==0):
     print ("Parsing %s" %(inputFile))
     
 ############################################################
 # READING THE INPUT FILE AND GETTING SOME KEY PARAMETERS
 images = pims.open(inputFile)
 [row,col,numFrames] = images.header_dict['height'],images.header_dict['width'],images.header_dict['allocated_frames']
 if (numFrames<lastFrame):
     frameList = range(int(firstFrame),numFrames+1)
 else:
     frameList = range(int(firstFrame),int(lastFrame)+1)
 procFrameList = numpy.array_split(frameList,size)
 
 ############################################################
 # SETTING UP THE DARK AND GAIN REFERENCEING
 [darkRef,_] = mrcz.readMRC(darkRefFile)
 [gainRef,_] = mrcz.readMRC(gainRefFile)
 if (gainRef.mean()<=10):
     correction = gainRef.copy()
 else:
     correction = gainRef-darkRef
 correction[numpy.abs(correction)<1e-5] = 1
 median = numpy.median(correction)
 
 ############################################################
 # FIRST PASS THROUGH ALL FILES TO GET THE CONTRAST VALUE
 outFile = open('convertSeqFileLog_'+str(rank)+'.dat','w')
 outFile.write("InputFile\tFrame\tCount\tPixInAngstrom (TODO)\tDoseRate\tYear\tMonth\tDay\tHour\tMinute\tSecond\tMicrosecond\n")
 contrastLimit = open(str(rank)+'.dat','w')
 minCount,maxCount=1e10,-1e10
 comm.Barrier()
Ejemplo n.º 18
0
def partExtract( globPath, boxShape, boxExt=".star", 
                binShape = None, binKernel = 'lanczos2',
                rootName="part", sigmaFilt=-1.0, 
                invertContrast=True, normalize=True, fitBackground=True,
                movieMode=False, startFrame=None, endFrame=None, doseFilter=False ):
    """
    Extracts particles from aligned and summed micrographs (generally <micrograph>.mrc).  
    cd .
    globPath = "align\*.mrc" for example, will process all such mrc files.  
        *.log will use Zorro logs
        Can also just pass a list of files
        
    TODO: CHANGE TO LOAD FROM A ZORRO LOG FILE?  OR JUST MAKE IT THE PREFERRED OPTION?
    
    Expects to find <micrograph>.star in the directory which has all the box centers in Relion .star format
    
    binShape = [y,x] is the particle box size to resample to.  If == None no resampling is done.  
        For binning, binKernel = 'lanczos2' or 'gauss'  reflects the anti-aliasing filter used.
          
    rootName affects the suffix appended to each extracted particle stack (<micrograph>_<rootName>.mrcs)
    
    sigmaFilt is the standard deviation applied for removal of x-rays and hot pixels, 2.5 - 3.0 is the recommended range.
    It uses the sigmaFilt value to compute a confidence interval to filter the intensity value of only outlier 
    pixels (typically ~ 1 %)
    
    invertContrast = True, inverts the contrast, as required for Relion/Frealign.
    
    normalize = True changes the particles to have 0.0 mean and 1.0 standard deviation, as required for Relion/Frealign.
    
    fitBackground = True removes a 2D Gaussian from the image. In general it's better to perform this prior to 
    particle picking using the Zorro dose filtering+background subtraction mechanism.  
    
    TODO: GRAB THE CTF INFORMATION AS WELL AND MAKE A MERGED .STAR FILE
    
    TODO: add a movie mode that outputs a substack (dose-filtered) average.
    """
    
    t0 = time.time()
    if isinstance( globPath, list ) or isinstance( globPath, tuple ):
        mrcFiles = globPath
    else:
        mrcFiles = glob.glob( globPath )
        
    try:
        os.mkdir( "Particles" )
    except:
        pass
        
    particleStarList = [None]*len(mrcFiles)
    
    for K, mrcFileName in enumerate(mrcFiles):
        
        boxFileName = os.path.splitext( mrcFileName )[0] + boxExt
        if not os.path.isfile( boxFileName ):
            print( "Could not find .box/.star file: " + boxFileName )
            continue
        
        rlnBox = ReliablePy.ReliablePy()
        rlnBox.load( boxFileName )
    
        xCoord = rlnBox.star['data_']['CoordinateX']
        yCoord = rlnBox.star['data_']['CoordinateY']
    
        mrcMage = mrcz.readMRC( mrcFileName )[0]
        
        ###### Remove background from whole image #####
        if bool( fitBackground ):
            mrcMage -= zorro_util.backgroundEstimate( mrcMage )
            
        ###### Check for particles too close to the edge and remove those coordinates. #####
        keepElements = ~( (xCoord < boxShape[1]/2) | 
                        ( yCoord < boxShape[0]/2) | 
                        ( xCoord > mrcMage.shape[1]-boxShape[1]/2) | 
                        ( yCoord > mrcMage.shape[0]-boxShape[0]/2) )
        xCoord = xCoord[keepElements];  yCoord = yCoord[keepElements]
        
        ##### Extract particles #####
        particles = np.zeros( [len(xCoord), boxShape[0], boxShape[1]], dtype='float32' )
        for J in np.arange( len(xCoord) ):
            
            
            partMat = mrcMage[ yCoord[J]-boxShape[0]/2:yCoord[J]+boxShape[0]/2, xCoord[J]-boxShape[1]/2:xCoord[J]+boxShape[1]/2 ]
            
            ###### Apply confidence-interval gaussian filter #####
            if sigmaFilt > 0.0:
                partMean = np.mean( partMat )
                partStd = np.std( partMat )
                partHotpix = np.abs(partMat - partMean) > sigmaFilt*partStd
                # Clip before applying the median filter to better limit multiple pixel hot spots
                partMat = np.clip( partMat, partMean - sigmaFilt*partStd, partMean + sigmaFilt*partStd )
            
                # Let's stick to a gaussian_filter, it's much faster than a median filter and seems equivalent if we pre-clip
                # boxFilt[J,:,:] = scipy.ndimage.median_filter( boxMat[J,:,:], [5,5] )
                partFilt = scipy.ndimage.gaussian_filter( partMat, 4.0 )
                particles[J,:,:] = partHotpix * partFilt + (~ partHotpix) * partMat
            else:
                particles[J,:,:] = partMat
                
        #ims( -particles )
        
        ##### Re-scale particles #####
        if np.any( binShape ):
            binFact = np.array( boxShape ) / np.array( binShape )
            # Force binFact to power of 2 for now.
            
            import math
            binFact[0] = np.floor( math.log( binFact[0], 2 ) ) ** 2
            binFact[1] = np.floor( math.log( binFact[1], 2 ) ) ** 2
            
            [xSample,ySample] = np.meshgrid( np.arange( 0,binShape[1] )/binFact[1], np.arange( 0,binShape[0] )/binFact[0] )

                
            if binKernel == 'lanczos2':
                # 2nd order Lanczos kernel
                lOrder = 2
                xWin = np.arange( -lOrder, lOrder + 1.0/binFact[1], 1.0/binFact[1] )
                yWin = np.arange( -lOrder, lOrder + 1.0/binFact[0], 1.0/binFact[0] )
                xWinMesh, yWinMesh = np.meshgrid( xWin, yWin )
                rmesh = np.sqrt( xWinMesh*xWinMesh + yWinMesh*yWinMesh )
            
                import warnings
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    windowKernel = (lOrder/(np.pi*np.pi*rmesh*rmesh)) * np.sin( np.pi / lOrder * rmesh ) * np.sin( np.pi * rmesh ) 
                windowKernel[ yWin==0, xWin==0 ] = 1.0
                
            elif binKernel == 'gauss':
                xWin = np.arange( -2.0*binFact[1],2.0*binFact[1]+1.0 )
                yWin = np.arange( -2.0*binFact[0],2.0*binFact[0]+1.0 )
                print( "binFact = " + str(binFact) )
                xWinMesh, yWinMesh = np.meshgrid( xWin, yWin )
                # rmesh = np.sqrt( xWinMesh*xWinMesh + yWinMesh*yWinMesh )
                windowKernel = np.exp( -xWinMesh**2/(0.36788*binFact[1]) - yWinMesh**2/(0.36788*binFact[0]) )
                pass
            
            partRescaled = np.zeros( [len(xCoord), binShape[0], binShape[1]], dtype='float32' )
            for J in np.arange( len(xCoord) ):
                # TODO: switch from squarekernel to an interpolator so we can use non-powers of 2
                partRescaled[J,:,:] = zorro_util.squarekernel( scipy.ndimage.convolve( particles[J,:,:], windowKernel ), 
                    k= binFact[0] )
            
            particles = partRescaled
        pass
        # ims( windowKernel ) # DEBUG
    
        print( " particles.dtype = " + str(particles.dtype) )
        ###### Normalize particles after binning and background subtraction #####
        if bool(normalize):
            particles -= np.mean( particles, axis=0  )
            particles *= 1.0 / np.std( particles, axis=0  )
        
        ##### Invert contrast #####
        if bool(invertContrast):
            particles = -particles
            

        

        

        ###### Save particles to disk ######
        # Relion always saves particles within ./Particles/<fileext> but we could use anything if we want to 
        # make changes and save them in the star file.
        particleFileName = os.path.join( "Particles", os.path.splitext( mrcFileName )[0] +"_" + rootName + ".mrcs" )
        # TODO: add pixel size to particles file
        mrcz.writeMRC( particles, particleFileName ) # TODO: pixelsize
            
        ##### Output a star file with CTF and particle info. #####
        print( "Particle file: " + particleFileName )
        particleStarList[K] = os.path.splitext( particleFileName )[0] + ".star"
        print( "star file: " + particleStarList[K] )
        
        headerDict = { "ImageName":1, "CoordinateX":2, "CoordinateY":3, "MicrographName": 4 }
        lookupDict = dict( zip( headerDict.values(), headerDict.keys() ) )
        # 
        with open( particleStarList[K], 'wb' ) as fh:
            fh.write( "\ndata_images\n\nloop_\n")
        
            for J in np.sort(lookupDict.keys()):
                fh.write( "_rln" + lookupDict[J] + " #" + str(J) + "\n")

            for I in np.arange(0, len(xCoord) ):
                mrcsPartName = os.path.splitext( particleStarList[K] )[0] + ".mrcs" 
                fh.write( "%06d@%s  %.1f  %.1f  %s\n" % ( I+1, mrcsPartName, xCoord[I], yCoord[I], mrcFileName ) )

        
    # TODO: join all star files, for multiprocessing this should just return the list of star files
    # TODO: add CTF Info
    t1 = time.time()
    print( "Particle extraction finished in (s): %.2f" % (t1-t0) )
    return particleStarList
Ejemplo n.º 19
0
<= 0.4.1 to the 0.5.0 standard, so that the volume sampling is set to the 
default (Mx, My, Mz) = [0, 0, 1].
"""

if len(sys.argv) > 1:
    directory = sys.argv[1]

mrcz_files = glob.glob(path.join(directory, '*.mrc'), recursive=False)
mrcz_files.extend(glob.glob(path.join(directory, '*.mrcz'), recursive=False))

backup_dir = path.join(directory, 'backup')
os.makedirs(backup_dir)
for filename in mrcz_files:
    shutil.copy(filename, path.join(backup_dir, path.basename(filename)))

    data, meta = mrcz.readMRC(filename)

    compressor = meta['compressor'] if 'compressor' in meta else None
    clevel = meta['clevel'] if 'clevel' in meta else 1
    voltage = meta['voltage'] if 'voltage' in meta else 0.0
    C3 = meta['C3'] if 'C3' in meta else 0.0
    gain = meta['gain'] if 'gain' in meta else 1.0
    pixelsize = meta['pixelsize'] if 'pixelsize' in meta else [0.1, 0.1, 0.1]
    pixelunits = meta['pixelunits'] if 'pixelunits' in meta else u'\\AA'

    if 'packedBytes' in meta:
        meta.pop('packedBytes')
    if 'minImage' in meta:
        meta.pop('minImage')
    if 'maxImage' in meta:
        meta.pop('maxImage')