def test_slicing(temp_file): dd = np.ones((10, 11, 12), dtype=np.uint16) with emd.fileEMD(temp_file, readonly=False) as emd0: dims = emd.defaultDims(dd) emd0.put_emdgroup('test', dd, dims) with emd.fileEMD(temp_file) as emd_obj: assert _get_slice(emd_obj, 0).shape == (11, 12) docs = list(ingest_NCEM_EMD([str(temp_file)])) event_doc = docs[2][1] data = event_doc['data']['raw'] assert data.shape == (10, 11, 12) assert data[0].compute().shape == (11, 12)
def _metadata_from_dset(path, dset_num=0): # parameterized by path rather than emd_obj so that hashing lru hashing resolves easily metaData = {} metaData['veloxFlag'] = False # EMD Berkeley emd_obj = emd.fileEMD(path, readonly=True) dataGroup = emd_obj.list_emds[dset_num] dataset0 = dataGroup['data'] # get the dataset in the first group found try: name = dataGroup.name.split('/')[-1] metaData[name] = {} metaData[name].update(dataGroup.attrs) except: pass # Get the dim vectors dims = emd_obj.get_emddims(dataGroup) if dataset0.ndim == 2: dimZ = None dimY = dims[0] # dataGroup['dim1'] dimX = dims[1] # dataGroup['dim2'] elif dataset0.ndim == 3: dimZ = dims[0] dimY = dims[1] # dataGroup['dim2'] dimX = dims[2] # dataGroup['dim3'] elif dataset0.ndim == 4: dimZ = dims[1] dimY = dims[2] # dataGroup['dim3'] dimX = dims[3] # dataGroup['dim4'] else: dimZ = None dimY = None dimX = None # Store the X and Y pixel size, offset and unit try: metaData['PhysicalSizeX'] = dimX[0][1] - dimX[0][0] metaData['PhysicalSizeXOrigin'] = dimX[0][0] metaData['PhysicalSizeXUnit'] = dimX[2].replace('_', '') metaData['PhysicalSizeY'] = dimY[0][1] - dimY[0][0] metaData['PhysicalSizeYOrigin'] = dimY[0][0] metaData['PhysicalSizeYUnit'] = dimY[2].replace('_', '') # metaData['PhysicalSizeZ'] = dimZ[0][1] - dimZ[0][0] # metaData['PhysicalSizeZOrigin'] = dimZ[0][0] # metaData['PhysicalSizeZUnit'] = dimZ[2] except: metaData['PhysicalSizeX'] = 1 metaData['PhysicalSizeXOrigin'] = 0 metaData['PhysicalSizeXUnit'] = '' metaData['PhysicalSizeY'] = 1 metaData['PhysicalSizeYOrigin'] = 0 metaData['PhysicalSizeYUnit'] = '' metaData['shape'] = dataset0.shape _cleandict(metaData) return metaData
def test_ingest_emd_berkeley(temp_file): dd = np.ones((10, 11, 12), dtype=np.uint16) with emd.fileEMD(temp_file, readonly=False) as emd0: dims = emd.defaultDims(dd) emd0.put_emdgroup('test', dd, dims) # Test slicing with emd.fileEMD(temp_file) as emd_obj: dd0 = emd_obj.list_emds[0]['data'] assert dd0[0, :, :].shape == (11, 12) # Test ingest docs = list(ingest_NCEM_EMD([str(temp_file)])) event_doc = docs[2][1] data = event_doc['data']['raw'] assert data.shape == (10, 11, 12) assert data[0].compute().shape == (11, 12)
def _metadata(path): # parameterized by path rather than emd_obj so that hashing lru hashing resolves easily metaData = {} metaData['veloxFlag'] = False metaData['FileName'] = path # EMD Berkeley emd_obj = emd.fileEMD(path, readonly=True) try: metaData['user'] = {} metaData['user'].update(emd_obj.file_hdl['/user'].attrs) except: pass try: metaData['microscope'] = {} metaData['microscope'].update(emd_obj.file_hdl['/microscope'].attrs) except: pass try: metaData['sample'] = {} metaData['sample'].update(emd_obj.file_hdl['/sample'].attrs) except: pass try: metaData['comments'] = {} metaData['comments'].update(emd_obj.file_hdl['/comments'].attrs) except: pass try: metaData['stage'] = {} # Check for legacy keys in stage group. Skip the rest good_keys = ('position', 'type', 'Type') for k in good_keys: if k in emd_obj.file_hdl['/stage'].attrs: metaData['stage'][k] = emd_obj.file_hdl['/stage'].attrs[k] except: pass _cleandict(metaData) return metaData
def EMD_multi_path(): """Write a small Berkeley EMD file with 2 data sets to a tempfile """ dd, _, _ = np.mgrid[0:30, 0:40, 0:50] dd = dd.astype('<u2') dd2, _, _ = np.mgrid[0:60, 0:80, 0:100] dd2 = dd2.astype('<u2') tmp = tempfile.NamedTemporaryFile(mode='wb') tmp.close() # need to close the file to use it later fPath = str(Path(tmp.name)) with emd.fileEMD(fPath, readonly=False) as f0: dims = emd.defaultDims(dd) f0.put_emdgroup('test1', dd, dims) dims2 = emd.defaultDims(dd2) f0.put_emdgroup('test2', dd2, dims2) return fPath
def test_multi_device(temp_file): dd = np.ones((10, 11, 12), dtype=np.uint16) with emd.fileEMD(temp_file, readonly=False) as emd0: dims = emd.defaultDims(dd) emd0.put_emdgroup('test', dd, dims) # Change shape and write again to simulate a second data set dd2 = dd.reshape(5, 22, 12) dims2 = emd.defaultDims(dd2) emd0.put_emdgroup('test2', dd2, dims2) del dd, dd2, dims, dims2 # Ingest and get the first data set. docs = list(ingest_NCEM_EMD([str(temp_file)])) event_doc = docs[2][1] data = event_doc['data']['raw'] assert data.shape == (10, 11, 12) assert data[0].compute().shape == (11, 12) event_doc = docs[4][1] data = event_doc['data']['raw'] assert data.shape == (5, 22, 12) assert data[0].compute().shape == (22, 12) catalog = BlueskyInMemoryCatalog() start = docs[0][1] stop = docs[-1][1] others = docs[1:-2] def doc_gen(): yield from docs catalog.upsert(start, stop, doc_gen, [], {}) run_catalog = catalog[-1] stream_names = list(run_catalog) print(stream_names) run_catalog[stream_names[0]].to_dask()['raw'].compute() run_catalog[stream_names[1]].to_dask()['raw'].compute()
def emd_sniffer(path, first_bytes): if not Path(path).suffix.lower() == '.emd': return test_velox = False try: # Test for Berkeley EMD with emd.fileEMD(path, readonly=True) as emd1: if len(emd1.list_emds) > 0: return 'application/x-EMD' else: test_velox = True except OSError: # Not a HDF5 file return if test_velox: # Test for Velox with emdVelox.fileEMDVelox(path) as emd2: ver = emd2._file_hdl['Version'][0].decode('ASCII') if ver.find('Velox') > -1: return 'application/x-EMD-VELOX' else: return
def ExtractSignalsFromEMD(InputEMD=None, SignalNames=['HAADF', 'Mg_K', 'Fe_Ka'], Binning=4): ''' Read in a set of Bruker bcf files containing EDS acquisitions. Parameters: InputEMD (str): Name of EMD file containing the tilt stacks. SignalNames (list of str): A list of signals that should be extracted from the Bruker files. Valid values include: HAADF: For the HAADF signal Element_Line: e.g. Fe_Ka for a weighted sum of Fe_Ka1 and Ka2, or Fe_Ka1 for just that line, or Mg_K for a weighted sum of all Mg-K lines. There is no brehmsstrahlung removal. eV1-eV2: Start and stop energies in eV. The signal will be the sum of all energies over the range [eV1, eV2). Binning (int): How much binning to do on EDS signals to reduce noise. 1 means no binning. 2 means each output voxel is 2x2x2 input voxels. HAADF signals are not rebinned as they are usually not noisy. Returns: SignalDict (OrderedDict of np.ndarray): Dictionary with names matching SignalNames and dimensions of (tilt, x, y). Tilts (list of floats): A list of tilt angles. ''' # for s in SignalNames: # if '_' in s: # El, Line = s.split('_') # print(GetFluorescenceLineEnergy(El, Series=Line[0], Line=Line)) # Make an ordered dictionary with one entry for each signal. SignalDict = OrderedDict() for n in SignalNames: SignalDict[n] = [] # Open the emd file that has our data. EMD = fileEMD(InputEMD, readonly=True) # We will compute the FWHM of peaks using the Mn-Ka reported in the EMD. EnergyResolutionMnKa = EMD.microscope.attrs['MnKaResolution[eV]'] Mn_Ka_Energy = GetFluorescenceLineEnergy('Mn', Series='K', Line='Ka') K = EnergyResolutionMnKa / np.sqrt(Mn_Ka_Energy) print('Energy resolution of Mn-Ka is: ' + str(EnergyResolutionMnKa) + ' eV.') print( 'Assumed FWHM of peaks will be (%g*sqrt(E))/2., hence at Mn-Ka: %g eV.' % (K, K * np.sqrt(Mn_Ka_Energy) / 2)) # Get links to the data we'll need from the EMD. HAADF, HAADF_dims = EMD.get_emdgroup(EMD.data['HAADF_TiltStack']) EDS, EDS_dims = EMD.get_emdgroup(EMD.data['EDS_TiltStack']) Tilts = HAADF_dims[0][0] print('HAADF dimensions are (%d, %d).' % (len(HAADF_dims[1][0]), len(HAADF_dims[1][0]))) # Calculate the rebinning size for the EDS data. rebinsize_m = int(len(EDS_dims[1][0]) / Binning) rebinsize_n = int(len(EDS_dims[2][0]) / Binning) print( 'Binning is %d so rebinned EDS cubes will have spatial dimension (%d, %d).' % (Binning, rebinsize_m, rebinsize_n)) for sig in SignalDict.keys(): print(sig, end='') if sig == 'HAADF': SignalDict['HAADF'] = HAADF[:].astype( "float32") # The HAADF doesn't get rebinned. print('') if '_' in sig: # This is a fluorescence line. El, Line = sig.split('_') CenterEnergy = GetFluorescenceLineEnergy(El, Series=Line[0], Line=Line) if CenterEnergy is None: print( 'Unrecognized fluorescence line: %s-%s, ignoring this signal.' % (El, Line)) continue LowEnergy = CenterEnergy - K * np.sqrt(CenterEnergy) / 2 HighEnergy = CenterEnergy + K * np.sqrt(CenterEnergy) / 2 LowEnergyIndex = np.argmin((EDS_dims[3][0] - LowEnergy)**2) HighEnergyIndex = np.argmin((EDS_dims[3][0] - HighEnergy)**2) print(', %g-%g eV window, energy bins: %d-%d.' % (LowEnergy, HighEnergy, LowEnergyIndex, HighEnergyIndex)) Cube = np.sum(EDS[:, :, :, LowEnergyIndex:HighEnergyIndex + 1], axis=-1) SignalDict[sig] = BinEDSSpatialDimensions(Cube, Binning) # fluor[0].rebin((rebinsize_m,rebinsize_n)).data.copy().astype("float32")) if '-' in sig: print('Energy range signals not implemented yet.') print('Signals Extracted.') # Turn those signal readouts into 3D numpy arrays (tilt, x, y). for k, v in SignalDict.items(): npStack = np.array(v) SignalDict[k] = npStack return SignalDict, Tilts
def ExtractRawSignalsFromBrukerSequence(InputDirectory=None, OutputEMD=None): ''' Read in a set of Bruker bcf files containing EDS acquisitions and write an EMD file with the data. Parameters: InputDirectory (str): Name of directory containing the bcf files. OutputEMD (str): Name of the output file (may include a path). Returns: None. ''' # Find all the bcf files first. Tilts = GetTiltsFromBrukerSequence(Directory=InputDirectory) Tilts = np.array(Tilts) # We need some filename if none was given to us. if OutputEMD is None: OutputEMD = 'test.emd' print('Extracting Signals:') for i, t in enumerate(Tilts): # Load the bruker file for this tilt. fname = os.path.join(InputDirectory, str(int(t))+'.bcf') x = hs.load(fname) # Only the first time, we need to calculate the sizes of all the arrays we are going to make. if 'HAADFsize_m' not in locals(): # HAADF's have x,y dimensions. HAADFsize_m = GetSpatialDimension(x[0].axes_manager['width']) HAADFsize_n = GetSpatialDimension(x[0].axes_manager['height']) HAADFDim = (len(Tilts), len(HAADFsize_m), len(HAADFsize_n)) print('HAADF has dimensions ' + str(HAADFDim)) # EDS cubes have x,y, energy dimensions. EDSsize_m = GetSpatialDimension(x[1].axes_manager['width']) EDSsize_n = GetSpatialDimension(x[1].axes_manager['height']) EDSsize_e = GetEnergyDimension(x[1].axes_manager['Energy']) EDSDim = (len(Tilts), len(EDSsize_m), len(EDSsize_n), len(EDSsize_e)) print('EDS has dimensions ' + str(EDSDim)) HAADF = np.zeros(HAADFDim) EDS = np.zeros(EDSDim, dtype='float32') BeamEnergy = x[0].metadata['Acquisition_instrument']['TEM']['beam_energy']*1000 # eV EnergyResolutionMnKa = x[1].metadata['Acquisition_instrument']['TEM']['Detector']['EDS']['energy_resolution_MnKa'] # eV DetectorTiltAngle = x[1].metadata['Acquisition_instrument']['TEM']['Detector']['EDS']['elevation_angle'] # degrees RealTime = x[1].metadata['Acquisition_instrument']['TEM']['Detector']['EDS']['real_time'] # seconds print(str(fname)) HAADF[i,:,:] = x[0].data.astype("float32") EDS[i,:,:,:] = x[1].data.astype("float32") # open nonexisting file for writing if os.path.isfile(OutputEMD): os.remove(OutputEMD) EMD = fileEMD(OutputEMD) data = HAADF dims = ( (Tilts, 'angle', '[deg]'), (HAADFsize_m, 'x', '[m]'), (HAADFsize_n, 'y', '[m]')) print('Writing HAADF tilt stack.') EMD.put_emdgroup('HAADF_TiltStack', data, dims) data = EDS dims = ( (Tilts, 'angle', '[deg]'), (EDSsize_m, 'x', '[m]'), (EDSsize_n, 'y', '[m]'), (EDSsize_e, 'E', '[eV]')) print('Writing EDS tilt stack.') EMD.put_emdgroup('EDS_TiltStack', data, dims) EMD.microscope.attrs['BeamVoltage[eV]'] = BeamEnergy EMD.microscope.attrs['MnKaResolution[eV]'] = EnergyResolutionMnKa EMD.microscope.attrs['DetectorTiltAngle[deg]'] = DetectorTiltAngle EMD.microscope.attrs['RealTime[s]'] = RealTime * len(Tilts) EMD.put_comment('File created.') del EMD print('Created file ' + OutputEMD)
def writeEMD(self, filename): """ Write SER data to an EMD file. Parameters ---------- filename: str or pathlib.Path Name of the EMD file. """ # Todo: Update this to be much simpler. Can write this in a couple of lines now using the fileEMD class from ncempy.io import emd # create the EMD file and set version attributes try: f = emd.fileEMD(filename) except: raise IOError('Cannot write to file "{}"!'.format(filename)) # create EMD group grp = f.file_hdl['data'].create_group( os.path.basename(self._file_hdl.name)) grp.attrs['emd_group_type'] = 1 # use first dataset to layout memory data, first_meta = self.getDataset(0) first_tag = self._getTag(0) if self.head['DataTypeID'] == 0x4122: # 2D datasets self.head[ 'ExperimentType'] = 'image' # text indicator of the experiment type if first_tag['TagTypeID'] == 0x4142: # 2D mapping dset = grp.create_dataset( 'data', (self.head['Dimensions'][1]['DimensionSize'], self.head['Dimensions'][0]['DimensionSize'], first_meta['ArrayShape'][1], first_meta['ArrayShape'][0]), dtype=self._dictDataType[first_meta['DataType']]) # collect time time = np.zeros((self.head['Dimensions'][0]['DimensionSize'], self.head['Dimensions'][1]['DimensionSize']), dtype='i4') # create mapping dims for checking map_xdim = self._createDim( self.head['Dimensions'][0]['DimensionSize'], self.head['Dimensions'][0]['CalibrationOffset'], self.head['Dimensions'][0]['CalibrationDelta'], self.head['Dimensions'][0]['CalibrationElement']) map_ydim = self._createDim( self.head['Dimensions'][1]['DimensionSize'], self.head['Dimensions'][1]['CalibrationOffset'], self.head['Dimensions'][1]['CalibrationDelta'], self.head['Dimensions'][1]['CalibrationElement']) # weird direction depend half pixel shifting map_xdim += 0.5 * self.head['Dimensions'][0]['CalibrationDelta'] map_ydim -= 0.5 * self.head['Dimensions'][1]['CalibrationDelta'] for y in range(self.head['Dimensions'][0]['DimensionSize']): for x in range( self.head['Dimensions'][1]['DimensionSize']): index = int( x + y * self.head['Dimensions'][0]['DimensionSize']) print('converting dataset {} of {}, items ({}, {})'. format(index + 1, self.head['ValidNumberElements'], x, y)) # retrieve dataset and put into buffer data, meta = self.getDataset(index) dset[y, x, :, :] = data[:, :] # get tag data per image tag = self._getTag(index) time[y, x] = tag['Time'] assert (np.abs(tag['PositionX'] - map_xdim[x]) < np.abs(tag['PositionX'] * 1e-8)) assert (np.abs(tag['PositionY'] - map_ydim[y]) < np.abs(tag['PositionY'] * 1e-8)) del data, meta, tag # create dimension datasets dims = [] dims_time = [] # Position Y assert self.head['Dimensions'][1]['Description'] == 'Position' dims.append( (map_ydim, self.head['Dimensions'][1]['Description'], '[{}]'.format(self.head['Dimensions'][1]['Units']))) dims_time.append( (map_ydim, self.head['Dimensions'][1]['Description'], '[{}]'.format(self.head['Dimensions'][1]['Units']))) # Position X assert self.head['Dimensions'][0]['Description'] == 'Position' dims.append( (map_xdim, self.head['Dimensions'][0]['Description'], '[{}]'.format(self.head['Dimensions'][0]['Units']))) dims_time.append( (map_xdim, self.head['Dimensions'][0]['Description'], '[{}]'.format(self.head['Dimensions'][0]['Units']))) dim = self._createDim( first_meta['ArrayShape'][1], first_meta['Calibration'][1]['CalibrationOffset'], first_meta['Calibration'][1]['CalibrationDelta'], first_meta['Calibration'][1]['CalibrationElement']) dims.append((dim, 'y', '[m]')) dim = self._createDim( first_meta['ArrayShape'][0], first_meta['Calibration'][0]['CalibrationOffset'], first_meta['Calibration'][0]['CalibrationDelta'], first_meta['Calibration'][0]['CalibrationElement']) dims.append((dim, 'x', '[m]')) # write dimensions for ii in range(len(dims)): f.write_dim('dim{:d}'.format(ii + 1), dims[ii], grp) # write out time as additional dataset _ = f.put_emdgroup('timestamp', time, dims_time, parent=grp) else: # 1 entry series to single image if self.head['ValidNumberElements'] == 1: # get image data, meta = self.getDataset(0) tag = self._getTag(0) # create dimensions dims = [] dim = self._createDim( first_meta['ArrayShape'][1], first_meta['Calibration'][1]['CalibrationOffset'], first_meta['Calibration'][1]['CalibrationDelta'], first_meta['Calibration'][1]['CalibrationElement']) dims.append((dim, 'y', '[m]')) dim = self._createDim( first_meta['ArrayShape'][0], first_meta['Calibration'][0]['CalibrationOffset'], first_meta['Calibration'][0]['CalibrationDelta'], first_meta['Calibration'][0]['CalibrationElement']) dims.append((dim, 'x', '[m]')) dset = grp.create_dataset( 'data', (first_meta['ArrayShape'][1], first_meta['ArrayShape'][0]), dtype=self._dictDataType[first_meta['DataType']]) dset[:, :] = data[:, :] for i in range(len(dims)): f.write_dim('dim{:d}'.format(i + 1), dims[i], grp) dset.attrs['timestamp'] = tag['Time'] else: # simple series dset = grp.create_dataset( 'data', (self.head['ValidNumberElements'], first_meta['ArrayShape'][1], first_meta['ArrayShape'][0]), dtype=self._dictDataType[first_meta['DataType']]) # collect time time = np.zeros(self.head['ValidNumberElements'], dtype='i4') for i in range(self.head['ValidNumberElements']): print('converting dataset {} of {}'.format( i + 1, self.head['ValidNumberElements'])) # retrieve dataset and put into buffer data, meta = self.getDataset(i) dset[i, :, :] = data[:, :] # get tag data per image tag = self._getTag(i) time[i] = tag['Time'] # create dimension data sets dims = [] # first SER dimension is number assert self.head['Dimensions'][0][ 'Description'] == 'Number' dim = self._createDim( self.head['Dimensions'][0]['DimensionSize'], self.head['Dimensions'][0]['CalibrationOffset'], self.head['Dimensions'][0]['CalibrationDelta'], self.head['Dimensions'][0]['CalibrationElement']) dims.append( (dim[0:self.head['ValidNumberElements']], self.head['Dimensions'][0]['Description'], '[{}]'.format(self.head['Dimensions'][0]['Units']))) dim = self._createDim( first_meta['ArrayShape'][1], first_meta['Calibration'][1]['CalibrationOffset'], first_meta['Calibration'][1]['CalibrationDelta'], first_meta['Calibration'][1]['CalibrationElement']) dims.append((dim, 'y', '[m]')) dim = self._createDim( first_meta['ArrayShape'][0], first_meta['Calibration'][0]['CalibrationOffset'], first_meta['Calibration'][0]['CalibrationDelta'], first_meta['Calibration'][0]['CalibrationElement']) dims.append((dim, 'x', '[m]')) # write dimensions for i in range(len(dims)): f.write_dim('dim{:d}'.format(i + 1), dims[i], grp) # write out time as additional dim vector f.write_dim('dim1_time', (time, 'timestamp', '[s]'), grp) elif self.head['DataTypeID'] == 0x4120: # 1D datasets; spectra self.head[ 'ExperimentType'] = 'spectrum' # text indicator of the experiment type if first_tag['TagTypeID'] == 0x4142: # 2D mapping dset = grp.create_dataset( 'data', (self.head['Dimensions'][1]['DimensionSize'], self.head['Dimensions'][0]['DimensionSize'], first_meta['ArrayShape'][0]), dtype=self._dictDataType[first_meta['DataType']]) time = np.zeros((self.head['Dimensions'][0]['DimensionSize'], self.head['Dimensions'][1]['DimensionSize']), dtype='i4') # create mapping dims for checking map_xdim = self._createDim( self.head['Dimensions'][0]['DimensionSize'], self.head['Dimensions'][0]['CalibrationOffset'], self.head['Dimensions'][0]['CalibrationDelta'], self.head['Dimensions'][0]['CalibrationElement']) map_ydim = self._createDim( self.head['Dimensions'][1]['DimensionSize'], self.head['Dimensions'][1]['CalibrationOffset'], self.head['Dimensions'][1]['CalibrationDelta'], self.head['Dimensions'][1]['CalibrationElement']) # weird direction depend half pixel shifting map_xdim += 0.5 * self.head['Dimensions'][0]['CalibrationDelta'] map_ydim -= 0.5 * self.head['Dimensions'][1]['CalibrationDelta'] for y in range(self.head['Dimensions'][0]['DimensionSize']): for x in range( self.head['Dimensions'][1]['DimensionSize']): index = int( x + y * self.head['Dimensions'][0]['DimensionSize']) print('converting dataset {} of {}, items ({}, {})'. format(index + 1, self.head['ValidNumberElements'], x, y)) # retrieve dataset and put into buffer data, meta = self.getDataset(index) dset[y, x, :] = np.copy(data[:]) # get tag data per image tag = self._getTag(index) time[y, x] = tag['Time'] assert (np.abs(tag['PositionX'] - map_xdim[x]) < np.abs(tag['PositionX'] * 1e-8)) assert (np.abs(tag['PositionY'] - map_ydim[y]) < np.abs(tag['PositionY'] * 1e-8)) del data, meta, tag # create dimension datasets dims = [] dims_time = [] # Position Y assert self.head['Dimensions'][1]['Description'] == 'Position' dims.append( (map_ydim, self.head['Dimensions'][1]['Description'], '[{}]'.format(self.head['Dimensions'][1]['Units']))) dims_time.append( (map_ydim, self.head['Dimensions'][1]['Description'], '[{}]'.format(self.head['Dimensions'][1]['Units']))) # Position X assert self.head['Dimensions'][0]['Description'] == 'Position' dims.append( (map_xdim, self.head['Dimensions'][0]['Description'], '[{}]'.format(self.head['Dimensions'][0]['Units']))) dims_time.append( (map_xdim, self.head['Dimensions'][0]['Description'], '[{}]'.format(self.head['Dimensions'][0]['Units']))) dim = self._createDim( first_meta['ArrayShape'][0], first_meta['Calibration'][0]['CalibrationOffset'], first_meta['Calibration'][0]['CalibrationDelta'], first_meta['Calibration'][0]['CalibrationElement']) dims.append((dim, 'E', '[m_eV]')) # write dimensions for i in range(len(dims)): f.write_dim('dim{:d}'.format(i + 1), dims[i], grp) # write out time as additional dataset _ = f.put_emdgroup('timestamp', time, dims_time, parent=grp) else: # simple series dset = grp.create_dataset( 'data', (self.head['ValidNumberElements'], first_meta['ArrayShape'][0]), dtype=self._dictDataType[first_meta['DataType']]) # collect time time = np.zeros(self.head['ValidNumberElements'], dtype='i4') for i in range(self.head['ValidNumberElements']): print('converting dataset {} of {}'.format( i + 1, self.head['ValidNumberElements'])) # retrieve dataset and put into buffer data, meta = self.getDataset(i) dset[i, :] = data[:] # get tag data per image tag = self._getTag(i) time[i] = tag['Time'] # create dimension datasets dims = [] # first SER dimension is number assert self.head['Dimensions'][0]['Description'] == 'Number' dim = self._createDim( self.head['Dimensions'][0]['DimensionSize'], self.head['Dimensions'][0]['CalibrationOffset'], self.head['Dimensions'][0]['CalibrationDelta'], self.head['Dimensions'][0]['CalibrationElement']) dims.append( (dim[0:self.head['ValidNumberElements']], self.head['Dimensions'][0]['Description'], '[{}]'.format(self.head['Dimensions'][0]['Units']))) dim = self._createDim( first_meta['ArrayShape'][0], first_meta['Calibration'][0]['CalibrationOffset'], first_meta['Calibration'][0]['CalibrationDelta'], first_meta['Calibration'][0]['CalibrationElement']) dims.append((dim, 'E', '[m_eV]')) # write dimensions for i in range(len(dims)): f.write_dim('dim{:d}'.format(i + 1), dims[i], grp) # write out time as additional dim vector f.write_dim('dim1_time', (time, 'timestamp', '[s]'), grp) else: raise RuntimeError('Unknown DataTypeID') # put meta information from _emi to Microscope group, if available if self._emi: for key in self._emi: if not self._emi[key] is None: f.microscope.attrs[key] = self._emi[key] # write comment into Comment group f.put_comment( 'Converted SER file "{}" to EMD using the openNCEM tools.'.format( self._file_hdl.name))
def ingest_NCEM_EMD(paths): assert len(paths) == 1 path = paths[0] emd_handle = emd.fileEMD(path, readonly=True) # Compose run start run_bundle = event_model.compose_run() # type: event_model.ComposeRunBundle start_doc = run_bundle.start_doc start_doc["sample_name"] = Path(paths[0]).resolve().stem metadata = _metadata(path) metadata.update(start_doc) start_doc = metadata yield 'start', start_doc for device_index, device_name in enumerate(_dset_names(emd_handle)): num_t = _num_t(emd_handle, dset_num=device_index) first_frame = _get_slice(emd_handle, 0, dset_num=device_index) shape = first_frame.shape dtype = first_frame.dtype delayed_get_slice = dask.delayed(_get_slice) dask_data = da.stack([da.from_delayed(delayed_get_slice(emd_handle, t, dset_num=device_index), shape=shape, dtype=dtype) for t in range(num_t)]) # Compose descriptor source = 'NCEM' frame_data_keys = {'raw': {'source': source, 'dtype': 'number', 'shape': (num_t, *shape)}} frame_stream_name = f'primary_{device_name}' stream_metadata = _metadata_from_dset(path, dset_num=device_index) configuration = {key: {"data": {key: value}, "timestamps": {key: time.time()}, "data_keys": {key: {"source": path, "dtype": _guess_type(value), "shape": [], "units": "", #"related_value": 0, ... # i.e. soft limits, precision }}} for key, value in stream_metadata.items() if _guess_type(value)} frame_stream_bundle = run_bundle.compose_descriptor(data_keys=frame_data_keys, name=frame_stream_name, configuration=configuration ) yield 'descriptor', frame_stream_bundle.descriptor_doc # NOTE: Resource document may be meaningful in the future. For transient access it is not useful # # Compose resource # resource = run_bundle.compose_resource(root=Path(path).root, resource_path=path, spec='NCEM_DM', resource_kwargs={}) # yield 'resource', resource.resource_doc # Compose datum_page # z_indices, t_indices = zip(*itertools.product(z_indices, t_indices)) # datum_page_doc = resource.compose_datum_page(datum_kwargs={'index_z': list(z_indices), 'index_t': list(t_indices)}) # datum_ids = datum_page_doc['datum_id'] # yield 'datum_page', datum_page_doc yield 'event', frame_stream_bundle.compose_event(data={'raw': dask_data}, timestamps={'raw': time.time()}) yield 'stop', run_bundle.compose_stop()