def load_laue(path: typing.Union[Path, str]): if isinstance(path, str): path = Path(path) binary_data = path.read_bytes() table, header = binary_data[:131072], binary_data[131072:] table = np.fromstring(table, dtype=np.uint16).reshape(256, 256) header = np.fromstring(header, dtype=northstar_62_69_dtype).item() arr = xarray.DataArray(table, coords={ 'x': np.array(range(256)), 'y': np.array(range(256)) }, dims=[ 'x', 'y', ], attrs={ 'sample': header[1].split(b'\0')[0].decode('ascii'), 'user': header[2].split(b'\0')[0].decode('ascii'), 'comment': header[3].split(b'\0')[0].decode('ascii'), }) provenance_from_file(arr, str(path), { 'what': 'Loaded Laue dataset from Northstar.', 'by': 'load_laue', }) return arr
def load(self, scan_desc: dict=None, **kwargs): if scan_desc is None: warnings.warn('Attempting to make due without user associated scan_desc for the file') raise TypeError('Expected a dictionary of scan_desc with the location of the file') scan_desc = dict(copy.deepcopy(scan_desc)) data_loc = scan_desc.get('path', scan_desc.get('file')) data_loc = data_loc if data_loc.startswith('/') else os.path.join(arpes.config.DATA_PATH, data_loc) hdulist = fits.open(data_loc) hdulist[0].verify('fix+warn') header_hdu, hdu = hdulist[0], hdulist[1] coords, dimensions, spectrum_shape = find_clean_coords(hdu, scan_desc) columns = hdu.columns # pylint: disable=no-member column_renamings = {} take_columns = columns spectra_names = [name for name in take_columns if name in columns.names] skip_frags = {} skip_predicates = {lambda k: any(s in k for s in skip_frags)} scan_desc = {k: v for k, v in scan_desc.items() if not any(pred(k) for pred in skip_predicates)} data_vars = {k: (dimensions[k], hdu.data[k].reshape(spectrum_shape[k]), scan_desc) # pylint: disable=no-member for k in spectra_names} data_vars = rename_keys(data_vars, column_renamings) hdulist.close() relevant_dimensions = {k for k in coords.keys() if k in set(itertools.chain(*[l[0] for l in data_vars.values()]))} relevant_coords = {k: v for k, v in coords.items() if k in relevant_dimensions} deg_to_rad_coords = {'beta', 'psi', 'chi', 'theta'} relevant_coords = {k: c * (np.pi / 180) if k in deg_to_rad_coords else c for k, c in relevant_coords.items()} dataset = xr.Dataset( data_vars, relevant_coords, scan_desc, ) provenance_from_file(dataset, data_loc, { 'what': 'Loaded BL10 dataset', 'by': 'load_DLD', }) return dataset
def load(self, scan_desc: dict = None, **kwargs): """ Imports a FITS file that contains all of the information from a run of Ping and Anton's delay line detector ARToF :param scan_desc: Dictionary with extra information to attach to the xarray.Dataset, must contain the location of the file :return: xarray.Dataset """ if scan_desc is None: warnings.warn( 'Attempting to make due without user associated metadata for the file' ) raise TypeError( 'Expected a dictionary of metadata with the location of the file' ) metadata = copy.deepcopy(scan_desc) data_loc = metadata['file'] data_loc = data_loc if data_loc.startswith('/') else os.path.join( arpes.config.DATA_PATH, data_loc) f = h5py.File(data_loc, 'r') dataset_contents = dict() raw_data = f['/PRIMARY/DATA'][:] raw_data = raw_data[:, ::-1] # Reverse the timing axis dataset_contents['raw'] = xr.DataArray( raw_data, coords={ 'x_pixels': np.linspace(0, 511, 512), 't_pixels': np.linspace(0, 511, 512) }, dims=('x_pixels', 't_pixels'), attrs=f['/PRIMARY'].attrs.items(), ) provenance_from_file( dataset_contents['raw'], data_loc, { 'what': 'Loaded Anton and Ping DLD dataset from HDF5.', 'by': 'load_DLD', }) return xr.Dataset(dataset_contents, attrs=metadata)
def prep_spectrum(data: xr.DataArray): # don't do center pixel inference because the main chamber # at least consistently records the offset from the edge # of the recorded window if 'pixel' in data.coords: phi_axis = data.coords['pixel'].values * \ arpes.constants.SPECTROMETER_MC['rad_per_pixel'] data = replace_coords(data, {'phi': phi_axis}, [( 'pixel', 'phi', )]) # Always attach provenance provenance_from_file(data, frame_path, { 'what': 'Loaded MC dataset from FITS.', 'by': 'load_MC', }) return data
def load_SToF_hdf5(self, scan_desc: dict = None, **kwargs): """ Imports a FITS file that contains ToF spectra. :param scan_desc: Dictionary with extra information to attach to the xr.Dataset, must contain the location of the file :return: xr.Dataset """ scan_desc = copy.deepcopy(scan_desc) data_loc = scan_desc.get('path', scan_desc.get('file')) data_loc = data_loc if data_loc.startswith('/') else os.path.join( arpes.config.DATA_PATH, data_loc) f = h5py.File(data_loc, 'r') dataset_contents = dict() raw_data = f['/PRIMARY/DATA'][:] raw_data = raw_data[:, ::-1] # Reverse the timing axis dataset_contents['raw'] = xr.DataArray( raw_data, coords={ 'x_pixels': np.linspace(0, 511, 512), 't_pixels': np.linspace(0, 511, 512) }, dims=('x_pixels', 't_pixels'), attrs=f['/PRIMARY'].attrs.items(), ) provenance_from_file( dataset_contents['raw'], data_loc, { 'what': 'Loaded Anton and Ping DLD dataset from HDF5.', 'by': 'load_DLD', }) return xr.Dataset(dataset_contents, attrs=scan_desc)
def load_SToF_fits(self, scan_desc: dict = None, **kwargs): scan_desc = dict(copy.deepcopy(scan_desc)) data_loc = scan_desc.get('path', scan_desc.get('file')) data_loc = data_loc if data_loc.startswith('/') else os.path.join( arpes.config.DATA_PATH, data_loc) hdulist = fits.open(data_loc) hdulist[0].verify('fix+warn') header_hdu, hdu = hdulist[0], hdulist[1] scan_desc.update(dict(hdu.header)) scan_desc.update(dict(header_hdu.header)) drop_attrs = [ 'COMMENT', 'HISTORY', 'EXTEND', 'SIMPLE', 'SCANPAR', 'SFKE_0' ] for dropped_attr in drop_attrs: if dropped_attr in scan_desc: del scan_desc[dropped_attr] coords, dimensions, spectrum_shape = find_clean_coords(hdu, scan_desc) dimensions = { k: [SpinToFEndstation.RENAME_KEYS.get(n, n) for n in v] for k, v in dimensions.items() } coords = rename_keys(coords, SpinToFEndstation.RENAME_KEYS) columns = hdu.columns spin_column_names = { 'targetMinus', 'targetPlus', 'Time_Target_Up', 'Time_Target_Down', 'Energy_Target_Up', 'Energy_Target_Down', 'Photocurrent_Up', 'Photocurrent_Down' } is_spin_resolved = any(cname in columns.names for cname in spin_column_names) spin_columns = ['Current' 'TempA', 'TempB', 'ALS_Beam_mA' ] + list(spin_column_names) straight_columns = [ 'Current', 'TempA', 'TempB', 'ALS_Beam_mA', 'Energy_Spectra', 'wave' ] take_columns = spin_columns if is_spin_resolved else straight_columns # We could do our own spectrum conversion too, but that would be more annoying # it would slightly improve accuracy though spectra_names = [ name for name in take_columns if name in columns.names ] skip_predicates = { lambda k: any(s in k for s in self.SKIP_ATTR_FRAGMENTS), } scan_desc = { k: v for k, v in scan_desc.items() if not any(pred(k) for pred in skip_predicates) } scan_desc = rename_keys(scan_desc, SpinToFEndstation.RENAME_KEYS) # TODO, we should try to unify this with the FITS file loader, but there are a few current inconsistencies data_vars = {} for spectrum_name in spectra_names: column_shape = spectrum_shape[spectrum_name] data_for_resize = hdu.data.columns[spectrum_name].array try: # best possible case is that we have identically all of the data resized_data = data_for_resize.reshape(column_shape) except ValueError: # if we stop scans early, the header is already written and so the size of the data will be small along # the experimental axes rest_column_shape = column_shape[1:] n_per_slice = int(np.prod(rest_column_shape)) total_shape = data_for_resize.shape total_n = np.prod(total_shape) n_slices = total_n // n_per_slice if (total_n // n_per_slice != total_n / n_per_slice): # the last slice was in the middle of writing when something hit the fan # we need to infer how much of the data to read, and then repeat the above # we need to cut the data # This can happen when the labview crashes during data collection, # we use column_shape[1] because of the row order that is used in the FITS file data_for_resize = data_for_resize[0:(total_n // n_per_slice) * column_shape[1]] warnings.warn( 'Column {} was in the middle of slice when DAQ stopped. Throwing out incomplete slice...' .format(spectrum_name)) column_shape = list(column_shape) column_shape[0] = n_slices try: resized_data = data_for_resize.reshape(column_shape) except Exception: # we should probably zero pad in the case where the slices are not the right size continue altered_dimension = dimensions[spectrum_name][0] coords[altered_dimension] = coords[ altered_dimension][:n_slices] data_vars[spectrum_name] = ( dimensions[spectrum_name], resized_data, scan_desc, ) data_vars = rename_keys(data_vars, SpinToFEndstation.COLUMN_RENAMINGS) if 'beam_current' in data_vars and np.all( data_vars['beam_current'][1] == 0): # Wasn't taken at a beamline del data_vars['beam_current'] hdulist.close() relevant_dimensions = { k for k in coords.keys() if k in set(itertools.chain(*[l[0] for l in data_vars.values()])) } relevant_coords = { k: v for k, v in coords.items() if k in relevant_dimensions } dataset = xr.Dataset( data_vars, relevant_coords, scan_desc, ) for var_name, data_arr in dataset.data_vars.items(): if 'time' in data_arr.dims: data_arr.data = data_arr.sel(time=slice(None, None, -1)).data provenance_from_file(dataset, data_loc, { 'what': 'Loaded Spin-ToF dataset', 'by': 'load_DLD', }) return dataset
def load_SES_h5(self, scan_desc: dict = None, robust_dimension_labels=False, **kwargs): """ Imports an hdf5 dataset exported from Igor that was originally generated by a Scienta spectrometer in the SESb format. In order to understand the structure of these files have a look at Conrad's saveSESDataset in Igor Pro. :param scan_desc: Dictionary with extra information to attach to the xr.Dataset, must contain the location of the file :return: xr.Dataset """ scan_desc = copy.deepcopy(scan_desc) data_loc = scan_desc.get('path', scan_desc.get('file')) p = Path(data_loc) if not p.exists(): import arpes.config data_loc = os.path.join(arpes.config.DATA_PATH, data_loc) # wave_note = shim_wave_note(data_loc) wave_note = "" f = h5py.File(data_loc, 'r') primary_dataset_name = list(f)[0] # This is bugged for the moment in h5py due to an inability to read fixed length unicode strings # wave_note = f['/' + primary_dataset_name].attrs['IGORWaveNote'] dimension_labels = list( f['/' + primary_dataset_name].attrs['IGORWaveDimensionLabels'][0]) # print(list(f['/' + primary_dataset_name].attrs.keys())) if any(x == '' for x in dimension_labels): # print(dimension_labels) if not robust_dimension_labels: raise ValueError( 'Missing dimension labels. Use robust_dimension_labels=True to override' ) else: used_blanks = 0 for i in range(len(dimension_labels)): if dimension_labels[i] == '': dimension_labels[i] = 'missing{}'.format(used_blanks) used_blanks += 1 # print(dimension_labels) scaling = f['/' + primary_dataset_name].attrs['IGORWaveScaling'][ -len(dimension_labels):] raw_data = f['/' + primary_dataset_name][:] scaling = [ np.linspace(scale[1], scale[1] + scale[0] * raw_data.shape[i], raw_data.shape[i]) for i, scale in enumerate(scaling) ] dataset_contents = {} attrs = scan_desc.pop('note', {}) attrs.update(wave_note) built_coords = dict(zip(dimension_labels, scaling)) deg_to_rad_coords = {'theta', 'beta', 'phi'} # the hemisphere axis is handled below built_coords = { k: c * (np.pi / 180) if k in deg_to_rad_coords else c for k, c in built_coords.items() } deg_to_rad_attrs = {'theta', 'beta', 'alpha', 'chi'} for angle_attr in deg_to_rad_attrs: if angle_attr in attrs: attrs[angle_attr] = float(attrs[angle_attr]) * np.pi / 180 dataset_contents['spectrum'] = xr.DataArray( raw_data, coords=built_coords, dims=dimension_labels, attrs=attrs, ) provenance_from_file(dataset_contents['spectrum'], data_loc, { 'what': 'Loaded SES dataset from HDF5.', 'by': 'load_SES' }) return xr.Dataset( dataset_contents, attrs={ **scan_desc, 'name': primary_dataset_name }, )