Ejemplo n.º 1
0
def load_laue(path: typing.Union[Path, str]):
    if isinstance(path, str):
        path = Path(path)

    binary_data = path.read_bytes()
    table, header = binary_data[:131072], binary_data[131072:]

    table = np.fromstring(table, dtype=np.uint16).reshape(256, 256)
    header = np.fromstring(header, dtype=northstar_62_69_dtype).item()

    arr = xarray.DataArray(table,
                           coords={
                               'x': np.array(range(256)),
                               'y': np.array(range(256))
                           },
                           dims=[
                               'x',
                               'y',
                           ],
                           attrs={
                               'sample':
                               header[1].split(b'\0')[0].decode('ascii'),
                               'user':
                               header[2].split(b'\0')[0].decode('ascii'),
                               'comment':
                               header[3].split(b'\0')[0].decode('ascii'),
                           })

    provenance_from_file(arr, str(path), {
        'what': 'Loaded Laue dataset from Northstar.',
        'by': 'load_laue',
    })

    return arr
Ejemplo n.º 2
0
    def load(self, scan_desc: dict=None, **kwargs):
        if scan_desc is None:
            warnings.warn('Attempting to make due without user associated scan_desc for the file')
            raise TypeError('Expected a dictionary of scan_desc with the location of the file')

        scan_desc = dict(copy.deepcopy(scan_desc))

        data_loc = scan_desc.get('path', scan_desc.get('file'))
        data_loc = data_loc if data_loc.startswith('/') else os.path.join(arpes.config.DATA_PATH, data_loc)

        hdulist = fits.open(data_loc)

        hdulist[0].verify('fix+warn')
        header_hdu, hdu = hdulist[0], hdulist[1]

        coords, dimensions, spectrum_shape = find_clean_coords(hdu, scan_desc)
        columns = hdu.columns # pylint: disable=no-member

        column_renamings = {}
        take_columns = columns

        spectra_names = [name for name in take_columns if name in columns.names]

        skip_frags = {}
        skip_predicates = {lambda k: any(s in k for s in skip_frags)}
        scan_desc = {k: v for k, v in scan_desc.items()
                    if not any(pred(k) for pred in skip_predicates)}

        data_vars = {k: (dimensions[k], hdu.data[k].reshape(spectrum_shape[k]), scan_desc)  # pylint: disable=no-member
                     for k in spectra_names}
        data_vars = rename_keys(data_vars, column_renamings)

        hdulist.close()

        relevant_dimensions = {k for k in coords.keys() if k in
                               set(itertools.chain(*[l[0] for l in data_vars.values()]))}
        relevant_coords = {k: v for k, v in coords.items() if k in relevant_dimensions}

        deg_to_rad_coords = {'beta', 'psi', 'chi', 'theta'}
        relevant_coords = {k: c * (np.pi / 180) if k in deg_to_rad_coords else c
                           for k, c in relevant_coords.items()}

        dataset = xr.Dataset(
            data_vars,
            relevant_coords,
            scan_desc,
        )

        provenance_from_file(dataset, data_loc, {
            'what': 'Loaded BL10 dataset',
            'by': 'load_DLD',
        })

        return dataset
Ejemplo n.º 3
0
    def load(self, scan_desc: dict = None, **kwargs):
        """
        Imports a FITS file that contains all of the information from a run of Ping
        and Anton's delay line detector ARToF

        :param scan_desc: Dictionary with extra information to attach to the xarray.Dataset, must contain the location
        of the file
        :return: xarray.Dataset
        """

        if scan_desc is None:
            warnings.warn(
                'Attempting to make due without user associated metadata for the file'
            )
            raise TypeError(
                'Expected a dictionary of metadata with the location of the file'
            )

        metadata = copy.deepcopy(scan_desc)

        data_loc = metadata['file']
        data_loc = data_loc if data_loc.startswith('/') else os.path.join(
            arpes.config.DATA_PATH, data_loc)

        f = h5py.File(data_loc, 'r')

        dataset_contents = dict()
        raw_data = f['/PRIMARY/DATA'][:]
        raw_data = raw_data[:, ::-1]  # Reverse the timing axis
        dataset_contents['raw'] = xr.DataArray(
            raw_data,
            coords={
                'x_pixels': np.linspace(0, 511, 512),
                't_pixels': np.linspace(0, 511, 512)
            },
            dims=('x_pixels', 't_pixels'),
            attrs=f['/PRIMARY'].attrs.items(),
        )

        provenance_from_file(
            dataset_contents['raw'], data_loc, {
                'what': 'Loaded Anton and Ping DLD dataset from HDF5.',
                'by': 'load_DLD',
            })

        return xr.Dataset(dataset_contents, attrs=metadata)
Ejemplo n.º 4
0
        def prep_spectrum(data: xr.DataArray):
            # don't do center pixel inference because the main chamber
            # at least consistently records the offset from the edge
            # of the recorded window
            if 'pixel' in data.coords:
                phi_axis = data.coords['pixel'].values * \
                           arpes.constants.SPECTROMETER_MC['rad_per_pixel']
                data = replace_coords(data, {'phi': phi_axis}, [(
                    'pixel',
                    'phi',
                )])

            # Always attach provenance
            provenance_from_file(data, frame_path, {
                'what': 'Loaded MC dataset from FITS.',
                'by': 'load_MC',
            })

            return data
    def load_SToF_hdf5(self, scan_desc: dict = None, **kwargs):
        """
        Imports a FITS file that contains ToF spectra.

        :param scan_desc: Dictionary with extra information to attach to the xr.Dataset, must contain the location
        of the file
        :return: xr.Dataset
        """

        scan_desc = copy.deepcopy(scan_desc)

        data_loc = scan_desc.get('path', scan_desc.get('file'))
        data_loc = data_loc if data_loc.startswith('/') else os.path.join(
            arpes.config.DATA_PATH, data_loc)

        f = h5py.File(data_loc, 'r')

        dataset_contents = dict()
        raw_data = f['/PRIMARY/DATA'][:]
        raw_data = raw_data[:, ::-1]  # Reverse the timing axis
        dataset_contents['raw'] = xr.DataArray(
            raw_data,
            coords={
                'x_pixels': np.linspace(0, 511, 512),
                't_pixels': np.linspace(0, 511, 512)
            },
            dims=('x_pixels', 't_pixels'),
            attrs=f['/PRIMARY'].attrs.items(),
        )

        provenance_from_file(
            dataset_contents['raw'], data_loc, {
                'what': 'Loaded Anton and Ping DLD dataset from HDF5.',
                'by': 'load_DLD',
            })

        return xr.Dataset(dataset_contents, attrs=scan_desc)
    def load_SToF_fits(self, scan_desc: dict = None, **kwargs):
        scan_desc = dict(copy.deepcopy(scan_desc))

        data_loc = scan_desc.get('path', scan_desc.get('file'))
        data_loc = data_loc if data_loc.startswith('/') else os.path.join(
            arpes.config.DATA_PATH, data_loc)

        hdulist = fits.open(data_loc)

        hdulist[0].verify('fix+warn')
        header_hdu, hdu = hdulist[0], hdulist[1]

        scan_desc.update(dict(hdu.header))
        scan_desc.update(dict(header_hdu.header))

        drop_attrs = [
            'COMMENT', 'HISTORY', 'EXTEND', 'SIMPLE', 'SCANPAR', 'SFKE_0'
        ]
        for dropped_attr in drop_attrs:
            if dropped_attr in scan_desc:
                del scan_desc[dropped_attr]

        coords, dimensions, spectrum_shape = find_clean_coords(hdu, scan_desc)
        dimensions = {
            k: [SpinToFEndstation.RENAME_KEYS.get(n, n) for n in v]
            for k, v in dimensions.items()
        }
        coords = rename_keys(coords, SpinToFEndstation.RENAME_KEYS)

        columns = hdu.columns

        spin_column_names = {
            'targetMinus', 'targetPlus', 'Time_Target_Up', 'Time_Target_Down',
            'Energy_Target_Up', 'Energy_Target_Down', 'Photocurrent_Up',
            'Photocurrent_Down'
        }

        is_spin_resolved = any(cname in columns.names
                               for cname in spin_column_names)
        spin_columns = ['Current'
                        'TempA', 'TempB', 'ALS_Beam_mA'
                        ] + list(spin_column_names)
        straight_columns = [
            'Current', 'TempA', 'TempB', 'ALS_Beam_mA', 'Energy_Spectra',
            'wave'
        ]
        take_columns = spin_columns if is_spin_resolved else straight_columns

        # We could do our own spectrum conversion too, but that would be more annoying
        # it would slightly improve accuracy though
        spectra_names = [
            name for name in take_columns if name in columns.names
        ]

        skip_predicates = {
            lambda k: any(s in k for s in self.SKIP_ATTR_FRAGMENTS),
        }

        scan_desc = {
            k: v
            for k, v in scan_desc.items()
            if not any(pred(k) for pred in skip_predicates)
        }
        scan_desc = rename_keys(scan_desc, SpinToFEndstation.RENAME_KEYS)

        # TODO, we should try to unify this with the FITS file loader, but there are a few current inconsistencies
        data_vars = {}

        for spectrum_name in spectra_names:
            column_shape = spectrum_shape[spectrum_name]
            data_for_resize = hdu.data.columns[spectrum_name].array

            try:
                # best possible case is that we have identically all of the data
                resized_data = data_for_resize.reshape(column_shape)
            except ValueError:
                # if we stop scans early, the header is already written and so the size of the data will be small along
                # the experimental axes
                rest_column_shape = column_shape[1:]
                n_per_slice = int(np.prod(rest_column_shape))
                total_shape = data_for_resize.shape
                total_n = np.prod(total_shape)

                n_slices = total_n // n_per_slice

                if (total_n // n_per_slice != total_n / n_per_slice):
                    # the last slice was in the middle of writing when something hit the fan
                    # we need to infer how much of the data to read, and then repeat the above
                    # we need to cut the data

                    # This can happen when the labview crashes during data collection,
                    # we use column_shape[1] because of the row order that is used in the FITS file
                    data_for_resize = data_for_resize[0:(total_n //
                                                         n_per_slice) *
                                                      column_shape[1]]
                    warnings.warn(
                        'Column {} was in the middle of slice when DAQ stopped. Throwing out incomplete slice...'
                        .format(spectrum_name))

                column_shape = list(column_shape)
                column_shape[0] = n_slices

                try:
                    resized_data = data_for_resize.reshape(column_shape)
                except Exception:
                    # we should probably zero pad in the case where the slices are not the right size
                    continue

                altered_dimension = dimensions[spectrum_name][0]
                coords[altered_dimension] = coords[
                    altered_dimension][:n_slices]

            data_vars[spectrum_name] = (
                dimensions[spectrum_name],
                resized_data,
                scan_desc,
            )

        data_vars = rename_keys(data_vars, SpinToFEndstation.COLUMN_RENAMINGS)
        if 'beam_current' in data_vars and np.all(
                data_vars['beam_current'][1] == 0):
            # Wasn't taken at a beamline
            del data_vars['beam_current']

        hdulist.close()

        relevant_dimensions = {
            k
            for k in coords.keys()
            if k in set(itertools.chain(*[l[0] for l in data_vars.values()]))
        }
        relevant_coords = {
            k: v
            for k, v in coords.items() if k in relevant_dimensions
        }

        dataset = xr.Dataset(
            data_vars,
            relevant_coords,
            scan_desc,
        )

        for var_name, data_arr in dataset.data_vars.items():
            if 'time' in data_arr.dims:
                data_arr.data = data_arr.sel(time=slice(None, None, -1)).data

        provenance_from_file(dataset, data_loc, {
            'what': 'Loaded Spin-ToF dataset',
            'by': 'load_DLD',
        })

        return dataset
Ejemplo n.º 7
0
    def load_SES_h5(self,
                    scan_desc: dict = None,
                    robust_dimension_labels=False,
                    **kwargs):
        """
        Imports an hdf5 dataset exported from Igor that was originally generated by a Scienta spectrometer
        in the SESb format. In order to understand the structure of these files have a look at Conrad's
        saveSESDataset in Igor Pro.

        :param scan_desc: Dictionary with extra information to attach to the xr.Dataset, must contain the location
        of the file
        :return: xr.Dataset
        """

        scan_desc = copy.deepcopy(scan_desc)

        data_loc = scan_desc.get('path', scan_desc.get('file'))
        p = Path(data_loc)
        if not p.exists():
            import arpes.config
            data_loc = os.path.join(arpes.config.DATA_PATH, data_loc)

        # wave_note = shim_wave_note(data_loc)
        wave_note = ""
        f = h5py.File(data_loc, 'r')

        primary_dataset_name = list(f)[0]
        # This is bugged for the moment in h5py due to an inability to read fixed length unicode strings
        # wave_note = f['/' + primary_dataset_name].attrs['IGORWaveNote']

        dimension_labels = list(
            f['/' + primary_dataset_name].attrs['IGORWaveDimensionLabels'][0])
        # print(list(f['/' + primary_dataset_name].attrs.keys()))

        if any(x == '' for x in dimension_labels):
            # print(dimension_labels)

            if not robust_dimension_labels:
                raise ValueError(
                    'Missing dimension labels. Use robust_dimension_labels=True to override'
                )
            else:
                used_blanks = 0
                for i in range(len(dimension_labels)):
                    if dimension_labels[i] == '':
                        dimension_labels[i] = 'missing{}'.format(used_blanks)
                        used_blanks += 1

                        # print(dimension_labels)

        scaling = f['/' + primary_dataset_name].attrs['IGORWaveScaling'][
            -len(dimension_labels):]
        raw_data = f['/' + primary_dataset_name][:]

        scaling = [
            np.linspace(scale[1], scale[1] + scale[0] * raw_data.shape[i],
                        raw_data.shape[i]) for i, scale in enumerate(scaling)
        ]

        dataset_contents = {}
        attrs = scan_desc.pop('note', {})
        attrs.update(wave_note)

        built_coords = dict(zip(dimension_labels, scaling))

        deg_to_rad_coords = {'theta', 'beta', 'phi'}

        # the hemisphere axis is handled below
        built_coords = {
            k: c * (np.pi / 180) if k in deg_to_rad_coords else c
            for k, c in built_coords.items()
        }

        deg_to_rad_attrs = {'theta', 'beta', 'alpha', 'chi'}
        for angle_attr in deg_to_rad_attrs:
            if angle_attr in attrs:
                attrs[angle_attr] = float(attrs[angle_attr]) * np.pi / 180

        dataset_contents['spectrum'] = xr.DataArray(
            raw_data,
            coords=built_coords,
            dims=dimension_labels,
            attrs=attrs,
        )

        provenance_from_file(dataset_contents['spectrum'], data_loc, {
            'what': 'Loaded SES dataset from HDF5.',
            'by': 'load_SES'
        })

        return xr.Dataset(
            dataset_contents,
            attrs={
                **scan_desc, 'name': primary_dataset_name
            },
        )