Python RunDirectory.get_dataframe Examples

Programming Language: Python

Namespace/Package Name: karabo_data

Class/Type: RunDirectory

Method/Function: get_dataframe

Examples at hotexamples.com: 3

Python RunDirectory.get_dataframe - 3 examples found. These are the top rated real world Python examples of karabo_data.RunDirectory.get_dataframe extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RunDirectory(30)

train_from_id(12)

trains(7)

info(5)

select_trains(5)

get_array(4)

get_dataframe(3)

select(3)

deselect(2)

get_virtual_dataset(2)

train_from_index(2)

get_dask_array(1)

get_series(1)

Example #1

Show file

File: test_reader_mockdata.py Project: Landau1908/karabo_data

def test_run_get_dataframe(mock_fxe_run):
    run = RunDirectory(mock_fxe_run)
    df = run.get_dataframe(fields=[("*_XGM/*", "*.i[xy]Pos*")])
    assert len(df.columns) == 4
    assert "SA1_XTD2_XGM/DOOCS/MAIN/beamPosition.ixPos" in df.columns

    df2 = run.get_dataframe(fields=[("*_XGM/*", "*.i[xy]Pos*")],
                            timestamps=True)
    assert len(df2.columns) == 8
    assert "SA1_XTD2_XGM/DOOCS/MAIN/beamPosition.ixPos" in df2.columns
    assert "SA1_XTD2_XGM/DOOCS/MAIN/beamPosition.ixPos.timestamp" in df2.columns

Example #2

Show file

from time import monotonic
from karabo_data import RunDirectory

print("Opening raw run...")
start = monotonic()
run = RunDirectory('/gpfs/exfel/exp/SA1/201830/p900025/raw/r0150/')
delta = monotonic() - start
print(len(run.files), "files")
print(delta, "seconds")

print()
print("Retrieving data frame for XGM ixPos & iyPos...")
start = monotonic()
df = run.get_dataframe(fields=[("*_XGM/*",
                                "*.i[xy]Pos"), ("*_XGM/*", "*.photonFlux")])
delta = monotonic() - start
print(delta, "seconds")
print(df.head())

Example #3

Show file

class XasAnalyzer(abc.ABC):
    """Abstract class for Xray Absoprtion Spectroscopy analysis."""
    def __init__(self, run_folder):
        """Initialization.
        
        :param str run_folder: full path of the run folder.
        """
        self._run = RunDirectory(run_folder)

        self._sources = {
            'MONO': 'SA3_XTD10_MONO/MDL/PHOTON_ENERGY',
            'XGM': 'SCS_BLU_XGM/XGM/DOOCS',
            'XGM_OUTPUT': 'SCS_BLU_XGM/XGM/DOOCS:output',
            'SA3_XGM': 'SA3_XTD10_XGM/XGM/DOOCS',
            'SA3_XGM_OUTPUT': 'SA3_XTD10_XGM/XGM/DOOCS:output'
        }

        # get the DataFrame for XGM control data
        self._xgm_df = self._run.get_dataframe(fields=[(self._sources['XGM'],
                                                        '*value')])
        self._xgm_df.rename(columns=lambda x: x.split('/')[-1], inplace=True)
        self._sa3_xgm_df = self._run.get_dataframe(
            fields=[(self._sources['SA3_XGM'], '*value')])
        self._sa3_xgm_df.rename(columns=lambda x: x.split('/')[-1],
                                inplace=True)

        # get the DataFrame for SoftMono control data
        self._mono_df = self._run.get_dataframe(fields=[(self._sources['MONO'],
                                                         '*value')])
        self._mono_df.rename(columns=lambda x: x.split('/')[-1], inplace=True)

        self._photon_energies = None  # photon energies for each pulse
        self._I0 = None
        self._I1 = OrderedDict()

        self._data = None  # pulse-resolved data in DataFrame

    def info(self):
        """Print out information of the run(s)."""
        first_train = self._run.train_ids[0]
        last_train = self._run.train_ids[-1]
        train_count = len(self._run.train_ids)
        span_sec = (last_train - first_train) / 10
        span_txt = str(datetime.timedelta(seconds=span_sec))
        photon_energies = self._mono_df['actualEnergy']

        print('# of trains:          ', train_count)
        print('Duration:             ', span_txt)
        print('First train ID:       ', first_train)
        print('Last train ID:        ', last_train)
        print('Min photon energy:    ', round(photon_energies.min(), 4), 'eV')
        print('Max photon energy:    ', round(photon_energies.max(), 4), 'eV')

        print('MCP channels:')
        for ch, value in self._channels.items():
            print('    - {}: {}'.format(ch, value['raw']))

    def _check_sources(self):
        """Check all the required sources are in the data."""
        sources = self._run.all_sources
        for src in self._sources.values():
            if src not in sources:
                raise ValueError("Source not found: {}!".format(src))

    def plot_xgm_run(self, *, figsize=(8, 5.6)):
        """Plot the train resolved data from XGM.

        :param tuple figsize: figure size.
        """
        import matplotlib.pyplot as plt
        plt.rcParams['font.size'] = 12

        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize)
        ax1_tw = ax1.twinx()

        ln1 = ax1.plot(self._xgm_df['pulseEnergy.photonFlux'],
                       label=r"Pulse energy ($\mu$J)")
        # "nummberOfBrunches" is indeed the name in the Karabo Device
        # implementation. For more details, please check
        # https://git.xfel.eu/gitlab/karaboDevices/xgmDoocs
        number_of_bunches = self._xgm_df['pulseEnergy.nummberOfBrunches']
        ln2 = ax1_tw.plot(number_of_bunches, label="Number of pulses", c='g')

        lns = ln1 + ln2
        labels = [l.get_label() for l in lns]
        ax1.legend(lns, labels)
        ax1.set_ylabel(r"Pulse energy ($\mu$J)")
        ax1_tw.set_ylabel("Number of pulses")
        if number_of_bunches.max() - number_of_bunches.min() < 5:
            mean_n_bunches = int(number_of_bunches.mean())
            ax1_tw.set_ylim((mean_n_bunches - 4.5, mean_n_bunches + 4.5))

        ax2.plot(1000 * self._xgm_df['beamPosition.ixPos'], label="x")
        ax2.plot(1000 * self._xgm_df['beamPosition.iyPos'], label="y")

        ax2.set_xlabel("Train ID")
        ax2.set_ylabel(r"Beam position ($\mu$m)")
        ax2.legend()
        fig.tight_layout()

        return fig, (ax1, ax1_tw, ax2)

    def plot_xgm_train(self, *, index=0, train_id=None, figsize=(8, 5.6)):
        """Plot xgm measurement in a given train.
        
        :param int index: train index. Ignored if train_id is given.
        :param int train_id: train ID.
        :param tuple figsize: figure size.
        """
        import matplotlib.pyplot as plt
        plt.rcParams['font.size'] = 12

        key = "data.intensityTD"
        filtered = self._run.select("*XGM/*", key)
        if train_id is None:
            tid, data = filtered.train_from_index(index)
        else:
            tid, data = filtered.train_from_id(train_id)

        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize)

        ax1.plot(data[self._sources['SA3_XGM_OUTPUT']][key], marker='.')
        ax2.plot(data[self._sources['XGM_OUTPUT']][key], marker='.')
        for ax in (ax1, ax2):
            ax.set_ylabel(r"Pulse energy ($\mu$J)")
            ax.set_xlim((-0.5, 100.5))

        ax1.set_title("SA3 XGM")
        ax2.set_title("SCS XGM")
        ax2.set_xlabel("Pulse ID")
        fig.suptitle("Train ID: {}".format(tid))
        fig.tight_layout(rect=[0, 0.03, 1, 0.95])

        return fig, (ax1, ax2)

    @abc.abstractmethod
    def process(self, *args, **kwargs):
        """Process the run data.

        :return: the current instance.
        """
        pass

    def select(self, keys, lower=-np.inf, upper=np.inf):
        """Select data within the given boundaries.

        It modifies the internal data inplace.

        :param str/list/tuple/numpy.ndarray: key(s) for applying the filter.
        :param float lower: lower boundary (included).
        :param float upper: higher boundary (included).

        :return: the current instance.
        """
        n0 = len(self._data)
        if isinstance(keys, (list, tuple, np.ndarray)):
            # TODO: remove this for loop
            for key in keys:
                self._data.query("{} <= {} <= {}".format(lower, key, upper),
                                 inplace=True)
        else:
            self._data.query("{} <= {} <= {}".format(lower, keys, upper),
                             inplace=True)

        print("{} out of {} data are selected!".format(len(self._data), n0))
        return self

    @property
    @abc.abstractmethod
    def data(self):
        """Get the pulse-resolved data in pandas.DataFrame."""
        pass

    @abc.abstractmethod
    def compute_total_absorption(self):
        """Compute absorption for all data."""
        pass

    @abc.abstractmethod
    def compute_spectrum(self, n_bins=20, point_wise=False):
        """Compute spectrum.

        :param int n_bins: number of energy bins.
        :param bool point_wise: if True, calculate the absorption point wise
            and then average. Otherwise, average over I0 and I1 first and
            then calculate the absorption. Default = False
        """
        pass

    @abc.abstractmethod
    def plot_correlation(self, *args, **kwargs):
        """Generate correlation plots."""
        pass

    @abc.abstractmethod
    def plot_spectrum(self, *args, **kwargs):
        """Generate spectrum plots."""