Esempio n. 1
0
def test_upcasing():
    meta = MetaDict({'wibble':1, 'WOBBLE':2})
    #__getitem__
    assert meta['wibble'] == meta['WIBBLE']
    #get
    assert meta.get('wibble') == meta.get('WIBBLE')
    #has_key
    assert ('wibble' in meta) == ('WIBBLE' in meta)
    #Copy
    meta2 = meta.copy()
    assert meta2 == meta
    #pop
    assert meta.pop('wibble') == meta2.pop('WIBBLE')
    #update
    meta.update({'spam':'eggs'})
    meta2.update({'SPAM':'eggs'})
    assert meta == meta2
    #setdefault
    meta.setdefault('dave',3)
    meta2.setdefault('DAVE',3)
    assert meta.get('DAVE') == meta2.get('dave')
    #__setitem__
    meta['wibble'] = 10
    assert meta['wibble'] == 10
    meta['WIBBLE'] = 20
    assert meta['wibble'] == 20
    #__contains__
    assert 'wibble' in meta
    assert 'WIBBLE' in meta
Esempio n. 2
0
    def _parse_hdus(cls, hdulist):
        """Parses LYRA HDU list from a FITS file"""
        # Open file with PyFITS
        fits_record = hdulist[1].data
        # secondary_header = hdulist[1].header

        # Start and end dates.  Different LYRA FITS files have
        # different tags for the date obs.
        """
        print(hdulist[0].header)
        if 'date-obs' in hdulist[0].header:
            start_str = hdulist[0].header['date-obs']
        elif 'date_obs' in hdulist[0].header:
            start_str = hdulist[0].header['date_obs']
        # end_str = hdulist[0].header['date-end']
        """
        metadata = MetaDict(OrderedDict(hdulist[0].header))
        start_str = metadata.get('date-obs', metadata.get('date_obs', ''))

        # start = datetime.datetime.strptime(start_str, '%Y-%m-%dT%H:%M:%S.%f')
        start = parse_time(start_str)
        # end = datetime.datetime.strptime(end_str, '%Y-%m-%dT%H:%M:%S.%f')

        # First column are times.  For level 2 data, the units are [s].
        # For level 3 data, the units are [min]
        if hdulist[1].header['TUNIT1'] == 's':
            times = start + TimeDelta(fits_record.field(0)*u.second)
        elif hdulist[1].header['TUNIT1'] == 'MIN':
            td = [int(n) for n in fits_record.field(0)]
            times = start + TimeDelta(td*u.minute)
        else:
            raise ValueError("Time unit in LYRA fits file not recognised.  "
                             "Value = {0}".format(hdulist[1].header['TUNIT1']))

        # Rest of columns are the data
        table = {}

        for i, col in enumerate(fits_record.columns[1:-1]):
            # temporary patch for big-endian data bug on pandas 0.13
            if fits_record.field(i+1).dtype.byteorder == '>' and sys.byteorder =='little':
                table[col.name] = fits_record.field(i + 1).byteswap().newbyteorder()
            else:
                table[col.name] = fits_record.field(i + 1)

        # Return the header and the data
        times.precision = 9
        data = pandas.DataFrame(table, index=times.isot.astype('datetime64'))
        data.sort_index(inplace=True)

        # Add the units data
        units = OrderedDict([('CHANNEL1', u.W/u.m**2),
                             ('CHANNEL2', u.W/u.m**2),
                             ('CHANNEL3', u.W/u.m**2),
                             ('CHANNEL4', u.W/u.m**2)])
        # ToDo: check: http://www.wmo-sat.info/oscar/instruments/view/733
        return data, metadata, units
Esempio n. 3
0
    def get(self, keys, time=None, colname=None, **kwargs):
        """
        Return a TimeSeriesMetaData object of all entries matching the time and
        colname filters with the dictionaries containing only the key value pairs
        with the key matching the given input key.

        Parameters
        ----------
        keys : `str`
            The Key/s to be searched in the dictionary.

        time : `str` or `~datetime.datetime` optional
            The string (parsed using the `~sunpy.time.parse_time`) or datetime
            that you need metadata for.

        colname : `str` optional
            A string that can be used to narrow results to specific columns.

        itemised : `bool` optional
            Option to allow the return of the time ranges and column names
            (as list) that match each given value.

        Returns
        -------
        metadata : `~sunpy.timeseries.metadata.TimeSeriesMetaData`
            A TimeSeriesMetaData that contain all matching metadata entries but
            with only the requested key/value pairs in the MetaDict objects.
        """
        # Make a list of keys if only one is given
        if isinstance(keys, str):
            keys = [ keys ]

        # Find all metadata entries for the given time/colname filters
        full_metadata = self.find(time=time, colname=colname)
        metadata = []

        # Append to metadata only key:value pairs with requested keys
        for i, entry in enumerate(full_metadata.metadata):
            metadict = MetaDict()
            for curkey, value in entry[2].items():
                for key in keys:
                    if curkey.lower() == key.lower():
                        metadict.update({key:value})
            metadata.append((entry[0], entry[1], metadict))

        # Return a TimeSeriesMetaData object
        return TimeSeriesMetaData(meta=metadata)
Esempio n. 4
0
File: eve.py Progetto: ehsteve/sunpy
    def _parse_hdus(cls, hdulist):
        header = MetaDict(OrderedDict(hdulist[0].header))
        # Adding telescope to MetaData
        header.update({'TELESCOP': hdulist[1].header['TELESCOP'].split()[0]})

        start_time = parse_time(hdulist[1].header['T_OBS'])
        times = start_time + TimeDelta(hdulist[1].data['SOD']*u.second)

        colnames = ['QD', 'CH_18', 'CH_26', 'CH_30', 'CH_36']

        all_data = [hdulist[1].data[x] for x in colnames]
        data = DataFrame(np.array(all_data).T, index=times.isot.astype('datetime64'), columns=colnames)
        data.sort_index(inplace=True)

        units = OrderedDict([('QD', u.W/u.m**2),
                             ('CH_18', u.W/u.m**2),
                             ('CH_26', u.W/u.m**2),
                             ('CH_30', u.W/u.m**2),
                             ('CH_36', u.W/u.m**2)])

        return data, header, units
Esempio n. 5
0
    def get(self, keys, time=None, colname=None):
        """
        Return a `~sunpy.timeseries.metadata.TimeSeriesMetaData` with all
        entries matching the filters which also contain the given input key.

        Parameters
        ----------
        keys : `str`
            The Key/s to be searched in the dictionary.
        time : {parse_time_types}, optional
            A `~sunpy.time.parse_time` parsable string that you need metadata for.
            Defaults to `None`.
        colname : `str`, optional
            A string that can be used to narrow results to specific columns.

        Returns
        -------
        metadata : `~sunpy.timeseries.metadata.TimeSeriesMetaData`
            A TimeSeriesMetaData that contain all matching metadata entries but
            with only the requested key/value pairs in the MetaDict objects.
        """
        # Make a list of keys if only one is given
        if isinstance(keys, str):
            keys = [keys]

        # Find all metadata entries for the given time/colname filters
        full_metadata = self.find(time=time, colname=colname)
        metadata = []

        # Append to metadata only key:value pairs with requested keys
        for i, entry in enumerate(full_metadata.metadata):
            metadict = MetaDict()
            for curkey, value in entry[2].items():
                for key in keys:
                    if curkey.lower() == key.lower():
                        metadict.update({key: value})
            metadata.append((entry[0], entry[1], metadict))

        # Return a TimeSeriesMetaData object
        return TimeSeriesMetaData(meta=metadata)
Esempio n. 6
0
def test_generic_construction_basic():
    # Generate the data and the corrisponding dates
    base = parse_time(datetime.datetime.today())
    times = base - TimeDelta(np.arange(24 * 60) * u.minute)
    intensity = np.sin(np.arange(0, 12 * np.pi, ((12 * np.pi) / (24 * 60))))

    # Create the data DataFrame, header MetaDict and units OrderedDict
    data = DataFrame(intensity, index=times, columns=['intensity'])
    units = OrderedDict([('intensity', u.W / u.m**2)])
    meta = MetaDict({'key': 'value'})

    # Create normal TS from dataframe and check
    ts_generic = sunpy.timeseries.TimeSeries(data, meta, units)
    assert isinstance(ts_generic,
                      sunpy.timeseries.timeseriesbase.GenericTimeSeries)
    assert ts_generic.columns == ['intensity']
    assert ts_generic.units == units
    assert ts_generic.meta.metadata[0][2] == meta

    # Create TS using a tuple of values
    ts_tuple = sunpy.timeseries.TimeSeries(((data, meta, units), ))
    assert isinstance(ts_tuple,
                      sunpy.timeseries.timeseriesbase.GenericTimeSeries)
    assert ts_generic == ts_tuple
Esempio n. 7
0
    def __init__(self, data, meta=None, units=None, **kwargs):
        self._data = data
        tr = self.time_range
        # Check metadata input
        if meta is None:
            # No meta given, so default
            self.meta = TimeSeriesMetaData(MetaDict(), tr,
                                           list(self._data.columns.values))
        elif isinstance(meta, (dict, OrderedDict, MetaDict)):
            # Given the values for metadata (dict) and infer timerange and colnames from the data
            self.meta = TimeSeriesMetaData(meta, tr,
                                           list(self._data.columns.values))
        elif isinstance(meta, tuple):
            # Given the values all in a tuple
            self.meta = TimeSeriesMetaData(meta, tr,
                                           list(self._data.columns.values))
        else:
            # Should have a list of 3-tuples giving a complex metadata list.
            self.meta = meta

        if units is None:
            self.units = {}
        else:
            self.units = units
Esempio n. 8
0
    def _parse_hdus(cls, hdulist):
        """
        Parses a GBM CSPEC `astropy.io.fits.HDUList`.

        Parameters
        ----------
        filepath : `str`
            The path to the file you want to parse.
        """
        header = MetaDict(OrderedDict(hdulist[0].header))
        # these GBM files have three FITS extensions.
        # extn1 - this gives the energy range for each of the 128 energy bins
        # extn2 - this contains the data, e.g. counts, exposure time, time of observation
        # extn3 - eclipse times?
        energy_bins = hdulist[1].data
        count_data = hdulist[2].data

        # rebin the 128 energy channels into some summary ranges
        # 4-15 keV, 15 - 25 keV, 25-50 keV, 50-100 keV, 100-300 keV, 300-800 keV, 800 - 2000 keV
        # put the data in the units of counts/s/keV
        summary_counts = _bin_data_for_summary(energy_bins, count_data)

        # get the time information in datetime format with the correct MET adjustment
        gbm_times = Time([fermi.met_to_utc(t) for t in count_data['time']])
        gbm_times.precision = 9
        gbm_times = gbm_times.isot.astype('datetime64')

        column_labels = ['4-15 keV', '15-25 keV', '25-50 keV', '50-100 keV',
                         '100-300 keV', '300-800 keV', '800-2000 keV']

        # Add the units data
        units = OrderedDict([('4-15 keV', u.ct / u.s / u.keV), ('15-25 keV', u.ct / u.s / u.keV),
                             ('25-50 keV', u.ct / u.s / u.keV), ('50-100 keV', u.ct / u.s / u.keV),
                             ('100-300 keV', u.ct / u.s / u.keV), ('300-800 keV', u.ct / u.s / u.keV),
                             ('800-2000 keV', u.ct / u.s / u.keV)])
        return pd.DataFrame(summary_counts, columns=column_labels, index=gbm_times), header, units
Esempio n. 9
0
def test_init_with_illegal_arg():
    """
    Ensure attempt to initialise with a nonsensical data structure is rejected.
    """
    with pytest.raises(TypeError):
        MetaDict({'a', 'b', 'c', 'd'})
Esempio n. 10
0
ts_goes.to_array()
# Note: the array doesn't include the datetime index column.

##############################################################################
# Creating a TimeSeries from scratch can be done in a lot of ways, much like a
# Map.
# Input data can be in the form of a Pandas DataFrame (preferred), an astropy
# Table or a Numpy Array.
# To generate some data and the corresponding dates
base = datetime.datetime.today()
dates = Time(base) - TimeDelta(np.arange(24 * 60) * u.minute)
intensity = np.sin(np.arange(0, 12 * np.pi, ((12 * np.pi) / (24 * 60))))
# Create the data DataFrame, header MetaDict and units OrderedDict
data = DataFrame(intensity, index=dates, columns=['intensity'])
units = OrderedDict([('intensity', u.W / u.m**2)])
meta = MetaDict({'key': 'value'})
# Create the time series
ts_custom = sunpy.timeseries.TimeSeries(data, meta, units)

# A more manual dataset would be a numpy array, which we can creat using:
tm = Time(['2000:002', '2001:345', '2002:345'])
a = [1, 4, 5]
b = [2.0, 5.0, 8.2]
c = ['x', 'y', 'z']
arr = np.stack([tm, a, b, c], axis=1)
# Note: this array needs to have the times in the first column, this can be in
# any form that can be converted using astropy.time.Time().

# We can use the array directly:
ts_from_arr = sunpy.timeseries.TimeSeries(arr, {})
Esempio n. 11
0
def synthetic_magnetogram(bottom_left_coord,
                          top_right_coord,
                          shape: u.pixel,
                          centers,
                          sigmas: u.arcsec,
                          amplitudes: u.Gauss,
                          observer=None):
    """
    Compute synthetic magnetogram using 2D guassian "sunspots"
    
    Parameters
    ----------
    bottom_left_coord : `~astropy.coordinates.SkyCoord`
        Bottom left corner
    top_right_coord : `~astropy.coordinates.SkyCoord`
        Top right corner
    shape : `~astropy.units.Quantity`
        Dimensionality of the magnetogram
    centers : `~astropy.coordinates.SkyCoord`
        Center coordinates of flux concentration
    sigmas : `~astropy.units.Quantity`
        Standard deviation of flux concentration with shape `(N,2)`, with `N` the
        number of flux concentrations
    amplitudes : `~astropy.units.Quantity`
        Amplitude of flux concentration with shape `(N,)`
    observer : `~astropy.coordinates.SkyCoord`, optional
        Defaults to Earth observer at current time
    """
    time_now = astropy.time.Time.now()
    if observer is None:
        observer = sunpy.coordinates.ephemeris.get_earth(time=time_now)
    # Transform to HPC frame
    bottom_left_coord = bottom_left_coord.transform_to(
        sunpy.coordinates.Helioprojective(observer=observer))
    top_right_coord = top_right_coord.transform_to(
        sunpy.coordinates.Helioprojective(observer=observer))
    # Setup array
    delta_x = (top_right_coord.Tx - bottom_left_coord.Tx).to(u.arcsec)
    delta_y = (top_right_coord.Ty - bottom_left_coord.Ty).to(u.arcsec)
    dx = delta_x / shape[0]
    dy = delta_y / shape[1]
    data = np.zeros((int(shape[1].value), int(shape[0].value)))
    xphysical, yphysical = np.meshgrid(
        np.arange(shape[0].value) * shape.unit * dx,
        np.arange(shape[1].value) * shape.unit * dy)
    # Add sunspots
    centers = centers.transform_to(
        sunpy.coordinates.Helioprojective(observer=observer))
    for c, s, a in zip(centers, sigmas, amplitudes):
        xc_2 = (xphysical - (c.Tx - bottom_left_coord.Tx)).to(
            u.arcsec).value**2.0
        yc_2 = (yphysical - (c.Ty - bottom_left_coord.Ty)).to(
            u.arcsec).value**2.0
        data += a.to(
            u.Gauss).value * np.exp(-xc_2 /
                                    (2 * s[0].to(u.arcsec).value**2) - yc_2 /
                                    (2 * s[1].to(u.arcsec).value**2))
    # Build metadata
    meta = MetaDict({
        'telescop':
        'synthetic_magnetic_imager',
        'instrume':
        'synthetic_magnetic_imager',
        'detector':
        'synthetic_magnetic_imager',
        'bunit':
        'Gauss',
        'ctype1':
        'HPLN-TAN',
        'ctype2':
        'HPLT-TAN',
        'hgln_obs':
        observer.transform_to('heliographic_stonyhurst').lon.to(u.deg).value,
        'hglt_obs':
        observer.transform_to('heliographic_stonyhurst').lat.to(u.deg).value,
        'cunit1':
        'arcsec',
        'cunit2':
        'arcsec',
        'crpix1': (shape[0].value + 1) / 2.,
        'crpix2': (shape[1].value + 1) / 2.,
        'cdelt1':
        dx.value,
        'cdelt2':
        dy.value,
        'crval1':
        ((bottom_left_coord.Tx + top_right_coord.Tx) / 2.).to(u.arcsec).value,
        'crval2':
        ((bottom_left_coord.Ty + top_right_coord.Ty) / 2.).to(u.arcsec).value,
        'dsun_obs':
        observer.transform_to('heliographic_stonyhurst').radius.to(u.m).value,
        'dsun_ref':
        observer.transform_to('heliographic_stonyhurst').radius.to(u.m).value,
        'rsun_ref':
        const.R_sun.to(u.m).value,
        'rsun_obs':
        ((const.R_sun / observer.transform_to('heliographic_stonyhurst').radius
          ).decompose() * u.radian).to(u.arcsec).value,
        't_obs':
        time_now.iso,
        'date-obs':
        time_now.iso,
    })
    return GenericMap(data, meta)
Esempio n. 12
0
class InstrumentBase(object):
    """
    Base class for instruments. This object is not meant to be instantiated directly. Instead,
    specific instruments should subclass this base object and implement a `calculate_intensity_kernel`
    method for that specific instrument.

    Parameters
    ----------
    observing_time : `~astropy.units.Quantity`
        Tuple of start and end observing times
    observer_coordinate : `~astropy.coordinates.SkyCoord`
        Coordinate of the observing instrument
    assumed_cross_section : `~astropy.units.Quantity`, optional
        Approximation of the loop cross-section. This defines the filling factor.
    pad_fov : `~astropy.units.Quantity`, optional
        Two-dimensional array specifying the padding to apply to the field of view of the synthetic
        image in both directions. If None, no padding is applied and the field of view is defined
        by the maximal extent of the loop coordinates in each direction.
    """
    fits_template = MetaDict()

    @u.quantity_input
    def __init__(self,
                 observing_time: u.s,
                 observer,
                 assumed_cross_section=1e14 * u.cm**2,
                 pad_fov=None):
        self.observing_time = np.arange(*observing_time.to('s').value,
                                        self.cadence.to('s').value) * u.s
        self.observer = observer.transform_to(HeliographicStonyhurst)
        self.assumed_cross_section = assumed_cross_section
        self.pad_fov = (0, 0) * u.arcsec if pad_fov is None else pad_fov

    def calculate_intensity_kernel(self, *args, **kwargs):
        """
        Converts emissivity for a particular transition to counts per detector channel. When writing
        a new instrument class, this method should be overridden.
        """
        raise NotImplementedError('No detect method implemented.')

    def los_velocity(self, v_x, v_y, v_z):
        """
        Compute the LOS velocity for the instrument observer
        """
        # NOTE: transform from HEEQ to HCC with respect to the instrument observer
        Phi_0 = self.observer.lon.to(u.radian)
        B_0 = self.observer.lat.to(u.radian)
        v_los = v_z * np.sin(B_0) + v_x * np.cos(B_0) * np.cos(
            Phi_0) + v_y * np.cos(B_0) * np.sin(Phi_0)
        # NOTE: Negative sign to be consistent with convention v_los > 0 away from observer
        return -v_los

    @property
    def projected_frame(self):
        return Helioprojective(observer=self.observer,
                               obstime=self.observer.obstime)

    @property
    @u.quantity_input
    def pixel_area(self) -> u.cm**2:
        """
        Pixel area
        """
        w_x, w_y = (1 * u.pix * self.resolution).to(
            u.radian).value * self.observer.radius
        return w_x * w_y

    def convolve_with_psf(self, data):
        # TODO: do the convolution here!
        return data

    def observe(self, skeleton, save_directory, channels=None, **kwargs):
        """
        Calculate the time dependent intensity for all loops and project them along
        the line-of-sight as defined by the instrument observer.

        Parameters
        ----------

        """
        if channels is None:
            channels = self.channels
        client = distributed.get_client()
        coordinates = skeleton.all_coordinates
        coordinates_centers = skeleton.all_coordinates_centers
        for channel in channels:
            kernels = client.map(self.calculate_intensity_kernel,
                                 skeleton.loops,
                                 channel=channel,
                                 **kwargs)
            kernels_interp = client.map(self.interpolate_to_instrument_time,
                                        kernels,
                                        skeleton.loops,
                                        observing_time=self.observing_time)
            files = client.map(self.write_kernel_to_file,
                               kernels_interp,
                               skeleton.loops,
                               channel=channel,
                               name=self.name)
            # NOTE: block here to avoid pileup of tasks that can overwhelm the scheduler
            distributed.wait(files)
            for i, t in enumerate(self.observing_time):
                m = self.integrate_los(t, channel, skeleton, coordinates,
                                       coordinates_centers)
                m = self.convolve_with_psf(m)
                m.save(os.path.join(save_directory,
                                    f'm_{channel.name}_t{i}.fits'),
                       overwrite=True)

    @staticmethod
    def write_kernel_to_file(kernel, loop, channel, name):
        root = zarr.open(loop.model_results_filename, 'a')
        if name not in root[loop.name]:
            root[loop.name].create_group(name)
        ds = root[f'{loop.name}/{name}'].create_dataset(
            channel.name,
            data=kernel.value,
            chunks=(None, ) + kernel.shape[:1],
            overwrite=True,
        )
        ds.attrs['unit'] = kernel.unit.to_string()

    @staticmethod
    def interpolate_to_instrument_time(kernel, loop, observing_time):
        """
        Interpolate the intensity kernel from the simulation time to the cadence
        of the instrument for the desired observing window.
        """
        time = loop.time
        if time.shape == (1, ):
            if time != observing_time:
                raise ValueError(
                    'Model and observing times are not equal for a single model time step.'
                )
            return kernel
        f_t = interp1d(time.to(observing_time.unit).value,
                       kernel.value,
                       axis=0,
                       fill_value='extrapolate')
        return f_t(observing_time.value) * kernel.unit

    def integrate_los(self, time, channel, skeleton, coordinates,
                      coordinates_centers):
        client = distributed.get_client()
        # Get Coordinates
        coords = coordinates_centers.transform_to(self.projected_frame)
        # Compute weights
        i_time = np.where(time == self.observing_time)[0][0]
        widths = np.concatenate(
            [l.field_aligned_coordinate_width for l in skeleton.loops])
        loop_area = np.concatenate(
            [l.cross_sectional_area for l in skeleton.loops])
        root = skeleton.loops[0].zarr_root
        # NOTE: do this outside of the client.map call to make Dask happy
        path = f'{{}}/{self.name}/{channel.name}'
        kernels = np.concatenate(
            client.gather(
                client.map(
                    lambda l: root[path.format(l.name)][i_time, :],
                    skeleton.loops,
                )))
        unit_kernel = u.Unit(
            root[f'{skeleton.loops[0].name}/{self.name}/{channel.name}'].
            attrs['unit'])
        area_ratio = (loop_area / self.pixel_area).decompose()
        weights = area_ratio * widths * (kernels * unit_kernel)
        visible = is_visible(coords, self.observer)
        # Bin
        bins, (blc, trc) = self.get_detector_array(coordinates)
        hist, _, _ = np.histogram2d(
            coords.Tx.value,
            coords.Ty.value,
            bins=bins,
            range=((blc.Tx.value, trc.Tx.value), (blc.Ty.value, trc.Ty.value)),
            weights=weights.value * visible,
        )
        header = self.get_header(channel, coordinates)
        header['bunit'] = weights.unit.decompose().to_string()
        header['date-obs'] = (self.observer.obstime + time).isot

        return Map(hist.T, header)

    def get_header(self, channel, coordinates):
        """
        Create the FITS header for a given channel and set of loop coordinates
        that define the needed FOV.
        """
        bins, bin_range = self.get_detector_array(coordinates)
        header = make_fitswcs_header(
            (bins[1], bins[0]),  # swap order because it expects (row,column)
            bin_range[
                0],  # align with the lower left corner of the lower left pixel
            reference_pixel=(-0.5, -0.5) *
            u.pixel,  # center of the lower left pixel is (0,0)
            scale=self.resolution,
            instrument=f'{self.detector}_{channel.telescope_number}',
            telescope=self.telescope,
            wavelength=channel.channel,
        )
        return header

    def get_detector_array(self, coordinates):
        """
        Calculate the number of pixels in the detector FOV and the physical coordinates of the
        bottom left and top right corners.
        """
        coordinates = coordinates.transform_to(self.projected_frame)
        # NOTE: this is the coordinate of the bottom left corner of the bottom left corner pixel,
        # NOT the coordinate at the center of the pixel!
        bottom_left_corner = SkyCoord(
            Tx=coordinates.Tx.min() - self.pad_fov[0],
            Ty=coordinates.Ty.min() - self.pad_fov[1],
            frame=coordinates.frame)
        bins_x = int(
            np.ceil((coordinates.Tx.max() + self.pad_fov[0] -
                     bottom_left_corner.Tx) / self.resolution[0]).value)
        bins_y = int(
            np.ceil((coordinates.Ty.max() + self.pad_fov[1] -
                     bottom_left_corner.Ty) / self.resolution[1]).value)
        # Compute right corner after the fact to account for rounding in bin numbers
        # NOTE: this is the coordinate of the top right corner of the top right corner pixel, NOT
        # the coordinate at the center of the pixel!
        top_right_corner = SkyCoord(
            Tx=bottom_left_corner.Tx + self.resolution[0] * bins_x * u.pixel,
            Ty=bottom_left_corner.Ty + self.resolution[1] * bins_y * u.pixel,
            frame=coordinates.frame)
        return (bins_x, bins_y), (bottom_left_corner, top_right_corner)
Esempio n. 13
0
    def _parse_args(self, *args, **kwargs):
        """
        Parses an `args` list for data-header pairs. `args` can contain any mixture of the following
        entries:

        * tuples of (data, header, unit) (1)
        * data, header not in a tuple (1)
        * filename, which will be read
        * directory, from which all files will be read
        * glob, from which all files will be read
        * url, which will be downloaded and read
        * lists containing any of the above.

        (1) header/unit are optional and in either order, but data should be the first entry in each group.

        Examples
        --------
        self._parse_args(data, header,
                         (data, header),
                         ['file1', 'file2', 'file3'],
                         'file4',
                         'directory1',
                         '*.fits')
        """
        data_header_unit_tuples = list()
        data_header_pairs = list()
        already_timeseries = list()
        filepaths = list()

        # Account for nested lists of items. Simply outputs a single list of
        # items, nested lists are expanded to element level.
        args = expand_list(args)

        # For each of the arguments, handle each of the cases
        i = 0
        while i < len(args):
            arg = args[i]

            # Data-header pair in a tuple
            if (isinstance(arg, (np.ndarray, Table, pd.DataFrame))):
                # and self._validate_meta(args[i+1])):
                # Assume a Pandas Dataframe is given
                data = arg
                units = OrderedDict()
                meta = MetaDict()

                # Convert the data argument into a Pandas DataFrame if needed.
                if isinstance(data, Table):
                    # We have an Astropy Table:
                    data, meta, units = self._from_table(data)
                elif isinstance(data, np.ndarray):
                    # We have a numpy ndarray. We assume the first column is a dt index
                    data = pd.DataFrame(data=data[:, 1:], index=Time(data[:, 0]))

                # If there are 1 or 2 more arguments:
                for _ in range(2):
                    if (len(args) > i+1):
                        # If that next argument isn't data but is metaddata or units:
                        if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)):
                            if self._validate_units(args[i+1]):
                                units.update(args[i+1])
                                i += 1  # an extra increment to account for the units
                            elif self._validate_meta(args[i+1]):
                                # if we have an astropy.io FITS header then convert
                                # to preserve multi-line comments
                                if isinstance(args[i+1], astropy.io.fits.header.Header):
                                    args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1]))
                                meta.update(args[i+1])
                                i += 1  # an extra increment to account for the meta

                # Add a 3-tuple for this TimeSeries.
                data_header_unit_tuples.append((data, meta, units))

            # Filepath
            elif (isinstance(arg, str) and
                  os.path.isfile(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)
                result = self._read_file(path, **kwargs)
                data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result)

            # Directory
            elif (isinstance(arg, str) and
                  os.path.isdir(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)
                files = [os.path.join(path, elem) for elem in os.listdir(path)]
                for afile in files:
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    result = self._read_file(afile, **kwargs)
                    data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths,
                                                                 result)

            # Glob
            elif isinstance(arg, str) and '*' in arg:

                files = glob.glob(os.path.expanduser(arg))
                for afile in files:
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    result = self._read_file(afile, **kwargs)
                    data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths,
                                                                 result)

            # Already a TimeSeries
            elif isinstance(arg, GenericTimeSeries):
                already_timeseries.append(arg)

            # A URL
            elif (isinstance(arg, str) and
                  _is_url(arg)):
                url = arg
                path = download_file(url, get_and_create_download_dir())
                result = self._read_file(path, **kwargs)
                data_header_pairs, filepaths = _apply_result(data_header_pairs, filepaths, result)
            else:
                raise NoMatchError("File not found or invalid input")
            i += 1

        # TODO:
        # In the end, if there are already TimeSeries it should be put in the
        # same order as the input, currently they are not.
        return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
Esempio n. 14
0
    def __call__(self,
                 *args,
                 composite=False,
                 sequence=False,
                 silence_errors=False,
                 **kwargs):
        """ Method for running the factory. Takes arbitrary arguments and
        keyword arguments and passes them to a sequence of pre-registered types
        to determine which is the correct Map-type to build.

        Arguments args and kwargs are passed through to the validation
        function and to the constructor for the final type. For Map types,
        validation function must take a data-header pair as an argument.

        Parameters
        ----------
        composite : `bool`, optional
            Indicates if collection of maps should be returned as a `~sunpy.map.CompositeMap`.
            Default is `False`.
        sequence : `bool`, optional
            Indicates if collection of maps should be returned as a `sunpy.map.MapSequence`.
            Default is `False`.
        silence_errors : `bool`, optional
            If set, ignore data-header pairs which cause an exception.
            Default is ``False``.

        Notes
        -----
        Extra keyword arguments are passed through to `sunpy.io.read_file` such
        as `memmap` for FITS files.
        """
        data_header_pairs = self._parse_args(*args,
                                             silence_errors=silence_errors,
                                             **kwargs)
        new_maps = list()

        # Loop over each registered type and check to see if WidgetType
        # matches the arguments.  If it does, use that type.
        for pair in data_header_pairs:
            if isinstance(pair, GenericMap):
                new_maps.append(pair)
                continue
            data, header = pair
            meta = MetaDict(header)

            try:
                new_map = self._check_registered_widgets(data, meta, **kwargs)
                new_maps.append(new_map)
            except (NoMatchError, MultipleMatchError, ValidationFunctionError,
                    MapMetaValidationError) as e:
                if not silence_errors:
                    raise
                warnings.warn(
                    f"One of the data, header pairs failed to validate with: {e}",
                    SunpyUserWarning)

        if not len(new_maps):
            raise RuntimeError('No maps loaded')

        # If the list is meant to be a sequence, instantiate a map sequence
        if sequence:
            return MapSequence(new_maps, **kwargs)

        # If the list is meant to be a composite map, instantiate one
        if composite:
            return CompositeMap(new_maps, **kwargs)

        if len(new_maps) == 1:
            return new_maps[0]

        return new_maps
Esempio n. 15
0
    def __call__(self, *args, **kwargs):
        """ Method for running the factory. Takes arbitrary arguments and
        keyword arguments and passes them to a sequence of pre-registered types
        to determine which is the correct TimeSeries source type to build.

        Arguments args and kwargs are passed through to the validation
        function and to the constructor for the final type.  For TimeSeries
        types, validation function must take a data-header pair as an argument.

        Parameters
        ----------

        silence_errors : `bool`, optional
            If set, ignore data-header pairs which cause an exception.

        Notes
        -----
        Extra keyword arguments are passed through to `sunpy.io.read_file` such
        as `memmap` for FITS files.
        """

        # Hack to get around Python 2.x not backporting PEP 3102.
        silence_errors = kwargs.pop('silence_errors', False)

        (data_header_unit_tuples, data_header_pairs,
         already_timeseries, filepaths) = self._parse_args(*args, **kwargs)

        new_timeseries = list()

        # The filepaths for unreadable files
        for filepath in filepaths:
            try:
                new_ts = self._check_registered_widgets(filepath=filepath, **kwargs)
            except (NoMatchError, MultipleMatchError, ValidationFunctionError):
                if not silence_errors:
                    raise
            except:
                raise

            new_timeseries.append(new_ts)

        # data_header_pairs is a list of HDUs as read by sunpy.io
        # For each set of HDus find the matching class and read the
        # data_header_unit_tuples by calling the _parse_hdus method
        # of the class.
        for pairs in data_header_pairs:
            # Pairs may be x long where x is the number of HDUs in the file.
            headers = [pair.header for pair in pairs]

            types = []
            for header in headers:
                try:
                    match = self._get_matching_widget(meta=header, **kwargs)
                    if not match == GenericTimeSeries:
                        types.append(match)
                except (MultipleMatchError, NoMatchError):
                    continue

            if not types:
                # If no specific classes have been found we can read the data
                # if we only have one data header pair:
                if len(pairs) == 1:
                    already_timeseries.append(GenericTimeSeries(pairs[0].data,
                                                                pairs[0].header))
                else:
                    raise NoMatchError("Input read by sunpy.io can not find a "
                                       "matching class for reading multiple HDUs")
            if len(set(types)) > 1:
                raise MultipleMatchError("Multiple HDUs return multiple matching classes.")

            cls = types[0]

            data_header_unit_tuples.append(cls._parse_hdus(pairs))

        # Loop over each registered type and check to see if WidgetType
        # matches the arguments.  If it does, use that type
        for triple in data_header_unit_tuples:
            data, header, units = triple
            # Make a MetaDict from various input types
            meta = header
            if isinstance(meta, astropy.io.fits.header.Header):
                meta = sunpy.io.header.FileHeader(meta)
            meta = MetaDict(meta)

            try:
                new_ts = self._check_registered_widgets(data=data, meta=meta,
                                                        units=units, **kwargs)
            except (NoMatchError, MultipleMatchError, ValidationFunctionError):
                if not silence_errors:
                    raise
            except:
                raise

            new_timeseries.append(new_ts)

        new_timeseries += already_timeseries

        # Concatenate the timeseries into one if specified.
        concatenate = kwargs.get('concatenate', False)
        if concatenate:
            # Merge all these timeseries into one.
            full_timeseries = new_timeseries.pop(0)
            for timeseries in new_timeseries:
                full_timeseries = full_timeseries.concatenate(timeseries)

            new_timeseries = [full_timeseries]

        # Sanitize any units OrderedDict details
        for timeseries in new_timeseries:
            timeseries._sanitize_units()

        # Only return single time series, not in a list if we only have one.
        if len(new_timeseries) == 1:
            return new_timeseries[0]
        return new_timeseries
Esempio n. 16
0
    def _parse_hdus(cls, hdulist):
        """Parses LYRA HDU list from a FITS file"""
        # Open file with PyFITS
        fits_record = hdulist[1].data
        # secondary_header = hdulist[1].header

        # Start and end dates.  Different LYRA FITS files have
        # different tags for the date obs.
        """
        print(hdulist[0].header)
        if 'date-obs' in hdulist[0].header:
            start_str = hdulist[0].header['date-obs']
        elif 'date_obs' in hdulist[0].header:
            start_str = hdulist[0].header['date_obs']
        # end_str = hdulist[0].header['date-end']
        """
        metadata = MetaDict(OrderedDict(hdulist[0].header))
        start_str = metadata.get('date-obs', metadata.get('date_obs', ''))

        # start = datetime.datetime.strptime(start_str, '%Y-%m-%dT%H:%M:%S.%f')
        start = parse_time(start_str)
        # end = datetime.datetime.strptime(end_str, '%Y-%m-%dT%H:%M:%S.%f')

        # First column are times.  For level 2 data, the units are [s].
        # For level 3 data, the units are [min]
        if hdulist[1].header['TUNIT1'] == 's':
            times = [
                start + datetime.timedelta(seconds=n)
                for n in fits_record.field(0)
            ]
        elif hdulist[1].header['TUNIT1'] == 'MIN':
            times = [
                start + datetime.timedelta(minutes=int(n))
                for n in fits_record.field(0)
            ]
        else:
            raise ValueError("Time unit in LYRA fits file not recognised.  "
                             "Value = {0}".format(hdulist[1].header['TUNIT1']))

        # Rest of columns are the data
        table = {}

        for i, col in enumerate(fits_record.columns[1:-1]):
            # temporary patch for big-endian data bug on pandas 0.13
            if fits_record.field(
                    i +
                    1).dtype.byteorder == '>' and sys.byteorder == 'little':
                table[col.name] = fits_record.field(
                    i + 1).byteswap().newbyteorder()
            else:
                table[col.name] = fits_record.field(i + 1)

        # Return the header and the data
        data = pandas.DataFrame(table, index=times)
        data.sort_index(inplace=True)

        # Add the units data
        units = OrderedDict([('CHANNEL1', u.W / u.m**2),
                             ('CHANNEL2', u.W / u.m**2),
                             ('CHANNEL3', u.W / u.m**2),
                             ('CHANNEL4', u.W / u.m**2)])
        # ToDo: check: http://www.wmo-sat.info/oscar/instruments/view/733
        return data, metadata, units
Esempio n. 17
0
def seas_metadict(sea_locations):
    return MetaDict(sea_locations)
Esempio n. 18
0
def test_original_copy():
    md = MetaDict({'foo': 'bar'})
    assert md.original_meta == md

    # Add a key, make sure original contents doesn't change
    md['a'] = 'b'
    assert md.original_meta != md
    assert list(md.keys()) == ['foo', 'a']
    assert list(md.original_meta.keys()) == ['foo']
    assert md.added_items == {'a': 'b'}

    # Check that creating a new MetaDict preserves the original copy
    md = MetaDict(md)
    assert list(md.keys()) == ['foo', 'a']
    assert list(md.original_meta.keys()) == ['foo']

    # Check that creating a copy preserves the original copy
    md = copy.copy(md)
    assert list(md.keys()) == ['foo', 'a']
    assert list(md.original_meta.keys()) == ['foo']

    # Check that creating a deepcopy preserves the original copy
    md = copy.deepcopy(md)
    assert list(md.keys()) == ['foo', 'a']
    assert list(md.original_meta.keys()) == ['foo']

    # Check that creating using .copy() preserves the original copy
    md = md.copy()
    assert list(md.keys()) == ['foo', 'a']
    assert list(md.original_meta.keys()) == ['foo']

    # Check modification of items
    md['foo'] = 'bar1'
    assert md.modified_items == {'foo': ModifiedItem('bar', 'bar1')}

    # Check removal of items
    md.pop('foo')
    assert md.removed_items == {'foo': 'bar'}
Esempio n. 19
0
def test_init_with_invalid_keycomments_type():
    """
    Ensure attempt to initialise with an invalid keycomments type is rejected.
    """
    with pytest.raises(TypeError):
        MetaDict({'a': 1, 'b': 2, 'keycomments': 3})
Esempio n. 20
0
def get_header(fn, hdu=0):
    with fn as fi:
        return MetaDict(sunpy.io.fits.get_header(fi)[hdu])
Esempio n. 21
0
    def _parse_level_0cs(filepath):
        """
        Parses and EVE Level 0CS file.
        """
        is_missing_data = False  # boolean to check for missing data
        missing_data_val = np.nan
        header = []
        fields = []
        with codecs.open(filepath, mode='rb', encoding='ascii') as fp:
            line = fp.readline()
            # Read header at top of file
            while line.startswith(";"):
                header.append(line)
                if '; Missing data:' in line:
                    is_missing_data = True
                    missing_data_val = line.split(':')[1].strip()

                line = fp.readline()

        meta = MetaDict()
        for hline in header:
            if hline == '; Format:\n' or hline == '; Column descriptions:\n':
                continue
            elif ('Created' in hline) or ('Source' in hline):
                meta[hline.split(':', 1)[0].replace(
                    ';', ' ').strip()] = hline.split(':', 1)[1].strip()
            elif ':' in hline:
                meta[hline.split(':')[0].replace(
                    ';', ' ').strip()] = hline.split(':')[1].strip()

        fieldnames_start = False
        for hline in header:
            if hline.startswith("; Format:"):
                fieldnames_start = False
            if fieldnames_start:
                fields.append(hline.split(":")[0].replace(';', ' ').strip())
            if hline.startswith("; Column descriptions:"):
                fieldnames_start = True

        # Next line is YYYY DOY MM DD
        date_parts = line.split(" ")
        year = int(date_parts[0])
        month = int(date_parts[2])
        day = int(date_parts[3])

        data = read_csv(filepath,
                        delim_whitespace=True,
                        names=fields,
                        comment=';',
                        dtype={'HHMM': int})
        # First line is YYYY DOY MM DD
        data = data.iloc[1:, :]
        data['Hour'] = data['HHMM'] // 100
        data['Minute'] = data['HHMM'] % 100
        data = data.drop(['HHMM'], axis=1)

        data['Year'] = year
        data['Month'] = month
        data['Day'] = day

        datecols = ['Year', 'Month', 'Day', 'Hour', 'Minute']
        data['Time'] = to_datetime(data[datecols])
        data = data.set_index('Time')
        data = data.drop(datecols, axis=1)

        if is_missing_data:  # If missing data specified in header
            data[data == float(missing_data_val)] = np.nan

        # Add the units data
        units = OrderedDict([('XRS-B proxy', u.W / u.m**2),
                             ('XRS-A proxy', u.W / u.m**2),
                             ('SEM proxy', u.W / u.m**2),
                             ('0.1-7ESPquad', u.W / u.m**2),
                             ('17.1ESP', u.W / u.m**2),
                             ('25.7ESP', u.W / u.m**2),
                             ('30.4ESP', u.W / u.m**2),
                             ('36.6ESP', u.W / u.m**2), ('darkESP', u.ct),
                             ('121.6MEGS-P', u.W / u.m**2),
                             ('darkMEGS-P', u.ct),
                             ('q0ESP', u.dimensionless_unscaled),
                             ('q1ESP', u.dimensionless_unscaled),
                             ('q2ESP', u.dimensionless_unscaled),
                             ('q3ESP', u.dimensionless_unscaled),
                             ('CMLat', u.deg), ('CMLon', u.deg)])
        # Todo: check units used.
        return data, meta, units
Esempio n. 22
0
    def _parse_file(cls, filepath):
        """Parses an NOAA indices csv file"""
        """
        header = []
        with open(filepath, 'r') as fp:
            line = fp.readline()
            # Read header at top of file
            while line.startswith((":", "#")):
                header += line
                line = fp.readline()
            fields = ('yyyy', 'mm', 'sunspot SWO', 'sunspot RI', 'sunspot ratio', 'sunspot SWO smooth', 'sunspot RI smooth', 'radio flux', 'radio flux smooth', 'geomagnetic ap', 'geomagnetic smooth')
            data = read_csv(fp, delim_whitespace=True, names = fields, comment='#', dtype={'yyyy':np.str, 'mm':np.str})
            data = data.dropna(how='any')
            timeindex = [datetime.datetime.strptime(x + y, '%Y%m') for x,y in zip(data['yyyy'], data['mm'])]
            data['time']=timeindex
            data = data.set_index('time')
            data = data.drop('mm',1)
            data = data.drop('yyyy',1)
            return data, {'comments': header}
        """
        header = []
        with open(filepath, 'r') as fp:
            line = fp.readline()
            # Read header at top of file
            while line.startswith((":", "#")):
                header += line
                line = fp.readline()
            fields = ('yyyy', 'mm', 'sunspot SWO', 'sunspot RI',
                      'sunspot ratio', 'sunspot SWO smooth',
                      'sunspot RI smooth', 'radio flux', 'radio flux smooth',
                      'geomagnetic ap', 'geomagnetic smooth')
            data = read_csv(fp,
                            delim_whitespace=True,
                            names=fields,
                            comment='#',
                            dtype={
                                'yyyy': np.str,
                                'mm': np.str
                            })
            data = data.dropna(how='any')
            timeindex = [
                datetime.datetime.strptime(x + y, '%Y%m')
                for x, y in zip(data['yyyy'], data['mm'])
            ]
            data['time'] = timeindex
            data = data.set_index('time')
            data = data.drop('mm', 1)
            data = data.drop('yyyy', 1)

            # Add the units data
            units = OrderedDict([
                ('sunspot SWO', u.dimensionless_unscaled),
                ('sunspot RI', u.dimensionless_unscaled),
                ('sunspot ratio', u.dimensionless_unscaled),
                ('sunspot SWO smooth', u.dimensionless_unscaled),
                ('sunspot RI smooth', u.dimensionless_unscaled),
                ('radio flux', u.W / u.m**2),
                ('radio flux smooth', u.W / u.m**2),
                ('geomagnetic ap', u.dimensionless_unscaled),
                ('geomagnetic smooth', u.dimensionless_unscaled)
            ])
            # Todo: check units
            # Todo: fix header/meta, it's returning rubbish.
            return data, MetaDict({'comments': header}), units
Esempio n. 23
0
    def _parse_args(self, *args, **kwargs):
        """
        Parses an args list for data-header pairs.  args can contain any
        mixture of the following entries:
        * tuples of (data, header, unit) (1)
        * data, header not in a tuple (1)
        * filename, which will be read
        * directory, from which all files will be read
        * glob, from which all files will be read
        * url, which will be downloaded and read
        * lists containing any of the above.

        (1) Note that header/unit are optional and in either order, but data
        but be the first entry in each group.

        Example
        -------
        self._parse_args(data, header,
                         (data, header),
                         ['file1', 'file2', 'file3'],
                         'file4',
                         'directory1',
                         '*.fits')

        """

        data_header_unit_tuples = list()
        data_header_pairs = list()
        already_timeseries = list()
        filepaths = list()

        # Take source kwarg if defined
        source = kwargs.get('source', None)

        # Account for nested lists of items. Simply outputs a single list of
        # items, nested lists are expanded to element level.
        args = expand_list(args)

        # For each of the arguments, handle each of the cases
        i = 0
        while i < len(args):
            arg = args[i]

            # Data-header pair in a tuple
            if (isinstance(arg, (np.ndarray, Table, pd.DataFrame))):# and self._validate_meta(args[i+1])):
                # Assume a Pandas Dataframe is given
                data = arg
                units = OrderedDict()
                meta = MetaDict()

                # Convert the data argument into a Pandas DataFrame if needed.
                if isinstance(data, Table):
                    # We have an AstroPy Table:
                    data, meta, units = self._from_table(data)
                elif isinstance(data, np.ndarray):
                    # We have a numpy ndarray. We assume the first column is a dt index
                    data = pd.DataFrame(data=data[:,1:], index=Time(data[:,0]))

                # If there are 1 or 2 more arguments:
                for _ in range(2):
                    if (len(args) > i+1):
                        # If that next argument isn't data but is metaddata or units:
                        if not isinstance(args[i+1], (np.ndarray, Table, pd.DataFrame)):
                            if self._validate_units(args[i+1]):
                                units.update(args[i+1])
                                i += 1  # an extra increment to account for the units
                            elif self._validate_meta(args[i+1]):
                                # if we have an astropy.io FITS header then convert
                                # to preserve multi-line comments
                                if isinstance(args[i+1], astropy.io.fits.header.Header):
                                    args[i+1] = MetaDict(sunpy.io.header.FileHeader(args[i+1]))
                                meta.update(args[i+1])
                                i += 1  # an extra increment to account for the meta

                # Add a 3-tuple for this TimeSeries.
                data_header_unit_tuples.append((data, meta, units))

            # Filepath
            elif (isinstance(arg, six.string_types) and
                  os.path.isfile(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)

                read, result = self._read_file(path, **kwargs)

                if read:
                    data_header_pairs.append(result)
                else:
                    filepaths.append(result)

            # Directory
            elif (isinstance(arg, six.string_types) and
                  os.path.isdir(os.path.expanduser(arg))):

                path = os.path.expanduser(arg)
                files = [os.path.join(path, elem) for elem in os.listdir(path)]
                for afile in files:
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    read, result = self._read_file(afile, **kwargs)
                    if read:
                        data_header_pairs.append(result)
                    else:
                        filepaths.append(result)

            # Glob
            elif (isinstance(arg, six.string_types) and '*' in arg):

                files = glob.glob(os.path.expanduser(arg))

                for afile in files:
                    # data_header_unit_tuples += self._read_file(afile, **kwargs)
                    # returns a boolean telling us if it were read and either a
                    # tuple or the original filepath for reading by a source
                    read, result = self._read_file(afile, **kwargs)
                    if read:
                        data_header_pairs.append(result)
                    else:
                        filepaths.append(result)

            # Already a TimeSeries
            elif isinstance(arg, GenericTimeSeries):
                already_timeseries.append(arg)

            # A URL
            elif (isinstance(arg,six.string_types) and
                  _is_url(arg)):
                default_dir = sunpy.config.get("downloads", "download_dir")
                url = arg
                path = download_file(url, default_dir)
                pairs = self._read_file(path, **kwargs)
                #data_header_pairs += pairs
                filepaths.append(pairs[1])

            else:
                #raise ValueError("File not found or invalid input")
                raise NoMatchError("File not found or invalid input")
            i += 1

        # TODO:
        # In the end, if there are already TimeSeries it should be put in the
        # same order as the input, currently they are not.
        return data_header_unit_tuples, data_header_pairs, already_timeseries, filepaths
Esempio n. 24
0
def test_upcasing():
    meta = MetaDict({'wibble': 1, 'WOBBLE': 2})
    # __getitem__
    assert meta['wibble'] == meta['WIBBLE']
    # get
    assert meta.get('wibble') == meta.get('WIBBLE')
    # has_key
    assert ('wibble' in meta) == ('WIBBLE' in meta)
    # Copy
    meta2 = meta.copy()
    assert meta2 == meta
    # pop
    assert meta.pop('wibble') == meta2.pop('WIBBLE')
    # update
    meta.update({'spam': 'eggs'})
    meta2.update({'SPAM': 'eggs'})
    assert meta == meta2
    # setdefault
    meta.setdefault('dave', 3)
    meta2.setdefault('DAVE', 3)
    assert meta.get('DAVE') == meta2.get('dave')
    # __setitem__
    meta['wibble'] = 10
    assert meta['wibble'] == 10
    meta['WIBBLE'] = 20
    assert meta['wibble'] == 20
    # __contains__
    assert 'wibble' in meta
    assert 'WIBBLE' in meta
Esempio n. 25
0
def test_invalid_manual_data():
    meta = MetaDict({'key': 'value'})
    data = []
    with pytest.raises(NoMatchError):
        sunpy.timeseries.TimeSeries(data, meta)
Esempio n. 26
0
def generate_jedi_catalog(
        threshold_time_prior_flare_minutes=240.0,
        dimming_window_relative_to_flare_minutes_left=0.0,
        dimming_window_relative_to_flare_minutes_right=240.0,
        threshold_minimum_dimming_window_minutes=120.0,
        flare_index_range=range(0, 5052),
        output_path='/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/',
        verbose=True):
    """Wrapper code for creating James's Extreme Ultraviolet Variability Experiment (EVE) Dimming Index (JEDI) catalog.

    Inputs:
        None.

    Optional Inputs:
        threshold_time_prior_flare_minutes [float]:             How long before a particular event does the last one need to have
                                                                occurred to be considered independent. If the previous one was too
                                                                recent, will use that event's pre-flare irradiance.
                                                                Default is 240 (4 hours).
        dimming_window_relative_to_flare_minutes_left [float]:  Defines the left side of the time window to search for dimming
                                                                relative to the GOES/XRS flare peak. Negative numbers mean
                                                                minutes prior to the flare peak. Default is 0.0.
        dimming_window_relative_to_flare_minutes_right [float]: Defines the right side of the time window to search for dimming
                                                                relative to the GOES/XRS flare peak. If another flare
                                                                occurs before this, that time will define the end of the
                                                                window instead. Default is 240 (4 hours).
        threshold_minimum_dimming_window_minutes [float]:       The smallest allowed time window in which to search for dimming.
                                                                Default is 120.
        flare_index_range [range]                               The range of GOES flare indices to process. Default is range(0, 5052).
        output_path [str]:                                      Set to a path for saving the JEDI catalog table and processing
                                                                summary plots. Default is '/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/'.
        verbose [bool]:                                         Set to log the processing messages to disk and console. Default is False.

    Outputs:
        No direct return, but writes a (csv? sql table? hdf5?) to disk with the dimming paramerization results.
        Subroutines also optionally save processing plots to disk in output_path.

    Optional Outputs:
        None

    Example:
        generate_jedi_catalog(output_path='/Users/jmason86/Dropbox/Research/Postdoc_NASA/Analysis/Coronal Dimming Analysis/JEDI Catalog/',
                              verbose=True)
    """

    # Prepare the logger for verbose
    if verbose:
        logger = JpmLogger(filename='generate_jedi_catalog',
                           path=output_path,
                           console=False)
        logger.info("Starting JEDI processing pipeline.")
        logger.info("Processing events {0} - {1}".format(
            flare_index_range[0], flare_index_range[-1]))
    else:
        logger = None

    # Get EVE level 2 extracted emission lines data
    # TODO: Replace this shortcut method with the method I'm building into sunpy
    from scipy.io.idl import readsav
    eve_readsav = readsav(
        '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/eve_lines_2010121-2014146 MEGS-A Mission Bare Bones.sav'
    )
    if verbose:
        logger.info('Loaded EVE data')

    # Create metadata dictionary
    # TODO: Replace this shortcut method with the method I'm building into sunpy
    from sunpy.util.metadata import MetaDict
    metadata = MetaDict()
    metadata['ion'] = eve_readsav['name']
    metadata['temperature_ion_peak_formation'] = np.power(
        10.0, eve_readsav['logt']) * u.Kelvin
    metadata['extracted_wavelength_center'] = eve_readsav['wavelength'] * u.nm
    metadata['extracted_wavelength_min'] = metadata[
        'extracted_wavelength_center']
    metadata['extracted_wavelength_max'] = metadata[
        'extracted_wavelength_center']
    metadata['emission_line_blends'] = ['none', 'yay', 'poop', 'Fe vi']  # etc
    metadata[
        'exposure_time'] = 60.0 * u.second  # These example EVE data are already binned down to 1 minute
    metadata['precision'] = ['Not implemented in prototype']
    metadata['accuracy'] = ['Not implemented in prototype']
    metadata['flags'] = ['Not implemented in prototype']
    metadata['flags_description'] = '1 = MEGS-A data is missing, ' \
                                    '2 = MEGS-B data is missing, ' \
                                    '4 = ESP data is missing, ' \
                                    '8 = MEGS-P data is missing, ' \
                                    '16 = Possible clock adjust in MEGS-A, ' \
                                    '32 = Possible clock adjust in MEGS-B, ' \
                                    '64 = Possible clock adjust in ESP, ' \
                                    '128 = Possible clock adjust in MEGS-P'
    metadata['flags_spacecraft'] = ['Not implemented in prototype']
    metadata['flags_spacecraft_description'] = '0 = No obstruction, ' \
                                               '1 = Warm up from Earth eclipse, ' \
                                               '2 = Obstruction atmosphere penumbra, ' \
                                               '3 = Obstruction atmosphere umbra, ' \
                                               '4 = Obstruction penumbra of Mercury, ' \
                                               '5 = Obstruction penumbra of Mercury, ' \
                                               '6 = Obstruction penumbra of Venus, ' \
                                               '7 = Obstruction umbra of Venus, ' \
                                               '8 = Obstruction penumbra of Moon, ' \
                                               '9 = Obstruction umbra of Moon, ' \
                                               '10 = Obstruction penumbra of solid Earth, ' \
                                               '11 = Obstruction umbra of solid Earth, ' \
                                               '16 = Observatory is off-pointed by more than 1 arcmin'
    metadata['data_version'] = ['Not implemented in prototype']
    metadata['data_reprocessed_revision'] = ['Not implemented in prototype']
    metadata['filename'] = ['Not implemented in prototype']

    # Load up the actual irradiance data into a pandas DataFrame
    # TODO: Replace this shortcut method with the method I'm building into sunpy
    irradiance = eve_readsav['irradiance'].byteswap().newbyteorder(
    )  # pandas doesn't like big endian
    irradiance[irradiance == -1] = np.nan
    wavelengths = eve_readsav['wavelength']
    wavelengths_str = []
    [
        wavelengths_str.append('{0:1.1f}'.format(wavelength))
        for wavelength in wavelengths
    ]
    eve_lines = pd.DataFrame(irradiance, columns=wavelengths_str)
    eve_lines.index = pd.to_datetime(eve_readsav.iso.astype(str))
    eve_lines = eve_lines.drop_duplicates()

    # slice out only columns needed by Shawn
    # eve_selected_lines = eve_lines.drop(columns=['9.4', '13.1', '13.3', '25.6', '28.4', '30.4', '33.5', '36.1', '36.8', '44.6', '46.5', '49.9', '52.1', '52.6', '53.7', '55.4', '56.8', '58.4', '59.2', '60.0', '61.0', '62.5', '63.0', '71.9', '72.2', '77.0', '79.0', '83.6', '95.0', '97.3', '97.7', '102.6', '103.2'])
    # eve_selected_lines.info()
    # eve_selected_lines.to_csv('/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/PyCharm/JEDI Catalog/eve_selected_lines_forreal.csv')

    # Get GOES flare events above C1 within date range corresponding to EVE data
    # flares = get_goes_flare_events(eve_lines.index[0], eve_lines.index[-1], verbose=verbose)  # TODO: The method in sunpy needs fixing, issue 2434

    # Load GOES events from IDL saveset instead of directly through sunpy
    goes_flare_events = readsav(
        '/Users/shawnpolson/Documents/School/Spring 2018/Data Mining/StealthCMEs/savesets/GoesEventsMegsAEra.sav'
    )
    goes_flare_events['class'] = goes_flare_events['class'].astype(str)
    goes_flare_events['event_peak_time_human'] = goes_flare_events[
        'event_peak_time_human'].astype(str)
    goes_flare_events['event_start_time_human'] = goes_flare_events[
        'event_start_time_human'].astype(str)
    goes_flare_events['peak_time'] = Time(
        goes_flare_events['event_peak_time_jd'], format='jd', scale='utc')
    goes_flare_events['start_time'] = Time(
        goes_flare_events['event_start_time_jd'], format='jd', scale='utc')
    if verbose:
        logger.info('Loaded GOES flare events.')

    # Define the columns of the JEDI catalog
    jedi_row = pd.DataFrame([
        OrderedDict([('Event #', np.nan), ('GOES Flare Start Time', np.nan),
                     ('GOES Flare Peak Time', np.nan),
                     ('GOES Flare Class', np.nan),
                     ('Pre-Flare Start Time', np.nan),
                     ('Pre-Flare End Time', np.nan),
                     ('Flare Interrupt', np.nan)])
    ])
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns +
                     ' Pre-Flare Irradiance [W/m2]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Min [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Max [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Mean [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Slope Uncertainty [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Depth Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Depth [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Depth Uncertainty [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Duration Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Duration End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Duration [s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Fitting Gamma'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=eve_lines.columns + ' Fitting Score'))

    ion_tuples = list(itertools.permutations(eve_lines.columns.values, 2))
    ion_permutations = pd.Index(
        [' by '.join(ion_tuples[i]) for i in range(len(ion_tuples))])

    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Min [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Max [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Mean [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Slope Uncertainty [%/s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Depth Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Depth [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Depth Uncertainty [%]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Duration Start Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Duration End Time'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Duration [s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Correction Time Shift [s]'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Correction Scale Factor'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Fitting Gamma'))
    jedi_row = jedi_row.join(
        pd.DataFrame(columns=ion_permutations + ' Fitting Score'))

    csv_filename = output_path + 'jedi_{0}.csv'.format(Time.now().iso)
    jedi_row.to_csv(csv_filename, header=True, index=False, mode='w')

    if verbose:
        logger.info('Created JEDI row definition.')

    # Start a progress bar
    widgets = [
        progressbar.Percentage(),
        progressbar.Bar(),
        progressbar.Timer(), ' ',
        progressbar.AdaptiveETA()
    ]
    progress_bar = progressbar.ProgressBar(
        widgets=[progressbar.FormatLabel('Flare Event Loop: ')] + widgets,
        min_value=flare_index_range[0],
        max_value=flare_index_range[-1]).start()

    # Prepare a hold-over pre-flare irradiance value,
    # which will normally have one element for each of the 39 emission lines
    preflare_irradiance = np.nan

    # Start loop through all flares
    for flare_index in flare_index_range:

        # Skip event 0 to avoid problems with referring to earlier indices
        if flare_index == 0:
            continue

        # Reset jedi_row
        jedi_row[:] = np.nan

        # Reset the flare interrupt flag
        flare_interrupt = False

        # Fill the GOES flare information into the JEDI row
        jedi_row['Event #'] = flare_index
        jedi_row['GOES Flare Start Time'] = goes_flare_events['start_time'][
            flare_index].iso
        jedi_row['GOES Flare Peak Time'] = goes_flare_events['peak_time'][
            flare_index].iso
        jedi_row['GOES Flare Class'] = goes_flare_events['class'][flare_index]
        if verbose:
            logger.info(
                "Event {0} GOES flare details stored to JEDI row.".format(
                    flare_index))

        # If haven't already done all pre-parameterization processing
        processed_jedi_non_params_filename = output_path + 'Processed Pre-Parameterization Data/Event {0} Pre-Parameterization.h5'.format(
            flare_index)
        processed_lines_filename = output_path + 'Processed Lines Data/Event {0} Lines.h5'.format(
            flare_index)
        if not os.path.isfile(processed_lines_filename) or not os.path.isfile(
                processed_jedi_non_params_filename):
            # Determine pre-flare irradiance
            minutes_since_last_flare = (
                goes_flare_events['peak_time'][flare_index] -
                goes_flare_events['peak_time'][flare_index - 1]).sec / 60.0
            if minutes_since_last_flare > threshold_time_prior_flare_minutes:
                # Clip EVE data from threshold_time_prior_flare_minutes prior to flare up to peak flare time
                preflare_window_start = (
                    goes_flare_events['peak_time'][flare_index] -
                    (threshold_time_prior_flare_minutes * u.minute)).iso
                preflare_window_end = (
                    goes_flare_events['peak_time'][flare_index]).iso
                eve_lines_preflare_time = eve_lines[
                    preflare_window_start:preflare_window_end]

                # Loop through the emission lines and get pre-flare irradiance for each
                preflare_irradiance = []
                for column in eve_lines_preflare_time:
                    eve_line_preflare_time = pd.DataFrame(
                        eve_lines_preflare_time[column])
                    eve_line_preflare_time.columns = ['irradiance']
                    preflare_irradiance.append(
                        determine_preflare_irradiance(
                            eve_line_preflare_time,
                            pd.Timestamp(goes_flare_events['start_time']
                                         [flare_index].iso),
                            plot_path_filename=output_path +
                            'Preflare Determination/Event {0} {1}.png'.format(
                                flare_index, column),
                            verbose=verbose,
                            logger=logger))
                    plt.close('all')
            else:
                logger.info(
                    "This flare at {0} will use the pre-flare irradiance from flare at {1}."
                    .format(
                        goes_flare_events['peak_time'][flare_index].iso,
                        goes_flare_events['peak_time'][flare_index - 1].iso))

            jedi_row["Pre-Flare Start Time"] = preflare_window_start
            jedi_row["Pre-Flare End Time"] = preflare_window_end
            preflare_irradiance_cols = [
                col for col in jedi_row.columns
                if 'Pre-Flare Irradiance' in col
            ]
            jedi_row[preflare_irradiance_cols] = preflare_irradiance

            if verbose:
                logger.info(
                    "Event {0} pre-flare determination complete.".format(
                        flare_index))

            # Clip EVE data to dimming window
            bracket_time_left = (
                goes_flare_events['peak_time'][flare_index] -
                (dimming_window_relative_to_flare_minutes_left * u.minute))
            next_flare_time = Time(
                (goes_flare_events['peak_time'][flare_index + 1]).iso)
            user_choice_time = (
                goes_flare_events['peak_time'][flare_index] +
                (dimming_window_relative_to_flare_minutes_right * u.minute))
            bracket_time_right = min(next_flare_time, user_choice_time)

            # If flare is shortening the window, set the flare_interrupt flag
            if bracket_time_right == next_flare_time:
                flare_interrupt = True
                if verbose:
                    logger.info(
                        'Flare interrupt for event at {0} by flare at {1}'.
                        format(goes_flare_events['peak_time'][flare_index].iso,
                               next_flare_time))

            # Write flare_interrupt to JEDI row
            jedi_row['Flare Interrupt'] = flare_interrupt

            # Skip event if the dimming window is too short
            if ((bracket_time_right - bracket_time_left).sec /
                    60.0) < threshold_minimum_dimming_window_minutes:
                # Leave all dimming parameters as NaN and write this null result to the CSV on disk
                jedi_row.to_csv(csv_filename,
                                header=False,
                                index=False,
                                mode='a')

                # Log message
                if verbose:
                    logger.info(
                        'The dimming window duration of {0} minutes is shorter than the minimum threshold of {1} minutes. Skipping this event ({2})'
                        .format(((bracket_time_right - bracket_time_left).sec /
                                 60.0),
                                threshold_minimum_dimming_window_minutes,
                                goes_flare_events['peak_time'][flare_index]))

                # Skip the rest of the processing in the flare_index loop
                continue
            else:
                eve_lines_event = eve_lines[bracket_time_left.
                                            iso:bracket_time_right.iso]

            if verbose:
                logger.info(
                    "Event {0} EVE data clipped to dimming window.".format(
                        flare_index))

            # Convert irradiance units to percent
            # (in place, don't care about absolute units from this point forward)
            eve_lines_event = (eve_lines_event - preflare_irradiance
                               ) / preflare_irradiance * 100.0

            if verbose:
                logger.info(
                    "Event {0} irradiance converted from absolute to percent units."
                    .format(flare_index))

            # Do flare removal in the light curves and add the results to the DataFrame
            progress_bar_correction = progressbar.ProgressBar(
                widgets=[progressbar.FormatLabel('Peak Match Subtract: ')] +
                widgets,
                max_value=len(ion_tuples)).start()
            for i in range(len(ion_tuples)):
                light_curve_to_subtract_from_df = pd.DataFrame(
                    eve_lines_event[ion_tuples[i][0]])
                light_curve_to_subtract_from_df.columns = ['irradiance']
                light_curve_to_subtract_with_df = pd.DataFrame(
                    eve_lines_event[ion_tuples[i][1]])
                light_curve_to_subtract_with_df.columns = ['irradiance']

                if (light_curve_to_subtract_from_df.isnull().all().all()) or (
                        light_curve_to_subtract_with_df.isnull().all().all()):
                    if verbose:
                        logger.info(
                            'Event {0} {1} correction skipped because all irradiances are NaN.'
                            .format(flare_index, ion_permutations[i]))
                else:
                    light_curve_corrected, seconds_shift, scale_factor = light_curve_peak_match_subtract(
                        light_curve_to_subtract_from_df,
                        light_curve_to_subtract_with_df,
                        pd.Timestamp(
                            (goes_flare_events['peak_time'][flare_index]).iso),
                        plot_path_filename=output_path +
                        'Peak Subtractions/Event {0} {1}.png'.format(
                            flare_index, ion_permutations[i]),
                        verbose=verbose,
                        logger=logger)

                    eve_lines_event[
                        ion_permutations[i]] = light_curve_corrected
                    jedi_row[ion_permutations[i] +
                             ' Correction Time Shift [s]'] = seconds_shift
                    jedi_row[ion_permutations[i] +
                             ' Correction Scale Factor'] = scale_factor

                    plt.close('all')

                    if verbose:
                        logger.info(
                            'Event {0} flare removal correction complete'.
                            format(flare_index))
                    progress_bar_correction.update(i)

            progress_bar_correction.finish()

            # TODO: Update calculate_eve_fe_line_precision to compute for all emission lines, not just selected
            uncertainty = np.ones(len(eve_lines_event)) * 0.002545

            # TODO: Propagate uncertainty through light_curve_peak_match_subtract and store in eve_lines_event

            # Fit the light curves to reduce influence of noise on the parameterizations to come later
            progress_bar_fitting = progressbar.ProgressBar(
                widgets=[progressbar.FormatLabel('Light curve fitting: ')] +
                widgets,
                max_value=len(eve_lines_event.columns)).start()
            for i, column in enumerate(eve_lines_event):
                if eve_lines_event[column].isnull().all().all():
                    if verbose:
                        logger.info(
                            'Event {0} {1} fitting skipped because all irradiances are NaN.'
                            .format(flare_index, column))
                else:
                    eve_line_event = pd.DataFrame(eve_lines_event[column])
                    eve_line_event.columns = ['irradiance']
                    eve_line_event['uncertainty'] = uncertainty

                    fitting_path = output_path + 'Fitting/'
                    if not os.path.exists(fitting_path):
                        os.makedirs(fitting_path)

                    plt.close('all')
                    light_curve_fit, best_fit_gamma, best_fit_score = automatic_fit_light_curve(
                        eve_line_event,
                        plots_save_path='{0} Event {1} {2} '.format(
                            fitting_path, flare_index, column),
                        verbose=verbose,
                        logger=logger)
                    eve_lines_event[column] = light_curve_fit
                    jedi_row[column + ' Fitting Gamma'] = best_fit_gamma
                    jedi_row[column + ' Fitting Score'] = best_fit_score

                    if verbose:
                        logger.info(
                            'Event {0} {1} light curves fitted.'.format(
                                flare_index, column))
                    progress_bar_fitting.update(i)

            progress_bar_fitting.finish()

        #     # Save the dimming event data to disk for quicker restore
        #     jedi_row.to_hdf(processed_jedi_non_params_filename, 'jedi_row')
        #     eve_lines_event.to_hdf(processed_lines_filename, 'eve_lines_event')
        # else:
        #     jedi_row = pd.read_hdf(processed_jedi_non_params_filename, 'jedi_row')
        #     eve_lines_event = pd.read_hdf(processed_lines_filename, 'eve_lines_event')
        #     if verbose:
        #         logger.info('Loading files {0} and {1} rather than processing again.'.format(processed_jedi_non_params_filename, processed_lines_filename))
        #
        # # Parameterize the light curves for dimming
        # for column in eve_lines_event:
        #
        #     # Null out all parameters
        #     depth_percent, depth_time = np.nan, np.nan
        #     slope_start_time, slope_end_time = np.nan, np.nan
        #     slope_min, slope_max, slope_mean = np.nan, np.nan, np.nan
        #     duration_seconds, duration_start_time, duration_end_time = np.nan, np.nan, np.nan
        #
        #     # Determine whether to do the parameterizations or not
        #     if eve_lines_event[column].isnull().all().all():
        #         if verbose:
        #             logger.info('Event {0} {1} parameterization skipped because all irradiances are NaN.'.format(flare_index, column))
        #     else:
        #         eve_line_event = pd.DataFrame(eve_lines_event[column])
        #         eve_line_event.columns = ['irradiance']
        #
        #         # Determine dimming depth (if any)
        #         depth_path = output_path + 'Depth/'
        #         if not os.path.exists(depth_path):
        #             os.makedirs(depth_path)
        #
        #         plt.close('all')
        #         depth_percent, depth_time = determine_dimming_depth(eve_line_event,
        #                                                             plot_path_filename='{0} Event {1} {2} Depth.png'.format(depth_path, flare_index, column),
        #                                                             verbose=verbose, logger=logger)
        #
        #         jedi_row[column + ' Depth [%]'] = depth_percent
        #         # jedi_row[column + ' Depth Uncertainty [%]'] = depth_uncertainty  # TODO: make determine_dimming_depth return the propagated uncertainty
        #         jedi_row[column + ' Depth Time'] = depth_time
        #
        #         # Determine dimming slope (if any)
        #         slope_path = output_path + 'Slope/'
        #         if not os.path.exists(slope_path):
        #             os.makedirs(slope_path)
        #
        #         slope_start_time = pd.Timestamp((goes_flare_events['peak_time'][flare_index]).iso)
        #         slope_end_time = depth_time
        #
        #         if (pd.isnull(slope_start_time)) or (pd.isnull(slope_end_time)):
        #             if verbose:
        #                 logger.warning('Cannot compute slope or duration because slope bounding times NaN.')
        #         else:
        #             plt.close('all')
        #             slope_min, slope_max, slope_mean = determine_dimming_slope(eve_line_event,
        #                                                                        earliest_allowed_time=slope_start_time,
        #                                                                        latest_allowed_time=slope_end_time,
        #                                                                        plot_path_filename='{0} Event {1} {2} Slope.png'.format(slope_path, flare_index, column),
        #                                                                        verbose=verbose, logger=logger)
        #
        #             jedi_row[column + ' Slope Min [%/s]'] = slope_min
        #             jedi_row[column + ' Slope Max [%/s]'] = slope_max
        #             jedi_row[column + ' Slope Mean [%/s]'] = slope_mean
        #             # jedi_row[column + ' Slope Uncertainty [%]'] = slope_uncertainty  # TODO: make determine_dimming_depth return the propagated uncertainty
        #             jedi_row[column + ' Slope Start Time'] = slope_start_time
        #             jedi_row[column + ' Slope End Time'] = slope_end_time
        #
        #             # Determine dimming duration (if any)
        #             duration_path = output_path + 'Duration/'
        #             if not os.path.exists(duration_path):
        #                 os.makedirs(duration_path)
        #
        #             plt.close('all')
        #             duration_seconds, duration_start_time, duration_end_time = determine_dimming_duration(eve_line_event,
        #                                                                                                   earliest_allowed_time=slope_start_time,
        #                                                                                                   plot_path_filename='{0} Event {1} {2} Duration.png'.format(duration_path, flare_index, column),
        #                                                                                                   verbose=verbose, logger=logger)
        #
        #             jedi_row[column + ' Duration [s]'] = duration_seconds
        #             jedi_row[column + ' Duration Start Time'] = duration_start_time
        #             jedi_row[column + ' Duration End Time'] = duration_end_time
        #
        #         if verbose:
        #             logger.info("Event {0} {1} parameterizations complete.".format(flare_index, column))
        #
        #         # Produce a summary plot for each light curve
        #         plt.style.use('jpm-transparent-light')
        #
        #         ax = eve_line_event['irradiance'].plot(color='black')
        #         plt.axhline(linestyle='dashed', color='grey')
        #         start_date = eve_line_event.index.values[0]
        #         start_date_string = pd.to_datetime(str(start_date))
        #         plt.xlabel(start_date_string.strftime('%Y-%m-%d %H:%M:%S'))
        #         plt.ylabel('Irradiance [%]')
        #         fmtr = dates.DateFormatter("%H:%M:%S")
        #         ax.xaxis.set_major_formatter(fmtr)
        #         ax.xaxis.set_major_locator(dates.HourLocator())
        #         plt.title('Event {0} {1} nm Parameters'.format(flare_index, column))
        #
        #         if not np.isnan(depth_percent):
        #             plt.annotate('', xy=(depth_time, -depth_percent), xycoords='data',
        #                          xytext=(depth_time, 0), textcoords='data',
        #                          arrowprops=dict(facecolor='limegreen', edgecolor='limegreen', linewidth=2))
        #             mid_depth = -depth_percent / 2.0
        #             plt.annotate('{0:.2f} %'.format(depth_percent), xy=(depth_time, mid_depth), xycoords='data',
        #                          ha='right', va='center', rotation=90, size=18, color='limegreen')
        #
        #         if not np.isnan(slope_mean):
        #             if pd.isnull(slope_start_time) or pd.isnull(slope_end_time):
        #                 import pdb
        #                 pdb.set_trace()
        #             p = plt.plot(eve_line_event[slope_start_time:slope_end_time]['irradiance'], c='tomato')
        #
        #             inverse_str = '$^{-1}$'
        #             plt.annotate('slope_min={0} % s{1}'.format(latex_float(slope_min), inverse_str),
        #                          xy=(0.98, 0.12), xycoords='axes fraction', ha='right',
        #                          size=12, color=p[0].get_color())
        #             plt.annotate('slope_max={0} % s{1}'.format(latex_float(slope_max), inverse_str),
        #                          xy=(0.98, 0.08), xycoords='axes fraction', ha='right',
        #                          size=12, color=p[0].get_color())
        #             plt.annotate('slope_mean={0} % s{1}'.format(latex_float(slope_mean), inverse_str),
        #                          xy=(0.98, 0.04), xycoords='axes fraction', ha='right',
        #                          size=12, color=p[0].get_color())
        #
        #         if not np.isnan(duration_seconds):
        #             plt.annotate('', xy=(duration_start_time, 0), xycoords='data',
        #                          xytext=(duration_end_time, 0), textcoords='data',
        #                          arrowprops=dict(facecolor='dodgerblue', edgecolor='dodgerblue', linewidth=5, arrowstyle='<->'))
        #             mid_time = duration_start_time + (duration_end_time - duration_start_time) / 2
        #             plt.annotate(str(duration_seconds) + ' s', xy=(mid_time, 0), xycoords='data', ha='center', va='bottom', size=18, color='dodgerblue')
        #
        #         summary_path = output_path + 'Summary Plots/'
        #         if not os.path.exists(summary_path):
        #             os.makedirs(summary_path)
        #         summary_filename = '{0} Event {1} {2} Parameter Summary.png'.format(summary_path, flare_index, column)
        #         plt.savefig(summary_filename)
        #         if verbose:
        #             logger.info("Summary plot saved to %s" % summary_filename)
        #
        # # Write to the JEDI catalog on disk
        # jedi_row.to_csv(csv_filename, header=False, index=False, mode='a')
        # if verbose:
        #     logger.info('Event {0} JEDI row written to {1}.'.format(flare_index, csv_filename))

        # Update progress bar
        progress_bar.update(flare_index)

    progress_bar.finish()
Esempio n. 27
0
    def _parse_level_0cs(fp):
        """Parses and EVE Level 0CS file."""
        is_missing_data = False  # boolean to check for missing data
        missing_data_val = numpy.nan
        header = []
        fields = []
        line = fp.readline()
        # Read header at top of file
        while line.startswith(";"):
            header.append(line)
            if '; Missing data:' in line:
                is_missing_data = True
                missing_data_val = line.split(':')[1].strip()

            line = fp.readline()

        meta = MetaDict()
        for hline in header:
            if hline == '; Format:\n' or hline == '; Column descriptions:\n':
                continue
            elif ('Created' in hline) or ('Source' in hline):
                meta[hline.split(':', 1)[0].replace(
                    ';', ' ').strip()] = hline.split(':', 1)[1].strip()
            elif ':' in hline:
                meta[hline.split(':')[0].replace(
                    ';', ' ').strip()] = hline.split(':')[1].strip()

        fieldnames_start = False
        for hline in header:
            if hline.startswith("; Format:"):
                fieldnames_start = False
            if fieldnames_start:
                fields.append(hline.split(":")[0].replace(';', ' ').strip())
            if hline.startswith("; Column descriptions:"):
                fieldnames_start = True

        # Next line is YYYY DOY MM DD
        date_parts = line.split(" ")

        year = int(date_parts[0])
        month = int(date_parts[2])
        day = int(date_parts[3])

        def parser(x):
            # Parse date column (HHMM)
            return datetime(year, month, day, int(x[0:2]), int(x[2:4]))

        data = read_csv(fp,
                        sep=r"\s+",
                        names=fields,
                        index_col=0,
                        date_parser=parser,
                        header=None,
                        engine='python')
        if is_missing_data:  # If missing data specified in header
            data[data == float(missing_data_val)] = numpy.nan

        # Add the units data
        units = OrderedDict([('XRS-B proxy', u.W / u.m**2),
                             ('XRS-A proxy', u.W / u.m**2),
                             ('SEM proxy', u.W / u.m**2),
                             ('0.1-7ESPquad', u.W / u.m**2),
                             ('17.1ESP', u.W / u.m**2),
                             ('25.7ESP', u.W / u.m**2),
                             ('30.4ESP', u.W / u.m**2),
                             ('36.6ESP', u.W / u.m**2), ('darkESP', u.ct),
                             ('121.6MEGS-P', u.W / u.m**2),
                             ('darkMEGS-P', u.ct),
                             ('q0ESP', u.dimensionless_unscaled),
                             ('q1ESP', u.dimensionless_unscaled),
                             ('q2ESP', u.dimensionless_unscaled),
                             ('q3ESP', u.dimensionless_unscaled),
                             ('CMLat', u.deg), ('CMLon', u.deg)])
        # Todo: check units used.
        return data, meta, units
Esempio n. 28
0
class InstrumentBase(object):
    """
    Base class for instruments. Need to at least implement a detect() method that is used by the
    `Observer` class to get the detector counts.

    Parameters
    ----------
    observing_time : `~astropy.units.Quantity`
        Tuple of start and end observing times
    observer_coordinate : `~astropy.coordinates.SkyCoord`
        Coordinate of the observing instrument
    """
    fits_template = MetaDict()

    @u.quantity_input
    def __init__(self, observing_time: u.s, observer_coordinate):
        self.observing_time = np.arange(observing_time[0].to(
            u.s).value, observing_time[1].to(u.s).value,
                                        self.cadence.value) * u.s
        self.observer_coordinate = observer_coordinate

    def detect(self, *args, **kwargs):
        """
        Converts emissivity for a particular transition to counts per detector channel. When writing
        a new instrument class, this method should be overridden.
        """
        raise NotImplementedError('No detect method implemented.')

    def build_detector_file(self, file_template, dset_shape, chunks, *args,
                            **kwargs):
        """
        Allocate space for counts data.
        """
        dset_names = [
            'density', 'electron_temperature', 'ion_temperature', 'velocity_x',
            'velocity_y', 'velocity_z'
        ]
        dset_names += kwargs.get('additional_fields', [])
        self.counts_file = file_template.format(self.name)

        with h5py.File(self.counts_file, 'a') as hf:
            if 'time' not in hf:
                dset = hf.create_dataset('time',
                                         data=self.observing_time.value)
                dset.attrs['units'] = self.observing_time.unit.to_string()
            for dn in dset_names:
                if dn not in hf:
                    hf.create_dataset(dn, dset_shape, chunks=chunks)

    @property
    def total_coordinates(self):
        """
        Helioprojective coordinates for all loops for the instrument observer
        """
        if not hasattr(self, 'counts_file'):
            raise AttributeError(
                f'''No counts file found for {self.name}. Build it first
                                     using Observer.build_detector_files''')
        with h5py.File(self.counts_file, 'r') as hf:
            total_coordinates = u.Quantity(hf['coordinates'],
                                           hf['coordinates'].attrs['units'])

        coords = SkyCoord(x=total_coordinates[:, 0],
                          y=total_coordinates[:, 1],
                          z=total_coordinates[:, 2],
                          frame=HeliographicStonyhurst,
                          representation='cartesian')
        # This extra transform-to is due to a bug where to convert out of an HEEQ frame
        # one must first transform to a polar HGS frame
        # FIXME:  once this is fixed upstream in SunPy, this can be removed
        return coords.transform_to(HeliographicStonyhurst).transform_to(
            Helioprojective(observer=self.observer_coordinate))

    def los_velocity(self, v_x, v_y, v_z):
        """
        Compute the LOS velocity for the instrument observer
        """
        # NOTE: transform from HEEQ to HCC with respect to the instrument observer
        obs = self.observer_coordinate.transform_to(HeliographicStonyhurst)
        Phi_0, B_0 = obs.lon.to(u.radian), obs.lat.to(u.radian)
        v_los = v_z * np.sin(B_0) + v_x * np.cos(B_0) * np.cos(
            Phi_0) + v_y * np.cos(B_0) * np.sin(Phi_0)
        # NOTE: Negative sign to be consistent with convention v_los > 0 away from observer
        return -v_los

    def interpolate_and_store(self,
                              y,
                              loop,
                              interp_s,
                              start_index=None,
                              save_dir=False,
                              dset_name=None):
        """
        Interpolate in time and space and write to HDF5 file.
        """
        if type(y) is str:
            y = getattr(loop, y)
        f_s = interp1d(loop.field_aligned_coordinate.value,
                       y.value,
                       axis=1,
                       kind='linear')
        y_s = f_s(interp_s)
        if loop.time.shape == (1, ):
            # If static case, no need to interpolate in time
            # But require that the observing and loop times are the same
            assert np.all(loop.time == self.observing_time)
            interpolated_y = y_s
        else:
            f_t = interp1d(loop.time.value,
                           y_s,
                           axis=0,
                           kind='linear',
                           fill_value='extrapolate')
            interpolated_y = f_t(self.observing_time.value)
        if save_dir:
            save_path = os.path.join(
                save_dir, f'{loop.name}_{self.name}_{dset_name}.pkl')
            with open(save_path, 'wb') as f:
                pickle.dump((interpolated_y, y.unit.to_string(), start_index,
                             dset_name), f)
            return save_path
        else:
            return interpolated_y * y.unit

    @staticmethod
    def assemble_arrays(interp_files, savefile):
        """
        Assemble interpolated results into single file
        """
        with h5py.File(savefile, 'a', driver=None) as hf:
            for filename in interp_files:
                with open(filename, 'rb') as f:
                    y, units, start_index, dset_name = pickle.load(f)
                tmp = u.Quantity(y, units)
                InstrumentBase.commit(tmp, hf[dset_name], start_index)
        return interp_files

    @staticmethod
    def commit(y, dset, start_index):
        if 'units' not in dset.attrs:
            dset.attrs['units'] = y.unit.to_string()
        dset[:, start_index:(start_index + y.shape[1])] = y.value

    @staticmethod
    def generic_2d_histogram(counts_filename, dset_name, i_time, bins,
                             bin_range):
        """
        Turn flattened quantity into 2D weighted histogram
        """
        with h5py.File(counts_filename, 'r') as hf:
            weights = np.array(hf[dset_name][i_time, :])
            units = u.Unit(hf[dset_name].attrs['units'])
            coordinates = np.array(hf['coordinates'][:, :2])
        hc, _ = np.histogramdd(coordinates, bins=bins[:2], range=bin_range[:2])
        h, _ = np.histogramdd(coordinates,
                              bins=bins[:2],
                              range=bin_range[:2],
                              weights=weights)
        h /= np.where(hc == 0, 1, hc)
        return h.T * units

    def make_fits_header(self, field, channel):
        """
        Build up FITS header with relevant instrument information.
        """
        min_x, max_x, min_y, max_y = self._get_fov(field.magnetogram)
        bins, _ = self.make_detector_array(field)
        fits_header = MetaDict()
        fits_header['crval1'] = (min_x + (max_x - min_x) / 2).value
        fits_header['crval2'] = (min_y + (max_y - min_y) / 2).value
        fits_header['cunit1'] = self.total_coordinates.Tx.unit.to_string()
        fits_header['cunit2'] = self.total_coordinates.Ty.unit.to_string()
        fits_header['hglt_obs'] = self.observer_coordinate.lat.to(u.deg).value
        fits_header['hgln_obs'] = self.observer_coordinate.lon.to(u.deg).value
        fits_header['ctype1'] = 'HPLN-TAN'
        fits_header['ctype2'] = 'HPLT-TAN'
        fits_header['date-obs'] = field.magnetogram.meta['date-obs']
        fits_header['dsun_obs'] = self.observer_coordinate.radius.to(u.m).value
        fits_header['rsun_obs'] = (
            (constants.radius /
             (self.observer_coordinate.radius - constants.radius)).decompose()
            * u.radian).to(u.arcsec).value
        fits_header['cdelt1'] = self.resolution.x.value
        fits_header['cdelt2'] = self.resolution.y.value
        fits_header['crpix1'] = (bins.x.value + 1.0) / 2.0
        fits_header['crpix2'] = (bins.y.value + 1.0) / 2.0
        if 'instrument_label' in channel:
            fits_header['instrume'] = channel['instrument_label']
        if 'wavelength' in channel:
            fits_header['wavelnth'] = channel['wavelength'].value
        # Anything that needs to be overridden in a subclass can be put in the fits template
        fits_header.update(self.fits_template)

        return fits_header

    def _get_fov(self, ar_map):
        """
        Find the field of view, taking into consideration the corners of the
        original AR map and the loop coordinates in HPC.
        """
        # Check magnetogram FOV
        left_corner = (ar_map.bottom_left_coord.transform_to(
            HeliographicStonyhurst).transform_to(
                Helioprojective(observer=self.observer_coordinate)))
        right_corner = (ar_map.top_right_coord.transform_to(
            HeliographicStonyhurst).transform_to(
                Helioprojective(observer=self.observer_coordinate)))
        # Set bounds to include all loops and original magnetogram FOV (with some padding)
        loop_coords = self.total_coordinates
        if 'gaussian_width' in self.channels[0]:
            width_max = u.Quantity(
                [c['gaussian_width']['x'] for c in self.channels]).max()
            pad_x = self.resolution.x * width_max
            width_max = u.Quantity(
                [c['gaussian_width']['y'] for c in self.channels]).max()
            pad_y = self.resolution.y * width_max
        else:
            pad_x = self.resolution.x * 1 * u.pixel
            pad_y = self.resolution.y * 1 * u.pixel
        min_x = min(loop_coords.Tx.min(), left_corner.Tx) - pad_x
        max_x = max(loop_coords.Tx.max(), right_corner.Tx) + pad_x
        min_y = min(loop_coords.Ty.min(), left_corner.Ty) - pad_y
        max_y = max(loop_coords.Ty.max(), right_corner.Ty) + pad_y

        return min_x, max_x, min_y, max_y

    def make_detector_array(self, field):
        """
        Construct bins based on desired observing area.
        """
        # Get field of view
        min_x, max_x, min_y, max_y = self._get_fov(field.magnetogram)
        min_z = self.total_coordinates.distance.min()
        max_z = self.total_coordinates.distance.max()
        delta_x = max_x - min_x
        delta_y = max_y - min_y
        bins_x = np.ceil(delta_x / self.resolution.x)
        bins_y = np.ceil(delta_y / self.resolution.y)
        bins_z = max(bins_x, bins_y)

        # NOTE: the z-quantities are used to determine the integration step along the LOS
        bins = SpatialPair(x=bins_x, y=bins_y, z=bins_z)
        bin_range = SpatialPair(x=u.Quantity([min_x, max_x]),
                                y=u.Quantity([min_y, max_y]),
                                z=u.Quantity([min_z, max_z]))

        return bins, bin_range
Esempio n. 29
0
    def __call__(self, *args, **kwargs):
        """ Method for running the factory. Takes arbitrary arguments and
        keyword arguments and passes them to a sequence of pre-registered types
        to determine which is the correct Map-type to build.

        Arguments args and kwargs are passed through to the validation
        function and to the constructor for the final type.  For Map types,
        validation function must take a data-header pair as an argument.

        Parameters
        ----------

        composite : boolean, optional
            Indicates if collection of maps should be returned as a CompositeMap

        cube : boolean, optional
            Indicates if collection of maps should be returned as a MapCube

        sequence : boolean, optional
            Indicates if collection of maps should be returned as a MapSequence

        silence_errors : boolean, optional
            If set, ignore data-header pairs which cause an exception.

        Notes
        -----
        Extra keyword arguments are passed through to `sunpy.io.read_file` such
        as `memmap` for FITS files.
        """

        # Hack to get around Python 2.x not backporting PEP 3102.
        composite = kwargs.pop('composite', False)

        # MapCube Deprecation
        cube = kwargs.pop('cube', False)
        if cube:
            warnings.warn('MapCube is now deprecated and renamed MapSequence. ' +
                          'Please use the syntax Map(sequence=True) instead of Map(cube=True).',
                          SunpyDeprecationWarning, stacklevel=2)

        sequence = kwargs.pop('sequence', False)
        silence_errors = kwargs.pop('silence_errors', False)

        data_header_pairs, already_maps = self._parse_args(*args, **kwargs)

        new_maps = list()

        # Loop over each registered type and check to see if WidgetType
        # matches the arguments.  If it does, use that type.
        for pair in data_header_pairs:
            data, header = pair
            meta = MetaDict(header)

            try:
                new_map = self._check_registered_widgets(data, meta, **kwargs)
            except (NoMatchError, MultipleMatchError, ValidationFunctionError):
                if not silence_errors:
                    raise
            except:
                raise

            new_maps.append(new_map)

        new_maps += already_maps

        # If the list is meant to be a cube, instantiate a map cube
        if cube:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=SunpyDeprecationWarning)
                amapcube = MapCube(new_maps, **kwargs)
            return amapcube

        # If the list is meant to be a sequence, instantiate a map sequence
        if sequence:
            return MapSequence(new_maps, **kwargs)

        # If the list is meant to be a composite map, instantiate one
        if composite:
            return CompositeMap(new_maps, **kwargs)

        if len(new_maps) == 1:
            return new_maps[0]

        return new_maps
Esempio n. 30
0
    def _parse_netcdf(filepath):
        """
        Parses the netCDF GOES files to return the data, header and associated units.

        Parameters
        ----------
        filepath : `str`
            The path of the file to parse
        """
        with h5netcdf.File(filepath, mode="r",
                           **XRSTimeSeries._netcdf_read_kw) as h5nc:
            header = MetaDict(OrderedDict(h5nc.attrs))
            flux_name = h5nc.variables.get("a_flux") or h5nc.variables.get(
                "xrsa_flux")
            if flux_name is None:
                raise ValueError(
                    f"No flux data (either a_flux or xrsa_flux) found in file: {filepath}"
                )
            flux_name_a = flux_name.name
            flux_name_b = flux_name_a.replace("a", "b")
            xrsa = np.array(h5nc[flux_name_a])
            xrsb = np.array(h5nc[flux_name_b])
            xrsa_quality = np.array(h5nc[flux_name_a.replace("flux", "flags")])
            xrsb_quality = np.array(h5nc[flux_name_b.replace("flux", "flags")])
            start_time_str = h5nc["time"].attrs["units"]
            # h5netcdf < 0.14 return bytes instead of a str
            if isinstance(start_time_str, bytes):
                start_time_str = start_time_str.decode("utf-8")
            start_time_str = start_time_str.lstrip("seconds since").rstrip(
                "UTC").strip()
            times = Time(parse_time(start_time_str).unix + h5nc["time"],
                         format="unix")

        try:
            times = times.datetime
        except ValueError:
            # We do not make the assumption that the leap second occurs at the end of the file.
            # Therefore, we need to find it:
            # To do so, we convert the times to isot strings, use numpy to find the the leap second string,
            # then use that to workout the index of the leap timestamp.
            idx = np.argwhere((np.char.find(times.isot, ":60.") != -1) == True)
            # We only handle the case there is only 1 leap second in the file.
            # I don't think there every would be a case where it would be more than 1.
            if len(idx) != 1:
                raise ValueError(
                    f"More than one leap second was found in: {Path(filepath).name}"
                )
            warn_user(
                f"There is one leap second timestamp present in: {Path(filepath).name}, "
                "This timestamp has been rounded to `:59.999` to allow its conversion into a Python datetime. "
                f"The leap second timestamp was: {times.isot[idx]}")
            times[idx] = Time(times[idx].isot.tolist()[0][0][:17] +
                              "59.999").unix
            times = times.datetime
        data = DataFrame(
            {
                "xrsa": xrsa,
                "xrsb": xrsb,
                "xrsa_quality": xrsa_quality,
                "xrsb_quality": xrsb_quality
            },
            index=times)
        data = data.replace(-9999, np.nan)
        units = OrderedDict([
            ("xrsa", u.W / u.m**2),
            ("xrsb", u.W / u.m**2),
            ("xrsa_quality", int),
            ("xrsb_quality", int),
        ])
        return data, header, units