Example #1
0
def test_create_mithout_metadata_or_timerange():
    # without a timerange we should get errors
    colnames = ['column1', 'column2']
    with pytest.raises(ValueError):
        TimeSeriesMetaData(colnames=colnames)
    with pytest.raises(ValueError):
        TimeSeriesMetaData()
Example #2
0
def test_to_string_few_metadict_entries(basic_1_md):
    tr = basic_1_md.metadata[0][0]
    colnames = basic_1_md.metadata[0][1]
    metadict = MetaDict(OrderedDict([('md1_key1', 'value1')]))
    lis = [(tr, colnames, metadict)]
    basic_1_md_less_metadict_entries = TimeSeriesMetaData(lis)
    depth_3_str = basic_1_md_less_metadict_entries.to_string(depth=3)
    assert isinstance(depth_3_str, str)
Example #3
0
def test_to_string_few_metadict_entries(basic_1_md):
    tr = basic_1_md.metadata[0][0]
    colnames = basic_1_md.metadata[0][1]
    metadict = MetaDict(OrderedDict([('md1_key1', 'value1')]))
    lis = [(tr, colnames, metadict)]
    basic_1_md_less_metadict_entries = TimeSeriesMetaData(lis)
    depth_3_str = basic_1_md_less_metadict_entries.to_string(depth=3)
    assert isinstance(depth_3_str, str)
Example #4
0
def test_create_mithout_metadata():
    tr = TimeRange('2010-01-01 13:59:57.468999', '2010-01-02 13:59:56.091999')
    colnames = ['column1', 'column2']
    tsmd_1 = TimeSeriesMetaData(timerange=tr, colnames=colnames)
    assert isinstance(tsmd_1, TimeSeriesMetaData)
    assert tsmd_1.metadata[0][1] == colnames
    tsmd_2 = TimeSeriesMetaData(timerange=tr)
    assert isinstance(tsmd_1, TimeSeriesMetaData)
    assert tsmd_2.metadata[0][1] == []
    assert tsmd_1.metadata[0][0] == tsmd_2.metadata[0][0] == tr
    assert tsmd_1.metadata[0][2] == tsmd_2.metadata[0][2] == MetaDict()
    assert len(tsmd_1.metadata) == len(tsmd_2.metadata) == 1
Example #5
0
    def truncate(self, a, b=None, int=None):
        """Returns a truncated version of the TimeSeries object.

        Parameters
        ----------
        a : `sunpy.time.TimeRange`, `str` or `int`
            Either a time range to truncate to, or a start time in some format
            recognised by pandas, or a index integer.

        b : `str` or `int`
            If specified, the end time of the time range in some format
            recognised by pandas, or a index integer.

        int : `int`
            If specified, the integer indicating the slicing intervals.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series with only the selected times.
        """
        # Evaluate inputs
        # If given strings, then use to create a sunpy.time.timerange.TimeRange
        # for the SunPy text date parser.
        if isinstance(a, str) and isinstance(b, str):
            a = TimeRange(a, b)
        if isinstance(a, TimeRange):
            # If we have a TimeRange, extract the values
            start = a.start.datetime
            end   = a.end.datetime
        else:
            # Otherwise we already have the values
            start = a
            end = b

        # If an interval integer was given then use in truncation.
        truncated_data = self.data.sort_index()[start:end:int]

        # Truncate the metadata
        # Check there is data still
        truncated_meta = TimeSeriesMetaData([])
        if len(truncated_data) > 0:
            tr = TimeRange(truncated_data.index.min(), truncated_data.index.max())
            truncated_meta = TimeSeriesMetaData(copy.deepcopy(self.meta.metadata))
            truncated_meta._truncate(tr)

        # Build similar TimeSeries object and sanatise metadata and units.
        object = self.__class__(truncated_data.sort_index(), truncated_meta, copy.copy(self.units))
        object._sanitize_metadata()
        object._sanitize_units()
        return object
def test_create_mithout_metadata():
    tr = TimeRange('2010-01-01 13:59:57.468999', '2010-01-02 13:59:56.091999')
    colnames = ['column1', 'column2']
    tsmd_1 = TimeSeriesMetaData(timerange=tr, colnames=colnames)
    assert isinstance(tsmd_1, TimeSeriesMetaData)
    assert tsmd_1.metadata[0][1] == colnames
    with pytest.warns(SunpyUserWarning,
                      match='No time range given for metadata'):
        tsmd_2 = TimeSeriesMetaData(timerange=tr)
    assert isinstance(tsmd_1, TimeSeriesMetaData)
    assert tsmd_2.metadata[0][1] == []
    assert tsmd_1.metadata[0][0] == tsmd_2.metadata[0][0] == tr
    assert tsmd_1.metadata[0][2] == tsmd_2.metadata[0][2] == MetaDict()
    assert len(tsmd_1.metadata) == len(tsmd_2.metadata) == 1
Example #7
0
    def extract(self, column_name):
        """
        Returns a new time series with the chosen column.

        Parameters
        ----------
        column_name : `str`
            A valid column name.

        Returns
        -------
        `~sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries` with only the selected column.
        """
        # TODO: allow the extract function to pick more than one column
        # TODO: Fix this?
        # if isinstance(self, pandas.Series):
        #    return self
        # else:
        #    return GenericTimeSeries(self._data[column_name], TimeSeriesMetaData(self.meta.metadata.copy()))

        # Extract column and remove empty rows
        data = self._data[[column_name]].dropna()
        units = {column_name: self.units[column_name]}

        # Build generic TimeSeries object and sanatise metadata and units.
        object = GenericTimeSeries(
            data.sort_index(),
            TimeSeriesMetaData(copy.copy(self.meta.metadata)), units)
        object._sanitize_metadata()
        return object
Example #8
0
def overlap_and_interleave_with_basic_1_md():
    tr = TimeRange('2010-01-01 01:01:00.0', '2010-01-02 01:01:00.0')
    colnames = ['column1', 'column2']
    metadict = MetaDict(
        OrderedDict([('other_key1', 'value1'), ('other_key2', 'value2'),
                     ('all_same', 'value3'), ('all_different', 'diff_5')]))
    lis = [(tr, colnames, metadict)]
    return TimeSeriesMetaData(lis)
Example #9
0
def basic_4_md():
    tr = TimeRange('2010-01-01 20:59:57.468999', '2010-01-03 20:59:56.091999')
    colnames = ['md4_column1', 'md4_column2']
    metadict = MetaDict(
        OrderedDict([('md4_key1', 'value1'), ('md4_key2', 'value2'),
                     ('all_same', 'value3'), ('all_different', 'diff_4')]))
    tup = (tr, colnames, metadict)
    return TimeSeriesMetaData(tup)
Example #10
0
def basic_3_md():
    tr = TimeRange('2010-01-03 13:59:57.468999', '2010-01-03 13:59:56.091999')
    colnames = ['column1', 'column2']
    metadict = MetaDict(
        OrderedDict([('md3_key1', 'value1'), ('md3_key2', 'value2'),
                     ('all_same', 'value3'), ('all_different', 'diff_3')]))
    lis = [(tr, colnames, metadict)]
    return TimeSeriesMetaData(lis)
Example #11
0
    def sort_index(self, **kwargs):
        """Returns a sorted version of the TimeSeries object.
        Generally this shouldn't be necessary as most TimeSeries operations sort
        the data anyway to ensure consistent behaviour when truncating.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series in ascending chronological order.
        """
        return GenericTimeSeries(self.data.sort_index(**kwargs), TimeSeriesMetaData(copy.copy(self.meta.metadata)), copy.copy(self.units))
Example #12
0
    def __init__(self, data, meta=None, units=None, **kwargs):
        self.data = data
        tr = self.time_range
        # Check metadata input
        if meta is None:
            # No meta given, so default
            self.meta = TimeSeriesMetaData(MetaDict(), tr, list(self.data.columns.values))
        elif isinstance(meta, (dict, OrderedDict, MetaDict)):
            # Given the values for metadata (dict) and infer timerange and colnames from the data
            self.meta = TimeSeriesMetaData(meta, tr, list(self.data.columns.values))
        elif isinstance(meta, tuple):
            # Given the values all in a tuple
            self.meta = TimeSeriesMetaData(meta, tr, list(self.data.columns.values))
        else:
            # Should have a list of 3-tuples giving a complex metadata list.
            self.meta = meta

        if units is None:
            self.units = {}
        else:
            self.units = units
Example #13
0
    def add_column(self,
                   colname,
                   quantity,
                   unit=False,
                   overwrite=True,
                   **kwargs):
        """
        Return an new TimeSeries with the given column added or updated.

        Parameters
        ----------
        colname : `str`
            The heading of the column you want output.

        quantity : `~astropy.units.quantity.Quantity` or `~numpy.ndarray`
            The values to be placed within the column.
            If updating values only then a numpy array is permitted.

        overwrite : `bool`, optional, default:True
            Set to true to allow the method to overwrite a column already present
            in the TimeSeries.

        Returns
        -------
        newts : TimeSeries

        """
        # Get the expected units from the quantity if required
        if not unit and isinstance(quantity, astropy.units.quantity.Quantity):
            unit = quantity.unit
        elif not unit:
            unit = u.dimensionless_unscaled

        # Make a copy of all the TimeSeries components.
        data = copy.copy(self.data)
        meta = TimeSeriesMetaData(copy.copy(self.meta.metadata))
        units = copy.copy(self.units)

        # Add the unit to the units dictionary if already there.
        if not (colname in self.data.columns):
            units[colname] = unit

        # Convert the given quantity into values for given units if necessary.
        values = quantity
        if isinstance(values, astropy.units.quantity.Quantity) and overwrite:
            values = values.to(units[colname]).value

        # Update or add the data.
        if not (colname in self.data.columns) or overwrite:
            data[colname] = values

        # Return a new TimeSeries with the given updated/added column.
        return self.__class__(data, meta, units)
Example #14
0
    def __init__(self, data, meta=None, units=None, **kwargs):
        self._data = data
        tr = self.time_range
        # Check metadata input
        if meta is None:
            # No meta given, so default
            self.meta = TimeSeriesMetaData(MetaDict(), tr, self.columns)
        elif isinstance(meta, (dict, OrderedDict, MetaDict)):
            # Given the values for metadata (dict) and infer timerange and colnames from the data
            self.meta = TimeSeriesMetaData(meta, tr, self.columns)
        elif isinstance(meta, tuple):
            # Given the values all in a tuple
            self.meta = TimeSeriesMetaData(meta, tr, self.columns)
        else:
            # Should have a list of 3-tuples giving a complex metadata list.
            self.meta = meta

        if units is None:
            self.units = {}
        else:
            self.units = units

        for col in self.columns:
            if col not in self.units:
                warn_user(f'Unknown units for {col}')
                self.units[col] = u.dimensionless_unscaled
Example #15
0
    def truncate(self, a, b=None, int=None):
        """Returns a truncated version of the TimeSeries object.

        Parameters
        ----------
        a : `sunpy.time.TimeRange`, `str` or `int`
            Either a time range to truncate to, or a start time in some format
            recognised by pandas, or a index integer.

        b : `str` or `int`
            If specified, the end time of the time range in some format
            recognised by pandas, or a index integer.

        int : `int`
            If specified, the integer indicating the slicing intervals.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series with only the selected times.
        """
        # Evaluate inputs
        # If given strings, then use to create a sunpy.time.timerange.TimeRange
        # for the SunPy text date parser.
        if isinstance(a, str) and isinstance(b, str):
            a = TimeRange(a, b)
        if isinstance(a, TimeRange):
            # If we have a TimeRange, extract the values
            start = a.start
            end = a.end
        else:
            # Otherwise we already have the values
            start = a
            end = b

        # If an interval integer was given then use in truncation.
        truncated_data = self.data.sort_index()[start:end:int]

        # Truncate the metadata
        # Check there is data still
        truncated_meta = TimeSeriesMetaData([])
        if len(truncated_data) > 0:
            tr = TimeRange(truncated_data.index.min(),
                           truncated_data.index.max())
            truncated_meta = TimeSeriesMetaData(
                copy.deepcopy(self.meta.metadata))
            truncated_meta._truncate(tr)

        # Build similar TimeSeries object and sanatise metadata and units.
        object = self.__class__(truncated_data.sort_index(), truncated_meta,
                                copy.copy(self.units))
        object._sanitize_metadata()
        object._sanitize_units()
        return object
Example #16
0
    def __init__(self, data, meta=None, units=None, **kwargs):
        self.data = data
        tr = TimeRange(self.data.index.min(), self.data.index.max())
        # Check metadata input
        if meta is None:
            # No meta given, so default
            self.meta = TimeSeriesMetaData(MetaDict(), tr, list(self.data.columns.values))
        elif isinstance(meta, (dict, OrderedDict, MetaDict)):
            # Given the values for metadata (dict) and infer timerange and colnames from the data
            self.meta = TimeSeriesMetaData(meta, tr, list(self.data.columns.values))
        elif isinstance(meta, tuple):
            # Given the values all in a tuple
            self.meta = TimeSeriesMetaData(meta, tr, list(self.data.columns.values))
        else:
            # Should have a list of 3-tuples giving a complex metadata list.
            self.meta = meta

        if units is None:
            self.units = {}
        else:
            self.units = units
Example #17
0
class GenericTimeSeries:
    """
    A generic time series object.

    Parameters
    ----------
    data : `~pandas.DataFrame`
        A `pandas.DataFrame` representing one or more fields as a function of time.
    meta : `~sunpy.timeseries.metadata.TimeSeriesMetaData`, optional
        The metadata giving details about the time series data/instrument.
        Defaults to `None`.
    units : `dict`, optional
        A mapping from column names in ``data`` to the physical units of that column.
        Defaults to `None`.

    Attributes
    ----------
    data : `~pandas.DataFrame`
        A `pandas.DataFrame` representing one or more fields as a function of time.
    meta : `~sunpy.timeseries.metadata.TimeSeriesMetaData`
        The metadata giving details about the time series data/instrument.
    units : `dict`
        A mapping from column names in ``data`` to the physical units ofthat column.

    Examples
    --------
    >>> from sunpy.timeseries import TimeSeries
    >>> from sunpy.time import parse_time
    >>> from astropy.time import TimeDelta
    >>> import numpy as np
    >>> import pandas as pd
    >>> times = parse_time("now") - TimeDelta(np.arange(24 * 60)*u.minute)
    >>> intensity = np.sin(np.arange(0, 12 * np.pi, step=(12 * np.pi) / (24 * 60)))
    >>> df = pd.DataFrame(intensity, index=times, columns=['intensity'])
    >>> ts = TimeSeries(df)
    >>> ts.peek()  # doctest: +SKIP

    References
    ----------
    * `Pandas Documentation <https://pandas.pydata.org/pandas-docs/stable/>`_
    """
    # Class attribute used to specify the source class of the TimeSeries.
    _source = None
    _registry = dict()

    def __init_subclass__(cls, **kwargs):
        """
        An __init_subclass__ hook initializes all of the subclasses of a given
        class.

        So for each subclass, it will call this block of code on import.
        This replicates some metaclass magic without the need to be
        aware of metaclasses. Here we use this to register each subclass
        in a dict that has the `is_datasource_for` attribute. This is
        then passed into the TimeSeries Factory so we can register them.
        """
        super().__init_subclass__(**kwargs)
        if hasattr(cls, 'is_datasource_for'):
            cls._registry[cls] = cls.is_datasource_for

    # kwargs are not used here but are passed in for sources.
    def __init__(self, data, meta=None, units=None, **kwargs):
        self.data = data
        tr = self.time_range
        # Check metadata input
        if meta is None:
            # No meta given, so default
            self.meta = TimeSeriesMetaData(MetaDict(), tr,
                                           list(self.data.columns.values))
        elif isinstance(meta, (dict, OrderedDict, MetaDict)):
            # Given the values for metadata (dict) and infer timerange and colnames from the data
            self.meta = TimeSeriesMetaData(meta, tr,
                                           list(self.data.columns.values))
        elif isinstance(meta, tuple):
            # Given the values all in a tuple
            self.meta = TimeSeriesMetaData(meta, tr,
                                           list(self.data.columns.values))
        else:
            # Should have a list of 3-tuples giving a complex metadata list.
            self.meta = meta

        if units is None:
            self.units = {}
        else:
            self.units = units

        # TODO: Fix this?
        # Validate input data
        # self._validate_meta()
        # self._validate_units()

# #### Attribute definitions #### #

    @property
    def source(self):
        """
        A string/object used to specify the source class of the TimeSeries.
        """
        return self._source

    @property
    def columns(self):
        """
        A list of all the names of the columns in the data.
        """
        return list(self.data.columns.values)

    @property
    def index(self):
        """
        The time index of the data.
        """
        return self.data.index

    @property
    def time_range(self):
        """
        The start and end times of the TimeSeries as a `~sunpy.time.TimeRange`.
        """
        if len(self.data) > 0:
            return TimeRange(self.data.index.min(), self.data.index.max())
        else:
            return None

# #### Data Access, Selection and Organisation Methods #### #

    def quantity(self, colname, **kwargs):
        """
        Return a `~astropy.units.quantity.Quantity` for the given column.

        Parameters
        ----------
        colname : `str`
            The heading of the column you want to output.

        Returns
        -------
        `~astropy.units.quantity.Quantity`
        """
        values = self.data[colname].values
        unit = self.units[colname]
        return u.Quantity(values, unit)

    def add_column(self,
                   colname,
                   quantity,
                   unit=False,
                   overwrite=True,
                   **kwargs):
        """
        Return a new `~sunpy.timeseries.TimeSeries` with the given column added
        or updated.

        Parameters
        ----------
        colname : `str`
            The heading of the column you want output.
        quantity : `~astropy.units.quantity.Quantity` or `~numpy.ndarray`
            The values to be placed within the column.
            If updating values only then a numpy array is permitted.
        overwrite : `bool`, optional
            Defaults to `True`, allowing the method to overwrite a column already present in the `~sunpy.timeseries.TimeSeries`.

        Returns
        -------
        `sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries`.
        """
        # Get the expected units from the quantity if required
        if not unit and isinstance(quantity, astropy.units.quantity.Quantity):
            unit = quantity.unit
        elif not unit:
            unit = u.dimensionless_unscaled

        # Make a copy of all the TimeSeries components.
        data = copy.copy(self.data)
        meta = TimeSeriesMetaData(copy.copy(self.meta.metadata))
        units = copy.copy(self.units)

        # Add the unit to the units dictionary if already there.
        if not (colname in self.data.columns):
            units[colname] = unit

        # Convert the given quantity into values for given units if necessary.
        values = quantity
        if isinstance(values, astropy.units.quantity.Quantity) and overwrite:
            values = values.to(units[colname]).value

        # Update or add the data.
        if not (colname in self.data.columns) or overwrite:
            data[colname] = values

        # Return a new TimeSeries with the given updated/added column.
        return self.__class__(data, meta, units)

    def sort_index(self, **kwargs):
        """
        Returns a sorted version of a `~sunpy.timeseries.TimeSeries`. Generally
        this shouldn't be necessary as most `~sunpy.timeseries.TimeSeries`
        operations sort the data anyway to ensure consistent behavior when
        truncating.

        Returns
        -------
        `~sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries` in ascending chronological order.
        """
        return GenericTimeSeries(
            self.data.sort_index(**kwargs),
            TimeSeriesMetaData(copy.copy(self.meta.metadata)),
            copy.copy(self.units))

    def truncate(self, a, b=None, int=None):
        """
        Returns a truncated version of the TimeSeries object.

        Parameters
        ----------
        a : `sunpy.time.TimeRange`, `str`, `int`
            Either a time range to truncate to, or a start time in some format recognized by pandas, or a index integer.
        b : `str` or `int`, optional
            If specified, the end time of the time range in some format recognized by pandas, or a index integer.
            Defaults to `None`.
        int : `int`, optional
            If specified, the integer indicating the slicing intervals.
            Defaults to `None`.

        Returns
        -------
        `~sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries` with only the selected times.
        """
        # Evaluate inputs
        # If given strings, then use to create a sunpy.time.timerange.TimeRange
        # for the SunPy text date parser.
        if isinstance(a, str) and isinstance(b, str):
            a = TimeRange(a, b)
        if isinstance(a, TimeRange):
            # If we have a TimeRange, extract the values
            start = a.start.datetime
            end = a.end.datetime
        else:
            # Otherwise we already have the values
            start = a
            end = b

        # If an interval integer was given then use in truncation.
        truncated_data = self.data.sort_index()[start:end:int]

        # Truncate the metadata
        # Check there is data still
        truncated_meta = TimeSeriesMetaData([])
        if len(truncated_data) > 0:
            tr = TimeRange(truncated_data.index.min(),
                           truncated_data.index.max())
            truncated_meta = TimeSeriesMetaData(
                copy.deepcopy(self.meta.metadata))
            truncated_meta._truncate(tr)

        # Build similar TimeSeries object and sanatise metadata and units.
        object = self.__class__(truncated_data.sort_index(), truncated_meta,
                                copy.copy(self.units))
        object._sanitize_metadata()
        object._sanitize_units()
        return object

    def extract(self, column_name):
        """
        Returns a new time series with the chosen column.

        Parameters
        ----------
        column_name : `str`
            A valid column name.

        Returns
        -------
        `~sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries` with only the selected column.
        """
        # TODO: allow the extract function to pick more than one column
        # TODO: Fix this?
        # if isinstance(self, pandas.Series):
        #    return self
        # else:
        #    return GenericTimeSeries(self.data[column_name], TimeSeriesMetaData(self.meta.metadata.copy()))

        # Extract column and remove empty rows
        data = self.data[[column_name]].dropna()

        # Build generic TimeSeries object and sanatise metadata and units.
        object = GenericTimeSeries(
            data.sort_index(),
            TimeSeriesMetaData(copy.copy(self.meta.metadata)),
            copy.copy(self.units))
        object._sanitize_metadata()
        object._sanitize_units()
        return object

    def concatenate(self, otherts, same_source=False, **kwargs):
        """
        Concatenate with another `~sunpy.timeseries.TimeSeries`. This function
        will check and remove any duplicate times. It will keep the column
        values from the original timeseries to which the new time series is
        being added.

        Parameters
        ----------
        otherts : `~sunpy.timeseries.TimeSeries`
            Another `~sunpy.timeseries.TimeSeries`.
        same_source : `bool`, optional
            Set to `True` to check if the sources of the time series match. Defaults to `False`.

        Returns
        -------
        `~sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries`.

        Notes
        -----
        Extra keywords are passed to `pandas.concat`.
        """
        # TODO: decide if we want to be able to concatenate multiple time series at once.
        # check to see if nothing needs to be done
        if self == otherts:
            return self

        # Check the sources match if specified.
        if same_source and not (isinstance(otherts, self.__class__)):
            raise TypeError("TimeSeries classes must match if specified.")

        # Concatenate the metadata and data
        kwargs['sort'] = kwargs.pop('sort', False)
        meta = self.meta.concatenate(otherts.meta)
        data = pd.concat([self.data.copy(), otherts.data], **kwargs)

        # Add all the new units to the dictionary.
        units = OrderedDict()
        units.update(self.units)
        units.update(otherts.units)

        # If sources match then build similar TimeSeries.
        if self.__class__ == otherts.__class__:
            object = self.__class__(data.sort_index(), meta, units)
        else:
            # Build generic time series if the sources don't match.
            object = GenericTimeSeries(data.sort_index(), meta, units)

        # Sanatise metadata and units
        object._sanitize_metadata()
        object._sanitize_units()
        return object

# #### Plotting Methods #### #

    def plot(self, axes=None, **plot_args):
        """
        Plot a plot of the `~sunpy.timeseries.TimeSeries`.

        Parameters
        ----------
        axes : `~matplotlib.axes.Axes`, optional
            If provided the image will be plotted on the given axes.
            Defaults to `None`, so the current axes will be used.
        **plot_args : `dict`, optional
            Any additional plot arguments that should be used when plotting.

        Returns
        -------
        axes : `~matplotlib.axes.Axes`
            The plot axes.
        """
        # Get current axes
        if axes is None:
            axes = plt.gca()

        axes = self.data.plot(ax=axes, **plot_args)

        return axes

    @peek_show
    def peek(self, **kwargs):
        """
        Displays a graphical overview of the data in this object for user evaluation.
        For the creation of plots, users should instead use the
        `~sunpy.timeseries.GenericTimeSeries.plot` method and Matplotlib's pyplot framework.

        Parameters
        ----------
        **kwargs : `dict`
            Any additional plot arguments that should be used when plotting.
        """
        # Check we have a timeseries valid for plotting
        self._validate_data_for_ploting()

        # Now make the plot
        figure = plt.figure()
        self.plot(**kwargs)

        return figure

    def _validate_data_for_ploting(self):
        """
        Raises an exception if the `~sunpy.timeseries.TimeSeries` is invalid
        for plotting.

        This should be added into all `~sunpy.timeseries.TimeSeries`
        peek methods.
        """
        # Check we have a valid TS
        if len(self.data) == 0:
            raise ValueError(
                "The timeseries can't be plotted as it has no data present. "
                "(len(self.data) == 0)")

# #### Miscellaneous #### #

    def _validate_meta(self):
        """
        Validates the meta-information associated with a
        `~sunpy.timeseries.TimeSeries`.

        This method includes very basic validation checks which apply to
        all of the kinds of files that SunPy can read. Datasource-
        specific validation should be handled in the relevant file in
        the "sunpy.timeseries.sources".
        """
        warnings.simplefilter('always', Warning)

        for meta_property in ('cunit1', 'cunit2', 'waveunit'):
            if (self.meta.get(meta_property) and
                    u.Unit(self.meta.get(meta_property),
                           parse_strict='silent').physical_type == 'unknown'):

                warnings.warn(f"Unknown value for {meta_property.upper()}.",
                              SunpyUserWarning)

    def _validate_units(self, units, **kwargs):
        """
        Validates the astropy unit-information associated with a
        `~sunpy.timeseries.TimeSeries`.

        This method includes very basic validation checks which apply to
        all of the kinds of files that SunPy can read. Datasource-
        specific validation should be handled in the relevant file in
        the "sunpy.timeseries.sources".
        """
        warnings.simplefilter('always', Warning)

        result = True
        for key in units:
            if not isinstance(units[key], astropy.units.UnitBase):
                # If this is not a unit then this can't be a valid units dict.
                result = False
                warnings.warn(f"Invalid unit given for {key}.",
                              SunpyUserWarning)

        return result

    def _sanitize_units(self, **kwargs):
        """
        Sanitizes the `collections.OrderedDict` used to store the units.

        Primarily this method will:

        * Remove entries that don't match up to a column.
        * Add unitless entries for columns with no units defined.
        * Re-arrange the order of the dictionary to match the columns.
        """
        warnings.simplefilter('always', Warning)

        # Populate unspecified units:
        for column in set(self.data.columns.tolist()) - set(self.units.keys()):
            # For all columns not present in the units dictionary.
            self.units[column] = u.dimensionless_unscaled
            warnings.warn(f"Unknown units for {column}.", SunpyUserWarning)

        # Re-arrange so it's in the same order as the columns and removed unused.
        units = OrderedDict()
        for column in self.data.columns.tolist():
            units.update({column: self.units[column]})

        # Now use the amended units Ordered Dictionary
        self.units = units

    def _sanitize_metadata(self, **kwargs):
        """
        Sanitizes the `~sunpy.timeseries.TimeSeriesMetaData`  used to store the
        metadata.

        Primarily this method will:

        * Remove entries outside of the dates or truncate if the metadata overflows past the data.
        * Remove column references in the metadata that don't match to a column in the data.
        * Remove metadata entries that have no columns matching the data.
        """
        warnings.simplefilter('always', Warning)

        # Truncate the metadata
        self.meta._truncate(self.time_range)

        # Remove non-existant columns
        redundant_cols = list(set(self.meta.columns) - set(self.columns))
        self.meta._remove_columns(redundant_cols)

# #### Export/Output Methods #### #

    def to_table(self, **kwargs):
        """
        Return an `astropy.table.Table` of the given
        `~sunpy.timeseries.TimeSeries`.

        Returns
        -------
        `~astropy.table.Table`
            A new `astropy.table.Table` containing the data from the `~sunpy.timeseries.TimeSeries`.
            The table will include units where relevant.
        """
        # TODO: Table.from_pandas(df) doesn't include the index column. Add request?
        # Get data columns
        table = Table.from_pandas(self.data)

        # Get index column and add to table.
        index_col = Column(self.data.index.values, name='date')
        table.add_column(index_col, index=0)

        # Add in units.
        for key in self.units:
            table[key].unit = self.units[key]

        # Output the table
        return table

    def to_dataframe(self, **kwargs):
        """
        Return a `~pandas.core.frame.DataFrame` of the given
        `~sunpy.timeseries.TimeSeries`.

        Returns
        -------
        `~pandas.core.frame.DataFrame`
            A `~pandas.core.frame.DataFrame` containing the data.
        """
        return self.data

    def to_array(self, **kwargs):
        """
        Return a `numpy.array` of the given `~sunpy.timeseries.TimeSeries`.

        Parameters
        ----------
        kwargs : `dict`
            All keyword arguments are passed to `pandas.DataFrame.to_numpy`.

        Returns
        -------
        `~numpy.ndarray`
            If the data is heterogeneous and contains booleans or objects, the result will be of ``dtype=object``.
        """
        if hasattr(self.data, "to_numpy"):
            return self.data.to_numpy(**kwargs)
        else:
            return self.data.values

    def __eq__(self, other):
        """
        Check two `~sunpy.timeseries.TimeSeries` are the same, they have
        matching type, data, metadata and units entries.

        Parameters
        ----------
        other : `~sunpy.timeseries.TimeSeries`
            The second `~sunpy.timeseries.TimeSeries` to compare with.

        Returns
        -------
        `bool`
        """
        match = True
        if isinstance(other, type(self)):
            if ((not self.data.equals(other.data)) or (self.meta != other.meta)
                    or (self.units != other.units)):
                match = False
        else:
            match = False
        return match

    def __ne__(self, other):
        """
        Check two `~sunpy.timeseries.TimeSeries` are not the same, they don't
        have matching type, data, metadata and/or units entries.

        Parameters
        ----------
        other : `~sunpy.timeseries.TimeSeries`
            The second `~sunpy.timeseries.TimeSeries` to compare with.

        Returns
        -------
        `bool`
        """
        return not self == other

    @classmethod
    def _parse_file(cls, filepath):
        """
        Parses a file - to be implemented in any subclass that may use files.

        Parameters
        ----------
        filepath : `str`
            The path to the file you want to parse.
        """
        return NotImplemented
Example #18
0
class GenericTimeSeries:
    """
    A generic time series object.

    Parameters
    ----------
    data : `~pandas.DataFrame` or `numpy.array`
        A `pandas.DataFrame` or `numpy.array` representing one or more fields as a function of time.
    meta : `~sunpy.timeseries.metadata.TimeSeriesMetaData`, optional
        The metadata giving details about the time series data/instrument.
        Defaults to `None`.
    units : `dict`, optional
        A mapping from column names in ``data`` to the physical units of that column.
        Defaults to `None`.

    Attributes
    ----------
    meta : `~sunpy.timeseries.metadata.TimeSeriesMetaData`
        The metadata giving details about the time series data/instrument.
    units : `dict`
        A mapping from column names in ``data`` to the physical units of that column.

    Examples
    --------
    >>> from sunpy.timeseries import TimeSeries
    >>> from sunpy.time import parse_time
    >>> from astropy.time import TimeDelta
    >>> import astropy.units as u
    >>> import numpy as np
    >>> import pandas as pd
    >>> times = parse_time("now") - TimeDelta(np.arange(24 * 60)*u.minute)
    >>> intensity = np.sin(np.arange(0, 12 * np.pi, step=(12 * np.pi) / (24 * 60)))
    >>> df = pd.DataFrame(intensity, index=times, columns=['intensity'])
    >>> header = {}
    >>> units = {'intensity': u.W/u.m**2}
    >>> ts = TimeSeries(df, header, units)
    >>> ts.peek()  # doctest: +SKIP

    References
    ----------
    * `Pandas Documentation <https://pandas.pydata.org/pandas-docs/stable/>`_
    """
    # Class attribute used to specify the source class of the TimeSeries.
    _source = None
    _registry = dict()

    def __init_subclass__(cls, **kwargs):
        """
        An __init_subclass__ hook initializes all of the subclasses of a given
        class.

        So for each subclass, it will call this block of code on import.
        This replicates some metaclass magic without the need to be
        aware of metaclasses. Here we use this to register each subclass
        in a dict that has the `is_datasource_for` attribute. This is
        then passed into the TimeSeries Factory so we can register them.
        """
        super().__init_subclass__(**kwargs)
        if hasattr(cls, 'is_datasource_for'):
            cls._registry[cls] = cls.is_datasource_for

    # kwargs are not used here but are passed in for sources.
    def __init__(self, data, meta=None, units=None, **kwargs):
        self._data = data
        tr = self.time_range
        # Check metadata input
        if meta is None:
            # No meta given, so default
            self.meta = TimeSeriesMetaData(MetaDict(), tr, self.columns)
        elif isinstance(meta, (dict, OrderedDict, MetaDict)):
            # Given the values for metadata (dict) and infer timerange and colnames from the data
            self.meta = TimeSeriesMetaData(meta, tr, self.columns)
        elif isinstance(meta, tuple):
            # Given the values all in a tuple
            self.meta = TimeSeriesMetaData(meta, tr, self.columns)
        else:
            # Should have a list of 3-tuples giving a complex metadata list.
            self.meta = meta

        if units is None:
            self.units = {}
        else:
            self.units = units

        for col in self.columns:
            if col not in self.units:
                warn_user(f'Unknown units for {col}')
                self.units[col] = u.dimensionless_unscaled

        # TODO: Fix this?
        # Validate input data
        # self._validate_meta()
        # self._validate_units()

# #### Attribute definitions #### #

    @property
    def data(self):
        """
        A `pandas.DataFrame` representing one or more fields as a function of time.
        """
        warn_user("Using .data to access the dataframe is discouraged; "
                  "use .to_dataframe() instead.")
        return self._data

    @data.setter
    def data(self, d):
        self._data = d

    @property
    def source(self):
        """
        A string/object used to specify the source class of the TimeSeries.
        """
        return self._source

    @property
    def observatory(self):
        """
        A string/object used to specify the observatory for the TimeSeries.
        """
        return

    @property
    def columns(self):
        """
        A list of all the names of the columns in the data.
        """
        return list(self._data.columns.values)

    @property
    def index(self):
        """
        The time index of the data.
        """
        return self._data.index

    @property
    def shape(self):
        """
        The shape of the data, a tuple (nrows, ncols).
        """
        return self._data.shape

    @property
    def time_range(self):
        """
        The start and end times of the TimeSeries as a `~sunpy.time.TimeRange`.
        """
        if len(self._data) > 0:
            return TimeRange(self._data.index.min(), self._data.index.max())
        else:
            return None

    @property
    def url(self):
        """
        URL to the mission website.
        """
        return self._url

# #### Data Access, Selection and Organisation Methods #### #

    def _text_summary(self):
        """
        Produces a table summary of the timeseries data to be passed to
        the _repr_html_ function.
        """
        obs = self.observatory
        if obs is None:
            try:
                obs = self.meta.metadata[0][2]["telescop"]
            except KeyError:
                obs = "Unknown"
        try:
            inst = self.meta.metadata[0][2]["instrume"]
        except KeyError:
            inst = "Unknown"
        try:
            link = f"""<a href={self.url} target="_blank">{inst}</a>"""
        except AttributeError:
            link = inst
        dat = self.to_dataframe()
        drange = dat.max() - dat.min()
        drange = drange.to_string(float_format="{:.2E}".format)
        drange = drange.replace("\n", "<br>")

        center = self.time_range.center.value.astype('datetime64[s]')
        center = str(center).replace("T", " ")
        resolution = round(self.time_range.seconds.value / self.shape[0], 3)
        resolution = str(resolution) + " s"

        channels = self.columns
        channels = "<br>".join(channels)

        uni = list(set(self.units.values()))
        uni = [x.unit if type(x) == u.quantity.Quantity else x for x in uni]
        uni = [
            "dimensionless" if x == u.dimensionless_unscaled else x
            for x in uni
        ]
        uni = "<br>".join(str(x) for x in uni)

        return textwrap.dedent(f"""\
                   SunPy TimeSeries
                   ----------------
                   Observatory:\t\t {obs}
                   Instrument:\t\t {link}
                   Channel(s):\t\t {channels}
                   Start Date:\t\t {dat.index.min().round('s')}
                   End Date:\t\t {dat.index.max().round('s')}
                   Center Date:\t\t {center}
                   Resolution:\t\t {resolution}
                   Samples per Channel:\t\t {self.shape[0]}
                   Data Range(s):\t\t {drange}
                   Units:\t\t {uni}""")

    def __str__(self):
        return f"{self._text_summary()}\n{self._data.__repr__()}"

    def __repr__(self):
        return f"{object.__repr__(self)}\n{self}"

    def _repr_html_(self):
        """
        Produces an HTML summary of the timeseries data with plots for use in
        Jupyter notebooks.
        """
        # Call _text_summary and reformat as an HTML table
        partial_html = (self._text_summary()[34:].replace(
            "\n", "</td></tr><tr><th>").replace(":\t", "</th><td>"))
        text_to_table = (f"""\
            <table style='text-align:left'>
                <tr><th>{partial_html}</td></tr>
            </table>""").replace("\n", "")

        # Create the timeseries plots for each channel as a panel in one
        # figure. The color list below is passed to both timeseries and
        # histogram plotting methods for consistency.
        cols = [
            'b', 'g', 'r', 'c', 'm', 'y', 'tab:blue', 'tab:orange', 'tab:red',
            'tab:purple', 'tab:brown', 'tab:pink', 'tab:gray', 'tab:green',
            'tab:olive', 'tab:cyan', 'palegreen', 'pink'
        ]
        dat = self.to_dataframe()
        fig, axs = plt.subplots(
            nrows=len(self.columns),
            ncols=1,
            sharex=True,
            constrained_layout=True,
            figsize=(6, 10),
        )
        # If all channels have the same unit, then one shared y-axis
        # label is set. Otherwise, each subplot has its own yaxis label.
        for i in range(len(self.columns)):
            if len(self.columns) == 1:
                axs.plot(
                    dat.index,
                    dat[self.columns[i]].values,
                    color=cols[i],
                    label=self.columns[i],
                )
                if (dat[self.columns[i]].values < 0).any() is np.bool_(False):
                    axs.set_yscale("log")
                axs.legend(frameon=False, handlelength=0)
                axs.set_ylabel(self.units[self.columns[i]])
            else:
                axs[i].plot(
                    dat.index,
                    dat[self.columns[i]].values,
                    color=cols[i],
                    label=self.columns[i],
                )
                if (dat[self.columns[i]].values < 0).any() is np.bool_(False):
                    axs[i].set_yscale("log")
                axs[i].legend(frameon=False, handlelength=0)
                axs[i].set_ylabel(self.units[self.columns[i]])
        plt.xticks(rotation=30)
        spc = _figure_to_base64(fig)
        plt.close(fig)
        # Make histograms for each column of data. The histograms are
        # created using the Astropy hist method that uses Scott's rule
        # for bin sizes.
        hlist = []
        for i in range(len(dat.columns)):
            if set(np.isnan(dat[self.columns[i]].values)) != {True}:
                fig = plt.figure(figsize=(5, 3), constrained_layout=True)
                hist(
                    dat[self.columns[i]].
                    values[~np.isnan(dat[self.columns[i]].values)],
                    log=True,
                    bins="scott",
                    color=cols[i],
                )
                plt.title(self.columns[i] + " [click for other channels]")
                plt.xlabel(self.units[self.columns[i]])
                plt.ylabel("# of occurences")
                hlist.append(_figure_to_base64(fig))
                plt.close(fig)

        # This loop creates a formatted list of base64 images that is passed
        # directly into the JS script below, so all images are written into
        # the html page when it is created (allows for an arbitrary number of
        # histograms to be rotated through onclick).
        hlist2 = []
        for i in range(len(hlist)):
            hlist2.append(f"data:image/png;base64,{hlist[i]}")

        # The code below creates unique names to be passed to the JS script
        # in the html code. Otherwise, multiple timeseries summaries will
        # conflict in a single notebook.
        source = str(self.source) + str(time.perf_counter_ns())

        return textwrap.dedent(f"""\
            <pre>{html.escape(object.__repr__(self))}</pre>
            <script type="text/javascript">
            function ImageChange(images) {{
                this.images = images;
                this.i = 0;
                this.next = function(img) {{
                    this.i++;
                    if (this.i == images.length)
                    this.i = 0;
                    img.src = images[this.i];
                }}
            }}
            var {source} = new ImageChange({hlist2});
            </script>
            <table>
                <tr>
                    <td style='width:40%'>{text_to_table}</td>
                    <td rowspan=3>
                        <img src='data:image/png;base64,{spc}'/>
                    </td>
                </tr>
                <tr>
                </tr>
                <tr>
                    <td>
                    <img src="{hlist2[0]}" alt="Click here for histograms"
                         onclick="{source}.next(this)"/>
                    </td>
                </tr>
            </table>""")

    def quicklook(self):
        """
        Display a quicklook summary of the Timeseries instance in the default
        webbrowser.

        Example
        -------
        >>> from sunpy.timeseries import TimeSeries
        >>> import sunpy.data.sample  # doctest: +REMOTE_DATA
        >>> goes_lc = TimeSeries(sunpy.data.sample.GOES_XRS_TIMESERIES)  # doctest: +REMOTE_DATA
        >>> goes_lc.quicklook()  # doctest: +SKIP
        """
        with NamedTemporaryFile("w",
                                delete=False,
                                prefix="sunpy.timeseries.",
                                suffix=".html") as f:
            url = "file://" + f.name
            f.write(
                textwrap.dedent(f"""\
                <html>
                    <title>Quicklook summary for {html.escape(object.__repr__(self))}</title>
                    <body>{self._repr_html_()}</body>
                </html>"""))
        webbrowser.open_new_tab(url)

    def quantity(self, colname, **kwargs):
        """
        Return a `~astropy.units.quantity.Quantity` for the given column.

        Parameters
        ----------
        colname : `str`
            The heading of the column you want to output.

        Returns
        -------
        `~astropy.units.quantity.Quantity`
        """
        values = self._data[colname].values
        unit = self.units[colname]
        return u.Quantity(values, unit)

    def add_column(self,
                   colname,
                   quantity,
                   unit=False,
                   overwrite=True,
                   **kwargs):
        """
        Return a new `~sunpy.timeseries.TimeSeries` with the given column added
        or updated.

        Parameters
        ----------
        colname : `str`
            The heading of the column you want output.
        quantity : `~astropy.units.quantity.Quantity` or `~numpy.ndarray`
            The values to be placed within the column.
            If updating values only then a numpy array is permitted.
        overwrite : `bool`, optional
            Defaults to `True`, allowing the method to overwrite a column already present in the `~sunpy.timeseries.TimeSeries`.

        Returns
        -------
        `sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries`.
        """
        # Get the expected units from the quantity if required
        if not unit and isinstance(quantity, astropy.units.quantity.Quantity):
            unit = quantity.unit
        elif not unit:
            unit = u.dimensionless_unscaled

        # Make a copy of all the TimeSeries components.
        data = copy.copy(self._data)
        meta = TimeSeriesMetaData(copy.copy(self.meta.metadata))
        units = copy.copy(self.units)

        # Add the unit to the units dictionary if already there.
        if not (colname in self._data.columns):
            units[colname] = unit

        # Convert the given quantity into values for given units if necessary.
        values = quantity
        if isinstance(values, astropy.units.quantity.Quantity) and overwrite:
            values = values.to(units[colname]).value

        # Update or add the data.
        if not (colname in self._data.columns) or overwrite:
            data[colname] = values

        # Return a new TimeSeries with the given updated/added column.
        return self.__class__(data, meta, units)

    def remove_column(self, colname):
        """
        Remove a column.

        Parameters
        ----------
        colname : str
            The heading of the column to remove.

        Returns
        -------
        `sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries`.
        """
        if colname not in self.columns:
            raise ValueError(
                f'Given column name ({colname}) not in list of columns {self.columns}'
            )
        data = self._data.drop(colname, axis='columns')
        units = self.units.copy()
        units.pop(colname)
        return self.__class__(data, self.meta, units)

    def sort_index(self, **kwargs):
        """
        Returns a sorted version of a `~sunpy.timeseries.TimeSeries`. Generally
        this shouldn't be necessary as most `~sunpy.timeseries.TimeSeries`
        operations sort the data anyway to ensure consistent behavior when
        truncating.

        Returns
        -------
        `~sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries` in ascending chronological order.
        """
        return GenericTimeSeries(
            self._data.sort_index(**kwargs),
            TimeSeriesMetaData(copy.copy(self.meta.metadata)),
            copy.copy(self.units))

    def truncate(self, a, b=None, int=None):
        """
        Returns a truncated version of the TimeSeries object.

        Parameters
        ----------
        a : `sunpy.time.TimeRange`, `str`, `int`
            Either a time range to truncate to, or a start time in some format recognized by pandas, or a index integer.
        b : `str` or `int`, optional
            If specified, the end time of the time range in some format recognized by pandas, or a index integer.
            Defaults to `None`.
        int : `int`, optional
            If specified, the integer indicating the slicing intervals.
            Defaults to `None`.

        Returns
        -------
        `~sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries` with only the selected times.
        """
        # Evaluate inputs
        # If given strings, then use to create a sunpy.time.timerange.TimeRange
        # for the SunPy text date parser.
        if isinstance(a, str) and isinstance(b, str):
            a = TimeRange(a, b)
        if isinstance(a, TimeRange):
            # If we have a TimeRange, extract the values
            start = a.start.datetime
            end = a.end.datetime
        else:
            # Otherwise we already have the values
            start = a
            end = b

        # If an interval integer was given then use in truncation.
        truncated_data = self._data.sort_index()[start:end:int]

        # Truncate the metadata
        # Check there is data still
        truncated_meta = TimeSeriesMetaData([])
        if len(truncated_data) > 0:
            tr = TimeRange(truncated_data.index.min(),
                           truncated_data.index.max())
            truncated_meta = TimeSeriesMetaData(
                copy.deepcopy(self.meta.metadata))
            truncated_meta._truncate(tr)

        # Build similar TimeSeries object and sanatise metadata and units.
        object = self.__class__(truncated_data.sort_index(), truncated_meta,
                                copy.copy(self.units))
        object._sanitize_metadata()
        return object

    def extract(self, column_name):
        """
        Returns a new time series with the chosen column.

        Parameters
        ----------
        column_name : `str`
            A valid column name.

        Returns
        -------
        `~sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries` with only the selected column.
        """
        # TODO: allow the extract function to pick more than one column
        # TODO: Fix this?
        # if isinstance(self, pandas.Series):
        #    return self
        # else:
        #    return GenericTimeSeries(self._data[column_name], TimeSeriesMetaData(self.meta.metadata.copy()))

        # Extract column and remove empty rows
        data = self._data[[column_name]].dropna()
        units = {column_name: self.units[column_name]}

        # Build generic TimeSeries object and sanatise metadata and units.
        object = GenericTimeSeries(
            data.sort_index(),
            TimeSeriesMetaData(copy.copy(self.meta.metadata)), units)
        object._sanitize_metadata()
        return object

    def concatenate(self, others, same_source=False, **kwargs):
        """
        Concatenate with another `~sunpy.timeseries.TimeSeries` or an iterable containing multiple
        `~sunpy.timeseries.TimeSeries`. This function will check and remove any duplicate times.
        It will keep the column values from the original timeseries to which the new time
        series is being added.

        Parameters
        ----------
        others : `~sunpy.timeseries.TimeSeries` or `collections.abc.Iterable`
            Another `~sunpy.timeseries.TimeSeries` or an iterable containing multiple
            `~sunpy.timeseries.TimeSeries`.
        same_source : `bool`, optional
            Set to `True` to check if the sources of the time series match. Defaults to `False`.

        Returns
        -------
        `~sunpy.timeseries.TimeSeries`
            A new `~sunpy.timeseries.TimeSeries`.

        Notes
        -----
        Extra keywords are passed to `pandas.concat`.

        Examples
        --------
        A single `~sunpy.timeseries.TimeSeries` or an `collections.abc.Iterable` containing multiple
        `~sunpy.timeseries.TimeSeries` can be passed to concatenate.

        >>> timeseries_1.concatenate(timeseries_2) # doctest: +SKIP
        >>> timeseries_1.concatenate([timeseries_2, timeseries_3]) # doctest: +SKIP

        Set ``same_source`` to `True` if the sources of the time series are the same.

        >>> timeseries_1.concatenate([timeseries_2, timeseries_3], same_source=True) # doctest: +SKIP
        """
        # Check to see if nothing needs to be done in case the same TimeSeries is provided.
        if self == others:
            return self
        elif isinstance(others, Iterable):
            if len(others) == 1 and self == next(iter(others)):
                return self

        # Check the sources match if specified.
        if (same_source and isinstance(others, Iterable) and not all(
                isinstance(series, self.__class__) for series in others)):
            raise TypeError(
                "TimeSeries classes must match if 'same_source' is specified.")
        elif (same_source and not isinstance(others, Iterable)
              and not isinstance(others, self.__class__)):
            raise TypeError(
                "TimeSeries classes must match if 'same_source' is specified.")

        # If an iterable is not provided, it must be a TimeSeries object, so wrap it in a list.
        if not isinstance(others, Iterable):
            others = [others]

        # Concatenate the metadata and data.
        kwargs["sort"] = kwargs.pop("sort", False)
        meta = self.meta.concatenate([series.meta for series in others])
        data = pd.concat([
            self._data.copy(), *list(series.to_dataframe()
                                     for series in others)
        ], **kwargs)

        # Add all the new units to the dictionary.
        units = OrderedDict()
        units.update(self.units)
        units.update({
            k: v
            for unit in list(series.units for series in others)
            for k, v in unit.items()
        })
        units = {k: v for k, v in units.items() if k in data.columns}

        # If sources match then build similar TimeSeries.
        if all(self.__class__ == series.__class__ for series in others):
            object = self.__class__(data.sort_index(), meta, units)
        else:
            # Build generic time series if the sources don't match.
            object = GenericTimeSeries(data.sort_index(), meta, units)

        # Sanatise metadata and units
        object._sanitize_metadata()
        return object

# #### Plotting Methods #### #

    def plot(self, axes=None, columns=None, **plot_args):
        """
        Plot a plot of the `~sunpy.timeseries.TimeSeries`.

        Parameters
        ----------
        axes : `~matplotlib.axes.Axes`, optional
            If provided the image will be plotted on the given axes.
            Defaults to `None`, so the current axes will be used.
        columns : list[str], optional
            If provided, only plot the specified columns.
        **plot_args : `dict`, optional
            Additional plot keyword arguments that are handed to
            :meth:`pandas.DataFrame.plot`.

        Returns
        -------
        `~matplotlib.axes.Axes`
            The plot axes.
        """
        import matplotlib.pyplot as plt

        # Get current axes
        if axes is None:
            axes = plt.gca()

        if columns is None:
            columns = self._data.columns

        axes = self._data[columns].plot(ax=axes, **plot_args)

        units = set([self.units[col] for col in columns])
        if len(units) == 1:
            # If units of all columns being plotted are the same, add a unit
            # label to the y-axis.
            unit = u.Unit(list(units)[0])
            axes.set_ylabel(unit.to_string())
        return axes

    @peek_show
    def peek(self, columns=None, **kwargs):
        """
        Displays a graphical overview of the data in this object for user evaluation.
        For the creation of plots, users should instead use the
        `~sunpy.timeseries.GenericTimeSeries.plot` method and Matplotlib's pyplot framework.

        Parameters
        ----------
        columns : list[str], optional
            If provided, only plot the specified columns.
        **kwargs : `dict`
            Any additional plot arguments that should be used when plotting.
        """
        import matplotlib.pyplot as plt

        # Check we have a timeseries valid for plotting
        self._validate_data_for_plotting()

        # Now make the plot
        figure = plt.figure()
        self.plot(columns=columns, **kwargs)

        return figure

    def _validate_data_for_plotting(self):
        """
        Raises an exception if the `~sunpy.timeseries.TimeSeries` is invalid
        for plotting.

        This should be added into all `~sunpy.timeseries.TimeSeries`
        peek methods.
        """
        # Check we have a valid TS
        if len(self._data) == 0:
            raise ValueError(
                "The timeseries can't be plotted as it has no data present. "
                "(len(self._data) == 0)")

# #### Miscellaneous #### #

    def _validate_meta(self):
        """
        Validates the meta-information associated with a
        `~sunpy.timeseries.TimeSeries`.

        This method includes very basic validation checks which apply to
        all of the kinds of files that SunPy can read. Datasource-
        specific validation should be handled in the relevant file in
        the "sunpy.timeseries.sources".
        """
        for meta_property in ('cunit1', 'cunit2', 'waveunit'):
            if (self.meta.get(meta_property) and
                    u.Unit(self.meta.get(meta_property),
                           parse_strict='silent').physical_type == 'unknown'):

                warn_user(f"Unknown value for {meta_property.upper()}.")

    def _validate_units(self, units, **kwargs):
        """
        Validates the astropy unit-information associated with a
        `~sunpy.timeseries.TimeSeries`.

        This method includes very basic validation checks which apply to
        all of the kinds of files that SunPy can read. Datasource-
        specific validation should be handled in the relevant file in
        the "sunpy.timeseries.sources".
        """
        result = True
        for key in units:
            if not isinstance(units[key], astropy.units.UnitBase):
                # If this is not a unit then this can't be a valid units dict.
                result = False
                warn_user(f"Invalid unit given for {key}.")

        return result

    def _sanitize_metadata(self, **kwargs):
        """
        Sanitizes the `~sunpy.timeseries.TimeSeriesMetaData`  used to store the
        metadata.

        Primarily this method will:

        * Remove entries outside of the dates or truncate if the metadata overflows past the data.
        * Remove column references in the metadata that don't match to a column in the data.
        * Remove metadata entries that have no columns matching the data.
        """
        # Truncate the metadata
        self.meta._truncate(self.time_range)

        # Remove non-existant columns
        redundant_cols = list(set(self.meta.columns) - set(self.columns))
        self.meta._remove_columns(redundant_cols)

# #### Export/Output Methods #### #

    def to_table(self, **kwargs):
        """
        Return an `astropy.table.Table` of the given
        `~sunpy.timeseries.TimeSeries`.

        Returns
        -------
        `~astropy.table.Table`
            A new `astropy.table.Table` containing the data from the `~sunpy.timeseries.TimeSeries`.
            The table will include units where relevant.
        """
        # TODO: Table.from_pandas(df) doesn't include the index column. Add request?
        # Get data columns
        table = Table.from_pandas(self._data)

        # Get index column and add to table.
        index_col = Column(self._data.index.values, name='date')
        table.add_column(index_col, index=0)

        # Add in units.
        for key in self.units:
            table[key].unit = self.units[key]

        # Output the table
        return table

    def to_dataframe(self, **kwargs):
        """
        Return a `~pandas.DataFrame` of the given
        `~sunpy.timeseries.TimeSeries`.

        Returns
        -------
        `~pandas.DataFrame`
        """
        return self._data

    def to_array(self, **kwargs):
        """
        Return a `numpy.array` of the given `~sunpy.timeseries.TimeSeries`.

        Parameters
        ----------
        **kwargs : `dict`
            All keyword arguments are passed to `pandas.DataFrame.to_numpy`.

        Returns
        -------
        `~numpy.ndarray`
            If the data is heterogeneous and contains booleans or objects, the result will be of ``dtype=object``.
        """
        if hasattr(self._data, "to_numpy"):
            return self._data.to_numpy(**kwargs)
        else:
            return self._data.values

    def __eq__(self, other):
        """
        Check two `~sunpy.timeseries.TimeSeries` are the same, they have
        matching type, data, metadata and units entries.

        Parameters
        ----------
        other : `~sunpy.timeseries.TimeSeries`
            The second `~sunpy.timeseries.TimeSeries` to compare with.

        Returns
        -------
        `bool`
        """
        match = True
        if isinstance(other, type(self)):
            if ((not self._data.equals(other.to_dataframe()))
                    or (self.meta != other.meta)
                    or (self.units != other.units)):
                match = False
        else:
            match = False
        return match

    def __ne__(self, other):
        """
        Check two `~sunpy.timeseries.TimeSeries` are not the same, they don't
        have matching type, data, metadata and/or units entries.

        Parameters
        ----------
        other : `~sunpy.timeseries.TimeSeries`
            The second `~sunpy.timeseries.TimeSeries` to compare with.

        Returns
        -------
        `bool`
        """
        return not self == other

    @classmethod
    def _parse_file(cls, filepath):
        """
        Parses a file - to be implemented in any subclass that may use files.

        Parameters
        ----------
        filepath : `str`
            The path to the file you want to parse.
        """
        raise NoMatchError(
            f'Could not find any timeseries sources to parse {filepath}')
Example #19
0
class GenericTimeSeries:
    """
    A generic time series object.

    Parameters
    ----------
    data : `~pandas.DataFrame`
        A pandas DataFrame representing one or more fields as a function
        of time.
    meta : `~sunpy.timeseries.metadata.TimeSeriesMetaData`, optional
        The metadata giving details about the time series data/instrument.
    units : dict, optional
        A mapping from column names in *data* to the physical units of
        that column.

    Attributes
    ----------
    data : `~pandas.DataFrame`
        A pandas DataFrame representing one or more fields as a function
        of time.
    meta : `~sunpy.timeseries.metadata.TimeSeriesMetaData`
        The metadata giving details about the time series data/instrument.
    units : dict
        A mapping from column names in *data* to the physical units of
        that column.

    Examples
    --------
    >>> from sunpy.timeseries import TimeSeries
    >>> from sunpy.time import parse_time
    >>> import datetime
    >>> from astropy.time import TimeDelta
    >>> import numpy as np
    >>> import pandas as pd
    >>> base = parse_time(datetime.datetime.today())
    >>> times = base - TimeDelta(np.arange(24 * 60)*u.minute)
    >>> intensity = np.sin(np.arange(0, 12 * np.pi, step=(12 * np.pi) / (24 * 60)))
    >>> df = pd.DataFrame(intensity, index=times, columns=['intensity'])
    >>> ts = TimeSeries(df)
    >>> ts.peek()   # doctest: +SKIP

    References
    ----------
    * `Pandas Documentation <https://pandas.pydata.org/pandas-docs/stable/>`_

    """

    # Class attribute used to specify the source class of the TimeSeries.
    _source = None
    _registry = dict()

    def __init_subclass__(cls, **kwargs):
        """
        An __init_subclass__ hook initializes all of the subclasses of a given class.
        So for each subclass, it will call this block of code on import.
        This replicates some metaclass magic without the need to be aware of metaclasses.
        Here we use this to register each subclass in a dict that has the `is_datasource_for`
        attribute. This is then passed into the TimeSeries Factory so we can register them.
        """
        super().__init_subclass__(**kwargs)
        if hasattr(cls, 'is_datasource_for'):
            cls._registry[cls] = cls.is_datasource_for

    def __init__(self, data, meta=None, units=None, **kwargs):
        self.data = data
        tr = self.time_range
        # Check metadata input
        if meta is None:
            # No meta given, so default
            self.meta = TimeSeriesMetaData(MetaDict(), tr, list(self.data.columns.values))
        elif isinstance(meta, (dict, OrderedDict, MetaDict)):
            # Given the values for metadata (dict) and infer timerange and colnames from the data
            self.meta = TimeSeriesMetaData(meta, tr, list(self.data.columns.values))
        elif isinstance(meta, tuple):
            # Given the values all in a tuple
            self.meta = TimeSeriesMetaData(meta, tr, list(self.data.columns.values))
        else:
            # Should have a list of 3-tuples giving a complex metadata list.
            self.meta = meta

        if units is None:
            self.units = {}
        else:
            self.units = units

        # Validate input data
        # self._validate_meta()
        # self._validate_units()

# #### Attribute definitions #### #

    @property
    def source(self):
        """
        A string/object used to specify the source class of the TimeSeries.
        """
        return self._source

    @property
    def columns(self):
        """A list of all the names of the columns in the data."""
        return list(self.data.columns.values)

    @property
    def index(self):
        """The time index of the data."""
        return self.data.index

    @property
    def time_range(self):
        """
        The start and end times of the TimeSeries as a `~sunpy.time.TimeRange`
        object
        """
        if len(self.data)>0:
            return TimeRange(self.data.index.min(), self.data.index.max())
        else:
            return None

# #### Data Access, Selection and Organisation Methods #### #

    def quantity(self, colname, **kwargs):
        """
        Return a `~astropy.units.quantity.Quantity` for the given column.

        Parameters
        ----------
        colname : `str`
            The heading of the column you want output.

        Returns
        -------
        quantity : `~astropy.units.quantity.Quantity`
        """
        values = self.data[colname].values
        unit = self.units[colname]
        return u.Quantity(values, unit)

    def add_column(self, colname, quantity, unit=False, overwrite=True, **kwargs):
        """
        Return an new TimeSeries with the given column added or updated.

        Parameters
        ----------
        colname : `str`
            The heading of the column you want output.

        quantity : `~astropy.units.quantity.Quantity` or `numpy.ndarray`
            The values to be placed within the column.
            If updating values only then a numpy array is permitted.

        overwrite : `bool`, optional, default:True
            Set to true to allow the method to overwrite a column already present
            in the TimeSeries.

        Returns
        -------
        newts : TimeSeries

        """
        # Get the expected units from the quantity if required
        if not unit and isinstance(quantity, astropy.units.quantity.Quantity):
            unit = quantity.unit
        elif not unit:
            unit = u.dimensionless_unscaled

        # Make a copy of all the TimeSeries components.
        data = copy.copy(self.data)
        meta = TimeSeriesMetaData(copy.copy(self.meta.metadata))
        units = copy.copy(self.units)

        # Add the unit to the units dictionary if already there.
        if not (colname in self.data.columns):
            units[colname] = unit

        # Convert the given quantity into values for given units if necessary.
        values = quantity
        if isinstance(values, astropy.units.quantity.Quantity) and overwrite:
            values = values.to(units[colname]).value

        # Update or add the data.
        if not (colname in self.data.columns) or overwrite:
            data[colname] = values

        # Return a new TimeSeries with the given updated/added column.
        return self.__class__(data, meta, units)

    def sort_index(self, **kwargs):
        """Returns a sorted version of the TimeSeries object.
        Generally this shouldn't be necessary as most TimeSeries operations sort
        the data anyway to ensure consistent behaviour when truncating.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series in ascending chronological order.
        """
        return GenericTimeSeries(self.data.sort_index(**kwargs),
                                 TimeSeriesMetaData(copy.copy(self.meta.metadata)),
                                 copy.copy(self.units))

    def truncate(self, a, b=None, int=None):
        """Returns a truncated version of the TimeSeries object.

        Parameters
        ----------
        a : `sunpy.time.TimeRange`, `str` or `int`
            Either a time range to truncate to, or a start time in some format
            recognised by pandas, or a index integer.

        b : `str` or `int`
            If specified, the end time of the time range in some format
            recognised by pandas, or a index integer.

        int : `int`
            If specified, the integer indicating the slicing intervals.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series with only the selected times.
        """
        # Evaluate inputs
        # If given strings, then use to create a sunpy.time.timerange.TimeRange
        # for the SunPy text date parser.
        if isinstance(a, str) and isinstance(b, str):
            a = TimeRange(a, b)
        if isinstance(a, TimeRange):
            # If we have a TimeRange, extract the values
            start = a.start.datetime
            end   = a.end.datetime
        else:
            # Otherwise we already have the values
            start = a
            end = b

        # If an interval integer was given then use in truncation.
        truncated_data = self.data.sort_index()[start:end:int]

        # Truncate the metadata
        # Check there is data still
        truncated_meta = TimeSeriesMetaData([])
        if len(truncated_data) > 0:
            tr = TimeRange(truncated_data.index.min(), truncated_data.index.max())
            truncated_meta = TimeSeriesMetaData(copy.deepcopy(self.meta.metadata))
            truncated_meta._truncate(tr)

        # Build similar TimeSeries object and sanatise metadata and units.
        object = self.__class__(truncated_data.sort_index(), truncated_meta, copy.copy(self.units))
        object._sanitize_metadata()
        object._sanitize_units()
        return object

    def extract(self, column_name):
        """Returns a new time series with the chosen column.

        Parameters
        ----------
        column_name : `str`
            A valid column name.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series with only the selected column.
        """
        """
        # TODO allow the extract function to pick more than one column
        if isinstance(self, pandas.Series):
            return self
        else:
            return GenericTimeSeries(self.data[column_name], TimeSeriesMetaData(self.meta.metadata.copy()))
        """
        # Extract column and remove empty rows
        data = self.data[[column_name]].dropna()

        # Build generic TimeSeries object and sanatise metadata and units.
        object = GenericTimeSeries(data.sort_index(),
                                   TimeSeriesMetaData(copy.copy(self.meta.metadata)),
                                   copy.copy(self.units))
        object._sanitize_metadata()
        object._sanitize_units()
        return object

    def concatenate(self, otherts, **kwargs):
        """Concatenate with another TimeSeries. This function will check and
        remove any duplicate times. It will keep the column values from the
        original time series to which the new time series is being added.

        Parameters
        ----------
        otherts : `~sunpy.timeseries.TimeSeries`
            Another time series.

        same_source : `bool` Optional
            Set to true to check if the sources of the time series match.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series.

        Debate: decide if we want to be able to concatenate multiple time series
        at once.
        """

        # check to see if nothing needs to be done
        if self == otherts:
            return self

        # Check the sources match if specified.
        same_source = kwargs.get('same_source', False)
        if same_source and not (isinstance(otherts, self.__class__)):
            raise TypeError("TimeSeries classes must match if specified.")

        # Concatenate the metadata and data
        kwargs['sort'] = kwargs.pop('sort', False)
        meta = self.meta.concatenate(otherts.meta)
        data = pd.concat([self.data.copy(), otherts.data], **kwargs)

        # Add all the new units to the dictionary.
        units = OrderedDict()
        units.update(self.units)
        units.update(otherts.units)

        # If sources match then build similar TimeSeries.
        if self.__class__ == otherts.__class__:
            object = self.__class__(data.sort_index(), meta, units)
        else:
            # Build generic time series if the sources don't match.
            object = GenericTimeSeries(data.sort_index(), meta, units)

        # Sanatise metadata and units
        object._sanitize_metadata()
        object._sanitize_units()
        return object

# #### Plotting Methods #### #

    def plot(self, axes=None, **plot_args):
        """Plot a plot of the time series

        Parameters
        ----------
        axes : `~matplotlib.axes.Axes` or None
            If provided the image will be plotted on the given axes. Otherwise
            the current axes will be used.

        **plot_args : `dict`
            Any additional plot arguments that should be used
            when plotting.

        Returns
        -------
        axes : `~matplotlib.axes.Axes`
            The plot axes.
        """

        # Get current axes
        if axes is None:
            axes = plt.gca()

        axes = self.data.plot(ax=axes, **plot_args)

        return axes

    def peek(self, **kwargs):
        """Displays the time series in a new figure.

        Parameters
        ----------
        **kwargs : `dict`
            Any additional plot arguments that should be used when plotting.
        """
        # Check we have a timeseries valid for plotting
        self._validate_data_for_ploting()

        # Now make the plot
        figure = plt.figure()
        self.plot(**kwargs)
        figure.show()

    def _validate_data_for_ploting(self):
        """Raises an exception if the timeseries is invalid for plotting.
        To be added into all the peek methods in all source sup-classes.
        Currently only checks if we have an empty timeseries, where:
        len(self.data) == 0

        """
        # Check we have a valid TS
        if len(self.data) == 0:
            raise ValueError("The timeseries can't be plotted as it has no data present. "
                             "(len(self.data) == 0)")

# #### Miscellaneous #### #

    def _validate_meta(self):
        """
        Validates the meta-information associated with a TimeSeries.

        This method includes very basic validation checks which apply to
        all of the kinds of files that SunPy can read. Datasource-specific
        validation should be handled in the relevant file in the
        sunpy.timeseries.sources package.

        Allows for default unit assignment for:
            COL_UNITS

        """

        warnings.simplefilter('always', Warning)

        for meta_property in ('cunit1', 'cunit2', 'waveunit'):
            if (self.meta.get(meta_property) and
                u.Unit(self.meta.get(meta_property),
                       parse_strict='silent').physical_type == 'unknown'):

                warnings.warn(f"Unknown value for {meta_property.upper()}.", SunpyUserWarning)

    def _validate_units(self, units, **kwargs):
        """
        Validates the astropy unit-information associated with a TimeSeries.

        This method includes very basic validation checks which apply to
        all of the kinds of files that SunPy can read. Datasource-specific
        validation should be handled in the relevant file in the
        sunpy.timeseries.sources package.

        Allows for default unit assignment for:
            COL_UNITS

        """

        warnings.simplefilter('always', Warning)

        result = True
        for key in units:
            if not isinstance(units[key], astropy.units.UnitBase):
                # If this is not a unit then this can't be a valid units dict.
                result = False
                warnings.warn(f"Invalid unit given for {key}.", SunpyUserWarning)

        return result

    def _sanitize_units(self, **kwargs):
        """
        Sanitises the collections.OrderedDict used to store the units.
        Primarily this method will:

        Remove entries that don't match up to a column,
        Add unitless entries for columns with no units defined.
        Re-arrange the order of the dictionary to match the columns.
        """
        warnings.simplefilter('always', Warning)

        # Populate unspecified units:
        for column in set(self.data.columns.tolist()) - set(self.units.keys()):
            # For all columns not present in the units dictionary.
            self.units[column] = u.dimensionless_unscaled
            warnings.warn(f"Unknown units for {column}.", SunpyUserWarning)

        # Re-arrange so it's in the same order as the columns and removed unused.
        units = OrderedDict()
        for column in self.data.columns.tolist():
            units.update({column:self.units[column]})

        # Now use the amended units Ordered Dictionary
        self.units = units

    def _sanitize_metadata(self, **kwargs):
        """
        Sanitises the TimeSeriesMetaData object used to store the metadata.
        Primarily this method will:

        Remove entries outside of the datas TimeRange or truncate TimeRanges
        if the metadata overflows past the data,
        Remove column references in the metadata that don't match to a column
        in the data.
        Remove metadata entries that have no columns matching the data.
        """
        warnings.simplefilter('always', Warning)

        # Truncate the metadata
        self.meta._truncate(self.time_range)

        # Remove non-existant columns
        redundant_cols = list(set(self.meta.columns) - set(self.columns))
        self.meta._remove_columns(redundant_cols)

# #### Export/Output Methods #### #

    def to_table(self, **kwargs):
        """
        Return an Astropy Table of the give TimeSeries object.

        Returns
        -------
        newtable : `~astrpy.table`
            A new astropy table containing the data from the time series.
            The table will include units where relevant.
        """
        # ToDo: Table.from_pandas(df) doesn't include the index column. Add request?
        # Get data columns
        table = Table.from_pandas(self.data)

        # Get index column and add to table.
        index_col = Column(self.data.index.values, name='date')
        table.add_column(index_col, index=0)

        # Add in units.
        for key in self.units:
            table[key].unit = self.units[key]

        # Output the table
        return table

    def to_dataframe(self, **kwargs):
        """
        Return a Pandas DataFrame of the give TimeSeries object.

        Returns
        -------
        newdf : `~pandas.core.frame.DataFrame`
            A Pandas Dataframe containing the data.
        """
        return self.data

    def to_array(self, columns=None):
        """
        Return a numpy array of the give TimeSeries object.

        Parameters
        ----------
        columns: `list`, optional, default:None
            If None, return all columns minus the index, otherwise, returns
            specified columns.

        Returns
        -------
        values : `numpy.ndarray`
            If the caller is heterogeneous and contains booleans or objects,
            the result will be of dtype=object. See Notes.
        """
        if columns:
            return self.data.values[columns]
        else:
            return self.data.values

    def __eq__(self, other):
        """
        Check two TimeSeries objects are the same, they have matching type, data,
        metadata and units entries.

        Parameters
        ----------
        other : `~sunpy.timeseries.GenericTimeSeries`
            The second TimeSeries object to compare with.

        Returns
        -------
        result : `bool`
        """
        match = True
        if isinstance(other, type(self)):
            if ((not self.data.equals(other.data)) or
                    (self.meta != other.meta) or
                    (self.units != other.units)):
                match = False
        else:
            match = False
        return match

    def __ne__(self, other):
        """
        Check two TimeSeries objects are not the same, they don't have matching
        type, data, metadata and/or units entries.

        Parameters
        ----------
        other : `~sunpy.timeseries.GenericTimeSeries`
            The second TimeSeries object to compare with.

        Returns
        -------
        result : `bool`
        """
        return not self == other

    @classmethod
    def _parse_file(cls, filepath):
        """Parses a file - to be implemented in any subclass that may use files"""
        return NotImplemented
Example #20
0
class GenericTimeSeries:
    """
    A generic time series object.

    Attributes
    ----------
    meta : `~sunpy.timeseries.metadata.TimeSeriesMetaData`
        The metadata giving details about the time series data/instrument.
    data : `~pandas.DataFrame`
        An pandas DataFrame prepresenting one or more fields as a function of time.

    Parameters
    ----------------
    filename: `str` or File
        A file to read.
    source: `str`
        A string identifier for a registered subclass, matched by that
         subclasses `_is_source_for` method.
    concatenate :  `bool`
        Concatenate all files into one TimeSeries object if True, or return
        one TimeSeries for each file if False.

    All other keywords are passed to _is_source_for and then __init__.

    Examples
    --------
    >>> from sunpy.timeseries import TimeSeries
    >>> import datetime
    >>> import numpy as np
    >>> import pandas as pd
    >>> base = datetime.datetime.today()
    >>> times = [base - datetime.timedelta(minutes=x) for x in range(0, 24 * 60)]
    >>> intensity = np.sin(np.arange(0, 12 * np.pi, step=(12 * np.pi) / (24 * 60)))
    >>> df = pd.DataFrame(intensity, index=times, columns=['intensity'])
    >>> ts = TimeSeries(df)
    >>> ts.peek()   # doctest: +SKIP

    References
    ----------
    * `Pandas Documentation <https://pandas.pydata.org/pandas-docs/stable/>`_

    """

    # Class attribute used to specify the source class of the TimeSeries.
    _source = None

    def __init__(self, data, meta=None, units=None, **kwargs):
        self.data = data
        tr = TimeRange(self.data.index.min(), self.data.index.max())
        # Check metadata input
        if meta is None:
            # No meta given, so default
            self.meta = TimeSeriesMetaData(MetaDict(), tr, list(self.data.columns.values))
        elif isinstance(meta, (dict, OrderedDict, MetaDict)):
            # Given the values for metadata (dict) and infer timerange and colnames from the data
            self.meta = TimeSeriesMetaData(meta, tr, list(self.data.columns.values))
        elif isinstance(meta, tuple):
            # Given the values all in a tuple
            self.meta = TimeSeriesMetaData(meta, tr, list(self.data.columns.values))
        else:
            # Should have a list of 3-tuples giving a complex metadata list.
            self.meta = meta

        if units is None:
            self.units = {}
        else:
            self.units = units

        # Validate input data
        #self._validate_meta()
        #self._validate_units()

# #### Attribute definitions #### #

    @property
    def source(self):
        """Returns a string/object used to specify the source class of the TimeSeries."""
        return self._source

    @property
    def columns(self):
        """Returns a list of all the names of the columns in the data."""
        return list(self.data.columns.values)

    @property
    def index(self):
        """Return the time index of the data."""
        return self.data.index

    @property
    def time_range(self):
        """Returns the start and end times of the TimeSeries as a `~sunpy.time.TimeRange`
        object"""
        return TimeRange(self.data.index.min(), self.data.index.max())

# #### Data Access, Selection and Organisation Methods #### #

    def quantity(self, colname, **kwargs):
        """
        Return a `~astropy.units.quantity.Quantity` for the given column.

        Parameters
        ----------
        colname : `str`
            The heading of the column you want output.

        Returns
        -------
        quantity : `~astropy.units.quantity.Quantity`
        """
        values = self.data[colname].values
        unit   = self.units[colname]
        return u.Quantity(values, unit)

    def add_column(self, colname, quantity, unit=False, overwrite=True, **kwargs):
        """
        Return an new TimeSeries with the given column added or updated.

        Parameters
        ----------
        colname : `str`
            The heading of the column you want output.

        quantity : `~astropy.units.quantity.Quantity` or `~numpy.ndarray`
            The values to be placed within the column.
            If updating values only then a numpy array is permitted.

        overwrite : `bool`, optional, default:True
            Set to true to allow the method to overwrite a column already present
            in the TimeSeries.

        Returns
        -------
        newts : TimeSeries

        """
        # Get the expected units from the quantity if required
        if not unit and isinstance(quantity, astropy.units.quantity.Quantity):
            unit = quantity.unit
        elif not unit:
            unit = u.dimensionless_unscaled

        # Make a copy of all the TimeSeries components.
        data  = copy.copy(self.data)
        meta  = TimeSeriesMetaData(copy.copy(self.meta.metadata))
        units = copy.copy(self.units)

        # Add the unit to the units dictionary if already there.
        if not (colname in self.data.columns):
            units[colname] = unit

        # Convert the given quantity into values for given units if necessary.
        values = quantity
        if isinstance(values, astropy.units.quantity.Quantity) and overwrite:
            values = values.to(units[colname]).value

        # Update or add the data.
        if not (colname in self.data.columns) or overwrite:
            data[colname] = values

        # Return a new TimeSeries with the given updated/added column.
        return self.__class__(data, meta, units)

    def sort_index(self, **kwargs):
        """Returns a sorted version of the TimeSeries object.
        Generally this shouldn't be necessary as most TimeSeries operations sort
        the data anyway to ensure consistent behaviour when truncating.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series in ascending chronological order.
        """
        return GenericTimeSeries(self.data.sort_index(**kwargs), TimeSeriesMetaData(copy.copy(self.meta.metadata)), copy.copy(self.units))

    def truncate(self, a, b=None, int=None):
        """Returns a truncated version of the TimeSeries object.

        Parameters
        ----------
        a : `sunpy.time.TimeRange`, `str` or `int`
            Either a time range to truncate to, or a start time in some format
            recognised by pandas, or a index integer.

        b : `str` or `int`
            If specified, the end time of the time range in some format
            recognised by pandas, or a index integer.

        int : `int`
            If specified, the integer indicating the slicing intervals.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series with only the selected times.
        """
        # Evaluate inputs
        # If given strings, then use to create a sunpy.time.timerange.TimeRange
        # for the SunPy text date parser.
        if isinstance(a, str) and isinstance(b, str):
            a = TimeRange(a, b)
        if isinstance(a, TimeRange):
            # If we have a TimeRange, extract the values
            start = a.start
            end   = a.end
        else:
            # Otherwise we already have the values
            start = a
            end   = b

        # If an interval integer was given then use in truncation.
        truncated_data = self.data.sort_index()[start:end:int]

        # Truncate the metadata
        # Check there is data still
        truncated_meta = TimeSeriesMetaData([])
        if len(truncated_data) > 0:
            tr = TimeRange(truncated_data.index.min(), truncated_data.index.max())
            truncated_meta = TimeSeriesMetaData(copy.deepcopy(self.meta.metadata))
            truncated_meta._truncate(tr)

        # Build similar TimeSeries object and sanatise metadata and units.
        object = self.__class__(truncated_data.sort_index(), truncated_meta, copy.copy(self.units))
        object._sanitize_metadata()
        object._sanitize_units()
        return object

    def extract(self, column_name):
        """Returns a new time series with the chosen column.

        Parameters
        ----------
        column_name : `str`
            A valid column name.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series with only the selected column.
        """
        """
        # TODO allow the extract function to pick more than one column
        if isinstance(self, pandas.Series):
            return self
        else:
            return GenericTimeSeries(self.data[column_name], TimeSeriesMetaData(self.meta.metadata.copy()))
        """
        # Extract column and remove empty rows
        data = self.data[[column_name]].dropna()

        # Build generic TimeSeries object and sanatise metadata and units.
        object = GenericTimeSeries(data.sort_index(), TimeSeriesMetaData(copy.copy(self.meta.metadata)), copy.copy(self.units))
        object._sanitize_metadata()
        object._sanitize_units()
        return object

    def concatenate(self, otherts, **kwargs):
        """Concatenate with another TimeSeries. This function will check and
        remove any duplicate times. It will keep the column values from the
        original time series to which the new time series is being added.

        Parameters
        ----------
        otherts : `~sunpy.timeseries.TimeSeries`
            Another time series.

        same_source : `bool` Optional
            Set to true to check if the sources of the time series match.

        Returns
        -------
        newts : `~sunpy.timeseries.TimeSeries`
            A new time series.

        Debate: decide if we want to be able to concatenate multiple time series
        at once.
        """

        # check to see if nothing needs to be done
        if self == otherts:
            return self

        # Check the sources match if specified.
        same_source = kwargs.get('same_source', False)
        if same_source and not (isinstance(otherts, self.__class__)):
            raise TypeError("TimeSeries classes must match if specified.")

        # Concatenate the metadata and data
        meta = self.meta.concatenate(otherts.meta)
        data = pd.concat([self.data.copy(), otherts.data], **kwargs)

        # Add all the new units to the dictionary.
        units = OrderedDict()
        units.update(self.units)
        units.update(otherts.units)

        # If sources match then build similar TimeSeries.
        if self.__class__ == otherts.__class__:
            object = self.__class__(data.sort_index(), meta, units)
        else:
            # Build generic time series if the sources don't match.
            object = GenericTimeSeries(data.sort_index(), meta, units)

        # Sanatise metadata and units
        object._sanitize_metadata()
        object._sanitize_units()
        return object

# #### Plotting Methods #### #

    def plot(self, axes=None, **plot_args):
        """Plot a plot of the time series

        Parameters
        ----------
        axes : `~matplotlib.axes.Axes` or None
            If provided the image will be plotted on the given axes. Otherwise
            the current axes will be used.

        **plot_args : `dict`
            Any additional plot arguments that should be used
            when plotting.

        Returns
        -------
        axes : `~matplotlib.axes.Axes`
            The plot axes.
        """

        # Get current axes
        if axes is None:
            axes = plt.gca()

        axes = self.data.plot(ax=axes, **plot_args)

        return axes

    def peek(self, **kwargs):
        """Displays the time series in a new figure.

        Parameters
        ----------
        **kwargs : `dict`
            Any additional plot arguments that should be used when plotting.
        """
        # Check we have a timeseries valid for plotting
        self._validate_data_for_ploting()

        # Now make the plot
        figure = plt.figure()
        self.plot(**kwargs)
        figure.show()

    def _validate_data_for_ploting(self):
        """Raises an exception if the timeseries is invalid for plotting.
        To be added into all the peek methods in all source sup-classes.
        Currently only checks if we have an empty timeseries, where:
        len(self.data) == 0

        """
        # Check we have a valid TS
        if len(self.data) == 0:
            raise ValueError('The timeseries can\'t be plotted as it has no data present. (len(self.data) == 0)')

# #### Miscellaneous #### #

    def _validate_meta(self):
        """
        Validates the meta-information associated with a TimeSeries.

        This method includes very basic validation checks which apply to
        all of the kinds of files that SunPy can read. Datasource-specific
        validation should be handled in the relevant file in the
        sunpy.timeseries.sources package.

        Allows for default unit assignment for:
            COL_UNITS

        """

        warnings.simplefilter('always', Warning)

        #
        for meta_property in ('cunit1', 'cunit2', 'waveunit'):
            if (self.meta.get(meta_property) and
                u.Unit(self.meta.get(meta_property),
                       parse_strict='silent').physical_type == 'unknown'):

                warnings.warn("Unknown value for "+meta_property.upper(), Warning)

    def _validate_units(self, units, **kwargs):
        """
        Validates the astropy unit-information associated with a TimeSeries.

        This method includes very basic validation checks which apply to
        all of the kinds of files that SunPy can read. Datasource-specific
        validation should be handled in the relevant file in the
        sunpy.timeseries.sources package.

        Allows for default unit assignment for:
            COL_UNITS

        """

        warnings.simplefilter('always', Warning)

        result = True
        for key in units:
            if not isinstance(units[key], astropy.units.UnitBase):
                # If this is not a unit then this can't be a valid units dict.
                result = False
                warnings.warn("Invalid unit given for \""+str(key)+"\"", Warning)

        return result

    def _sanitize_units(self, **kwargs):
        """
        Sanitises the collections.OrderedDict used to store the units.
        Primarily this method will:

        Remove entries that don't match up to a column,
        Add unitless entries for columns with no units defined.
        Re-arrange the order of the dictionary to match the columns.
        """
        warnings.simplefilter('always', Warning)

        # Populate unspecified units:
        for column in set(self.data.columns.tolist()) - set(self.units.keys()):
            # For all columns not present in the units dictionary.
            self.units[column] = u.dimensionless_unscaled
            warnings.warn("Unknown units for \""+str(column)+"\"", Warning)

        # Re-arrange so it's in the same order as the columns and removed unused.
        units = OrderedDict()
        for column in self.data.columns.tolist():
            units.update({column:self.units[column]})

        # Now use the amended units Ordered Dictionary
        self.units = units

    def _sanitize_metadata(self, **kwargs):
        """
        Sanitises the TimeSeriesMetaData object used to store the metadata.
        Primarily this method will:

        Remove entries outside of the datas TimeRange or truncate TimeRanges
        if the metadata overflows past the data,
        Remove column references in the metadata that don't match to a column
        in the data.
        Remove metadata entries that have no columns matching the data.
        """
        warnings.simplefilter('always', Warning)

        # Truncate the metadata
        self.meta._truncate(self.time_range)

        # Remove non-existant columns
        redundant_cols = list(set(self.meta.columns) - set(self.columns))
        self.meta._remove_columns(redundant_cols)

# #### Export/Output Methods #### #

    def to_table(self, **kwargs):
        """
        Return an Astropy Table of the give TimeSeries object.

        Returns
        -------
        newtable : `~astrpy.table`
            A new astropy table containing the data from the time series.
            The table will include units where relevant.
        """
        # ToDo: Table.from_pandas(df) doesn't include the index column. Add request?
        # Get data columns
        table = Table.from_pandas(self.data)

        # Get index column and add to table.
        index_col = Column(self.data.index.values, name='date')
        table.add_column(index_col, index=0)

        # Add in units.
        for key in self.units:
            table[key].unit = self.units[key]

        # Output the table
        return table

    def to_dataframe(self, **kwargs):
        """
        Return a Pandas DataFrame of the give TimeSeries object.

        Returns
        -------
        newdf : `~pandas.core.frame.DataFrame`
            A Pandas Dataframe containing the data.
        """
        return self.data

    def to_array(self, **kwargs):
        """
        Return a numpy array of the give TimeSeries object.

        Parameters
        ----------
        columns: `list`, optional, default:None
            If None, return all columns minus the index, otherwise, returns
            specified columns.

        Returns
        -------
        values : `~numpy.ndarray`
            If the caller is heterogeneous and contains booleans or objects,
            the result will be of dtype=object. See Notes.
        """
        return self.data.as_matrix(**kwargs)

    def __eq__(self, other):
        """
        Check two TimeSeries objects are the same, they have matching type, data,
        metadata and units entries.

        Parameters
        ----------
        other : `~sunpy.timeseries.GenericTimeSeries`
            The second TimeSeries object to compare with.

        Returns
        -------
        result : `bool`
        """
        match = True
        if isinstance(other, type(self)):
            if (not self.data.equals(other.data)) or (self.meta != other.meta) or (self.units != other.units):
                match = False
        else:
            match = False
        return match

    def __ne__(self, other):
        """
        Check two TimeSeries objects are not the same, they don't have matching
        type, data, metadata and/or units entries.

        Parameters
        ----------
        other : `~sunpy.timeseries.GenericTimeSeries`
            The second TimeSeries object to compare with.

        Returns
        -------
        result : `bool`
        """
        return not self == other

    @classmethod
    def _parse_file(cls, filepath):
        """Parses a file - to be implemented in any subclass that may use files"""
        return NotImplemented