class StationStatistics(object):
    """
    Class representing objects that include statistical information about
    diurnal features of selected variables. A StationStatistics object is
    generally associated to a Station object for which this infomration
    is valid.
    """
    def __init__(self, data=None, lon=None, lat=None, timezone=None):
        self._data = None
        self._lon = lon
        self._lat = lat
        self._timezone = timezone

        if data is not None:
            self.data = data

        self.wind = Bunch(a=None, b=None, t_shift=None)
        self.precip = Bunch(months=None, stats=None)
        self.hum = Bunch(a0=None, a1=None, kr=None)
        self.temp = Bunch(max_delta=None)
        self.glob = Bunch(angstroem_a=0.25,
                          angstroem_b=0.75,
                          bristcamp_a=0.75,
                          bristcamp_c=2.4)

    @property
    def data(self):
        return self._data

    @data.setter
    def data(self, df):
        assert isinstance(df, pd.DataFrame)
        assert df.index.is_all_dates
        assert df.index.resolution == 'hour'

        self._data = df.copy()

    def calc_precipitation_stats(self,
                                 months=None,
                                 avg_stats=True,
                                 percentile=50):
        """
        Calculates precipitation statistics for the cascade model while aggregating hourly observations

        Parameters
        ----------
        months :        Months for each seasons to be used for statistics (array of numpy array, default=1-12, e.g., [np.arange(12) + 1])
        avg_stats :     average statistics for all levels True/False (default=True)
        percentile :    percentil for splitting the dataset in small and high intensities (default=50)
        
        """
        if months is None:
            months = [np.arange(12) + 1]

        self.precip.months = months
        self.precip.stats = melodist.build_casc(self.data,
                                                months=months,
                                                avg_stats=avg_stats,
                                                percentile=percentile)

    def calc_wind_stats(self):
        """
        Calculates statistics in order to derive diurnal patterns of wind speed
        """
        a, b, t_shift = melodist.fit_cosine_function(self.data.wind)
        self.wind.update(a=a, b=b, t_shift=t_shift)

    def calc_humidity_stats(self):
        """
        Calculates statistics in order to derive diurnal patterns of relative humidity (requires temperature data as well)        
        """
        a1, a0 = melodist.calculate_dewpoint_regression(self.data,
                                                        return_stats=False)
        self.hum.update(a0=a0, a1=a1)
        self.hum.kr = 12

    def calc_temperature_stats(self):
        """
        Calculates statistics in order to derive diurnal patterns of temperature
        """
        self.temp.max_delta = melodist.get_shift_by_data(
            self.data.temp, self._lon, self._lat, self._timezone)

    def calc_radiation_stats(self, data_daily, day_length=None):
        """
        Calculates statistics in order to derive solar radiation from sunshine duration or
        minimum/maximum temperature.

        Parameters
        ----------
        data_daily : DataFrame
            Daily data from the associated ``Station`` object.

        day_length : Series, optional
            Day lengths as calculated by ``calc_sun_times``.
        """
        if 'ssd' in data_daily and day_length is not None:
            df = pd.DataFrame(
                data=dict(ssd=data_daily.ssd, day_length=day_length)).dropna(
                    how='any')
            pot_rad = melodist.potential_radiation(
                melodist.util.hourly_index(df.index), self._lon, self._lat,
                self._timezone)
            pot_rad_daily = pot_rad.resample('D', how='mean')
            obs_rad_daily = self.data.glob.resample('D', how='mean')
            a, b = melodist.fit_angstroem_params(data_daily.ssd, day_length,
                                                 pot_rad_daily, obs_rad_daily)
            self.glob.angstroem_a = a
            self.glob.angstroem_b = b

        if 'tmin' in data_daily and 'tmax' in data_daily:
            pot_rad = melodist.potential_radiation(
                melodist.util.hourly_index(df.index), self._lon, self._lat,
                self._timezone)
            pot_rad_daily = pot_rad.resample('D', how='mean')
            obs_rad_daily = self.data.glob.resample('D', how='mean')
            df = pd.DataFrame(data=dict(
                tmin=data_daily.tmin,
                tmax=data_daily.tmax,
                pot_rad=pot_rad_daily,
                obs_rad=obs_rad_daily,
            )).dropna(how='any')
            a, c = melodist.fit_bristow_campbell_params(
                df.tmin, df.tmax, df.pot_rad, df.obs_rad)
            self.glob.bristcamp_a = a
            self.glob.bristcamp_c = c

    def to_json(self, filename=None):
        """
        Exports statistical data to a JSON formatted file

        Parameters
        ----------
        filename:    output file that holds statistics data
        """
        def json_encoder(obj):
            if isinstance(obj, pd.DataFrame) or isinstance(obj, pd.Series):
                return json.loads(obj.to_json(date_format='iso'))
            elif isinstance(obj, melodist.cascade.CascadeStatistics):
                return obj.__dict__
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            else:
                raise TypeError('%s not supported' % type(obj))

        d = dict(
            temp=self.temp,
            wind=self.wind,
            precip=self.precip,
            hum=self.hum,
        )

        j = json.dumps(d, default=json_encoder, indent=4)

        if filename is None:
            return j
        else:
            with open(filename, 'w') as f:
                f.write(j)

    @classmethod
    def from_json(cls, filename):
        """
        Imports statistical data from a JSON formatted file

        Parameters
        ----------
        filename:    input file that holds statistics data
        """
        def json_decoder(d):
            if 'p01' in d and 'pxx' in d:  # we assume this is a CascadeStatistics object
                return melodist.cascade.CascadeStatistics.from_dict(d)

            return d

        with open(filename) as f:
            d = json.load(f, object_hook=json_decoder)

        stats = cls()

        stats.temp.update(d['temp'])
        stats.hum.update(d['hum'])
        stats.precip.update(d['precip'])
        stats.wind.update(d['wind'])

        if stats.temp.max_delta is not None:
            stats.temp.max_delta = pd.read_json(json.dumps(
                stats.temp.max_delta),
                                                typ='series')

        return stats
Esempio n. 2
0
class StationStatistics(object):
    """
    Class representing objects that include statistical information about
    diurnal features of selected variables. A StationStatistics object is
    generally associated to a Station object for which this infomration
    is valid.
    """
    def __init__(self, data=None, lon=None, lat=None, timezone=None):
        self._data = None
        self._lon = lon
        self._lat = lat
        self._timezone = timezone

        if data is not None:
            self.data = data

        self.wind = Bunch(a=None, b=None, t_shift=None)
        self.precip = Bunch(months=None, stats=None)
        self.hum = Bunch(a0=None,
                         a1=None,
                         kr=None,
                         month_hour_precip_mean=None)
        self.temp = Bunch(max_delta=None, mean_course=None)

        angstroem_df = pd.DataFrame(index=np.arange(12) + 1,
                                    columns=['a', 'b'])
        angstroem_df.a = 0.25
        angstroem_df.b = 0.75

        bristcamp_df = pd.DataFrame(index=np.arange(12) + 1,
                                    columns=['a', 'c'])
        bristcamp_df.a = 0.75
        bristcamp_df.c = 2.4

        self.glob = Bunch(angstroem=angstroem_df,
                          bristcamp=bristcamp_df,
                          mean_course=None)

    @property
    def data(self):
        return self._data

    @data.setter
    def data(self, df):
        assert isinstance(df, pd.DataFrame)
        assert df.index.is_all_dates
        assert df.index.resolution == 'hour'

        self._data = df.copy()

    def calc_precipitation_stats(self,
                                 months=None,
                                 avg_stats=True,
                                 percentile=50):
        """
        Calculates precipitation statistics for the cascade model while aggregating hourly observations

        Parameters
        ----------
        months :        Months for each seasons to be used for statistics (array of numpy array, default=1-12, e.g., [np.arange(12) + 1])
        avg_stats :     average statistics for all levels True/False (default=True)
        percentile :    percentil for splitting the dataset in small and high intensities (default=50)
        
        """
        if months is None:
            months = [np.arange(12) + 1]

        self.precip.months = months
        self.precip.stats = melodist.build_casc(self.data,
                                                months=months,
                                                avg_stats=avg_stats,
                                                percentile=percentile)

    def calc_wind_stats(self):
        """
        Calculates statistics in order to derive diurnal patterns of wind speed
        """
        a, b, t_shift = melodist.fit_cosine_function(self.data.wind)
        self.wind.update(a=a, b=b, t_shift=t_shift)

    def calc_humidity_stats(self):
        """
        Calculates statistics in order to derive diurnal patterns of relative humidity.
        """
        a1, a0 = melodist.calculate_dewpoint_regression(self.data,
                                                        return_stats=False)
        self.hum.update(a0=a0, a1=a1)
        self.hum.kr = 12

        self.hum.month_hour_precip_mean = melodist.calculate_month_hour_precip_mean(
            self.data)

    def calc_temperature_stats(self):
        """
        Calculates statistics in order to derive diurnal patterns of temperature
        """
        self.temp.max_delta = melodist.get_shift_by_data(
            self.data.temp, self._lon, self._lat, self._timezone)
        self.temp.mean_course = melodist.util.calculate_mean_daily_course_by_month(
            self.data.temp, normalize=True)

    def calc_radiation_stats(self,
                             data_daily=None,
                             day_length=None,
                             how='all'):
        """
        Calculates statistics in order to derive solar radiation from sunshine duration or
        minimum/maximum temperature.

        Parameters
        ----------
        data_daily : DataFrame, optional
            Daily data from the associated ``Station`` object.

        day_length : Series, optional
            Day lengths as calculated by ``calc_sun_times``.
        """
        assert how in ('all', 'seasonal', 'monthly')

        self.glob.mean_course = melodist.util.calculate_mean_daily_course_by_month(
            self.data.glob)

        if data_daily is not None:
            pot_rad = melodist.potential_radiation(
                melodist.util.hourly_index(data_daily.index), self._lon,
                self._lat, self._timezone)
            pot_rad_daily = pot_rad.resample('D').mean()
            obs_rad_daily = self.data.glob.resample('D').mean()

            if how == 'all':
                month_ranges = [np.arange(12) + 1]
            elif how == 'seasonal':
                month_ranges = [[3, 4, 5], [6, 7, 8], [9, 10, 11], [12, 1, 2]]
            elif how == 'monthly':
                month_ranges = zip(np.arange(12) + 1)

            def myisin(s, v):
                return pd.Series(s).isin(v).values

            def extract_months(s, months):
                return s[myisin(s.index.month, months)]

            if 'ssd' in data_daily and day_length is not None:
                for months in month_ranges:
                    a, b = melodist.fit_angstroem_params(
                        extract_months(data_daily.ssd, months),
                        extract_months(day_length, months),
                        extract_months(pot_rad_daily, months),
                        extract_months(obs_rad_daily, months),
                    )

                    for month in months:
                        self.glob.angstroem.loc[month] = a, b

            if 'tmin' in data_daily and 'tmax' in data_daily:
                df = pd.DataFrame(data=dict(
                    tmin=data_daily.tmin,
                    tmax=data_daily.tmax,
                    pot_rad=pot_rad_daily,
                    obs_rad=obs_rad_daily,
                )).dropna(how='any')

                for months in month_ranges:
                    a, c = melodist.fit_bristow_campbell_params(
                        extract_months(df.tmin, months),
                        extract_months(df.tmax, months),
                        extract_months(df.pot_rad, months),
                        extract_months(df.obs_rad, months),
                    )

                    for month in months:
                        self.glob.bristcamp.loc[month] = a, c

    def to_json(self, filename=None):
        """
        Exports statistical data to a JSON formatted file

        Parameters
        ----------
        filename:    output file that holds statistics data
        """
        def json_encoder(obj):
            if isinstance(obj, pd.DataFrame) or isinstance(obj, pd.Series):
                if isinstance(obj.index, pd.core.index.MultiIndex):
                    obj = obj.reset_index()  # convert MultiIndex to columns

                return json.loads(obj.to_json(date_format='iso'))
            elif isinstance(obj, melodist.cascade.CascadeStatistics):
                return obj.__dict__
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            else:
                raise TypeError('%s not supported' % type(obj))

        d = dict(temp=self.temp,
                 wind=self.wind,
                 precip=self.precip,
                 hum=self.hum,
                 glob=self.glob)

        j = json.dumps(d, default=json_encoder, indent=4)

        if filename is None:
            return j
        else:
            with open(filename, 'w') as f:
                f.write(j)

    @classmethod
    def from_json(cls, filename):
        """
        Imports statistical data from a JSON formatted file

        Parameters
        ----------
        filename:    input file that holds statistics data
        """
        def json_decoder(d):
            if 'p01' in d and 'pxx' in d:  # we assume this is a CascadeStatistics object
                return melodist.cascade.CascadeStatistics.from_dict(d)

            return d

        with open(filename) as f:
            d = json.load(f, object_hook=json_decoder)

        stats = cls()

        stats.temp.update(d['temp'])
        stats.hum.update(d['hum'])
        stats.precip.update(d['precip'])
        stats.wind.update(d['wind'])
        stats.glob.update(d['glob'])

        if stats.temp.max_delta is not None:
            stats.temp.max_delta = pd.read_json(json.dumps(
                stats.temp.max_delta),
                                                typ='series').sort_index()

        if stats.temp.mean_course is not None:
            mc = pd.read_json(json.dumps(stats.temp.mean_course),
                              typ='frame').sort_index()[np.arange(1, 12 + 1)]
            stats.temp.mean_course = mc.sort_index()[np.arange(1, 12 + 1)]

        if stats.hum.month_hour_precip_mean is not None:
            mhpm = pd.read_json(json.dumps(stats.hum.month_hour_precip_mean),
                                typ='frame').sort_index()
            mhpm = mhpm.set_index(['level_0', 'level_1',
                                   'level_2'])  # convert to MultiIndex
            mhpm = mhpm.squeeze()  # convert to Series
            mhpm = mhpm.rename_axis([None, None, None])  # remove index labels
            stats.hum.month_hour_precip_mean = mhpm

        for var in ('angstroem', 'bristcamp', 'mean_course'):
            if stats.glob[var] is not None:
                stats.glob[var] = pd.read_json(json.dumps(
                    stats.glob[var])).sort_index()

        if stats.glob.mean_course is not None:
            stats.glob.mean_course = stats.glob.mean_course[np.arange(
                1, 12 + 1)]

        return stats
class StationStatistics(object):
    """
    Class representing objects that include statistical information about
    diurnal features of selected variables. A StationStatistics object is
    generally associated to a Station object for which this infomration
    is valid.
    """
    def __init__(self, data=None, lon=None, lat=None, timezone=None):
        self._data = None
        self._lon = lon
        self._lat = lat
        self._timezone = timezone

        if data is not None:
            self.data = data

        self.wind = Bunch(a=None, b=None, t_shift=None)
        self.precip = Bunch(months=None, stats=None)
        self.hum = Bunch(a0=None, a1=None, kr=None, month_hour_precip_mean=None)
        self.temp = Bunch(max_delta=None, mean_course=None)

        angstroem_df = pd.DataFrame(index=np.arange(12) + 1, columns=['a', 'b'])
        angstroem_df.a = 0.25
        angstroem_df.b = 0.75

        bristcamp_df = pd.DataFrame(index=np.arange(12) + 1, columns=['a', 'c'])
        bristcamp_df.a = 0.75
        bristcamp_df.c = 2.4

        self.glob = Bunch(angstroem=angstroem_df, bristcamp=bristcamp_df, mean_course=None)

    @property
    def data(self):
        return self._data

    @data.setter
    def data(self, df):
        assert isinstance(df, pd.DataFrame)
        assert df.index.is_all_dates
        assert df.index.resolution == 'hour'

        self._data = df.copy()

    def calc_precipitation_stats(self, months=None, avg_stats=True, percentile=50):
        """
        Calculates precipitation statistics for the cascade model while aggregating hourly observations

        Parameters
        ----------
        months :        Months for each seasons to be used for statistics (array of numpy array, default=1-12, e.g., [np.arange(12) + 1])
        avg_stats :     average statistics for all levels True/False (default=True)
        percentile :    percentil for splitting the dataset in small and high intensities (default=50)
        
        """
        if months is None:
            months = [np.arange(12) + 1]

        self.precip.months = months
        self.precip.stats = melodist.build_casc(self.data, months=months, avg_stats=avg_stats, percentile=percentile)

    def calc_wind_stats(self):
        """
        Calculates statistics in order to derive diurnal patterns of wind speed
        """
        a, b, t_shift = melodist.fit_cosine_function(self.data.wind)
        self.wind.update(a=a, b=b, t_shift=t_shift)

    def calc_humidity_stats(self):
        """
        Calculates statistics in order to derive diurnal patterns of relative humidity.
        """
        a1, a0 = melodist.calculate_dewpoint_regression(self.data, return_stats=False)
        self.hum.update(a0=a0, a1=a1)
        self.hum.kr = 12

        self.hum.month_hour_precip_mean = melodist.calculate_month_hour_precip_mean(self.data)

    def calc_temperature_stats(self):
        """
        Calculates statistics in order to derive diurnal patterns of temperature
        """
        self.temp.max_delta = melodist.get_shift_by_data(self.data.temp, self._lon, self._lat, self._timezone)
        self.temp.mean_course = melodist.util.calculate_mean_daily_course_by_month(self.data.temp, normalize=True)

    def calc_radiation_stats(self, data_daily=None, day_length=None, how='all'):
        """
        Calculates statistics in order to derive solar radiation from sunshine duration or
        minimum/maximum temperature.

        Parameters
        ----------
        data_daily : DataFrame, optional
            Daily data from the associated ``Station`` object.

        day_length : Series, optional
            Day lengths as calculated by ``calc_sun_times``.
        """
        assert how in ('all', 'seasonal', 'monthly')

        self.glob.mean_course = melodist.util.calculate_mean_daily_course_by_month(self.data.glob)

        if data_daily is not None:
            pot_rad = melodist.potential_radiation(
                melodist.util.hourly_index(data_daily.index),
                self._lon, self._lat, self._timezone)
            pot_rad_daily = pot_rad.resample('D').mean()
            obs_rad_daily = self.data.glob.resample('D').mean()

            if how == 'all':
                month_ranges = [np.arange(12) + 1]
            elif how == 'seasonal':
                month_ranges = [[3, 4, 5], [6, 7, 8], [9, 10, 11], [12, 1, 2]]
            elif how == 'monthly':
                month_ranges = zip(np.arange(12) + 1)

            def myisin(s, v):
                return pd.Series(s).isin(v).values

            def extract_months(s, months):
                return s[myisin(s.index.month, months)]

            if 'ssd' in data_daily and day_length is not None:
                for months in month_ranges:
                    a, b = melodist.fit_angstroem_params(
                        extract_months(data_daily.ssd, months),
                        extract_months(day_length, months),
                        extract_months(pot_rad_daily, months),
                        extract_months(obs_rad_daily, months),
                    )

                    for month in months:
                        self.glob.angstroem.loc[month] = a, b

            if 'tmin' in data_daily and 'tmax' in data_daily:
                df = pd.DataFrame(
                    data=dict(
                        tmin=data_daily.tmin,
                        tmax=data_daily.tmax,
                        pot_rad=pot_rad_daily,
                        obs_rad=obs_rad_daily,
                    )
                ).dropna(how='any')

                for months in month_ranges:
                    a, c = melodist.fit_bristow_campbell_params(
                        extract_months(df.tmin, months),
                        extract_months(df.tmax, months),
                        extract_months(df.pot_rad, months),
                        extract_months(df.obs_rad, months),
                    )

                    for month in months:
                        self.glob.bristcamp.loc[month] = a, c

    def to_json(self, filename=None):
        """
        Exports statistical data to a JSON formatted file

        Parameters
        ----------
        filename:    output file that holds statistics data
        """
        def json_encoder(obj):
            if isinstance(obj, pd.DataFrame) or isinstance(obj, pd.Series):
                if isinstance(obj.index, pd.core.index.MultiIndex):
                    obj = obj.reset_index()  # convert MultiIndex to columns

                return json.loads(obj.to_json(date_format='iso'))
            elif isinstance(obj, melodist.cascade.CascadeStatistics):
                return obj.__dict__
            elif isinstance(obj, np.ndarray):
                return obj.tolist()
            else:
                raise TypeError('%s not supported' % type(obj))

        d = dict(
            temp=self.temp,
            wind=self.wind,
            precip=self.precip,
            hum=self.hum,
            glob=self.glob
        )

        j = json.dumps(d, default=json_encoder, indent=4)

        if filename is None:
            return j
        else:
            with open(filename, 'w') as f:
                f.write(j)

    @classmethod
    def from_json(cls, filename):
        """
        Imports statistical data from a JSON formatted file

        Parameters
        ----------
        filename:    input file that holds statistics data
        """
        def json_decoder(d):
            if 'p01' in d and 'pxx' in d:  # we assume this is a CascadeStatistics object
                return melodist.cascade.CascadeStatistics.from_dict(d)

            return d

        with open(filename) as f:
            d = json.load(f, object_hook=json_decoder)

        stats = cls()

        stats.temp.update(d['temp'])
        stats.hum.update(d['hum'])
        stats.precip.update(d['precip'])
        stats.wind.update(d['wind'])
        stats.glob.update(d['glob'])

        if stats.temp.max_delta is not None:
            stats.temp.max_delta = pd.read_json(json.dumps(stats.temp.max_delta), typ='series').sort_index()

        if stats.temp.mean_course is not None:
            mc = pd.read_json(json.dumps(stats.temp.mean_course), typ='frame').sort_index()[np.arange(1, 12 + 1)]
            stats.temp.mean_course = mc.sort_index()[np.arange(1, 12 + 1)]

        if stats.hum.month_hour_precip_mean is not None:
            mhpm = pd.read_json(json.dumps(stats.hum.month_hour_precip_mean), typ='frame').sort_index()
            mhpm = mhpm.set_index(['level_0', 'level_1', 'level_2'])  # convert to MultiIndex
            mhpm = mhpm.squeeze()  # convert to Series
            mhpm = mhpm.rename_axis([None, None, None])  # remove index labels
            stats.hum.month_hour_precip_mean = mhpm

        for var in ('angstroem', 'bristcamp', 'mean_course'):
            if stats.glob[var] is not None:
                stats.glob[var] = pd.read_json(json.dumps(stats.glob[var])).sort_index()

        if stats.glob.mean_course is not None:
            stats.glob.mean_course = stats.glob.mean_course[np.arange(1, 12 + 1)]

        return stats