Example #1
0
    def test_get_dekad_period(self):

        dates = [self.date1, self.date2, self.date3]

        periods = dk.get_dekad_period(dates)

        assert periods == [4, 5, 6]
Example #2
0
def calc_DI(data, inverse=False, interest_period=[6, 12, 24], scaled=False,
            scale_zero=False, modf_all=False):
    """
    Calculates a Drought Index based on an algorithm developed by
    FAO SWALIM.

    Parameters
    ----------
    data : pandas.DataFrame
        Input data as Pandas DataFrame, must come with column names.
    inverse : bool
        Inverts the input time series; set True if time series is indirect
        proportional to the expected  output, e.g. Temperature with output
        Temperature Drought Index.
    interest_period : list of int, optional
        interest periods used to calculate drought index,
        defaults to [6, 12, 24]
    scaled : boolean, optional
        If True values will be scaled between 0 and 1.
    scale_zero : boolean, optional
        If True values will be shifted around zero, defaults to False.
    modf_all : boolean, optional
        If True values will be modified, independent of their min.
    """

    ts_date = data.index
    variables = data.keys()
    data['period'] = get_dekad_period(ts_date)

    for var in variables:

        if inverse is True:
            data[var] = ((data[var].max() + 1) - data[var])

        if modf_all is True:
            data['modf'] = data[var] + 1
            del data[var]
        elif data[var].min() == 0:
            data['modf'] = data[var] + 1
            del data[var]
        else:
            data['modf'] = data[var]
            del data[var]

        data['modf_avg'] = (data.groupby('period').modf
                            .transform(lambda x: x.mean()))

        # Excess
        # Dekads below long term average. If the statement is true the
        # program return 1
        ex = np.choose((data['modf_avg'] / data['modf']) >= 1, [0., 1.]).values
        nanloc = np.where(np.isnan(data['modf']))
        ex[nanloc] = np.NAN

        data['exc'] = ex

        # Run length
        # Maximum number of successive dekads below long term average
        for ip in interest_period:
            data['rlen'] = pd.rolling_apply(data['exc'], ip,
                                            (lambda x:
                                             len(max((''.join(str(j)
                                                              for j in map(int,
                                                                           x)))
                                                     .split('0')))),
                                            ip)

            # get modified run length
            max_rlen = data['rlen'].max()
            data['rlen'] = (max_rlen + 1) - data['rlen']

            # average run lenghts
            rlen_avg = (data.groupby('period').modf
                        .transform(lambda x: x.mean()))
            data['form'] = data['rlen'] / rlen_avg

            # sumip matrix
            # calculates sum of the values for each interest period
            data['sumip'] = pd.rolling_apply(data['modf'], ip,
                                             lambda x: sum(x), ip)

            # average values for each interest period over all years
            sumip_avg = (data.groupby('period')['sumip']
                         .transform(lambda x: x.mean()))
            data['nrl'] = data['sumip'] / sumip_avg

            # calculating PDI/TDI
            data['val'] = data['nrl'] * np.sqrt(data['form'])

            # scaled index
            dkey = var + '_DI_' + str(ip)
            if scaled:
                data[dkey] = ((data['val'] - data['val'].min()) /
                              (data['val'].max() - data['val'].min()))
            else:
                data[dkey] = data['val']

            if scale_zero:
                data[dkey] = data[dkey] - data[dkey].mean()

            del (data['val'], data['nrl'], data['sumip'], data['rlen'],
                 data['form'])

        # deletes not further relevant columns
        del data['modf'], data['modf_avg'], data['exc']

    del data['period']

    return data