def calc_dry_recurance_monthly_smd():
    data = get_vcsn_record(vcsn_version)
    t = calc_smd_monthly(rain=data.rain, pet=data.pet, dates=data.index)
    data.loc[:, 'smd'] = t
    t = data.loc[:, ['doy', 'smd']].groupby('doy').mean().to_dict()
    data.loc[:,
             'sma'] = data.loc[:, 'smd'] - data.loc[:, 'doy'].replace(t['smd'])
    data.reset_index(inplace=True)

    data.to_csv(os.path.join(event_def_dir, 'monthly_smd_dry_raw.csv'))

    smd_thresholds = [0]
    sma_thresholds = [-5, -10, -12, -15, -17, -20]
    ndays = [5, 7, 10, 14]
    out_keys = []
    for smd_t, sma_t in itertools.product(smd_thresholds, sma_thresholds):
        k = 'd_smd{:03d}_sma{:02d}'.format(smd_t, sma_t)
        data.loc[:, k] = (data.loc[:, 'smd'] <= smd_t) & (data.loc[:, 'sma'] <=
                                                          sma_t)
        out_keys.append(k)

    grouped_data = data.loc[:, ['month', 'year', 'smd', 'sma'] +
                            out_keys].groupby(['month',
                                               'year']).sum().reset_index()

    grouped_data.to_csv(
        os.path.join(event_def_dir, 'monthly_smd_dry_monthly_data.csv'))
    grouped_data.drop(columns=['year']).groupby('month').describe().to_csv(
        os.path.join(event_def_dir, 'monthly_smd_dry_monthly_data_desc.csv'))
    out_keys2 = []
    for nd in ndays:
        for k in out_keys:
            ok = '{:02d}d_{}'.format(nd, k)
            out_keys2.append(ok)
            grouped_data.loc[:, ok] = grouped_data.loc[:, k] >= nd

    out = grouped_data.loc[:, ['month'] + out_keys2].groupby(
        ['month']).aggregate(['sum', prob])
    drop_keys = []
    for k in out_keys2:
        temp = (out.loc[:, k].loc[:, 'sum']
                == 48).all() or (out.loc[:, k].loc[:, 'sum'] == 0).all()
        if temp:
            drop_keys.append(k)

    out = out.drop(columns=drop_keys)
    out, out_years = add_pga(grouped_data,
                             set(out_keys2) - set(drop_keys), out)
    t = pd.Series([' '.join(e) for e in out.columns])
    idx = ~((t.str.contains('sum')) | (t.str.contains('count')))
    out.loc[:, out.columns[idx]] *= 100

    out.to_csv(os.path.join(event_def_dir, 'monthly_smd_dry_prob.csv'),
               float_format='%.1f%%')
    out.loc[:, out.columns[idx]].to_csv(os.path.join(
        event_def_dir, 'monthly_smd_dry_prob_only_prob.csv'),
                                        float_format='%.1f%%')

    out_years.to_csv(os.path.join(event_def_dir, 'monthly_smd_dry_years.csv'))
def get_monthly_smd_mean_detrended(leap=False, recalc=False):

    outpath = os.path.join(climate_shocks_env.supporting_data_dir,
                           'mean_montly_smd_detrend.csv')

    if not recalc and os.path.exists(outpath):
        average_smd = pd.read_csv(outpath, index_col=0)

    else:
        data = get_vcsn_record('detrended2').reset_index()

        data = data.loc[~((data.date.dt.month == 2) &
                          (data.date.dt.day == 29))]  # get rid of leap days
        average_start_year = 1981
        average_stop_year = 2010
        rain, pet, h2o_cap, h2o_start = data['rain'], data['pet'], 150, 1

        dates = data.loc[:, 'date']

        # reset doy to 365 calander (e.g. no leap effect)
        mapper = get_month_day_to_nonleap_doy(False)
        doy = [mapper[(m, d)] for m, d in zip(dates.dt.month, dates.dt.day)]
        pet = np.atleast_1d(pet)
        rain = np.atleast_1d(rain)

        assert dates.shape == pet.shape == rain.shape, 'date, pet, rain must be same shape'

        smd = calc_smd_monthly(rain,
                               pet,
                               dates,
                               month_start=detrended_start_month,
                               h2o_cap=150,
                               a=0.0073,
                               p=1,
                               return_drn_aet=False)

        outdata = pd.DataFrame(data={
            'date': dates,
            'doy': doy,
            'pet': pet,
            'rain': rain,
            'smd': smd
        }, )

        # calculate mean smd for doy

        idx = (outdata.date.dt.year >= average_start_year) & (
            outdata.date.dt.year <= average_stop_year)
        temp = outdata.loc[idx, ['doy', 'smd']]
        average_smd = temp.groupby('doy').mean().fillna('bfill')
        average_smd.to_csv(outpath)

    out = average_smd.loc[:, 'smd'].to_dict()
    if leap:
        raise NotImplementedError
    return out
def make_storylines():  # todo make detrended storylines!
    data = get_vcsn_record('detrended2')
    data.loc[:, 'day'] = data.index.day
    data.loc[:, 'month'] = data.index.month
    data.loc[:, 'year'] = data.index.year
    data = data.loc[~((data.month == 2) & (data.day == 29))]

    rest = get_restriction_record('detrended2')
    rest.loc[:, 'day'] = rest.index.day
    rest.loc[:, 'month'] = rest.index.month
    rest.loc[:, 'year'] = rest.index.year
    rest = rest.loc[~((rest.month == 2) & (rest.day == 29))]
    rest.loc[:, 'f_rest'] = [
        rc / month_len[m]
        for rc, m in rest.loc[:, ['f_rest', 'month']].itertuples(False, None)
    ]
    rest = rest.groupby(['year', 'month']).sum()

    # calc SMA
    data.loc[:, 'sma'] = calc_smd_monthly(
        data.rain, data.pet, data.index) - data.loc[:, 'doy'].replace(
            get_monthly_smd_mean_detrended(leap=False))

    data.loc[:, 'wet'] = data.loc[:, 'rain'] >= 0.1
    data.loc[:, 'dry'] = data.loc[:, 'sma'] <= -15
    data.loc[:, 'hot'] = data.loc[:, 'tmax'] >= 25
    data.loc[:, 'cold'] = ((data.loc[:, 'tmin'] + data.loc[:, 'tmax']) /
                           2).rolling(3).mean().fillna(method='bfill') <= 7
    data = data.groupby(['year', 'month']).sum()

    # todo below not right
    ndays_wet = {  # todo definition hard coded in
        'org':
        {  # this is the best value!
            5: 14,
            6: 11,
            7: 11,
            8: 13,
            9: 13,
        }
    }
    for v in ndays_wet.values():
        v.update({
            1: 99,
            2: 99,
            3: 99,
            4: 99,
            10: 99,
            11: 99,
            12: 99,
        })

    for y in range(1972, 2019):
        t = np.array([
            0,
            0,
            0,
            0,
            0,
            0,
            1,
            1,
            1,
            1,
            1,
            1,
        ]) + y
        tm = [
            7,
            8,
            9,
            10,
            11,
            12,
            1,
            2,
            3,
            4,
            5,
            6,
        ]
        idx = list(zip(t, tm))
        temp = pd.DataFrame(index=np.arange(12),
                            columns=[
                                'year', 'month', 'temp_class', 'precip_class',
                                'rest', 'rest_per'
                            ])
        temp.loc[:, 'year'] = [
            2024,
            2024,
            2024,
            2024,
            2024,
            2024,
            2025,
            2025,
            2025,
            2025,
            2025,
            2025,
        ]
        temp.loc[:, 'month'] = [
            7,
            8,
            9,
            10,
            11,
            12,
            1,
            2,
            3,
            4,
            5,
            6,
        ]
        # todo events hard coded in
        temp.loc[:, 'rest'] = rest.loc[idx, 'f_rest'].round(2).values

        temp.loc[:, 'temp_class'] = 'A'
        idx2 = data.loc[idx, 'hot'] >= 7
        temp.loc[idx2.values, 'temp_class'] = 'H'
        idx2 = data.loc[idx, 'cold'] >= 10
        temp.loc[idx2.values, 'temp_class'] = 'C'

        temp.loc[:, 'precip_class'] = 'A'
        idx2 = data.loc[idx, 'dry'] >= 10
        temp.loc[idx2.values, 'precip_class'] = 'D'
        temp.loc[np.in1d(temp.month, [6, 7, 8]), 'precip_class'] = 'A'
        idx2 = data.loc[idx, 'wet'] >= [
            ndays_wet['org'][m] for m in temp.loc[:, 'month']
        ]
        temp.loc[idx2.values, 'precip_class'] = 'W'

        temp.loc[:, 'precip_class_prev'] = temp.loc[:, 'precip_class'].shift(
            1).fillna('A')  # todo check
        temp.loc[:, 'rest_per'] = [
            map_irr_quantile_from_rest(m=m,
                                       rest_val=rq,
                                       precip=p,
                                       prev_precip=pp) for m, rq, p, pp in
            temp.loc[:,
                     ['month', 'rest', 'precip_class', 'precip_class_prev']].
            itertuples(False, None)
        ]
        temp.to_csv(os.path.join(story_dir, f'sl-{y}.csv'))
def _check_data_v1(swg_path,
                   storyline,
                   m,
                   cold_months,
                   wet_months,
                   hot_months,
                   dry_months,
                   return_full_results=False):
    """
    check that a single realisation is correct
    :param swg_path: path to the SWG
    :param yml_path: path to the YML
    :param m: None or int or list of int, months to check
    :return: True
    """

    storyline = storyline.loc[np.in1d(storyline.month, m)]
    storyline = storyline.set_index(['year', 'month'])

    data = read_swg_data(swg_path)[0]
    data = data.loc[np.in1d(data.month, m)]

    # calc SMA
    data.loc[:, 'sma'] = calc_smd_monthly(
        data.rain, data.pet, data.index) - data.loc[:, 'doy'].replace(
            get_monthly_smd_mean_detrended(leap=False))

    data.loc[:, 'wet'] = data.loc[:, 'rain'] >= 0.1
    data.loc[:, 'dry'] = data.loc[:, 'sma'] <= -15
    data.loc[:, 'hot'] = data.loc[:, 'tmax'] >= 25
    data.loc[:, 'cold'] = ((data.loc[:, 'tmin'] + data.loc[:, 'tmax']) /
                           2).rolling(3).mean().fillna(method='bfill') <= 7

    temp = data.loc[:, ['year', 'month', 'wet', 'dry', 'hot', 'cold']].groupby(
        ['year', 'month']).sum()
    storyline.loc[temp.index, ['wet', 'dry', 'hot', 'cold'
                               ]] = temp.loc[:, ['wet', 'dry', 'hot', 'cold']]
    storyline.reset_index(inplace=True)

    storyline.loc[:, 'swg_precip_class'] = 'A'
    storyline.loc[((storyline.wet >= storyline.month.replace(rain_limits_wet))
                   & np.in1d(storyline.month, wet_months)),
                  'swg_precip_class'] = 'W'
    # dry out weighs wet if both happen
    storyline.loc[((storyline.dry >= 10)
                   & np.in1d(storyline.month, dry_months)),
                  'swg_precip_class'] = 'D'

    storyline.loc[:, 'swg_temp_class'] = 'A'
    storyline.loc[(storyline.hot >= 7) & np.in1d(storyline.month, hot_months),
                  'swg_temp_class'] = 'H'
    storyline.loc[(storyline.cold >= 10)
                  & np.in1d(storyline.month, cold_months),
                  'swg_temp_class'] = 'C'

    num_dif = (
        ~((storyline.temp_class == storyline.swg_temp_class) &
          (storyline.precip_class == storyline.swg_precip_class))).sum()
    if not return_full_results:
        return num_dif > 0

    hot = storyline.hot.max()
    cold = storyline.cold.max()
    wet = storyline.wet.max()
    dry = storyline.dry.max()

    where_same = ((storyline.temp_class == storyline.swg_temp_class) &
                  (storyline.precip_class == storyline.swg_precip_class))

    out_keys = [
        '{}:{}-{}_{}-{}'.format(m, p, swgp, t, swgt)
        for m, p, swgp, t, swgt in storyline.loc[~where_same, [
            'month', 'precip_class', 'swg_precip_class', 'temp_class',
            'swg_temp_class'
        ]].itertuples(False, None)
    ]
    return num_dif, out_keys, hot, cold, wet, dry
def compair_means(outdir, detrended=False):
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    if detrended:
        raise NotImplementedError
    else:
        vcsn = get_vcsn_record('trended')
        events_path = climate_shocks_env.event_def_path

    vcsn.loc[:, 'tmean'] = (vcsn.loc[:, 'tmax'] + vcsn.loc[:, 'tmin']) / 2
    vcsn.loc[:, 'rain-pet'] = vcsn.loc[:, 'rain'] - vcsn.loc[:, 'pet']
    temp = calc_sma_smd_historical(vcsn.rain, vcsn.pet, vcsn.index, 150, 1)
    vcsn.loc[:, 'smd'] = temp.loc[:, 'smd'].values
    vcsn.loc[:, 'sma'] = temp.loc[:, 'sma'].values
    temp = calc_smd_monthly(vcsn.rain, vcsn.pet, vcsn.index)
    vcsn.loc[:, 'monthly_smd'] = temp
    # make monthly basis smd/sma

    vcsn = vcsn.groupby(['year', 'month']).mean()

    events = pd.read_csv(events_path, skiprows=1)
    events = events.set_index(['year', 'month'])
    assert (events.index.values == vcsn.index.values).all()

    # precip
    precip_keys = [
        'monthly_smd',
        'rain',
        'rain-pet',
        'smd',
        'sma',
    ]
    fig, axs = plt.subplots(len(precip_keys), figsize=figsize)
    fig.suptitle('precip all months')
    for ax, var in zip(axs, precip_keys):
        temp_events = events
        temp_vcsn = vcsn
        data = [
            temp_vcsn.loc[np.isclose(temp_events.loc[:, 'precip'], -1),
                          var].dropna(),
            temp_vcsn.loc[np.isclose(temp_events.loc[:, 'precip'], 0),
                          var].dropna(),
            temp_vcsn.loc[np.isclose(temp_events.loc[:, 'precip'], 1),
                          var].dropna(),
        ]

        ax.boxplot(data, labels=['W', 'A', 'D'])
        ax.set_ylabel(var)
    fig.savefig(
        os.path.join(outdir,
                     fig._suptitle._text.replace(':', '_') + '.png'))

    for m in range(1, 13):
        fig, axs = plt.subplots(len(precip_keys), figsize=figsize)
        fig.suptitle('precip m:{}'.format(m))
        for ax, var in zip(axs, precip_keys):
            temp_events = events.loc[:, m, :]
            temp_vcsn = vcsn.loc[:, m, :]
            data = [
                temp_vcsn.loc[np.isclose(temp_events.loc[:, 'precip'], -1),
                              var].dropna(),
                temp_vcsn.loc[np.isclose(temp_events.loc[:, 'precip'], 0),
                              var].dropna(),
                temp_vcsn.loc[np.isclose(temp_events.loc[:, 'precip'], 1),
                              var].dropna(),
            ]

            ax.boxplot(data, labels=['W', 'A', 'D'])
            ax.set_ylabel(var)
        fig.savefig(
            os.path.join(outdir,
                         fig._suptitle._text.replace(':', '_') + '.png'))

    # temp
    for m in range(1, 13):
        fig, axs = plt.subplots(3, figsize=figsize)
        fig.suptitle('temp m:{}'.format(m))
        for ax, var in zip(axs, ['tmin', 'tmax', 'tmean']):
            temp_events = events.loc[:, m, :]
            temp_vcsn = vcsn.loc[:, m, :]
            data = [
                temp_vcsn.loc[np.isclose(temp_events.loc[:, 'temp'], -1), var],
                temp_vcsn.loc[np.isclose(temp_events.loc[:, 'temp'], 0), var],
                temp_vcsn.loc[np.isclose(temp_events.loc[:, 'temp'], 1), var],
            ]

            ax.boxplot(data, labels=['C', 'A', 'H'])
            ax.set_ylabel(var)
        fig.savefig(
            os.path.join(outdir,
                         fig._suptitle._text.replace(':', '_') + '.png'))
    fig, axs = plt.subplots(3, figsize=figsize)
    fig.suptitle('temp all months')
    for ax, var in zip(axs, ['tmin', 'tmax', 'tmean']):
        temp_events = events
        temp_vcsn = vcsn
        data = [
            temp_vcsn.loc[np.isclose(temp_events.loc[:, 'temp'], -1), var],
            temp_vcsn.loc[np.isclose(temp_events.loc[:, 'temp'], 0), var],
            temp_vcsn.loc[np.isclose(temp_events.loc[:, 'temp'], 1), var],
        ]

        ax.boxplot(data, labels=['C', 'A', 'H'])
        ax.set_ylabel(var)
    fig.savefig(
        os.path.join(outdir,
                     fig._suptitle._text.replace(':', '_') + '.png'))

    plt.show()
예제 #6
0
def calc_doy_per_from_historical(version='detrended2'):
    data = get_vcsn_record(version).reset_index()
    data.loc[:, 'month'] = data.date.dt.month
    data.loc[:, 'day'] = data.date.dt.day

    # fix leap year shit!
    data = data.loc[~((data.month == 2) & (data.day == 29))]
    data.loc[:, 'doy'] = pd.to_datetime([
        f'2001 - {m:02d} - {d:02d}'
        for m, d in data.loc[:, ['month', 'day']].itertuples(False, None)
    ]).dayofyear

    # add data
    data.loc[:, 'cold'] = ((data.loc[:, 'tmin'] + data.loc[:, 'tmax']) /
                           2).rolling(3).mean()
    data.loc[:, 'hot'] = data.loc[:, 'tmax']
    data.loc[:, 'wet'] = data.loc[:, 'rain']

    t = calc_smd_monthly(rain=data.rain,
                         pet=data.pet,
                         dates=data.loc[:, 'date'])
    data.loc[:, 'smd'] = t
    t = data.loc[:, ['doy', 'smd']].groupby('doy').mean().to_dict()
    data.loc[:,
             'sma'] = data.loc[:, 'smd'] - data.loc[:, 'doy'].replace(t['smd'])

    data.loc[:, 'dry'] = data.loc[:, 'sma']

    use_keys = ['hot', 'cold', 'dry', 'wet']
    thresholds = {
        'hot': 25,
        'cold': 7,
        'dry': -15,
        'wet': 0.01,
    }
    use_keys2 = ['H', 'C', 'D', 'W']
    events = get_months_with_events()
    outdata = pd.DataFrame(index=pd.Index(range(1, 366), name='dayofyear'))
    for k, k2 in zip(use_keys, use_keys2):
        print(k)
        temp = data.loc[np.in1d(data.month, events[k2])]
        for d in range(1, 366):
            if k == 'dry':
                days = np.array([d])
            else:
                days = np.arange(d - 5, d + 6)
            days[days <= 0] += 365
            days[days > 365] += -365

            temp2 = temp.loc[np.in1d(temp.doy, days), k]
            if temp2.empty:
                continue
            per, err = inverse_percentile(temp2,
                                          thresholds[k],
                                          bootstrap=False)
            outdata.loc[d, '{}_per'.format(k)] = per
            outdata.loc[d, '{}_err'.format(k)] = err
    outdata.loc[:, 'date'] = pd.to_datetime(
        ['2001-{:03d}'.format(e) for e in outdata.index], format='%Y-%j')
    outdata.loc[:, 'month'] = outdata.date.dt.month

    # get rid of hangers on from leap years
    for k, k2 in zip(use_keys, use_keys2):
        outdata.loc[~np.in1d(outdata.month, events[k2]),
                    '{}_per'.format(k)] = np.nan
        outdata.loc[~np.in1d(outdata.month, events[k2]),
                    '{}_err'.format(k)] = np.nan

    outdata.set_index('date', inplace=True, append=True)

    return outdata