コード例 #1
0
ファイル: bootstrap.py プロジェクト: guziy/GevFit
def apply_bootstrap_to_data(streamflow = None, times = None,
                    period_start_month = 1, period_end_month = 12,
                    start_date = datetime(1970,1,1,0,0),
                    end_date = datetime(1999,12,31,0,0),
                    event_duration_days = timedelta(days = 1),
                    n_samples = 1, high_flow = True,
                    return_periods = [], process_pool = None,
                    out_file = ''):


    """
    select extremes and perform bootstraping on streamflow
    the shape of the streamflow is (time, position)
    """
    
    all_extremes = []
    positions = [i for i in range(streamflow.shape[1])]

    for pos in positions:
        all_extremes.append([])

    for pos in positions:
        if high_flow:
            data = data_select.get_period_maxima(streamflow[:, pos], times,
                            start_date = start_date,
                            end_date = end_date,
                            start_month = period_start_month,
                            end_month = period_end_month,
                            event_duration = event_duration_days
                            )
        else:
            data = data_select.get_period_minima(streamflow[:, pos], times,
                            start_date = start_date,
                            end_date = end_date,
                            start_month = period_start_month,
                            end_month = period_end_month,
                            event_duration = event_duration_days
                            )
        all_extremes[pos].extend(list(data.values()))


    all_extremes = np.array(all_extremes).transpose()
    print('all_extremes.shape = ', all_extremes.shape)
    print('n_samples = ', n_samples)

    apply_bootstrap_to_extremes(all_extremes, n_samples = n_samples,
                                out_file = out_file,
                                return_periods = return_periods,
                                process_pool = process_pool,
                                positions = positions
                                )
コード例 #2
0
ファイル: gevfit.py プロジェクト: guziy/GevFit
def optimize_stationary_for_period_and_all_cells(
                data_file = 'data/streamflows/hydrosheds_euler9/aex_discharge_1970_01_01_00_00.nc',
                paramfile = 'gev_params_stationary',
                high_flow = True,
                start_month = 1, end_month = 12,
                start_date = datetime(1970,1,1,0,0),
                end_date = datetime(1999,12, 31,0,0),
                event_duration = timedelta(days = 1)):

    print(paramfile)

    #check whether optimization is required
    if os.path.isfile(paramfile):
        print('already optimized, if you want to reoptimize delete %s' % paramfile)
        pars_set = pickle.load(open(paramfile))
        return pars_set

    #get streamflow data
    streamflow, times, xs, ys = data_select.get_data_from_file(path = data_file)

    data = []
    for pos in range(streamflow.shape[1]):
        if high_flow:
            data1 = data_select.get_period_maxima(streamflow[:,pos], times,
                            start_date = start_date,
                            end_date = end_date,
                            start_month = start_month,
                            end_month = end_month,
                            event_duration = event_duration
                            )
        else:
            data1 = data_select.get_period_minima(streamflow[:, pos], times,
                            start_date = start_date,
                            end_date = end_date,
                            start_month = start_month,
                            end_month = end_month,
                            event_duration = event_duration
                            )
        data.append(list(data1.values()))


    data = np.array(data).transpose()
    pars_set = optimize_stationary_for_period_and_all_cells_using_data(data = data,
                                                    high_flow = high_flow)
    f = open(paramfile ,'w')
    pickle.dump(pars_set, f)
    f.close()
    return pars_set
コード例 #3
0
ファイル: kruskal_wallis_test.py プロジェクト: guziy/GevFit
def get_extremes_list(data_path = "", member_ids = None, high_flow = True,
                        start_date = None, end_date = None,
                        event_duration = timedelta(days = 1),
                        period_start_month = 1, period_end_month = 12
                        ):
    """
    returns list of 2d arrays of extremes, the 2d arrays have the  shape = (time, cell_index)
    """
    file_paths = []
    for the_name in os.listdir(data_path):
        prefix = the_name.split('_')[0]
        if prefix in member_ids:
            file_paths += [os.path.join(data_path, the_name)]


    #merge extreme data
    all_extremes = []
    i_indices = None
    j_indices = None
    for the_path in file_paths:
        streamflow, times, i_indices, j_indices = data_select.get_data_from_file(the_path)


        domain_extremes = [[] for pos in range(len(i_indices))]

        for pos, point_extrems in enumerate(domain_extremes):
            if high_flow:
                extremes = data_select.get_period_maxima(streamflows=streamflow[:, pos], times = times,
                                                               start_date = start_date, end_date = end_date,
                                                               event_duration = event_duration,
                                                               start_month = period_start_month,
                                                               end_month = period_end_month
                                                               )
            else:
                extremes = data_select.get_period_minima(streamflows=streamflow[:, pos], times = times,
                                                           start_date = start_date, end_date = end_date,
                                                           event_duration = event_duration,
                                                           start_month = period_start_month,
                                                           end_month = period_end_month
                                                           )
            point_extrems.extend(list(extremes.values()))

        all_extremes.append(np.transpose( np.array(domain_extremes) ))


    return all_extremes, i_indices, j_indices
コード例 #4
0
def apply_bootstrap_to_all_members_merged(file_paths = None,
                                high_flow = True,
                                n_samples = 10, out_file = '',
                                process_pool = None,
                                start_date = None,
                                end_date = None,
                                start_month = None,
                                end_month = None,
                                duration_days = None,
                                return_periods = None
                                ):
    """
    duration_days - timedelta object
    """

    if os.path.isfile(out_file):
        print("{0} already exists, skipping ".format(out_file))
        return



    #select data
    all_extremes = []
    streamflow = None
    for the_path in file_paths:
        print(the_path)
        streamflow, times, i_indices, j_indices = data_select.get_data_from_file(the_path)

        if not len(all_extremes):
            all_extremes = [[] for i in range(streamflow.shape[1])]

        for pos in range(streamflow.shape[1]):
            if high_flow:
                data1 = data_select.get_period_maxima(streamflow[:, pos], times,
                                start_date = start_date,
                                end_date = end_date,
                                start_month = start_month,
                                end_month = end_month,
                                event_duration = duration_days
                                )
            else:
                data1 = data_select.get_period_minima(streamflow[:, pos], times,
                                start_date = start_date,
                                end_date = end_date,
                                start_month = start_month,
                                end_month = end_month,
                                event_duration = duration_days
                                )
            all_extremes[pos].extend(list(data1.values()))

    #axes order: (time, position)
    all_extremes = np.array(all_extremes).transpose()
    bootstrap.apply_bootstrap_to_extremes(all_extremes,
                                        n_samples = n_samples,
                                        out_file = out_file,
                                        process_pool = process_pool,
                                        return_periods = return_periods,
                                        positions = range(streamflow.shape[1]),
                                        high_flow = high_flow,
                                        restrict_indices_to_member=True,
                                        n_values_per_member= all_extremes.shape[0] / len(file_paths)
                                        )
    print("n_indices per member = ", all_extremes.shape[0] / len(file_paths))
    pass
コード例 #5
0
ファイル: gevfit_for_all.py プロジェクト: guziy/GevFit
def gev_fit_all_members(
    high_flow=True,
    member_ids=[],
    data_folder="",
    file_name_pattern="",
    start_date=None,
    end_date=None,
    start_month=1,
    end_month=12,
    duration_days=timedelta(days=1),
):
    """
    gev fit using data from all members
    data_folder - path to the folder with input data (streamflow)
    start_month -
    end_month - 
    """

    param_file = "high" if high_flow else "low"
    for id in member_ids:
        param_file += "_" + id
    if os.path.isfile(param_file):
        print("delete {0}, to reoptimize".format(param_file))
        return pickle.load(open(param_file))

    # select data
    path_pattern = os.path.join(data_folder, file_name_pattern)
    all_extremes = []
    for id in member_ids:
        print(id)
        the_path = path_pattern.format(id)
        streamflow, times, i_indices, j_indices = data_select.get_data_from_file(the_path)

        if not len(all_extremes):
            for i in range(streamflow.shape[1]):
                all_extremes.append([])

        for pos in range(streamflow.shape[1]):
            if high_flow:
                data1 = data_select.get_period_maxima(
                    streamflow[:, pos],
                    times,
                    start_date=start_date,
                    end_date=end_date,
                    start_month=start_month,
                    end_month=end_month,
                    event_duration=duration_days,
                )
            else:
                data1 = data_select.get_period_minima(
                    streamflow[:, pos],
                    times,
                    start_date=start_date,
                    end_date=end_date,
                    start_month=start_month,
                    end_month=end_month,
                    event_duration=duration_days,
                )
            all_extremes[pos].extend(list(data1.values()))

    # axes order: (time, position)
    all_extremes = np.array(all_extremes).transpose()

    if np.any(all_extremes is None):
        assert False, "all_extremes = " + str(all_extremes)

    # optimize
    print(all_extremes.shape)
    assert all_extremes.shape[1] == 547, "all_extremes.shape[1] != 547"
    param_set = gevfit.optimize_stationary_for_period_and_all_cells_using_data(data=all_extremes, high_flow=high_flow)
    pickle.dump(param_set, open(param_file, "wb"))
    return param_set
    pass