def test_plot_windwave_fit(self): """ Plots goodness of fit graphs, for the marginal distribution of X1 and for the dependence function of X2|X1. Uses wind and wave data. """ sample_v, sample_hs, label_v, label_hs = \ read_ecbenchmark_dataset('datasets/1year_dataset_D.txt') label_v = 'v (m s$^{-1}$)' # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_v = { 'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2 } dist_description_hs = { 'name': 'Weibull_Exp', 'fixed_parameters': (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2 'functions': ('logistics4', None, 'alpha3', None), # shape, location, scale, shape2 'min_datapoints_for_fit': 50, 'do_use_weights_for_dependence_function': True } # Fit the model to the data. fit = Fit((sample_v, sample_hs), (dist_description_v, dist_description_hs)) dist0 = fit.mul_var_dist.distributions[0] fig = plt.figure(figsize=(12.5, 3.5), dpi=150) ax1 = fig.add_subplot(131) ax2 = fig.add_subplot(132) ax3 = fig.add_subplot(133) plot_marginal_fit(sample_v, dist0, fig=fig, ax=ax1, label=label_v, dataset_char='D') plot_dependence_functions(fit=fit, fig=fig, ax1=ax2, ax2=ax3, unconditonal_variable_label=label_v)
def test_plot_seastate_fit(self): """ Plots goodness of fit graphs, for the marginal distribution of X1 and for the dependence function of X2|X1. Uses sea state data. """ sample_hs, sample_tz, label_hs, label_tz = read_ecbenchmark_dataset() # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_hs = { 'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 0.5 } dist_description_tz = { 'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), # Shape, Location, Scale 'functions': ('asymdecrease3', None, 'lnsquare2'), # Shape, Location, Scale 'min_datapoints_for_fit': 50 } # Fit the model to the data. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) dist0 = fit.mul_var_dist.distributions[0] fig = plt.figure(figsize=(12.5, 3.5), dpi=150) ax1 = fig.add_subplot(131) ax2 = fig.add_subplot(132) ax3 = fig.add_subplot(133) plot_marginal_fit(sample_hs, dist0, fig=fig, ax=ax1, label='$h_s$ (m)', dataset_char='A') plot_dependence_functions(fit=fit, fig=fig, ax1=ax2, ax2=ax3, unconditonal_variable_label=label_hs)
def test_plot_confidence_interval(self): """ Plots a contour's confidence interval. """ dataset_d_v, dataset_d_hs, label_v, label_hs = \ read_ecbenchmark_dataset('datasets/1year_dataset_D.txt') # Read the contours that have beem computed previously from csv files. folder_name = 'contour-coordinates/' file_name_median = 'doe_john_years_25_median.txt' file_name_bottom = 'doe_john_years_25_bottom.txt' file_name_upper = 'doe_john_years_25_upper.txt' (contour_v_median, contour_hs_median) = read_contour(folder_name + file_name_median) (contour_v_bottom, contour_hs_bottom) = read_contour(folder_name + file_name_bottom) (contour_v_upper, contour_hs_upper) = read_contour(folder_name + file_name_upper) # Plot the sample, the median contour and the confidence interval. fig = plt.figure(figsize=(5, 5), dpi=150) ax = fig.add_subplot(111) plotted_sample = SamplePlotData(x=np.asarray(dataset_d_v), y=np.asarray(dataset_d_hs), ax=ax, label='dataset D') contour_labels = [ '50th percentile contour', '2.5th percentile contour', '97.5th percentile contour' ] plot_confidence_interval(x_median=contour_v_median, y_median=contour_hs_median, x_bottom=contour_v_bottom, y_bottom=contour_hs_bottom, x_upper=contour_v_upper, y_upper=contour_hs_upper, ax=ax, x_label=label_v, y_label=label_hs, contour_labels=contour_labels, plotted_sample=plotted_sample)
latitudes = [43.525, 28.508, 25.897, 54.0, 55.0, 59.5 ] longitudes = [-70.141, -80.185, -89.668, 6.575, 1.175, 4.325] fig, axs = plt.subplots(2, 3, sharex='row', sharey='row', figsize=(10, 8)) max_hs_of_sample = 0 for i, ax0 in enumerate(axs): if i == 0: datasets = datasets_hstz else: datasets = datasets_vhs for j, (dataset_char, ax1) in enumerate(zip(datasets, ax0)): # Load the environmental data. file_name_provided = 'datasets/' + dataset_char + '.txt' file_name_retained = 'datasets-retained/' + dataset_char + 'r.txt' x1_p, x2_p, x1_label, x2_label = read_ecbenchmark_dataset(file_name_provided) x1_r, x2_r, x1_label, x2_label = read_ecbenchmark_dataset(file_name_retained) if i == 1: x1_p, x2_p = x2_p, x1_p x1_r, x2_r = x2_r, x1_r x1_label, x2_label = x2_label, x1_label max_hs_of_sample = max([max_hs_of_sample, max(x1_p), max(x1_r)]) # Scatter plot ax1.scatter(x2_p, x1_p, c='black', alpha=0.5, zorder=-2) ax1.scatter(x2_r, x1_r, marker='v', facecolor='None', edgecolor='black', alpha=0.5, zorder=-2) ax1.set_rasterization_zorder(-1) ax1.set_xlabel(x2_label.capitalize())
dataset_chars = ['A', 'B', 'C'] return_periods = [1, 20] n_contours_to_analyze = len(legends_for_contribution) fig, axs = plt.subplots(len(return_periods), len(dataset_chars), sharex='row', sharey='row', figsize=(10, 8)) max_hs_of_sample = 0 for (return_period, ax0) in zip(return_periods, axs): for (dataset_char, ax1) in zip(dataset_chars, ax0): # Load the environmental data. file_name_provided = 'datasets/' + dataset_char + '.txt' file_name_retained = 'datasets-retained/' + dataset_char + 'r.txt' hs_p, tz_p, label_hs, label_tz = read_ecbenchmark_dataset( file_name_provided) hs_r, tz_r, label_hs, label_tz = read_ecbenchmark_dataset( file_name_retained) max_hs_of_sample = max([max_hs_of_sample, max(hs_p), max(hs_r)]) contours_hs = [] contours_tz = [] max_hs_on_contour = np.empty(n_contours_to_analyze) for i in range(n_contours_to_analyze): contribution_nr = i + 1 if 11 >= contribution_nr >= 9: contribution_nr = 9 elif contribution_nr > 11: # Because contribution 9 holds 3 sets of contours. contribution_nr = contribution_nr - 2 folder_name = 'results/exercise-1/contribution-' + str(
if t < 0: theta[i] = t + 2 * np.pi return theta colors_for_contribution = mycorder.mpl_colors for idx in range(2): colors_for_contribution.append(colors_for_contribution[8]) colors_for_contribution.append('blue') fig, axs = plt.subplots(1, 2, sharey=True, figsize=(8, 4)) max_hs_of_sample = 0 # Load the environmental data. file_name_provided = 'datasets/' + dataset_char + '.txt' v_p, hs_p, label_v, label_hs = read_ecbenchmark_dataset(file_name_provided) max_hs_of_sample = max([max_hs_of_sample, max(hs_p)]) contours = [] contours_v = [] contours_hs = [] max_hs_on_contours = np.empty(n_contours_to_analyze) for i in range(n_contours_to_analyze): contribution_nr = i + 1 if 11 >= contribution_nr >= 9: contribution_nr = 9 elif contribution_nr > 11: # Because contribution 9 holds 3 sets of contours. contribution_nr = contribution_nr - 2 folder_name = 'results/exercise-1/contribution-' + str(contribution_nr) file_name = folder_name + '/' + lastname_firstname[i] + '_dataset_' + \
def test_read_dataset(self): """ Reads the provided dataset "1year_dataset_A.txt". """ sample_hs, sample_tz, label_hs, label_tz = read_ecbenchmark_dataset() self.assertAlmostEqual(sample_hs[0], 0.2845, delta=0.00001)
from viroconcom.fitting import Fit from viroconcom.contours import HighestDensityContour, \ sort_points_to_form_continous_line from viroconcom.plot import plot_contour, SamplePlotData np.random.seed(9001) # For reproducablity. # Define the number of years of data that one bootstrap sample should contain. # In the benchmark 1, 5 and 25 years are used. NR_OF_YEARS_TO_DRAW = [1, 2, 5] NR_OF_BOOTSTRAP_SAMPLES = [25, 12, 5] GRID_CELL_SIZE = 0.05 # Read dataset D. file_path = 'datasets/D.txt' dataset_d_v, dataset_d_hs, label_v, label_hs = read_ecbenchmark_dataset(file_path) # Define the origin (will be used to compute confidence intervals). v0 = np.mean(dataset_d_v) hs0 = np.mean(dataset_d_hs) # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_v = {'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 2} dist_description_hs = {'name': 'Weibull_Exp', 'fixed_parameters': (None, None, None, 5), # shape, location, scale, shape2 'dependency': (0, None, 0, None), # shape, location, scale, shape2
def test_plot_contour_and_sample(self): """ Plots a contour together with the dataset that has been used to fit a distribution for the contour. """ sample_hs, sample_tz, label_hs, label_tz = read_ecbenchmark_dataset() # Define the structure of the probabilistic model that will be fitted to the # dataset. dist_description_hs = { 'name': 'Weibull_Exp', 'dependency': (None, None, None, None), 'width_of_intervals': 0.5 } dist_description_tz = { 'name': 'Lognormal_SigmaMu', 'dependency': (0, None, 0), # Shape, Location, Scale 'functions': ('asymdecrease3', None, 'lnsquare2'), # Shape, Location, Scale 'min_datapoints_for_fit': 50 } # Fit the model to the data. fit = Fit((sample_hs, sample_tz), (dist_description_hs, dist_description_tz)) contour = IFormContour(fit.mul_var_dist, 20, 1, 50) contour_hs_20 = contour.coordinates[0][0] contour_tz_20 = contour.coordinates[0][1] # Find datapoints that exceed the 20-yr contour. hs_outside, tz_outside, hs_inside, tz_inside = \ points_outside(contour_hs_20, contour_tz_20, np.asarray(sample_hs), np.asarray(sample_tz)) # Compute the median tz conditonal on hs. hs = np.linspace(0, 14, 100) d1 = fit.mul_var_dist.distributions[1] c1 = d1.scale.a c2 = d1.scale.b tz = c1 + c2 * np.sqrt(np.divide(hs, 9.81)) fig = plt.figure(figsize=(5, 5), dpi=150) ax = fig.add_subplot(111) # Plot the 20-year contour and the sample. plotted_sample = SamplePlotData(x=np.asarray(sample_tz), y=np.asarray(sample_hs), ax=ax, x_inside=tz_inside, y_inside=hs_inside, x_outside=tz_outside, y_outside=hs_outside, return_period=20) plot_contour(x=contour_tz_20, y=contour_hs_20, ax=ax, contour_label='20-yr IFORM contour', x_label=label_tz, y_label=label_hs, line_style='b-', plotted_sample=plotted_sample, x_lim=(0, 19), upper_ylim=15, median_x=tz, median_y=hs, median_label='median of $T_z | H_s$') plot_wave_breaking_limit(ax)
e_max_v_c50[i] = max(v) e_max_hs_c50[i] = max(hs) elif dataset_char == 'F': f_max_v_c50[i] = max(v) f_max_hs_c50[i] = max(hs) # Load the environmental data and compute their minima and maxima. empirical_max_hs_abc = np.empty([3, 1]) empirical_min_tz_abc = np.empty([3, 1]) empirical_max_tz_abc = np.empty([3, 1]) empirical_hs1_abc = np.empty([3, 1]) empirical_tz1_abc = np.empty([3, 1]) for i, dataset_char in np.ndenumerate(['A', 'B', 'C']): file_name_provided = 'datasets/' + dataset_char + '.txt' file_name_retained = 'datasets-retained/' + dataset_char + 'r.txt' hs_p, tz_p, lhs, ltz = read_ecbenchmark_dataset(file_name_provided) hs_r, tz_r, lhs, ltz = read_ecbenchmark_dataset(file_name_retained) hs = np.append(hs_p, hs_r) tz = np.append(tz_p, tz_r) empirical_max_hs_abc[i] = max(hs) empirical_min_tz_abc[i] = min(tz) empirical_max_tz_abc[i] = max(tz) pe_1yr = 1.0 / (365.25 * 24) empirical_hs1_abc[i] = np.quantile(hs, 1 - pe_1yr) empirical_tz1_abc[i] = np.quantile(tz, 1 - pe_1yr) empirical_max_v_def = np.empty([3, 1]) empirical_max_hs_def = np.empty([3, 1]) empirical_v1_def = np.empty([3, 1]) empirical_hs1_def = np.empty([3, 1]) for i, dataset_char in np.ndenumerate(['D', 'E', 'F']): file_name_provided = 'datasets/' + dataset_char + '.txt'
hs_shape2 = ConstantParam(5) Hs = ExponentiatedWeibullDistribution(shape=hs_shape, scale=hs_scale, shape2=hs_shape2) distributions = [U10, Hs] dependencies = [(None, None, None, None), (0, None, 0, None)] joint_model_4 = MultivariateDistribution(distributions, dependencies) joint_models = [joint_model_4] model_names = ['Contribution 4'] u_dim = [0] # Indices of wind speed the different hierarchical joint models. hs_dim = [1] # Indices of wave height the different hierarchical joint models. file_name_provided = 'datasets/D.txt' file_name_retained = 'datasets-retained/Dr.txt' u_p, hs_p, lu, lhs = read_ecbenchmark_dataset(file_name_provided) u_r, hs_r, lu, lhs = read_ecbenchmark_dataset(file_name_retained) u = np.append(u_p, u_r) hs = np.append(hs_p, hs_r) fig1, axs1 = plt.subplots(1, 4, figsize=(12, 3)) def ecdf(data): """ Compute ECDF """ x = np.sort(data) n = x.size F = np.arange(1, n + 1) / n return (x, F)
import matplotlib.pyplot as plt import numpy as np from viroconcom.read_write import read_ecbenchmark_dataset, read_contour from viroconcom.plot import plot_confidence_interval, SamplePlotData fs = 12 file_name = 'datasets/D.txt' sample_v, sample_hs, label_v, label_hs = read_ecbenchmark_dataset(file_name) names = [ 'GC_CGS', 'hannesdottir_asta', 'haselsteiner_andreas', 'vanem_DirectSampling' ] styles = [ '-r', '-g', '-k', '-c', ] leg_strs = [ 'Contribution 2', 'Contribution 3', 'Contribution 4', 'Contribution 9' ] nums = [2, 3, 4, 9] prcntl_strs = [ '50th percentile contour', '2.5th percentile contour', '97.5th percentile contour' ]