Example #1
0
    def test_plot_windwave_fit(self):
        """
        Plots goodness of fit graphs, for the marginal distribution of X1 and
        for the dependence function of X2|X1. Uses wind and wave data.
        """

        sample_v, sample_hs, label_v, label_hs = \
            read_ecbenchmark_dataset('datasets/1year_dataset_D.txt')
        label_v = 'v (m s$^{-1}$)'

        # Define the structure of the probabilistic model that will be fitted to the
        # dataset.
        dist_description_v = {
            'name': 'Weibull_Exp',
            'dependency': (None, None, None, None),
            'width_of_intervals': 2
        }
        dist_description_hs = {
            'name': 'Weibull_Exp',
            'fixed_parameters': (None, None, None, 5),
            # shape, location, scale, shape2
            'dependency': (0, None, 0, None),
            # shape, location, scale, shape2
            'functions': ('logistics4', None, 'alpha3', None),
            # shape, location, scale, shape2
            'min_datapoints_for_fit': 50,
            'do_use_weights_for_dependence_function': True
        }

        # Fit the model to the data.
        fit = Fit((sample_v, sample_hs),
                  (dist_description_v, dist_description_hs))
        dist0 = fit.mul_var_dist.distributions[0]

        fig = plt.figure(figsize=(12.5, 3.5), dpi=150)
        ax1 = fig.add_subplot(131)
        ax2 = fig.add_subplot(132)
        ax3 = fig.add_subplot(133)
        plot_marginal_fit(sample_v,
                          dist0,
                          fig=fig,
                          ax=ax1,
                          label=label_v,
                          dataset_char='D')
        plot_dependence_functions(fit=fit,
                                  fig=fig,
                                  ax1=ax2,
                                  ax2=ax3,
                                  unconditonal_variable_label=label_v)
Example #2
0
    def test_plot_seastate_fit(self):
        """
        Plots goodness of fit graphs, for the marginal distribution of X1 and
        for the dependence function of X2|X1. Uses sea state data.

        """

        sample_hs, sample_tz, label_hs, label_tz = read_ecbenchmark_dataset()

        # Define the structure of the probabilistic model that will be fitted to the
        # dataset.
        dist_description_hs = {
            'name': 'Weibull_Exp',
            'dependency': (None, None, None, None),
            'width_of_intervals': 0.5
        }
        dist_description_tz = {
            'name': 'Lognormal_SigmaMu',
            'dependency': (0, None, 0),
            # Shape, Location, Scale
            'functions': ('asymdecrease3', None, 'lnsquare2'),
            # Shape, Location, Scale
            'min_datapoints_for_fit': 50
        }

        # Fit the model to the data.
        fit = Fit((sample_hs, sample_tz),
                  (dist_description_hs, dist_description_tz))
        dist0 = fit.mul_var_dist.distributions[0]

        fig = plt.figure(figsize=(12.5, 3.5), dpi=150)
        ax1 = fig.add_subplot(131)
        ax2 = fig.add_subplot(132)
        ax3 = fig.add_subplot(133)
        plot_marginal_fit(sample_hs,
                          dist0,
                          fig=fig,
                          ax=ax1,
                          label='$h_s$ (m)',
                          dataset_char='A')
        plot_dependence_functions(fit=fit,
                                  fig=fig,
                                  ax1=ax2,
                                  ax2=ax3,
                                  unconditonal_variable_label=label_hs)
Example #3
0
    def test_plot_confidence_interval(self):
        """
        Plots a contour's confidence interval.
        """
        dataset_d_v, dataset_d_hs, label_v, label_hs = \
            read_ecbenchmark_dataset('datasets/1year_dataset_D.txt')

        # Read the contours that have beem computed previously from csv files.
        folder_name = 'contour-coordinates/'
        file_name_median = 'doe_john_years_25_median.txt'
        file_name_bottom = 'doe_john_years_25_bottom.txt'
        file_name_upper = 'doe_john_years_25_upper.txt'
        (contour_v_median,
         contour_hs_median) = read_contour(folder_name + file_name_median)
        (contour_v_bottom,
         contour_hs_bottom) = read_contour(folder_name + file_name_bottom)
        (contour_v_upper,
         contour_hs_upper) = read_contour(folder_name + file_name_upper)

        # Plot the sample, the median contour and the confidence interval.
        fig = plt.figure(figsize=(5, 5), dpi=150)
        ax = fig.add_subplot(111)
        plotted_sample = SamplePlotData(x=np.asarray(dataset_d_v),
                                        y=np.asarray(dataset_d_hs),
                                        ax=ax,
                                        label='dataset D')
        contour_labels = [
            '50th percentile contour', '2.5th percentile contour',
            '97.5th percentile contour'
        ]
        plot_confidence_interval(x_median=contour_v_median,
                                 y_median=contour_hs_median,
                                 x_bottom=contour_v_bottom,
                                 y_bottom=contour_hs_bottom,
                                 x_upper=contour_v_upper,
                                 y_upper=contour_hs_upper,
                                 ax=ax,
                                 x_label=label_v,
                                 y_label=label_hs,
                                 contour_labels=contour_labels,
                                 plotted_sample=plotted_sample)
latitudes =  [43.525,   28.508,  25.897, 54.0,  55.0,  59.5 ]
longitudes = [-70.141, -80.185, -89.668, 6.575, 1.175, 4.325]


fig, axs = plt.subplots(2, 3, sharex='row', sharey='row', figsize=(10, 8))
max_hs_of_sample = 0
for i, ax0 in enumerate(axs):
    if i == 0:
        datasets = datasets_hstz
    else:
        datasets = datasets_vhs
    for j, (dataset_char, ax1) in enumerate(zip(datasets, ax0)):
        # Load the environmental data.
        file_name_provided = 'datasets/' + dataset_char + '.txt'
        file_name_retained = 'datasets-retained/' + dataset_char + 'r.txt'
        x1_p, x2_p, x1_label, x2_label = read_ecbenchmark_dataset(file_name_provided)
        x1_r, x2_r, x1_label, x2_label = read_ecbenchmark_dataset(file_name_retained)
        if i == 1:
            x1_p, x2_p = x2_p, x1_p
            x1_r, x2_r = x2_r, x1_r
            x1_label, x2_label = x2_label, x1_label

        max_hs_of_sample = max([max_hs_of_sample, max(x1_p), max(x1_r)])

        # Scatter plot
        ax1.scatter(x2_p, x1_p, c='black', alpha=0.5, zorder=-2)
        ax1.scatter(x2_r, x1_r, marker='v', facecolor='None',
                    edgecolor='black', alpha=0.5, zorder=-2)

        ax1.set_rasterization_zorder(-1)
        ax1.set_xlabel(x2_label.capitalize())
Example #5
0
dataset_chars = ['A', 'B', 'C']
return_periods = [1, 20]
n_contours_to_analyze = len(legends_for_contribution)

fig, axs = plt.subplots(len(return_periods),
                        len(dataset_chars),
                        sharex='row',
                        sharey='row',
                        figsize=(10, 8))
max_hs_of_sample = 0
for (return_period, ax0) in zip(return_periods, axs):
    for (dataset_char, ax1) in zip(dataset_chars, ax0):
        # Load the environmental data.
        file_name_provided = 'datasets/' + dataset_char + '.txt'
        file_name_retained = 'datasets-retained/' + dataset_char + 'r.txt'
        hs_p, tz_p, label_hs, label_tz = read_ecbenchmark_dataset(
            file_name_provided)
        hs_r, tz_r, label_hs, label_tz = read_ecbenchmark_dataset(
            file_name_retained)
        max_hs_of_sample = max([max_hs_of_sample, max(hs_p), max(hs_r)])

        contours_hs = []
        contours_tz = []
        max_hs_on_contour = np.empty(n_contours_to_analyze)
        for i in range(n_contours_to_analyze):
            contribution_nr = i + 1
            if 11 >= contribution_nr >= 9:
                contribution_nr = 9
            elif contribution_nr > 11:
                # Because contribution 9 holds 3 sets of contours.
                contribution_nr = contribution_nr - 2
            folder_name = 'results/exercise-1/contribution-' + str(
        if t < 0:
            theta[i] = t + 2 * np.pi
    return theta

colors_for_contribution = mycorder.mpl_colors
for idx in range(2):
        colors_for_contribution.append(colors_for_contribution[8])
colors_for_contribution.append('blue')


fig, axs = plt.subplots(1, 2, sharey=True, figsize=(8, 4))
max_hs_of_sample = 0

# Load the environmental data.
file_name_provided = 'datasets/' + dataset_char + '.txt'
v_p, hs_p, label_v, label_hs = read_ecbenchmark_dataset(file_name_provided)
max_hs_of_sample = max([max_hs_of_sample, max(hs_p)])

contours = []
contours_v = []
contours_hs = []
max_hs_on_contours = np.empty(n_contours_to_analyze)
for i in range(n_contours_to_analyze):
    contribution_nr = i + 1
    if 11 >= contribution_nr >= 9:
        contribution_nr = 9
    elif contribution_nr > 11:
        # Because contribution 9 holds 3 sets of contours.
        contribution_nr = contribution_nr - 2
    folder_name = 'results/exercise-1/contribution-' + str(contribution_nr)
    file_name = folder_name + '/' + lastname_firstname[i] + '_dataset_' + \
Example #7
0
 def test_read_dataset(self):
     """
     Reads the provided dataset "1year_dataset_A.txt".
     """
     sample_hs, sample_tz, label_hs, label_tz = read_ecbenchmark_dataset()
     self.assertAlmostEqual(sample_hs[0], 0.2845, delta=0.00001)
from viroconcom.fitting import Fit
from viroconcom.contours import HighestDensityContour, \
    sort_points_to_form_continous_line
from viroconcom.plot import plot_contour, SamplePlotData

np.random.seed(9001) # For reproducablity.

# Define the number of years of data that one bootstrap sample should contain.
# In the benchmark 1, 5 and 25 years are used.
NR_OF_YEARS_TO_DRAW = [1, 2, 5]
NR_OF_BOOTSTRAP_SAMPLES = [25, 12, 5]
GRID_CELL_SIZE = 0.05

# Read dataset D.
file_path = 'datasets/D.txt'
dataset_d_v, dataset_d_hs, label_v, label_hs = read_ecbenchmark_dataset(file_path)

# Define the origin (will be used to compute confidence intervals).
v0 = np.mean(dataset_d_v)
hs0 = np.mean(dataset_d_hs)

# Define the structure of the probabilistic model that will be fitted to the
# dataset.
dist_description_v = {'name': 'Weibull_Exp',
                      'dependency': (None, None, None, None),
                      'width_of_intervals': 2}
dist_description_hs = {'name': 'Weibull_Exp',
                       'fixed_parameters': (None, None, None, 5),
                       # shape, location, scale, shape2
                       'dependency': (0, None, 0, None),
                       # shape, location, scale, shape2
Example #9
0
    def test_plot_contour_and_sample(self):
        """
        Plots a contour together with the dataset that has been used to
        fit a distribution for the contour.
        """

        sample_hs, sample_tz, label_hs, label_tz = read_ecbenchmark_dataset()

        # Define the structure of the probabilistic model that will be fitted to the
        # dataset.
        dist_description_hs = {
            'name': 'Weibull_Exp',
            'dependency': (None, None, None, None),
            'width_of_intervals': 0.5
        }
        dist_description_tz = {
            'name': 'Lognormal_SigmaMu',
            'dependency': (0, None, 0),
            # Shape, Location, Scale
            'functions': ('asymdecrease3', None, 'lnsquare2'),
            # Shape, Location, Scale
            'min_datapoints_for_fit': 50
        }

        # Fit the model to the data.
        fit = Fit((sample_hs, sample_tz),
                  (dist_description_hs, dist_description_tz))

        contour = IFormContour(fit.mul_var_dist, 20, 1, 50)
        contour_hs_20 = contour.coordinates[0][0]
        contour_tz_20 = contour.coordinates[0][1]

        # Find datapoints that exceed the 20-yr contour.
        hs_outside, tz_outside, hs_inside, tz_inside = \
            points_outside(contour_hs_20,
                           contour_tz_20,
                           np.asarray(sample_hs),
                           np.asarray(sample_tz))

        # Compute the median tz conditonal on hs.
        hs = np.linspace(0, 14, 100)
        d1 = fit.mul_var_dist.distributions[1]
        c1 = d1.scale.a
        c2 = d1.scale.b
        tz = c1 + c2 * np.sqrt(np.divide(hs, 9.81))

        fig = plt.figure(figsize=(5, 5), dpi=150)
        ax = fig.add_subplot(111)

        # Plot the 20-year contour and the sample.
        plotted_sample = SamplePlotData(x=np.asarray(sample_tz),
                                        y=np.asarray(sample_hs),
                                        ax=ax,
                                        x_inside=tz_inside,
                                        y_inside=hs_inside,
                                        x_outside=tz_outside,
                                        y_outside=hs_outside,
                                        return_period=20)

        plot_contour(x=contour_tz_20,
                     y=contour_hs_20,
                     ax=ax,
                     contour_label='20-yr IFORM contour',
                     x_label=label_tz,
                     y_label=label_hs,
                     line_style='b-',
                     plotted_sample=plotted_sample,
                     x_lim=(0, 19),
                     upper_ylim=15,
                     median_x=tz,
                     median_y=hs,
                     median_label='median of $T_z | H_s$')
        plot_wave_breaking_limit(ax)
                e_max_v_c50[i] = max(v)
                e_max_hs_c50[i] = max(hs)
            elif dataset_char == 'F':
                f_max_v_c50[i] = max(v)
                f_max_hs_c50[i] = max(hs)

# Load the environmental data and compute their minima and maxima.
empirical_max_hs_abc = np.empty([3, 1])
empirical_min_tz_abc = np.empty([3, 1])
empirical_max_tz_abc = np.empty([3, 1])
empirical_hs1_abc = np.empty([3, 1])
empirical_tz1_abc = np.empty([3, 1])
for i, dataset_char in np.ndenumerate(['A', 'B', 'C']):
    file_name_provided = 'datasets/' + dataset_char + '.txt'
    file_name_retained = 'datasets-retained/' + dataset_char + 'r.txt'
    hs_p, tz_p, lhs, ltz = read_ecbenchmark_dataset(file_name_provided)
    hs_r, tz_r, lhs, ltz = read_ecbenchmark_dataset(file_name_retained)
    hs = np.append(hs_p, hs_r)
    tz = np.append(tz_p, tz_r)
    empirical_max_hs_abc[i] = max(hs)
    empirical_min_tz_abc[i] = min(tz)
    empirical_max_tz_abc[i] = max(tz)
    pe_1yr = 1.0 / (365.25 * 24)
    empirical_hs1_abc[i] = np.quantile(hs, 1 - pe_1yr)
    empirical_tz1_abc[i] = np.quantile(tz, 1 - pe_1yr)
empirical_max_v_def = np.empty([3, 1])
empirical_max_hs_def = np.empty([3, 1])
empirical_v1_def = np.empty([3, 1])
empirical_hs1_def = np.empty([3, 1])
for i, dataset_char in np.ndenumerate(['D', 'E', 'F']):
    file_name_provided = 'datasets/' + dataset_char + '.txt'
Example #11
0
hs_shape2 = ConstantParam(5)
Hs = ExponentiatedWeibullDistribution(shape=hs_shape,
                                      scale=hs_scale,
                                      shape2=hs_shape2)
distributions = [U10, Hs]
dependencies = [(None, None, None, None), (0, None, 0, None)]
joint_model_4 = MultivariateDistribution(distributions, dependencies)

joint_models = [joint_model_4]
model_names = ['Contribution 4']
u_dim = [0]  # Indices of wind speed the different hierarchical joint models.
hs_dim = [1]  # Indices of wave height the different hierarchical joint models.

file_name_provided = 'datasets/D.txt'
file_name_retained = 'datasets-retained/Dr.txt'
u_p, hs_p, lu, lhs = read_ecbenchmark_dataset(file_name_provided)
u_r, hs_r, lu, lhs = read_ecbenchmark_dataset(file_name_retained)
u = np.append(u_p, u_r)
hs = np.append(hs_p, hs_r)

fig1, axs1 = plt.subplots(1, 4, figsize=(12, 3))


def ecdf(data):
    """ Compute ECDF """
    x = np.sort(data)
    n = x.size
    F = np.arange(1, n + 1) / n
    return (x, F)

import matplotlib.pyplot as plt
import numpy as np

from viroconcom.read_write import read_ecbenchmark_dataset, read_contour
from viroconcom.plot import plot_confidence_interval, SamplePlotData

fs = 12

file_name = 'datasets/D.txt'
sample_v, sample_hs, label_v, label_hs = read_ecbenchmark_dataset(file_name)

names = [
    'GC_CGS', 'hannesdottir_asta', 'haselsteiner_andreas',
    'vanem_DirectSampling'
]
styles = [
    '-r',
    '-g',
    '-k',
    '-c',
]
leg_strs = [
    'Contribution 2', 'Contribution 3', 'Contribution 4', 'Contribution 9'
]
nums = [2, 3, 4, 9]

prcntl_strs = [
    '50th percentile contour', '2.5th percentile contour',
    '97.5th percentile contour'
]