Beispiel #1
0
def interearthquake_time_estimates_for_parkfield(time_gap):
    # Compute the mean time gap: mean_time_gap
    mean_time_gap = np.mean(time_gap)

    # Standard deviation of the time gap: std_time_gap
    std_time_gap = np.std(time_gap)

    # Generate theoretical Exponential distribution of timings: time_gap_exp
    time_gap_exp = np.random.exponential(mean_time_gap, 10000)

    # Generate theoretical Normal distribution of timings: time_gap_norm
    time_gap_norm = np.random.normal(mean_time_gap, std_time_gap, 10000)

    # Plot theoretical CDFs
    _ = plt.plot(*dcst.ecdf(time_gap_exp))
    _ = plt.plot(*dcst.ecdf(time_gap_norm))

    # Plot Parkfield ECDF
    _ = plt.plot(*dcst.ecdf(time_gap, formal=True, min_x=-10, max_x=50))

    # Add legend
    _ = plt.legend(('Exp.', 'Norm.'), loc='upper left')

    # Label axes, set limits and show plot
    _ = plt.xlabel('time gap (years)')
    _ = plt.ylabel('ECDF')
    _ = plt.xlim(-10, 50)
    plt.show()
Beispiel #2
0
def test_pandas_conversion(seed):
    df = pd.DataFrame({
        'a': [3, 2, 1, 4],
        'b': [8, 6, 7, 5],
        'c': [9.1, 10.1, 11.1, np.nan]
    })

    x, y = dcst.ecdf(df.loc[:, 'a'])
    assert (x == np.array([1, 2, 3, 4])).all()
    assert (y == np.array([0.25, 0.5, 0.75, 1.0])).all()

    x, y = dcst.ecdf(df.loc[:, 'c'])
    assert np.allclose(x, np.array([9.1, 10.1, 11.1]))
    assert np.allclose(y, np.array([1 / 3, 2 / 3, 1.0]))

    df = pd.DataFrame({
        'a':
        np.concatenate((np.random.normal(0, 1, size=10), [np.nan] * 990)),
        'b':
        np.random.normal(0, 1, size=1000)
    })
    correct, _ = st.ks_2samp(df['a'].dropna(), df['b'])
    assert np.isclose(dcst.ks_stat(df['a'], df['b']), correct)

    df = pd.DataFrame({
        'a':
        np.concatenate((np.random.normal(0, 1, size=80), [np.nan] * 20)),
        'b':
        np.random.normal(0, 1, size=100)
    })
    dcst_private._seed_numba(seed)
    correct = dcst.draw_bs_reps(df['a'].values, np.mean, size=100)
    dcst_private._seed_numba(seed)
    assert np.allclose(dcst.draw_bs_reps(df['a'], np.mean, size=100),
                       correct,
                       atol=atol)

    dcst_private._seed_numba(seed)
    correct = dcst.draw_bs_reps(df['b'].values, np.mean, size=100)
    dcst_private._seed_numba(seed)
    assert np.allclose(dcst.draw_bs_reps(df['b'], np.mean, size=100),
                       correct,
                       atol=atol)

    dcst_private._seed_numba(seed)
    correct = dcst.draw_perm_reps(df['a'].values,
                                  df['b'].values,
                                  dcst.diff_of_means,
                                  size=100)
    dcst_private._seed_numba(seed)
    assert np.allclose(dcst.draw_perm_reps(df['a'],
                                           df['b'],
                                           dcst.diff_of_means,
                                           size=100),
                       correct,
                       atol=atol)
def test_ecdf_formal_for_plotting():
    data = np.array([2, 1, 3])
    y_correct = np.array([0, 0, 1, 1, 2, 2, 3, 3]) / 3
    x_correct = np.array([0, 1, 1, 2, 2, 3, 3, 4])
    x, y = dcst.ecdf(data, formal=True, min_x=0, max_x=4)
    assert np.allclose(x, x_correct, atol=atol, equal_nan=True)
    assert np.allclose(y, y_correct, atol=atol, equal_nan=True)

    data = np.array([1, 2, 2, 3])
    y_correct = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) / 4
    x_correct = np.array([0, 1, 1, 2, 2, 2, 2, 3, 3, 4])
    x, y = dcst.ecdf(data, formal=True, min_x=0, max_x=4)
    assert np.allclose(x, x_correct, atol=atol, equal_nan=True)
    assert np.allclose(y, y_correct, atol=atol, equal_nan=True)
Beispiel #4
0
def eda(time, mags):
    # Get magnitudes before and after 2010
    mags_pre = mags[time < 2010]
    mags_post = mags[time >= 2010]

    # Generate ECDFs
    _ = plt.plot(*dcst.ecdf(mags_pre), marker='.', linestyle='none')
    _ = plt.plot(*dcst.ecdf(mags_post), marker='.', linestyle='none')

    # Label axes and show plot
    _ = plt.xlabel('magnitude')
    _ = plt.ylabel('ECDF')
    plt.legend(('1980 though 2009', '2010 through mid-2017'), loc='upper left')
    plt.show()
Beispiel #5
0
def plot_ecdf_for_active_bout_length(bout_lengths_wt, bout_lengths_mut):
    # Generate x and y values for plotting ECDFs
    x_wt, y_wt = dcst.ecdf(bout_lengths_wt)
    x_mut, y_mut = dcst.ecdf(bout_lengths_mut)

    # Plot the ECDFs
    _ = plt.plot(x_wt, y_wt, marker='.', linestyle='none')
    _ = plt.plot(x_mut, y_mut, marker='.', linestyle='none')

    # Make a legend, label axes, and show plot
    _ = plt.legend(('wt', 'mut'))
    _ = plt.xlabel('active bout length (min)')
    _ = plt.ylabel('ECDF')
    plt.show()
Beispiel #6
0
def eda(mags):
    # Make the plot
    _ = plt.plot(*dcst.ecdf(mags), marker='.', linestyle='none')

    # Label axes and show plot
    _ = plt.xlabel('magnitude')
    _ = plt.ylabel('ECDF')
    plt.show()
Beispiel #7
0
def how_should_we_test_the_hypothesis(swimtime_low_lanes, swimtime_high_lanes):
    # Compute the fractional improvement of being in high lane: f
    f = (swimtime_low_lanes - swimtime_high_lanes) / swimtime_low_lanes
    # Make x and y values for ECDF: x, y
    x, y = dcst.ecdf(f)
    # Plot the ECDFs as dots
    _ = plt.plot(x, y, marker='.', linestyle='none')
    _ = plt.xlabel('f')
    _ = plt.ylabel('ECDF')

    f_shifted = f - np.mean(f)
    x, y = dcst.ecdf(f_shifted)
    # Plot the ECDFs as dots
    _ = plt.plot(x, y, marker='.', linestyle='none', color="red")
    _ = plt.xlabel('f')
    _ = plt.ylabel('ECDF')

    plt.show()
def ks_stat(data1, data2):
    # Compute ECDF from data: x, y
    x, y = dcst.ecdf(data1)
    # Compute corresponding values of the target CDF
    cdf = dcst.ecdf_formal(x, data2)
    # Compute distances between concave corners and CDF
    D_top = y - cdf
    # Compute distance between convex corners and CDF
    D_bottom = cdf - y + 1 / len(data1)
    return np.max((D_top, D_bottom))
Beispiel #9
0
def graphical_eda_of_means_200_free_heats(mens_200_free_heats):
    # Generate x and y values for ECDF: x, y
    x, y = dcst.ecdf(mens_200_free_heats)

    # Plot the ECDF as dots
    _ = plt.plot(x, y, marker='.', linestyle='none')

    # Label axes and show plot
    _ = plt.xlabel('time (s)')
    _ = plt.ylabel('ECDF')
    plt.show()
Beispiel #10
0
def ecdf_of_improvement_from_low_to_high_lanes(swimtime_low_lanes,
                                               swimtime_high_lanes):
    # Compute the fractional improvement of being in high lane: f
    f = (swimtime_low_lanes - swimtime_high_lanes) / swimtime_low_lanes
    # Make x and y values for ECDF: x, y
    x, y = dcst.ecdf(f)
    # Plot the ECDFs as dots
    _ = plt.plot(x, y, marker='.', linestyle='none')
    _ = plt.xlabel('f')
    _ = plt.ylabel('ECDF')

    plt.show()
def eda_final_vs_semifinals(semi_times, final_times):
    # Compute fractional difference in time between finals and semis
    f = (semi_times - final_times) / semi_times

    # Generate x and y values for the ECDF: x, y
    x, y = dcst.ecdf(f)

    # Make a plot of the ECDF
    _ = plt.plot(x, y, marker='.', linestyle='none')

    # Label axes and show plot
    _ = plt.xlabel('f')
    _ = plt.ylabel('ECDF')
    plt.show()
Beispiel #12
0
def the_b_value_for_parkfield(mags):
    # Compute b-value and 95% confidence interval
    b, conf_int = b_value(mags, 3, perc=[2.5, 97.5], n_reps=10000)

    # Generate samples to for theoretical ECDF
    m_theor = np.random.exponential(b / np.log(10), size=100000) + 3

    # Plot the theoretical CDF
    _ = plt.plot(*dcst.ecdf(m_theor))

    # Plot the ECDF (slicing mags >= mt)
    _ = plt.plot(*dcst.ecdf(mags[mags >= 3]), marker='.', linestyle='none')

    # Pretty up and show the plot
    _ = plt.xlabel('magnitude')
    _ = plt.ylabel('ECDF')
    _ = plt.xlim(2.8, 6.2)
    plt.show()

    # Report the results
    print("""
    b-value: {0:.2f}
    95% conf int: [{1:.2f}, {2:.2f}]""".format(b, *conf_int))
Beispiel #13
0
def did_the_2015_event_have_this_problem():
    swimtime_low_lanes_15 = np.array([
        27.66, 24.69, 23.29, 23.05, 26.87, 31.03, 22.04, 24.51, 21.86, 25.64,
        25.91, 24.77, 30.14, 27.23, 24.31, 30.2, 26.86
    ])
    swimtime_high_lanes_15 = np.array([
        27.7, 24.64, 23.21, 23.09, 26.87, 30.74, 21.88, 24.5, 21.86, 25.9,
        26.2, 24.73, 30.13, 26.92, 24.31, 30.25, 26.76
    ])

    # Compute f and its mean
    f = (swimtime_low_lanes_15 -
         swimtime_high_lanes_15) / swimtime_low_lanes_15
    x, y = dcst.ecdf(f)
    # Plot the ECDFs as dots
    _ = plt.plot(x, y, marker='.', linestyle='none')
    _ = plt.xlabel('f')
    _ = plt.ylabel('ECDF')
    plt.show()

    f_mean = np.mean(f)

    # Draw 10,000 bootstrap replicates
    bs_reps = dcst.draw_bs_reps(f, np.mean, size=10000)

    # Compute 95% confidence interval
    conf_int = np.percentile(bs_reps, [2.5, 97.5])

    # Shift f
    f_shift = f - f_mean

    # Draw 100,000 bootstrap replicates of the mean
    bs_reps = dcst.draw_bs_reps(f_shift, np.mean, size=100000)

    # Compute the p-value
    p_val = np.sum(bs_reps >= f_mean) / 100000

    # Print the results
    print("""
    mean frac. diff.: {0:.5f}
    95% conf int of mean frac. diff.: [{1:.5f}, {2:.5f}]
    p-value: {3:.5f}""".format(f_mean, *conf_int, p_val))
Beispiel #14
0
bout_lengths_wt = df[df.genotype == 'wt'].bout_length.values
bout_lengths_mut = df[df.genotype == 'mut'].bout_length.values
'''
INSTRUCTIONS

*   Import the module dc_stat_think as dcst so you have its functions available.
*   Generate the x and y values for plotting the ECDF of the wild type fish (bout_lengths_wt) using dcst.ecdf(). Store the result in numpy arrays named x_wt and y_wt.
*   Do the same for the the mutant fish (bout_lengths_mut), storing the result in numpy arrays named x_mut and y_mut.
*   Use plt.plot() to plot the two ECDFs as dots on the same plot. Be sure to specify the keyword arguments marker='.' and linestyle='none'.
*   Show your plot using plt.show().
'''

# Import the dc_stat_think module as dcst
import dc_stat_think as dcst

# Generate x and y values for plotting ECDFs
x_wt, y_wt = dcst.ecdf(bout_lengths_wt)
x_mut, y_mut = dcst.ecdf(bout_lengths_mut)

# Plot the ECDFs
_ = plt.plot(x_wt, y_wt, marker='.', linestyle='none')
_ = plt.plot(x_mut, y_mut, marker='.', linestyle='none')

# Make a legend, label axes, and show plot
_ = plt.legend(('wt', 'mut'))
_ = plt.xlabel('active bout length (min)')
_ = plt.ylabel('ECDF')

plt.show()
Beispiel #15
0
    mean_pagerank = np.mean(pageranks_list)
    stdev_pagerank = np.std(pageranks_list)
    median_pagerank = np.median(pageranks_list)

    print("Max pageranks:", max_pagerank)
    print("Min pageranks:", min_pagerank)
    print("Mean pageranks:", mean_pagerank)
    print("Standard Deviation of pageranks:", stdev_pagerank)
    print("Median pageranks:", median_pagerank)

    print("Pagerank:", pagerank)
    print("Degrees:", G.degree)
    print("Diameter:", diameter)
    print("Number of connected components:", n_connected_components)

    x, y = dcst.ecdf(np.array(degrees_list))
    x_ecdf_degrees.append(x)
    y_ecdf_degrees.append(y)

    x, y = dcst.ecdf(np.array(weight))
    x_ecdf_weights.append(x)
    y_ecdf_weights.append(y)

    x, y = dcst.ecdf(np.array(pageranks_list))
    x_ecdf_pageranks.append(x)
    y_ecdf_pageranks.append(y)

    graphs.append(G)
    pageranks.append(pagerank)
    distances.append(D)
    diameters.append(diameter)
Beispiel #16
0
"""ECDF of improvement from low to high lanes

Now that you have a metric for improvement going from low- to high-numbered lanes, plot an ECDF of this metric.
I have put together the swim times of all swimmers who swam a 50 m semifinal in a high numbered lane and the final
in a low numbered lane, and vice versa. The swim times are stored in the Numpy arrays swimtime_high_lanes and
swimtime_low_lanes. Entry i in the respective arrays are for the same swimmer in the same event."""
import matplotlib.pyplot as plt
import numpy as np
import dc_stat_think as dcst

# Compute the fractional improvement of being in high lane: f
f = (swimtime_low_lanes - swimtime_high_lanes) / swimtime_low_lanes

# Make x and y values for ECDF: x, y
x, y = dcst.ecdf(f)

# Plot the ECDFs as dots
_ = plt.plot(x, y, marker='.', linestyle='none')

# Label the axes and show the plot
plt.xlabel('f')
plt.ylabel('ECDF')

plt.show()
"""Estimation of mean improvement

You will now estimate how big this current effect is. Compute the mean fractional improvement for being in a 
high-numbered lane versus a low-numbered lane, along with a 95% confidence interval of the mean."""

# Compute the mean difference: f_mean
f_mean = np.mean(f)
*   Compute the b-value and the 95% confidence interval using your b_value() function. Use 10,000 bootstrap replicates.
*   Use np.random.exponential() to draw 100,000 samples from the theoretical distribution. Hint: The mean for the distribution is b/np.log(10), and you need to add mt to your samples to appropriately handle the location parameter. Store the result in m_theor.
*   Plot the ECDF of m_theor as a line.
*   Plot the ECDF of all magnitudes above mt as dots. Hint: You need to use Boolean indexing to slice out magnitudes at or above mt from the mags array.
*   Hit 'Submit Answer' to display the plot and print the b-value and confidence interval to the screen.
'''

# Compute b-value and confidence interval
b, conf_int = b_value(mags, mt, perc=[2.5, 97.5], n_reps=10000)

# Generate samples to for theoretical ECDF
m_theor = np.random.exponential(b/np.log(10), size=100000) + mt

# Plot the theoretical CDF
_ = plt.plot(*dcst.ecdf(m_theor))

# Plot the ECDF (slicing mags >= mt)
_ = plt.plot(*dcst.ecdf(mags[mags >= mt]), marker='.', linestyle='none')

# Pretty up and show the plot
_ = plt.xlabel('magnitude')
_ = plt.ylabel('ECDF')
_ = plt.xlim(2.8, 6.2)

plt.show()

# Report the results
print("""
b-value: {0:.2f}
95% conf int: [{1:.2f}, {2:.2f}]""".format(b, *conf_int))
Beispiel #18
0
    result = win_payout + place_payout + did_not_place_payout - total_money_wagered

    # append the result
    results.append(result)

    # print to track number of nationals run
    #print('Number of nationals run: ',n+1)

mean_result = np.mean(results)
median_result = np.median(results)
ci_95 = np.percentile(results, [2.5, 97.5])

fmt = '£{x:,.0f}'
tick = mtick.StrMethodFormatter(fmt)
fig, ax = plt.subplots(1, 1)
_ = plt.plot(*dcst.ecdf(results), marker='.', linestyle='none')
ax.xaxis.set_major_formatter(tick)
_ = plt.xticks(rotation=25)
#_ = plt.axvline(x=mean_result,color='k')
#_ = plt.axvline(x=ci_95[0],color='g')
#_ = plt.axvline(x=ci_95[1],color='g')
_ = plt.xlabel('Net Return (£)')
_ = plt.ylabel('ECDF')
_ = plt.title('Grand National Simulated Returns')

plt.tight_layout()
plt.show()

print('''
      Mean expected return: £{0:,.2f}
      Median expected return: £{1:,.2f}
Beispiel #19
0
As usual, you will start with EDA and plot the ECDF of the magnitudes of earthquakes detected in the Parkfield region from 1950 to 2016. The magnitudes of all earthquakes in the region from the ANSS ComCat are stored in the Numpy array mags.

When you do it this time, though, take a shortcut in generating the ECDF. You may recall that putting an asterisk before an argument in a function splits what follows into separate arguments. Since dcst.ecdf() returns two values, we can pass them as the x, y positional arguments to plt.plot() as plt.plot(*dcst.ecdf(data_you_want_to_plot)).

You will use this shortcut in this exercise and going forward.
'''

import numpy as np
import dc_stat_think as dcst
import matplotlib.pyplot as plt

mags = np.loadtxt('../datasets/parkfield_earthquakes_1950-2017.csv',
                  delimiter=',',
                  comments='#',
                  skiprows=3,
                  usecols=4)
'''
INSTRUCTIONS

*   Generate a plot of the ECDF in one line, using the *dcst.ecdf() approach describe above. Call plt.plot() with the marker='.' and linestyle='none' keyword arguments as usual.
*   Label the x-axis 'magnitude', y-axis 'ECDF', and show the plot.
'''

# Make the plot
plt.plot(*dcst.ecdf(mags), marker='.', linestyle='none')

# Label axes and show plot
_ = plt.xlabel('magnitude')
_ = plt.ylabel('ECDF')
plt.show()
Beispiel #20
0
# EDA: Plot ECDFs of active bout length
# -------------------------------------------------------

# Import the dc_stat_think module as dcst
import dc_stat_think as dcst

# Generate x and y values for plotting ECDFs
x_wt, y_wt = dcst.ecdf(bout_lengths_wt)
x_mut, y_mut = dcst.ecdf(bout_lengths_mut)

# Plot the ECDFs
_ = plt.plot(x_wt, y_wt, marker='.', linestyle='none')
_ = plt.plot(x_mut, y_mut, marker='.', linestyle='none')

# Make a legend, label axes, and show plot
_ = plt.legend(('wt', 'mut'))
_ = plt.xlabel('active bout length (min)')
_ = plt.ylabel('ECDF')
plt.show()

# -------------------------------------------------------
# Parameter estimation: active bout length
# -------------------------------------------------------

# Compute mean active bout length
mean_wt = np.mean(bout_lengths_wt)
mean_mut = np.mean(bout_lengths_mut)

# Draw bootstrap replicates
bs_reps_wt = dcst.draw_bs_reps(bout_lengths_wt, np.mean, size=10000)
bs_reps_mut = dcst.draw_bs_reps(bout_lengths_mut, np.mean, size=10000)
'''

# Compute the mean time gap: mean_time_gap
mean_time_gap = np.mean(time_gap)

# Standard deviation of the time gap: std_time_gap
std_time_gap = np.std(time_gap)

# Generate theoretical Exponential distribution of timings: time_gap_exp
time_gap_exp = np.random.exponential(mean_time_gap, size=10000)

# Generate theoretical Normal distribution of timings: time_gap_norm
time_gap_norm = np.random.normal(mean_time_gap, std_time_gap, size=10000)

# Plot theoretical CDFs
_ = plt.plot(*dcst.ecdf(time_gap_exp))
_ = plt.plot(*dcst.ecdf(time_gap_norm))

# Plot Parkfield ECDF
_ = plt.plot(*dcst.ecdf(time_gap, formal=True, min_x=-10, max_x=50))

# Add legend
_ = plt.legend(('Exp.', 'Norm.'), loc='upper left')

# Label axes, set limits and show plot
_ = plt.xlabel('time gap (years)')
_ = plt.ylabel('ECDF')
_ = plt.xlim(-10, 50)
plt.show()
'''
By eye, the Gaussian model seems to describe the observed data best. We will investigate the consequences of this in the next exercise, and see if we can reject the Exponential model in coming exercises.
Beispiel #22
0
    114.36, 121.77, 108.23, 107.47, 118.41, 108.29, 106.00, 109.32,
    111.49, 112.92, 117.38, 110.95, 108.27, 111.78, 107.87, 110.77,
    109.05, 111.00, 108.77, 106.10, 106.61, 113.68, 108.20, 106.20,
    111.01, 109.25, 112.00, 118.55, 109.56, 108.18, 111.67, 108.09,
    110.04, 113.97, 109.91, 112.12, 111.65, 110.18, 116.36, 124.59,
    115.59, 121.01, 106.88, 108.96, 109.09, 108.67, 109.60, 111.85,
    118.54, 108.12, 124.38, 107.17, 107.48, 106.65, 106.91, 140.68,
    117.93, 120.66, 111.29, 107.10, 108.49, 112.43, 110.61, 110.38,
    109.87, 106.73, 107.18, 110.98, 108.55, 114.31, 112.05
])
    
'''
INSTRUCTIONS

*   Generate x and y values for the ECDF using dcst.ecdf(). The swim times of the heats are stored in the numpy array mens_200_free_heats.
*   Plot the ECDF as dots. Remember to specify the appropriate marker and linestyle.
*   Label the axes and show the plot. Use 'time (s)' as the x-axis label and 'ECDF' as the y-axis label.
'''

# Generate x and y values for ECDF: x, y
x, y = dcst.ecdf(mens_200_free_heats)

# Plot the ECDF as dots
_ = plt.plot(x, y, marker='.', linestyle='none')

# Label axes and show plot
_ = plt.xlabel('time (s)')
_ = plt.ylabel('ECDF')

plt.show()
def test_ecdf(data):
    x, y = dcst.ecdf(data)
    x_correct, y_correct = original.ecdf(data)
    assert np.allclose(x, x_correct, atol=atol, equal_nan=True)
    assert np.allclose(y, y_correct, atol=atol, equal_nan=True)
                 usecols=['time', 'mag'])

time = np.array([
    d.timestamp() / 31556925.9747 + 1970
    for d in df['1980-01':'2017-06'].index.to_pydatetime()
])
mags = df['1980-01':'2017-06'].mag.values
'''
INSTRUCTIONS

*   Use Boolean indexing to slice out the magnitudes of all earthquakes before 2010 and store the result in mags_pre. Similarly, generate a numpy array mags_post that has all magnitudes of earthquakes in and after 2010.
*   Use plt.plot() with a *dcst.ecdf(____) argument to make ECDFs for pre- and post- 2010 earthquake magnitudes. Remember to specify arguments for the marker and linestyle parameters.
*   Hit 'Submit Answer' to view the plot.
'''

# Get magnitudes before and after 2010
mags_pre = mags[time < 2010]
mags_post = mags[time >= 2010]

# Generate ECDFs
_ = plt.plot(*dcst.ecdf(mags_pre), marker='.', linestyle='none')
_ = plt.plot(*dcst.ecdf(mags_post), marker='.', linestyle='none')

# Label axes and show plot
_ = plt.xlabel('magnitude')
_ = plt.ylabel('ECDF')
plt.legend(('1980 though 2009', '2010 through mid-2017'), loc='upper left')
plt.show()
'''
Both curves seem to follow the Gutenberg-Richter Law, but with different completeness thresholds, probably due to improvements in sensing capabilities in more recent years.
'''
Beispiel #25
0
# - The aftershock_threshold can be adjusted.
# %% [markdown]
# Third part: Underlying distribution of time between earthquakes<br>
# We will use the K-S test to test whether the data at hand is different from theoretical distributions (normal or exponential)
#

# %%
# 1. Calculate parameters from our data to use for the hypothesized distribution
mean_days_between_earthquakes = np.mean(days_between_eqs)
std_days_between_earthquakes = np.median(days_between_eqs)
print('Mean = {}, Std = {}'.format(mean_days_between_earthquakes,
                                   std_days_between_earthquakes))

# %%
# 2. Formal ecdf vs dot ecdf
_ = plt.plot(*dcst.ecdf(days_between_eqs, formal=False),
             linestyle='none',
             marker='.')
plt.show()
_ = plt.plot(*dcst.ecdf(days_between_eqs, formal=True))
plt.show()


# %%
# 3. Define function to help calculate K-S stat
def ks_stat(data1, data2):
    '''
    Calculates the ks_stat between two datasets
    '''
    # Compute ECDF from data: x, y
    x, y = dcst.ecdf(data1)
Beispiel #26
0
def plot_ecdf(data, xlabel):
    x, y = dcst.ecdf(data)
    plt.plot(x, y * 100)
    plt.xlabel(xlabel, size=14)
    plt.ylabel('Percent', size=14)
    plt.show()