def test_initialise_from_not_run_detector(example_detector):

    det = example_detector
    evt = EvaluationTool()

    # Todo: throw a more informative error rather than print statement + AttributeError
    with pytest.raises(AttributeError):
        evt.build_EvaluationTool_via_run_detector(det)
def test_constructor():

    # Initialise an EvaluationTool
    evt = EvaluationTool()

    # Check that general properties have been set correctly
    assert evt.type is None
    assert evt.has_true_CPs is False
    assert evt.true_CP_location is None
    assert evt.true_CP_model_index is None
    assert evt.true_CP_model_label is None

    # Check that properties related to plotting functionality are of the correct type
    # (but we aren't concerned about precisely which colours/linestyles are used).
    possible_colours = ('b', 'g', 'r', 'c', 'm', 'y', 'k', 'w')
    possible_linestyles = ('-', '--', '-.', ':')
    assert evt.cushion > 0
    assert evt.CP_color in possible_colours
    assert evt.median_color in possible_colours
    assert evt.max_color in possible_colours
    assert all([c in possible_colours for c in evt.colors])
    assert all([l in possible_linestyles for l in evt.linestyle])

    # Check that the correct list of names has been prepared - results corresponding to this list should be added later
    assert evt.names == [
        "names", "execution time", "S1", "S2", "T", "trimmer threshold",
        "MAP CPs", "model labels", "run length log distribution",
        "model and run length log distribution",
        "one-step-ahead predicted mean", "one-step-ahead predicted variance",
        "all run length log distributions",
        "all model and run length log distributions",
        "all retained run lengths", "has true CPs", "true CP locations",
        "true CP model index", "true CP model labels"
    ]
def test_initialise_from_run_detector(example_detector):

    det = example_detector
    det.run()
    evt = EvaluationTool()
    evt.build_EvaluationTool_via_run_detector(det)

    # If initialised with a detector that has already been run, the evt should be type 4 (of 4)
    assert evt.type == 4

    # Check that the evt created using this detector has stored the expected values
    assert 1.6232197650790918 == pytest.approx(np.mean(evt.MSE), 1e-8)
    assert 1.6059882853108407 == pytest.approx(
        np.mean(evt.negative_log_likelihood), 1e-8)

    # Check the structure of the list of results
    assert evt.names == evt.results[
        0]  # Names of quantities are duplicated; should be the same
    assert len(evt.results[0]) == len(
        evt.results
    )  # Check that the number of names corresponds to the no. results
def test_save_and_initialise_from_results(example_detector, tmpdir):

    det = example_detector
    det.run()
    evt = EvaluationTool()
    evt.build_EvaluationTool_via_run_detector(det)

    # Save to HD
    evt.store_results_to_HD(tmpdir.join("test_save_evt.txt"))
    assert os.path.isfile(tmpdir.join("test_save_evt.txt"))

    # Load into a new evt
    evt_load = EvaluationTool()
    evt_load.build_EvaluationTool_via_results(tmpdir.join("test_save_evt.txt"))

    # Check that key quantities have been restored
    assert evt_load.type == 4  # Detector has already been run
    assert evt_load.results[1] == evt.results[1]  # execution time
    for rlld_l, rlld in zip(evt_load.results[8],
                            evt.results[8]):  # run length log distribution
        assert rlld_l == rlld
def test_add_true_cps(example_detector):

    det = example_detector
    det.run()

    true_cp_index = 50
    true_cp_model_index = 2

    # Add CPs to the EvT before setting it up with the detector:

    # Set up the EvT
    evt = EvaluationTool()
    assert evt.has_true_CPs is False
    evt.add_true_CPs(true_cp_index, true_cp_model_index)
    assert evt.has_true_CPs is True

    # Build the evt using the detector, then check that true CP is stored in results
    evt.build_EvaluationTool_via_run_detector(det)
    assert evt.results[evt.results[0].index("has true CPs")] is True
    assert evt.results[evt.results[0].index(
        "true CP locations")] == true_cp_index
    assert evt.results[evt.results[0].index(
        "true CP model index")] == true_cp_model_index
    assert evt.results[evt.results[0].index("true CP model labels")] is None

    # Add CPs to the EvT after setting it up with the detector:

    # Set up the EvT
    evt2 = EvaluationTool()
    evt2.build_EvaluationTool_via_run_detector(det)
    evt2.add_true_CPs(true_cp_index, true_cp_model_index)

    # Check that CPs are still stored correctly if added after the detector has been run
    assert evt2.has_true_CPs is True
    assert evt2.results[evt.results[0].index("has true CPs")] is True
    assert evt2.results[evt.results[0].index(
        "true CP locations")] == true_cp_index
    assert evt2.results[evt.results[0].index(
        "true CP model index")] == true_cp_model_index
    assert evt2.results[evt.results[0].index("true CP model labels")] is None
Esempio n. 6
0
    notifications=25,
    save_performance_indicators=True,
    generalized_bayes_rld=
    rld,  #"power_divergence", #"kullback_leibler", #"power_divergence" , #"power_divergence", #"kullback_leibler",
    alpha_param_learning=
    param_learning,  #"together", #"individual", #"individual", #"individual", #"individual", #"together",
    alpha_param=alpha_param,
    alpha_param_opt_t=100,  #, #) #,
    alpha_rld=alpha_rld,  #pow(10, -5), #0.25,
    alpha_rld_learning=rld_learning,  #"power_divergence",
    #alpha_rld = 0.25, #0.00000005,pow(10,-12)
    #alpha_rld_learning=True,
    loss_der_rld_learning="absolute_loss")
detector.run()
"""STEP 7: Make graphing tool"""
EvT = EvaluationTool()
EvT.build_EvaluationTool_via_run_detector(detector)
"""STEP 8: Inspect convergence of the hyperparameters"""
for lag in range(0, len(model_universe)):
    plt.plot(
        np.linspace(1, len(detector.model_universe[lag].a_list),
                    len(detector.model_universe[lag].a_list)),
        np.array(detector.model_universe[lag].a_list))
    plt.plot(
        np.linspace(1, len(detector.model_universe[lag].b_list),
                    len(detector.model_universe[lag].b_list)),
        np.array(detector.model_universe[lag].b_list))
"""STEP 9+: Inspect convergence of alpha-rld"""
if detector.generalized_bayes_rld == "power_divergence" and mode == "DPD":
    plt.plot(detector.alpha_list)
    for lag in range(0, len(model_universe)):
        hyperpar_opt)   
    model_universe = VAR0_list + VAR4_list + VAR8_list
    """detector"""
    model_universe = np.array(model_universe)
    model_prior = np.array(
             [1.0/int(len(model_universe))] * 
             int(len(model_universe)))
    detector = Detector(
        data, model_universe, model_prior, cp_model, 
        S1, S2, T, exo_data=None, num_exo_vars=None, 
        threshold=200,
        store_rl=True, store_mrl=True)#,
    detector.run()
                                
    """build evaluation tool"""                           
    EvT = EvaluationTool()
    EvT.build_EvaluationTool_via_run_detector(detector)
else:
    """Directly read results"""
    result_path = baseline_working_directory
    EvT = EvaluationTool()
    EvT.build_EvaluationTool_via_results(result_path + "//" + "results_demo.txt") 


"""Plot in panels: Raw data (offsetting the mean), 
                         CPs + RLD
                         1-step-ahead-prediction + variance
                         model posterior"""
custom_colors_models =  ['green', 'purple', 'orange', 'blue', 'darkgray']
custom_colors_series = ['black']*4
custom_linestyles =['solid']*5
    raw_data_float.append(float(entry))
raw_data = raw_data_float
"""STEP 2: put into right form"""
T = int(len(raw_data) / 2)
S1, S2 = 1, 1
data = np.array(raw_data).reshape(T, 2)
dates = data[:, 0]
river_height = data[:, 1]
mean, variance = np.mean(river_height), np.var(river_height)
river_height = (river_height - mean) / np.sqrt(variance)
"""STEP 3: Get dates"""
all_dates = []
for i in range(622 + 2, 1285):
    all_dates.append(datetime.date(i, 1, 1))
"""Read in results"""
EvT = EvaluationTool()
EvT.build_EvaluationTool_via_results(results_file)
"""get MAP CPs in range"""
segmentation = np.array(EvT.results[EvT.names.index("MAP CPs")][-2])
models = np.union1d([e[1] for e in segmentation], [e[1] for e in segmentation])
"""Obtain the plot for RLD"""
start, stop = (2007 + 7 / 12), 2009
#start, stop = datetime.date(2007, 8, 1), datetime.date(2008, 12, 31)
height_ratio = [10, 14]
custom_colors = ["blue", "purple"
                 ]  #["green", "darkviolet", "orange", "purple", "turquoise"]
fig, ax_array = plt.subplots(2,
                             figsize=(8, 5),
                             sharex=True,
                             gridspec_kw={'height_ratios': height_ratio})
plt.subplots_adjust(hspace=.35, left=None, bottom=None, right=None, top=None)
with open(well_file) as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        raw_data += row

raw_data_float = []
for entry in raw_data:
    raw_data_float.append(float(entry))
raw_data = raw_data_float

T = int(len(raw_data))
S1, S2 = 1,1 #S1, S2 give you spatial dimensions
data = np.array(raw_data).reshape(T,1,1)

"""STEP 2: Read in the data to create your EvT objects"""
EvTKL = EvaluationTool()
EvTKL.build_EvaluationTool_via_results(result_path + "//" + results_file_KL) 

EvTDPD = EvaluationTool()
EvTDPD.build_EvaluationTool_via_results(result_path + "//" + results_file_DPD) 


if plot1:
    """STEP 3: Set up the figure properties and plot"""
    height_ratio =[8,10] #10,10
    custom_colors = ["blue", "purple"] 
    fig, ((ax1, ax2),(ax3,ax4)) = plt.subplots(2, 2, 
                                 figsize=(12,5), 
                                 sharex = 'col', 
                                 sharey = 'row',# True, 
                                 gridspec_kw = {'height_ratios':height_ratio})
                                        model_universe=model_universe,
                                        model_prior=model_prior,
                                        cp_model=cp_model,
                                        S1=S1,
                                        S2=S2,
                                        T=T,
                                        store_rl=True,
                                        store_mrl=True,
                                        trim_type="keep_K",
                                        threshold=100,
                                        notifications=100,
                                        save_performance_indicators=True,
                                        training_period=test_obs)
                    detector.run()
                    """Store results + real CPs into EvaluationTool obj"""
                    EvT = EvaluationTool()
                    EvT.build_EvaluationTool_via_run_detector(detector)
                    """store that EvT object onto hard drive"""
                    prior_spec_str = ("//time_frame=" + time_frame +
                                      "//transform=" +
                                      str(heavy_tails_transform) + "//a=" +
                                      str(a) + "//b=" + str(b))
                    detector_path = baseline_working_directory + prior_spec_str
                    if not os.path.exists(detector_path):
                        os.makedirs(detector_path)

                    results_path = detector_path + "//results.txt"
                    if not shortened:
                        EvT.store_results_to_HD(results_path)

                    fig = EvT.plot_predictions(indices=[0],
            """get the average for each 2h-interval for each weekday"""
            for _2h in range(0, 12 * 7):
                selection_2h = [False] * _2h + [
                    True
                ] + [False] * (12 * 7 - 1 - _2h)
                selection = (selection_2h * int(T / (7 * 12)) +
                             selection_2h[:(T - int(T / (7 * 12)) * 7 * 12)])
                mean_2h[_2h, station] = np.mean(data[selection, station])
                data[selection, station] = (data[selection, station] -
                                            mean_2h[_2h, station])

if normalize:
    data = (data - np.mean(data, axis=0)) / np.sqrt(np.var(data, axis=0))
    intercept_priors = np.mean(data, axis=0)
"""""STEP 2: READ RESULTS""" ""
EvT = EvaluationTool()
EvT.build_EvaluationTool_via_results(results_file)

segmentation = EvT.results[EvT.names.index("MAP CPs")][-2]
model_labels = EvT.results[EvT.names.index("model labels")]
num_models = len(np.union1d(model_labels, model_labels))
relevant_models = np.union1d([seg[1] for seg in segmentation],
                             [seg[1] for seg in segmentation])
#mods = [8,11,13,17,18]
all_models = [
    e for e in range(0, len(model_labels))
]  #np.linspace(0, len(model_labels)-1, len(model_labels), dtype = int)
"""Get dates"""


def perdelta(start, end, delta, date_list):
# Set paths to original data and stored results
baseline_working_directory = os.getcwd()
nile_file = os.path.join(baseline_working_directory, "Data", "nile.txt")
results_file = os.path.join(baseline_working_directory, "Output", "results_nile.txt")
if not os.path.isfile(results_file):
    print("\nCould not find results_nile.txt in the Output directory. Have you run nile_ICML18.py?\n")

"""STEP 1: Read in data and convert to appropriate format"""

# Extract data (height and date) and properties (T: num years; S1 and S2: spatial dimensions) from csv file
T, S1, S2, river_height, unstandardised_river_height, dates = load_nile_data(nile_file)

"""Step 2: Generate EvaluationTool based on the results generated with nile_ICML18.py"""

# Read in results
EvT = EvaluationTool()
EvT.build_EvaluationTool_via_results(results_file) 

# Get MAP CPs in range
print("Identified", len(EvT.results[EvT.names.index("MAP CPs")][-2]), "MAP CPs at years: ")
[print("\t", int(dates[m[0]])) for m in EvT.results[EvT.names.index("MAP CPs")][-2]]

"""STEP 3: Generate run-length distribution plot for paper"""

# Prepare the axes
fig, ax_array = plt.subplots(2, figsize=(8, 5), sharex=True,
                             gridspec_kw={'height_ratios': [10, 14]})
plt.subplots_adjust(hspace=.35, left=None, bottom=None, right=None, top=None)

# Placement of y-labels
ylabel_coords = [-0.065, 0.5]
Esempio n. 13
0
"""STEP 6: Build and run detector"""
detector = Detector(data=data, model_universe=model_universe, 
        model_prior = model_prior,
        cp_model = cp_model, S1 = S1, S2 = S2, T = T, 
        store_rl=True, store_mrl=True,
        trim_type="keep_K", threshold = 200,
        notifications = 100,
        save_performance_indicators = True,
        training_period = 250)
detector.run()


"""STEP 7: give some results/pictures/summaries"""

"""Store results + real CPs into EvaluationTool obj"""
EvT = EvaluationTool()
EvT.add_true_CPs(true_CP_location=true_CP_location, 
                 true_CP_model_index=true_CP_location, 
             true_CP_model_label = -1)
EvT.build_EvaluationTool_via_run_detector(detector)

print("convergence diagnostics for on-line hyperparameter opt:")
plt.plot(np.linspace(1,len(detector.model_universe[0].a_list), 
                     len(detector.model_universe[0].a_list)), 
         np.array(detector.model_universe[0].a_list))
plt.plot(np.linspace(1,len(detector.model_universe[0].b_list),
                     len(detector.model_universe[0].b_list)), 
         np.array(detector.model_universe[0].b_list))

fig = EvT.plot_run_length_distr(
    time_range = np.linspace(1,
    model_universe=model_universe,
    model_prior=model_prior,
    cp_model=cp_model,
    S1=S1,
    S2=S2,
    T=T,
    store_rl=True,
    store_mrl=True,
    trim_type="keep_K",
    threshold=200,
    training_period=25,  #i.e., we let 2 years pass before MSE computed
    notifications=1500,
    save_performance_indicators=True)
detector.run()
"""Store results + real CPs into EvaluationTool obj"""
EvT = EvaluationTool()
EvT.build_EvaluationTool_via_run_detector(detector)
"""plot transformed data"""
fig = EvT.plot_raw_TS(data.reshape(T, 1))
"""plot prediction error"""
fig = EvT.plot_prediction_error(data,
                                indices=[0],
                                print_plt=True,
                                time_range=np.linspace(2 * 365,
                                                       T - upper_AR - 1,
                                                       T - upper_AR - 1 -
                                                       2 * 365,
                                                       dtype=int))
"""plot predictions themselves"""
fig = EvT.plot_predictions(indices=[0],
                           print_plt=True,
Esempio n. 15
0
    cp_model=cp_model,
    S1=S1,
    S2=S2,
    T=T,
    store_rl=True,
    store_mrl=True,
    trim_type="keep_K",
    threshold=200,
    notifications=100,
    save_performance_indicators=True,
    generalized_bayes="power_divergence",
    alpha=5,  #0.0005, 0.25, 0.5, 1.0// 0.05, 0.1 upwards
    generalized_bayes_hyperparameter_learning=True)
detector.run()
"""Store results + real CPs into EvaluationTool obj"""
EvT = EvaluationTool()
#EvT.add_true_CPs(true_CP_location=true_CP_location,
#                 true_CP_model_index=true_CP_location,
#             true_CP_model_label = -1)
EvT.build_EvaluationTool_via_run_detector(detector)

minus = 3
EvT.plot_run_length_distr(
    buffer=0,
    show_MAP_CPs=True,
    mark_median=False,
    mark_max=True,
    upper_limit=T - 2 - minus,
    print_colorbar=True,
    colorbar_location='bottom',
    log_format=True,
            """get the average for each 2h-interval for each weekday"""
            for _2h in range(0, 12 * 7):
                selection_2h = [False] * _2h + [
                    True
                ] + [False] * (12 * 7 - 1 - _2h)
                selection = (selection_2h * int(T / (7 * 12)) +
                             selection_2h[:(T - int(T / (7 * 12)) * 7 * 12)])
                mean_2h[_2h, station] = np.mean(data[selection, station])
                data[selection, station] = (data[selection, station] -
                                            mean_2h[_2h, station])

if normalize:
    data = (data - np.mean(data, axis=0)) / np.sqrt(np.var(data, axis=0))
    intercept_priors = np.mean(data, axis=0)
"""""STEP 2: READ RESULTS""" ""
EvTKL, EvTDPD = EvaluationTool(), EvaluationTool()
EvTKL.build_EvaluationTool_via_results(results_file_KL)
EvTDPD.build_EvaluationTool_via_results(results_file_DPD)
"""Get dates"""


def perdelta(start, end, delta, date_list):
    curr = start
    while curr < end:
        #yield curr
        date_list.append(curr)
        curr += delta


all_dates = []
#start_year, start_month, start_day, start_hour = 2002, 8, 17, 0
Esempio n. 17
0
count = 0 
with open(baseline_working_directory + "//" + date_file) as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        #DEBUG: Unclear if this is needed
        if count > 0:
            myl += row
        count += 1
        if count % 2500 == 0:
            print(count)
dates = []
for e in myl:
    dates.append(int(e))
                              
result_path = baseline_working_directory
EvT = EvaluationTool()
EvT.build_EvaluationTool_via_results(result_path + "//" + results_file) 

"""Using the dates, select your range and indices: select 
03/07/1975 -- 31/12/2008, i.e. find indices that correspond"""
start_test = dates.index(19740703)
start_algo = dates.index(19750703)
stop = dates.index(20081231)

"""time period for which we want RLD"""
start_comparison = dates.index(20070801)#dates.index(19980102)
stop_comparison = stop#stop

all_dates = []
for d in dates[start_comparison:stop-2]:
    s = str(d)
        if cp == 0:
            next_AR1 = next_AR1[burn_in:]

        #add the next AR 1 stream into 'data'
        data[start:fin, i, 0] = next_AR1
"""STEP 3: Set up analysis parameters"""
S1, S2 = K, 1  #S1, S2 give you spatial dimensions
if normalize:
    data = (data - np.mean(data)) / np.sqrt(np.var(data))
"""STEP 4: Offset"""
if offset:
    for i in range(0, min(K, max_num_plotted_series)):
        data[:, i, :] = data[:, i, :] + i * 7
"""#CREATE THE PICTURES USING THE STORED RESULTS#"""

EvTKL = EvaluationTool()
EvTKL.build_EvaluationTool_via_results(result_path + "//" + results_file_KL)

EvTDPD = EvaluationTool()
EvTDPD.build_EvaluationTool_via_results(result_path + "//" + results_file_DPD)
"""STEP 1: Set up the plot configs"""

#mpl.rcParams.update(mpl.rcParamsDefault)
#height_ratio =[5,5,5,5,5]
#custom_colors = ["blue", "purple"]
#fig, ax_array = plt.subplots(5, figsize=(5,5), sharex = True,
#                             gridspec_kw = {'height_ratios':height_ratio})
#plt.subplots_adjust(hspace = .35, left = None, bottom = None,
#                    right = None, top = None)
if singlePlot:
    fig, ax = plt.subplots(1, figsize=(8, 5))
                                axis=0)
            """Fill in and obtain normal and demeaned versions"""
            temperatures_spatial[selection, location] = (
                station_means[location] + month_means[location, month] +
                year_effect_controlling_for_months)

            temperatures_spatial_demeaned[selection2, location] = (
                temperatures_spatial[selection2, location] -
                station_means[location] - month_means[location, month])
"""Normalize"""
temperatures_spatial_demeaned = (temperatures_spatial_demeaned - np.mean(
    temperatures_spatial_demeaned, axis=0)) / np.sqrt(
        np.var(temperatures_spatial_demeaned, axis=0))
"""STEP 3: Read in the results"""
"""Read in results"""
EvT = EvaluationTool()
EvT.build_EvaluationTool_via_results(results_file)
"""STEP 4: Get your plots"""
segmentation = EvT.results[EvT.names.index("MAP CPs")][-2]
model_labels = EvT.results[EvT.names.index("model labels")]
num_models = len(np.union1d(model_labels, model_labels))
relevant_models = np.union1d([seg[1] for seg in segmentation],
                             [seg[1] for seg in segmentation])
mods = [8, 11, 13, 17, 18]
all_models = np.linspace(0,
                         len(model_labels) - 1,
                         len(model_labels),
                         dtype=int)
""""STEP 5: Get annotations"""
#http://file.scirp.org/pdf/ACS_2013062615184222.pdf
#https://en.wikipedia.org/wiki/History_of_climate_change_science
        threshold = 50,
        notifications = 100,
        save_performance_indicators = True,
        generalized_bayes_rld = rld_KL, 
        alpha_param_learning =  param_learning, 
        alpha_param  = alpha_param, 
        alpha_param_opt_t = 100, 
        alpha_rld = alpha_rld, 
        alpha_rld_learning = rld_learning, 
        loss_der_rld_learning="absolute_loss"
        )
detector_KL.run()


"""STEP 7: Make graphing tool"""
EvTDPD = EvaluationTool()
EvTDPD.build_EvaluationTool_via_run_detector(detector_DPD)
EvTKL = EvaluationTool()
EvTKL.build_EvaluationTool_via_run_detector(detector_KL)
        
    
"""STEP 8: Plotting Pictures in paper"""
matplotlib.rcParams.update({'figure.autolayout': False})


"""Get the different CPs"""
CPsDPD = np.array([e[0] for e in EvTDPD.results[EvTDPD.names.index("MAP CPs")][-2]])
CPsKL = np.array([e[0] for e in EvTKL.results[EvTKL.names.index("MAP CPs")][-2]])

k  = 25
additional_CPs = []