def test_initialise_from_not_run_detector(example_detector): det = example_detector evt = EvaluationTool() # Todo: throw a more informative error rather than print statement + AttributeError with pytest.raises(AttributeError): evt.build_EvaluationTool_via_run_detector(det)
def test_constructor(): # Initialise an EvaluationTool evt = EvaluationTool() # Check that general properties have been set correctly assert evt.type is None assert evt.has_true_CPs is False assert evt.true_CP_location is None assert evt.true_CP_model_index is None assert evt.true_CP_model_label is None # Check that properties related to plotting functionality are of the correct type # (but we aren't concerned about precisely which colours/linestyles are used). possible_colours = ('b', 'g', 'r', 'c', 'm', 'y', 'k', 'w') possible_linestyles = ('-', '--', '-.', ':') assert evt.cushion > 0 assert evt.CP_color in possible_colours assert evt.median_color in possible_colours assert evt.max_color in possible_colours assert all([c in possible_colours for c in evt.colors]) assert all([l in possible_linestyles for l in evt.linestyle]) # Check that the correct list of names has been prepared - results corresponding to this list should be added later assert evt.names == [ "names", "execution time", "S1", "S2", "T", "trimmer threshold", "MAP CPs", "model labels", "run length log distribution", "model and run length log distribution", "one-step-ahead predicted mean", "one-step-ahead predicted variance", "all run length log distributions", "all model and run length log distributions", "all retained run lengths", "has true CPs", "true CP locations", "true CP model index", "true CP model labels" ]
def test_initialise_from_run_detector(example_detector): det = example_detector det.run() evt = EvaluationTool() evt.build_EvaluationTool_via_run_detector(det) # If initialised with a detector that has already been run, the evt should be type 4 (of 4) assert evt.type == 4 # Check that the evt created using this detector has stored the expected values assert 1.6232197650790918 == pytest.approx(np.mean(evt.MSE), 1e-8) assert 1.6059882853108407 == pytest.approx( np.mean(evt.negative_log_likelihood), 1e-8) # Check the structure of the list of results assert evt.names == evt.results[ 0] # Names of quantities are duplicated; should be the same assert len(evt.results[0]) == len( evt.results ) # Check that the number of names corresponds to the no. results
def test_save_and_initialise_from_results(example_detector, tmpdir): det = example_detector det.run() evt = EvaluationTool() evt.build_EvaluationTool_via_run_detector(det) # Save to HD evt.store_results_to_HD(tmpdir.join("test_save_evt.txt")) assert os.path.isfile(tmpdir.join("test_save_evt.txt")) # Load into a new evt evt_load = EvaluationTool() evt_load.build_EvaluationTool_via_results(tmpdir.join("test_save_evt.txt")) # Check that key quantities have been restored assert evt_load.type == 4 # Detector has already been run assert evt_load.results[1] == evt.results[1] # execution time for rlld_l, rlld in zip(evt_load.results[8], evt.results[8]): # run length log distribution assert rlld_l == rlld
def test_add_true_cps(example_detector): det = example_detector det.run() true_cp_index = 50 true_cp_model_index = 2 # Add CPs to the EvT before setting it up with the detector: # Set up the EvT evt = EvaluationTool() assert evt.has_true_CPs is False evt.add_true_CPs(true_cp_index, true_cp_model_index) assert evt.has_true_CPs is True # Build the evt using the detector, then check that true CP is stored in results evt.build_EvaluationTool_via_run_detector(det) assert evt.results[evt.results[0].index("has true CPs")] is True assert evt.results[evt.results[0].index( "true CP locations")] == true_cp_index assert evt.results[evt.results[0].index( "true CP model index")] == true_cp_model_index assert evt.results[evt.results[0].index("true CP model labels")] is None # Add CPs to the EvT after setting it up with the detector: # Set up the EvT evt2 = EvaluationTool() evt2.build_EvaluationTool_via_run_detector(det) evt2.add_true_CPs(true_cp_index, true_cp_model_index) # Check that CPs are still stored correctly if added after the detector has been run assert evt2.has_true_CPs is True assert evt2.results[evt.results[0].index("has true CPs")] is True assert evt2.results[evt.results[0].index( "true CP locations")] == true_cp_index assert evt2.results[evt.results[0].index( "true CP model index")] == true_cp_model_index assert evt2.results[evt.results[0].index("true CP model labels")] is None
notifications=25, save_performance_indicators=True, generalized_bayes_rld= rld, #"power_divergence", #"kullback_leibler", #"power_divergence" , #"power_divergence", #"kullback_leibler", alpha_param_learning= param_learning, #"together", #"individual", #"individual", #"individual", #"individual", #"together", alpha_param=alpha_param, alpha_param_opt_t=100, #, #) #, alpha_rld=alpha_rld, #pow(10, -5), #0.25, alpha_rld_learning=rld_learning, #"power_divergence", #alpha_rld = 0.25, #0.00000005,pow(10,-12) #alpha_rld_learning=True, loss_der_rld_learning="absolute_loss") detector.run() """STEP 7: Make graphing tool""" EvT = EvaluationTool() EvT.build_EvaluationTool_via_run_detector(detector) """STEP 8: Inspect convergence of the hyperparameters""" for lag in range(0, len(model_universe)): plt.plot( np.linspace(1, len(detector.model_universe[lag].a_list), len(detector.model_universe[lag].a_list)), np.array(detector.model_universe[lag].a_list)) plt.plot( np.linspace(1, len(detector.model_universe[lag].b_list), len(detector.model_universe[lag].b_list)), np.array(detector.model_universe[lag].b_list)) """STEP 9+: Inspect convergence of alpha-rld""" if detector.generalized_bayes_rld == "power_divergence" and mode == "DPD": plt.plot(detector.alpha_list) for lag in range(0, len(model_universe)):
hyperpar_opt) model_universe = VAR0_list + VAR4_list + VAR8_list """detector""" model_universe = np.array(model_universe) model_prior = np.array( [1.0/int(len(model_universe))] * int(len(model_universe))) detector = Detector( data, model_universe, model_prior, cp_model, S1, S2, T, exo_data=None, num_exo_vars=None, threshold=200, store_rl=True, store_mrl=True)#, detector.run() """build evaluation tool""" EvT = EvaluationTool() EvT.build_EvaluationTool_via_run_detector(detector) else: """Directly read results""" result_path = baseline_working_directory EvT = EvaluationTool() EvT.build_EvaluationTool_via_results(result_path + "//" + "results_demo.txt") """Plot in panels: Raw data (offsetting the mean), CPs + RLD 1-step-ahead-prediction + variance model posterior""" custom_colors_models = ['green', 'purple', 'orange', 'blue', 'darkgray'] custom_colors_series = ['black']*4 custom_linestyles =['solid']*5
raw_data_float.append(float(entry)) raw_data = raw_data_float """STEP 2: put into right form""" T = int(len(raw_data) / 2) S1, S2 = 1, 1 data = np.array(raw_data).reshape(T, 2) dates = data[:, 0] river_height = data[:, 1] mean, variance = np.mean(river_height), np.var(river_height) river_height = (river_height - mean) / np.sqrt(variance) """STEP 3: Get dates""" all_dates = [] for i in range(622 + 2, 1285): all_dates.append(datetime.date(i, 1, 1)) """Read in results""" EvT = EvaluationTool() EvT.build_EvaluationTool_via_results(results_file) """get MAP CPs in range""" segmentation = np.array(EvT.results[EvT.names.index("MAP CPs")][-2]) models = np.union1d([e[1] for e in segmentation], [e[1] for e in segmentation]) """Obtain the plot for RLD""" start, stop = (2007 + 7 / 12), 2009 #start, stop = datetime.date(2007, 8, 1), datetime.date(2008, 12, 31) height_ratio = [10, 14] custom_colors = ["blue", "purple" ] #["green", "darkviolet", "orange", "purple", "turquoise"] fig, ax_array = plt.subplots(2, figsize=(8, 5), sharex=True, gridspec_kw={'height_ratios': height_ratio}) plt.subplots_adjust(hspace=.35, left=None, bottom=None, right=None, top=None)
with open(well_file) as csvfile: reader = csv.reader(csvfile) for row in reader: raw_data += row raw_data_float = [] for entry in raw_data: raw_data_float.append(float(entry)) raw_data = raw_data_float T = int(len(raw_data)) S1, S2 = 1,1 #S1, S2 give you spatial dimensions data = np.array(raw_data).reshape(T,1,1) """STEP 2: Read in the data to create your EvT objects""" EvTKL = EvaluationTool() EvTKL.build_EvaluationTool_via_results(result_path + "//" + results_file_KL) EvTDPD = EvaluationTool() EvTDPD.build_EvaluationTool_via_results(result_path + "//" + results_file_DPD) if plot1: """STEP 3: Set up the figure properties and plot""" height_ratio =[8,10] #10,10 custom_colors = ["blue", "purple"] fig, ((ax1, ax2),(ax3,ax4)) = plt.subplots(2, 2, figsize=(12,5), sharex = 'col', sharey = 'row',# True, gridspec_kw = {'height_ratios':height_ratio})
model_universe=model_universe, model_prior=model_prior, cp_model=cp_model, S1=S1, S2=S2, T=T, store_rl=True, store_mrl=True, trim_type="keep_K", threshold=100, notifications=100, save_performance_indicators=True, training_period=test_obs) detector.run() """Store results + real CPs into EvaluationTool obj""" EvT = EvaluationTool() EvT.build_EvaluationTool_via_run_detector(detector) """store that EvT object onto hard drive""" prior_spec_str = ("//time_frame=" + time_frame + "//transform=" + str(heavy_tails_transform) + "//a=" + str(a) + "//b=" + str(b)) detector_path = baseline_working_directory + prior_spec_str if not os.path.exists(detector_path): os.makedirs(detector_path) results_path = detector_path + "//results.txt" if not shortened: EvT.store_results_to_HD(results_path) fig = EvT.plot_predictions(indices=[0],
"""get the average for each 2h-interval for each weekday""" for _2h in range(0, 12 * 7): selection_2h = [False] * _2h + [ True ] + [False] * (12 * 7 - 1 - _2h) selection = (selection_2h * int(T / (7 * 12)) + selection_2h[:(T - int(T / (7 * 12)) * 7 * 12)]) mean_2h[_2h, station] = np.mean(data[selection, station]) data[selection, station] = (data[selection, station] - mean_2h[_2h, station]) if normalize: data = (data - np.mean(data, axis=0)) / np.sqrt(np.var(data, axis=0)) intercept_priors = np.mean(data, axis=0) """""STEP 2: READ RESULTS""" "" EvT = EvaluationTool() EvT.build_EvaluationTool_via_results(results_file) segmentation = EvT.results[EvT.names.index("MAP CPs")][-2] model_labels = EvT.results[EvT.names.index("model labels")] num_models = len(np.union1d(model_labels, model_labels)) relevant_models = np.union1d([seg[1] for seg in segmentation], [seg[1] for seg in segmentation]) #mods = [8,11,13,17,18] all_models = [ e for e in range(0, len(model_labels)) ] #np.linspace(0, len(model_labels)-1, len(model_labels), dtype = int) """Get dates""" def perdelta(start, end, delta, date_list):
# Set paths to original data and stored results baseline_working_directory = os.getcwd() nile_file = os.path.join(baseline_working_directory, "Data", "nile.txt") results_file = os.path.join(baseline_working_directory, "Output", "results_nile.txt") if not os.path.isfile(results_file): print("\nCould not find results_nile.txt in the Output directory. Have you run nile_ICML18.py?\n") """STEP 1: Read in data and convert to appropriate format""" # Extract data (height and date) and properties (T: num years; S1 and S2: spatial dimensions) from csv file T, S1, S2, river_height, unstandardised_river_height, dates = load_nile_data(nile_file) """Step 2: Generate EvaluationTool based on the results generated with nile_ICML18.py""" # Read in results EvT = EvaluationTool() EvT.build_EvaluationTool_via_results(results_file) # Get MAP CPs in range print("Identified", len(EvT.results[EvT.names.index("MAP CPs")][-2]), "MAP CPs at years: ") [print("\t", int(dates[m[0]])) for m in EvT.results[EvT.names.index("MAP CPs")][-2]] """STEP 3: Generate run-length distribution plot for paper""" # Prepare the axes fig, ax_array = plt.subplots(2, figsize=(8, 5), sharex=True, gridspec_kw={'height_ratios': [10, 14]}) plt.subplots_adjust(hspace=.35, left=None, bottom=None, right=None, top=None) # Placement of y-labels ylabel_coords = [-0.065, 0.5]
"""STEP 6: Build and run detector""" detector = Detector(data=data, model_universe=model_universe, model_prior = model_prior, cp_model = cp_model, S1 = S1, S2 = S2, T = T, store_rl=True, store_mrl=True, trim_type="keep_K", threshold = 200, notifications = 100, save_performance_indicators = True, training_period = 250) detector.run() """STEP 7: give some results/pictures/summaries""" """Store results + real CPs into EvaluationTool obj""" EvT = EvaluationTool() EvT.add_true_CPs(true_CP_location=true_CP_location, true_CP_model_index=true_CP_location, true_CP_model_label = -1) EvT.build_EvaluationTool_via_run_detector(detector) print("convergence diagnostics for on-line hyperparameter opt:") plt.plot(np.linspace(1,len(detector.model_universe[0].a_list), len(detector.model_universe[0].a_list)), np.array(detector.model_universe[0].a_list)) plt.plot(np.linspace(1,len(detector.model_universe[0].b_list), len(detector.model_universe[0].b_list)), np.array(detector.model_universe[0].b_list)) fig = EvT.plot_run_length_distr( time_range = np.linspace(1,
model_universe=model_universe, model_prior=model_prior, cp_model=cp_model, S1=S1, S2=S2, T=T, store_rl=True, store_mrl=True, trim_type="keep_K", threshold=200, training_period=25, #i.e., we let 2 years pass before MSE computed notifications=1500, save_performance_indicators=True) detector.run() """Store results + real CPs into EvaluationTool obj""" EvT = EvaluationTool() EvT.build_EvaluationTool_via_run_detector(detector) """plot transformed data""" fig = EvT.plot_raw_TS(data.reshape(T, 1)) """plot prediction error""" fig = EvT.plot_prediction_error(data, indices=[0], print_plt=True, time_range=np.linspace(2 * 365, T - upper_AR - 1, T - upper_AR - 1 - 2 * 365, dtype=int)) """plot predictions themselves""" fig = EvT.plot_predictions(indices=[0], print_plt=True,
cp_model=cp_model, S1=S1, S2=S2, T=T, store_rl=True, store_mrl=True, trim_type="keep_K", threshold=200, notifications=100, save_performance_indicators=True, generalized_bayes="power_divergence", alpha=5, #0.0005, 0.25, 0.5, 1.0// 0.05, 0.1 upwards generalized_bayes_hyperparameter_learning=True) detector.run() """Store results + real CPs into EvaluationTool obj""" EvT = EvaluationTool() #EvT.add_true_CPs(true_CP_location=true_CP_location, # true_CP_model_index=true_CP_location, # true_CP_model_label = -1) EvT.build_EvaluationTool_via_run_detector(detector) minus = 3 EvT.plot_run_length_distr( buffer=0, show_MAP_CPs=True, mark_median=False, mark_max=True, upper_limit=T - 2 - minus, print_colorbar=True, colorbar_location='bottom', log_format=True,
"""get the average for each 2h-interval for each weekday""" for _2h in range(0, 12 * 7): selection_2h = [False] * _2h + [ True ] + [False] * (12 * 7 - 1 - _2h) selection = (selection_2h * int(T / (7 * 12)) + selection_2h[:(T - int(T / (7 * 12)) * 7 * 12)]) mean_2h[_2h, station] = np.mean(data[selection, station]) data[selection, station] = (data[selection, station] - mean_2h[_2h, station]) if normalize: data = (data - np.mean(data, axis=0)) / np.sqrt(np.var(data, axis=0)) intercept_priors = np.mean(data, axis=0) """""STEP 2: READ RESULTS""" "" EvTKL, EvTDPD = EvaluationTool(), EvaluationTool() EvTKL.build_EvaluationTool_via_results(results_file_KL) EvTDPD.build_EvaluationTool_via_results(results_file_DPD) """Get dates""" def perdelta(start, end, delta, date_list): curr = start while curr < end: #yield curr date_list.append(curr) curr += delta all_dates = [] #start_year, start_month, start_day, start_hour = 2002, 8, 17, 0
count = 0 with open(baseline_working_directory + "//" + date_file) as csvfile: reader = csv.reader(csvfile) for row in reader: #DEBUG: Unclear if this is needed if count > 0: myl += row count += 1 if count % 2500 == 0: print(count) dates = [] for e in myl: dates.append(int(e)) result_path = baseline_working_directory EvT = EvaluationTool() EvT.build_EvaluationTool_via_results(result_path + "//" + results_file) """Using the dates, select your range and indices: select 03/07/1975 -- 31/12/2008, i.e. find indices that correspond""" start_test = dates.index(19740703) start_algo = dates.index(19750703) stop = dates.index(20081231) """time period for which we want RLD""" start_comparison = dates.index(20070801)#dates.index(19980102) stop_comparison = stop#stop all_dates = [] for d in dates[start_comparison:stop-2]: s = str(d)
if cp == 0: next_AR1 = next_AR1[burn_in:] #add the next AR 1 stream into 'data' data[start:fin, i, 0] = next_AR1 """STEP 3: Set up analysis parameters""" S1, S2 = K, 1 #S1, S2 give you spatial dimensions if normalize: data = (data - np.mean(data)) / np.sqrt(np.var(data)) """STEP 4: Offset""" if offset: for i in range(0, min(K, max_num_plotted_series)): data[:, i, :] = data[:, i, :] + i * 7 """#CREATE THE PICTURES USING THE STORED RESULTS#""" EvTKL = EvaluationTool() EvTKL.build_EvaluationTool_via_results(result_path + "//" + results_file_KL) EvTDPD = EvaluationTool() EvTDPD.build_EvaluationTool_via_results(result_path + "//" + results_file_DPD) """STEP 1: Set up the plot configs""" #mpl.rcParams.update(mpl.rcParamsDefault) #height_ratio =[5,5,5,5,5] #custom_colors = ["blue", "purple"] #fig, ax_array = plt.subplots(5, figsize=(5,5), sharex = True, # gridspec_kw = {'height_ratios':height_ratio}) #plt.subplots_adjust(hspace = .35, left = None, bottom = None, # right = None, top = None) if singlePlot: fig, ax = plt.subplots(1, figsize=(8, 5))
axis=0) """Fill in and obtain normal and demeaned versions""" temperatures_spatial[selection, location] = ( station_means[location] + month_means[location, month] + year_effect_controlling_for_months) temperatures_spatial_demeaned[selection2, location] = ( temperatures_spatial[selection2, location] - station_means[location] - month_means[location, month]) """Normalize""" temperatures_spatial_demeaned = (temperatures_spatial_demeaned - np.mean( temperatures_spatial_demeaned, axis=0)) / np.sqrt( np.var(temperatures_spatial_demeaned, axis=0)) """STEP 3: Read in the results""" """Read in results""" EvT = EvaluationTool() EvT.build_EvaluationTool_via_results(results_file) """STEP 4: Get your plots""" segmentation = EvT.results[EvT.names.index("MAP CPs")][-2] model_labels = EvT.results[EvT.names.index("model labels")] num_models = len(np.union1d(model_labels, model_labels)) relevant_models = np.union1d([seg[1] for seg in segmentation], [seg[1] for seg in segmentation]) mods = [8, 11, 13, 17, 18] all_models = np.linspace(0, len(model_labels) - 1, len(model_labels), dtype=int) """"STEP 5: Get annotations""" #http://file.scirp.org/pdf/ACS_2013062615184222.pdf #https://en.wikipedia.org/wiki/History_of_climate_change_science
threshold = 50, notifications = 100, save_performance_indicators = True, generalized_bayes_rld = rld_KL, alpha_param_learning = param_learning, alpha_param = alpha_param, alpha_param_opt_t = 100, alpha_rld = alpha_rld, alpha_rld_learning = rld_learning, loss_der_rld_learning="absolute_loss" ) detector_KL.run() """STEP 7: Make graphing tool""" EvTDPD = EvaluationTool() EvTDPD.build_EvaluationTool_via_run_detector(detector_DPD) EvTKL = EvaluationTool() EvTKL.build_EvaluationTool_via_run_detector(detector_KL) """STEP 8: Plotting Pictures in paper""" matplotlib.rcParams.update({'figure.autolayout': False}) """Get the different CPs""" CPsDPD = np.array([e[0] for e in EvTDPD.results[EvTDPD.names.index("MAP CPs")][-2]]) CPsKL = np.array([e[0] for e in EvTKL.results[EvTKL.names.index("MAP CPs")][-2]]) k = 25 additional_CPs = []