def normalisation_over_curves(self): preprocess = PreprocessData() preprocess.enable_normalisation_scaler = True preprocess.enable_ignore_price = True preprocess.feature_range = [0, 1] sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) self.plotting.plot_some_curves( "normalisation_over_curves", sets_test[0], sets_test_scaled[0], [25, 50, 75, 815], maturities, plot_separate=True) # old: [25, 50, 75, 100, 600, 720, 740, 815]
def helper(self, preprocess_type): preprocess = PreprocessData() if preprocess_type is None or preprocess_type is PreprocessType.NORMALISATION_OVER_TENORS: preprocess.enable_normalisation_scaler = True preprocess.feature_range = [0, 1] elif preprocess_type is PreprocessType.NORMALISATION_OVER_CURVES: preprocess.enable_normalisation_scaler = True preprocess.feature_range = [0, 1] preprocess.enable_ignore_price = True elif preprocess_type is PreprocessType.STANDARDISATION_OVER_TENORS: preprocess.enable_standardisation_scaler = True elif preprocess_type is PreprocessType.LOG_RETURNS_OVER_TENORS: preprocess.enable_log_returns = True sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) rescaled_first_test_set = preprocess.rescale_data( sets_test_scaled[0], test_dataset_names[0]) # check that assert_allclose is working: # rand = np.random.random_sample(sets_test[0].shape) # np.testing.assert_allclose(rescaled_first_test_set, rand) np.testing.assert_allclose(rescaled_first_test_set, sets_test[0])
def simulate(plot=True): plotting = Plotting() preprocess = PreprocessData() preprocess.enable_normalisation_scaler = True preprocess.feature_range = [0, 1] # 1. get data and apply normalisation sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) print("sets_training_scaled.shape", sets_training_scaled[0].shape) # plotting.plot_2d(sets_training_scaled[0][:, 0], "sets_training_scaled[0][:, 0]", save=False) # plotting.plot_2d(sets_test_scaled[0][:, 0], "test_feature_normalised_short_end", save=True) ae_params = { 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': 2, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = Autoencoder(ae_params) autoencoder.train(sets_training_scaled, sets_test_scaled) autoencoder.save_model("ae_" + ae_params_hash) # autoencoder.load_model("ae_" + ae_params_hash) # 2: encode data using autoencoder sets_encoded_training = [] for set_training_scaled in sets_training_scaled: sets_encoded_training.append(autoencoder.encode(set_training_scaled)) sets_encoded_test = [] for set_test_scaled in sets_test_scaled: sets_encoded_test.append(autoencoder.encode(set_test_scaled)) # 6: decode using autoencoder decoded_test = autoencoder.decode(sets_encoded_test[0]) # 7: undo minimax, for now only the first simulation # decoded_generated_segments_first_sim = decoded_generated_segments[0] simulated = preprocess.rescale_data(decoded_test, dataset_name=test_dataset_names[0]) # reconstruction error # reconstruction_error(sets_test_scaled[0], decoded_test) error = reconstruction_error(np.array(sets_test[0]), simulated) if plot: plotting.plot_2d(sets_encoded_test[0], "test_feature_normalised_encoded_autoencoder_on_", save=True) plotting.plot_some_curves("normalised_compare_ae_before_rescale", sets_test_scaled[0], decoded_test, [25, 50, 75, 815], maturities) plotting.plot_some_curves("normalised_compare_ae", sets_test[0], simulated, [25, 50, 75, 815], maturities) plotting.plot_some_curves("normalised_compare_ae", sets_test[0], sets_test_scaled[0], [25, 50, 75, 815, 100, 600, 720, 740], maturities, plot_separate=True) return error
def simulate(): plotting = Plotting() preprocess_normalisation = PreprocessData() preprocess_normalisation.enable_normalisation_scaler = True preprocess_normalisation.feature_range = [-1, 1] # preprocess_normalisation.enable_ignore_price = True # 1. get data and apply normalisation sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data( ) all_training_scaled = np.vstack(sets_training_scaled) ae_params = { 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': 3, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'leaky_relu': 0.1, 'last_activation': 'linear', # sigmoid or linear 'loss': 'mean_square_error', # binary_crossentropy or mean_square_error 'epsilon_std': 1.0, 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() # 2. train/load variational autoencoder vae = VariationalAutoencoder(ae_params) vae.train(all_training_scaled, sets_test_scaled) vae.save_model("vae_" + ae_params_hash) # vae.load_model("vae_" + ae_params_hash) # 3: encode data using autoencoder sets_encoded_training = [] for set_training_scaled in sets_training_scaled: sets_encoded_training.append(vae.encode(set_training_scaled)) sets_encoded_test = [] for set_test_scaled in sets_test_scaled: sets_encoded_test.append(vae.encode(set_test_scaled)) # 4: decode using vae decoded_data = vae.decode(sets_encoded_test[0]) # 7: undo minimax, for now only the first simulation simulated = preprocess_normalisation.rescale_data( decoded_data, dataset_name=test_dataset_names[0]) # reconstruction error # reconstruction_error(sets_test_scaled[0], decoded_data) reconstruction_error(np.array(sets_test[0]), simulated) # plot latent space plotting.plot_2d(sets_encoded_test[0], "test_feature_normalised_encoded_vae_on_", save=True) plotting.plot_space(maturities, vae, "variational_grid", latent_dim=sets_encoded_test[0].shape[1]) # plot scaled results plotting.plot_some_curves("test_feature_normalised_compare_vae_scaled", sets_test_scaled[0], decoded_data, [25, 50, 75, 815], maturities) plotting.plot_some_curves("test_feature_normalised_compare_vae", sets_test[0], simulated, [25, 50, 75, 815], maturities)
def simulate(plot=True): plotting = Plotting() preprocess = PreprocessData() preprocess.enable_normalisation_scaler = True preprocess.feature_range = [0, 1] window_size = 20 # 1. get data and apply normalisation sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( chunks_of=window_size) print("sets_training_scaled.shape", sets_training_scaled[0].shape) # plotting.plot_2d(sets_training_scaled[0][:, 0], "sets_training_scaled[0][:, 0]", save=False) # plotting.plot_2d(sets_test_scaled[0][:, 0], "test_feature_normalised_short_end", save=True) ae_params = { 'input_dim': ( window_size, sets_training_scaled[0].shape[1], ), # 10 x 56 'latent_dim': ( 2, 56, ), 'hidden_layers': ( 12 * 56, 4 * 56, ), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500, } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = AutoencoderWindows(ae_params) print("sets_training_scaled", sets_training_scaled[0].shape) autoencoder.train(sets_training_scaled, sets_test_scaled) autoencoder.save_model("ae_" + ae_params_hash) # autoencoder.load_model("ae_" + ae_params_hash) # 2: encode data using autoencoder sets_encoded_training = [] for set_training_scaled in sets_training_scaled: sets_encoded_training.append(autoencoder.encode(set_training_scaled)) sets_encoded_test = [] for set_test_scaled in sets_test_scaled: sets_encoded_test.append(autoencoder.encode(set_test_scaled)) print("sets_encoded_training", len(sets_encoded_training), sets_encoded_training[0].shape) print("sets_encoded_test", sets_encoded_test[0].shape) # 6: decode using autoencoder decoded_test = autoencoder.decode(sets_encoded_test[0]) print("decoded_test", decoded_test.shape) # 7: undo minimax, for now only the first simulation # decoded_generated_segments_first_sim = decoded_generated_segments[0] preprocess.enable_curve_smoothing = True simulated_smooth = preprocess.rescale_data( decoded_test, dataset_name=test_dataset_names[0]) # reconstruction error # reconstruction_error(sets_test_scaled[0], decoded_test) # error = reconstruction_error(np.array(sets_test[0]), simulated_smooth) # print("error:", error) smape_result_smooth = smape(simulated_smooth, np.array(sets_test[0]), over_curves=True) print(np.mean(smape_result_smooth), np.var(smape_result_smooth)) if plot: # plotting.plot_2d(sets_encoded_test[0], "test_feature_normalised_encoded_autoencoder_on_", save=True) # plotting.plot_some_curves("normalised_compare_ae_before_rescale", sets_test_scaled[0], decoded_test, # [25, 50, 75, 815], maturities) plotting.plot_some_curves("normalised_compare_ae", sets_test[0], simulated_smooth, [25, 50, 75, 815], maturities)
def simulate(): plotting = Plotting() preprocess_normalisation = PreprocessData() preprocess_normalisation.enable_normalisation_scaler = True preprocess_normalisation.feature_range = [0, 1] # preprocess_normalisation.enable_scaler = True # 1. get data and apply normalisation sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data( ) # plotting.plot_2d(sets_training_scaled[0][:, 0], "sets_training_scaled[0][:, 0]", save=False) # plotting.plot_2d(sets_test_scaled[0][:, 0], "test_feature_normalised_short_end", save=True) all_stacked = np.vstack((np.vstack(sets_training), np.vstack(sets_test))) all_stacked_scaled = np.vstack( (np.vstack(sets_training_scaled), np.vstack(sets_test_scaled))) all_training_scaled = np.vstack(sets_training_scaled) # print("all_stacked_scaled.shape", all_stacked_scaled.shape) # plotting.plot_2d(all_stacked[:, 0], "training and test data", save=False) # plotting.plot_2d(all_stacked_scaled[:, 0], "training and test data scaled", save=False) ae_params = { 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': 2, 'hidden_layers': (56, 40, 28, 12, 4, 2), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = Autoencoder(ae_params) # autoencoder.train(all_stacked_scaled, sets_test_scaled) # autoencoder.train(sets_test_scaled[0], sets_test_scaled) # autoencoder.train(all_training_scaled, sets_test_scaled) # autoencoder.save_model("ae_" + ae_params_hash) autoencoder.load_model("ae_" + ae_params_hash) # 2: encode data using autoencoder sets_encoded_training = [] for set_training_scaled in sets_training_scaled: sets_encoded_training.append(autoencoder.encode(set_training_scaled)) sets_encoded_test = [] for set_test_scaled in sets_test_scaled: sets_encoded_test.append(autoencoder.encode(set_test_scaled)) plotting.plot_2d(sets_encoded_test[0], "test_feature_normalised_encoded_autoencoder_on_", save=True) # 6: decode using autoencoder decoded_test = autoencoder.decode(sets_encoded_test[0]) # 7: undo minimax, for now only the first simulation simulated = preprocess_normalisation.rescale_data( decoded_test, dataset_name=test_dataset_names[0]) plotting.plot_some_curves( "test_feature_normalised_compare_autoencoder_before_rescale", sets_test_scaled[0], decoded_test, [25, 50, 75, 815], maturities) # old: [25, 50, 75, 100, 600, 720, 740, 815] plotting.plot_some_curves( "test_feature_normalised_compare_autoencoder", sets_test[0], simulated, [25, 50, 75, 815], maturities) # old: [25, 50, 75, 100, 600, 720, 740, 815] # curve_smooth = [] # for curve in simulated: # print("curve.shape", curve.shape) # curve_smooth.append(savgol_filter(curve, 23, 5)) # window size 51, polynomial order 3 # curve_smooth = np.array(curve_smooth) print("reconstruction error BEFORE smoothing:") reconstruction_error(np.array(sets_test[0]), simulated) preprocess_normalisation.enable_curve_smoothing = True simulated = preprocess_normalisation.rescale_data( decoded_test, dataset_name=test_dataset_names[0]) plotting.plot_some_curves( "test_feature_normalised_compare_autoencoder", sets_test[0], simulated, [25, 50, 75, 815], maturities) # old: [25, 50, 75, 100, 600, 720, 740, 815] # plotting.plot_some_curves("test_feature_normalised_compare_normalisation", sets_test[0], sets_test_scaled[0], # [25, 50, 75, 815, 100, 600, 720, 740], maturities, plot_separate=True) # reconstruction error # reconstruction_error(sets_test_scaled[0], decoded_test) print("reconstruction error AFTER smoothing:") reconstruction_error(np.array(sets_test[0]), simulated)