def train(self, x_train, x_val, name=None, epochs=None, batch_size=None, steps_per_epoch=None): if epochs is None: epochs = self.params['epochs'] if batch_size is None: batch_size = self.params['batch_size'] if steps_per_epoch is None: steps_per_epoch = self.params['steps_per_epoch'] # checkpoint = ModelCheckpoint(self.config.get_filepath_ae_model("/checkpoints/deep_ae_encoder-{epoch:02d}-{val_loss:.2f}"), monitor='val_loss', verbose=1, save_best_only=True, mode='auto') # Train autoencoder for 50 epochs history = self.autoencoder.fit_generator(self.generator(x_train, batch_size), validation_data=(x_val, x_val), steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=2)# callbacks=[checkpoint], # history = self.autoencoder.fit(x_train, x_train, epochs=epochs, batch_size=batch_size, shuffle=True, # validation_data=(x_test, x_test), verbose=2) print(history.history.keys()) if self.plot: plotting = Plotting() plotting.plot_loss(history.history['loss'], history.history['val_loss'], "deep_loss") if name is not None: self.save_model(name)
def simulate(): plotting = Plotting() preprocess_logreturns = PreprocessData() preprocess_logreturns.enable_log_returns = True # 1. get data and apply minimax sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_logreturns.get_data() sets_training_first_last_tenors = [] for set_training_scaled in sets_training_scaled: sets_training_first_last_tenors.append(set_training_scaled[:,[0,-1]]) # sets_training_first_last_tenors = np.array(sets_training_first_last_tenors) sets_test_first_last_tenors = [] for set_test_scaled in sets_test_scaled: sets_test_first_last_tenors.append(set_test_scaled[:,[0,-1]]) # sets_test_first_last_tenors = np.array(sets_test_first_last_tenors) gan_params = {'num_tenors': sets_training_first_last_tenors[0].shape[1], 'num_c': 6*7, 'num_z': 6*7, 'num_o': 6*7, 'gen_model_type': 'standard', # conv 'dis_model_type': 'standard', # conv 'gen_layers': (4*(6*7*2),), # 4 * num_o * num_tenors 'dis_layers': (4*(6*7),), # 4 * num_o 'gen_last_activation': 'tanh', 'dis_last_activation': 'sigmoid', 'loss': 'binary_crossentropy', 'batch_size': 128, 'epochs': 20000} gan_params_hash = hashlib.md5(json.dumps(gan_params, sort_keys=True).encode('utf-8')).hexdigest() gan = GAN(gan_params) # gan.train(np.vstack(sets_training_first_last_tenors)) # gan.save_model("gan_test_" + gan_params_hash) gan.load_model("gan_test_" + gan_params_hash) # 4: simulate on encoded log returns, conditioned on test dataset num_simulations = 10 num_repeats = 20 generated_segments, real_segment = gan.generate(data=sets_test_first_last_tenors[-1], num_simulations=num_simulations, remove_condition=False) last_generated_segment = generated_segments for _ in np.arange(num_repeats - 1): generated_temp, real_temp = gan.generate(condition=last_generated_segment, remove_condition=True) last_generated_segment = generated_temp generated_segments = np.append(generated_segments, generated_temp, axis=1) # 5: undo log-returns generated_segments = preprocess_logreturns.rescale_data(generated_segments, start_value=sets_test_first_last_tenors[-1][-1]) # plotting.plot_3d_many(file_name, data, save=False) plotting.plot_3d_training("3d recursively generated with GAN, test", generated_segments, sets_test[-1], show=True, after_real_data=True)
def all_log_returns(self): preprocess_data = PreprocessData() plotting = Plotting() preprocess_data.enable_log_returns = True sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_data.get_data( ) for i, set_training_scaled in enumerate(sets_training_scaled): print("set_training_scaled.shape", set_training_scaled.shape, i) plotting.plot_2d(set_training_scaled, "/time_series/" + training_dataset_names[i], timeseries=True, save=False, title=True)
def __init__(self): self.preprocess_data = PreprocessData() self.plotting = Plotting() self.config = Config() # self.preprocess_data.enable_min_max_scaler = True self.preprocess_data.enable_log_returns = True self.sets_training, self.sets_test, self.sets_training_scaled, self.sets_test_scaled, \ self.training_dataset_names, self.test_dataset_names, self.maturities = self.preprocess_data.get_data() wti_nymex = self.sets_test[0] time = wti_nymex.axes[0].tolist() self.wti_nymex_short_end = wti_nymex.iloc[:, 0] self.data_scaled = self.sets_test_scaled[0][0]
def __init__(self): self.preprocess_data = PreprocessData() self.plotting = Plotting() self.config = Config() # self.preprocess_data.enable_min_max_scaler = True self.preprocess_data.enable_log_returns = True self.sets_training, self.sets_test, self.sets_training_scaled, self.sets_test_scaled, \ self.training_dataset_names, self.test_dataset_names, self.maturities = self.preprocess_data.get_data() self.wti_nymex = self.sets_test[0] time = self.wti_nymex.axes[0].tolist() self.wti_nymex_short_end = self.wti_nymex.iloc[:, 0] self.data_scaled = self.sets_test_scaled[0][0] self.train_len = 128 self.test_len = 42 self.data_train = self.wti_nymex[:self.train_len] self.data_test = self.wti_nymex[self.train_len:self.train_len + self.test_len] self.data_train_and_test = self.wti_nymex[:self.train_len + self.test_len] print("self.data_train.shape", self.data_train.shape) print("self.data_test.shape", self.data_test.shape)
def __init__(self, params, plot=False): self.config = Config() self.plotting = Plotting() self.params = params self.plot = plot self.build_model(params)
def __init__(self): print("Andersen Markov Model") self.plotting = Plotting() preprocess_logreturns = PreprocessData() preprocess_logreturns.enable_log_returns = True # 1. get data and apply minimax sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_logreturns.get_data( ) # tenors: rate tenors in year fractions (from 0.083 to 5 over 60 steps) # rates: corresponding zero rates matrix # obs_time: observation dates in year fractions (starting at the first date) # 988 steps from -3.835... to 0 on the WTI NYMEX data num_c = 6 * 7 # add '* 20' to see if a larger training set helps num_o = 6 * 7 train_set = sets_test[-1].iloc[:num_c] test_set = sets_test[-1].iloc[num_c:num_c + num_o + 1] num_of_test_curves = len(test_set) self.test_set = test_set tenors = maturities self.tenors = tenors[:, np.newaxis] self.rates = np.array(train_set) index = pd.Series(train_set.index) end_num = toYearFraction(sets_test[-1].index[-1]) dates_as_decimal = np.array( index.apply(lambda x: toYearFraction(x, end_num))) self.dates_as_decimal = dates_as_decimal[:, np.newaxis] print("test_set.shape", np.array(test_set).shape) smape_results = [] for i in np.arange(100): simulated_rates = self.simulate(num_of_test_curves) smape_result = smape(simulated_rates, test_set) smape_results.append(smape_result) print("simulate rates", i) print("simulated, real", np.array(simulated_rates).shape, np.array(test_set).shape) print("smape:", smape_result) print("=============\n") # self.plotting.plot_3d("real", test_set, show_title=False) # self.plotting.plot_3d("AMM_simulated_" + str(i), simulated_rates, show_title=False) # # cov_log_returns = cov_log_returns_over_features(simulated_rates) # self.plotting.plot_3d_cov("AMM_simulated_" + str(i) + "_cov", cov_log_returns, show_title=False) smape_results = np.array(smape_results) # print("smape_results:", smape_results) print("smape mean and std:", np.mean(smape_results), np.std(smape_results))
def __init__(self, params, plot=True): self.k = params['latent_dim'] self.A_tilde = None self.mu = None self.plot = plot self.config = Config() self.plotting = Plotting() print("PCA")
def __init__(self, params, plot=True): self.config = Config() self.plotting = Plotting() self.params = params self.plot = plot self.input_dim = params['input_dim'] self.latent_dim = params['latent_dim'] optimizer = Adam(0.0002, 0.5) # learning rate, beta_1 # Build and compile the discriminator self.discriminator = self.build_discriminator(params) self.discriminator.compile(loss=params['loss_discriminator'], optimizer=optimizer, metrics=['accuracy']) # Build the encoder / decoder self.encoder = self.build_encoder(params) self.decoder = self.build_decoder(params) img = Input(shape=(self.input_dim, )) # The generator takes the image, encodes it and reconstructs it # from the encoding encoded_repr = self.encoder(img) reconstructed_img = self.decoder(encoded_repr) # For the adversarial_autoencoder model we will only train the generator self.discriminator.trainable = False # The discriminator determines validity of the encoding validity = self.discriminator(encoded_repr) # The adversarial_autoencoder model (stacked generator and discriminator) self.adversarial_autoencoder = Model(img, [reconstructed_img, validity]) self.adversarial_autoencoder.compile( loss=[params['loss_generator'], params['loss_discriminator']], loss_weights=[0.999, 0.001], optimizer=optimizer)
def __init__(self, params, plot=True): self.config = Config() self.plotting = Plotting() self.params = params self.plot = plot # Number of Conditioning, Random and Prediction returns self.num_c = params["num_c"] self.num_z = params["num_z"] self.num_o = params["num_o"] self.num_tenors = params["num_tenors"] optimizer = Adam(1e-5) # Build and compile the discriminator self.discriminator = self.build_discriminator() self.discriminator.compile(loss=params["loss"], optimizer=optimizer, metrics=['accuracy']) # Build the generator self.generator = self.build_generator() # The generator takes noise as input and generates imgs condition = Input(shape=(self.num_c, self.num_tenors)) noise = Input(shape=(self.num_z, self.num_tenors)) img = self.generator([condition, noise]) # For the combined model we will only train the generator self.discriminator.trainable = False # The discriminator takes generated images as input and determines validity validity = self.discriminator(img) # The combined model (stacked generator and discriminator) # Trains the generator to fool the discriminator self.combined = Model([condition, noise], validity) self.combined.compile(loss=params["loss"], optimizer=optimizer)
def simulate(self): plotting = Plotting() old_rates = self.model.rates plotting.plot_3d("AMModel_input_data", old_rates) plotting.plot_2d(old_rates[-1, :], "AMModel_input_data_first") tenors = self.model.tenors obs_time = self.model.obs_time print("tenors", tenors) print("obs_time", obs_time) print("old_rates", old_rates) self.model.make_data() rates = self.model.rates print("new rates", rates) plotting.plot_3d("AMModel_test", rates) # , maturities=tenors, time=obs_time print("made data")
class Analysis(): def __init__(self): self.preprocess_data = PreprocessData() self.plotting = Plotting() self.config = Config() # self.preprocess_data.enable_min_max_scaler = True self.preprocess_data.enable_log_returns = True self.sets_training, self.sets_test, self.sets_training_scaled, self.sets_test_scaled, \ self.training_dataset_names, self.test_dataset_names, self.maturities = self.preprocess_data.get_data() wti_nymex = self.sets_test[0] time = wti_nymex.axes[0].tolist() self.wti_nymex_short_end = wti_nymex.iloc[:, 0] self.data_scaled = self.sets_test_scaled[0][0] def normalisation_over_tenors(self): preprocess = PreprocessData(PreprocessType.NORMALISATION_OVER_TENORS) sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) print("sets_test[0].shape", sets_test[0].shape, sets_test_scaled[0].shape) self.plotting.plot_some_curves( "normalisation_over_tenors", sets_test[0], sets_test_scaled[0], [25, 50, 75, 815], maturities, plot_separate=True) # old: [25, 50, 75, 100, 600, 720, 740, 815] def standardisation_over_tenors(self): preprocess = PreprocessData(PreprocessType.STANDARDISATION_OVER_TENORS) sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) self.plotting.plot_some_curves( "standardisation_over_tenors", sets_test[0], sets_test_scaled[0], [25, 50, 75, 815], maturities, plot_separate=True) # old: [25, 50, 75, 100, 600, 720, 740, 815] def logreturns_over_tenors(self): preprocess = PreprocessData(PreprocessType.LOG_RETURNS_OVER_TENORS) sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) self.plotting.plot_some_curves( "logreturns_over_curves", sets_test[0], sets_test_scaled[0], [25, 50, 75, 815], maturities, plot_separate=True) # old: [25, 50, 75, 100, 600, 720, 740, 815] self.plotting.plot_3d( "logreturns_over_curves_3d", sets_test_scaled[0], ) def normalisation_over_curves(self): preprocess = PreprocessData() preprocess.enable_normalisation_scaler = True preprocess.enable_ignore_price = True preprocess.feature_range = [0, 1] sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) self.plotting.plot_some_curves( "normalisation_over_curves", sets_test[0], sets_test_scaled[0], [25, 50, 75, 815], maturities, plot_separate=True) # old: [25, 50, 75, 100, 600, 720, 740, 815] def standardisation_over_curves(self): print("todo standardisation_over_curves") def logreturns_over_curves(self): print("todo logreturns_over_curves") def all_log_returns(self): preprocess_data = PreprocessData() plotting = Plotting() preprocess_data.enable_log_returns = True sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_data.get_data( ) for i, set_training_scaled in enumerate(sets_training_scaled): print("set_training_scaled.shape", set_training_scaled.shape, i) plotting.plot_2d(set_training_scaled, "/time_series/" + training_dataset_names[i], timeseries=True, save=False, title=True) def all_normalised_data(self): preprocess_data = PreprocessData() preprocess_data.enable_normalisation_scaler = True sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_data.get_data( ) for i, set_training_scaled in enumerate(sets_training_scaled): self.plotting.plot_2d(set_training_scaled, "/time_series/" + training_dataset_names[i], timeseries=True, save=True, title=True) for i, set_test_scaled in enumerate(sets_test_scaled): self.plotting.plot_2d(set_test_scaled, "/time_series/" + test_dataset_names[i], timeseries=True, save=True, title=True) def all_data(self, show_title=False): preprocess_data = PreprocessData(extend_data=False) sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_data.get_data( ) print("maturities", maturities) for i, set_training in enumerate(sets_training): print(self.training_dataset_names[i]) print(set_training.index[0], set_training.index[-1], round(np.min(set_training.min()), 2), round(np.max(set_training.max()), 2)) # self.plotting.plot_2d(set_training, "/time_series/" + training_dataset_names[i], timeseries=True, # save=True, title=show_title) # self.plotting.plot_3d("/time_series/" + training_dataset_names[i] + "_3d", set_training, show_title=show_title) cov_log_returns = cov_log_returns_over_tenors(set_training) # self.plotting.plot_3d_cov("/time_series/" + training_dataset_names[i] + "_cov", cov_log_returns, maturities=maturities, show_title=show_title) print("\n") for i, set_test in enumerate(sets_test): print(self.test_dataset_names[i]) print(set_test.index[0], set_test.index[-1], round(np.min(set_test.min()), 2), round(np.max(set_test.max()), 2)) self.plotting.plot_2d(set_test, "/time_series/" + test_dataset_names[i], timeseries=True, save=True, title=show_title) self.plotting.plot_3d("/time_series/" + test_dataset_names[i] + "_3d", set_test, show_title=show_title) cov_log_returns = cov_log_returns_over_tenors(set_test) # self.plotting.plot_3d_cov("/time_series/" + test_dataset_names[i] + "_cov", cov_log_returns, maturities=maturities, show_title=show_title) print("\n")
class GAN: def __init__(self, params, plot=True): self.config = Config() self.plotting = Plotting() self.params = params self.plot = plot # Number of Conditioning, Random and Prediction returns self.num_c = params["num_c"] self.num_z = params["num_z"] self.num_o = params["num_o"] self.num_tenors = params["num_tenors"] optimizer = Adam(1e-5) # Build and compile the discriminator self.discriminator = self.build_discriminator() self.discriminator.compile(loss=params["loss"], optimizer=optimizer, metrics=['accuracy']) # Build the generator self.generator = self.build_generator() # The generator takes noise as input and generates imgs condition = Input(shape=(self.num_c, self.num_tenors)) noise = Input(shape=(self.num_z, self.num_tenors)) img = self.generator([condition, noise]) # For the combined model we will only train the generator self.discriminator.trainable = False # The discriminator takes generated images as input and determines validity validity = self.discriminator(img) # The combined model (stacked generator and discriminator) # Trains the generator to fool the discriminator self.combined = Model([condition, noise], validity) self.combined.compile(loss=params["loss"], optimizer=optimizer) def build_generator(self): model = Sequential() if self.params['gen_model_type'] == 'standard': model.add( Flatten(input_shape=(self.num_c + self.num_z, self.num_tenors))) for i in np.arange(len(self.params['gen_layers'])): model.add( Dense(self.params['gen_layers'][i], activation='relu') ) # input_dim=(self.num_c + self.num_z, self.num_tenors) # model.add(LeakyReLU(alpha=self.params['leaky_relu'])) elif self.params['gen_model_type'] == 'conv': model.add( Conv1D(28, kernel_size=5, padding="same", data_format="channels_last", activation='relu', input_shape=(self.num_c + self.num_z, self.num_tenors)) ) # for termporal data we should use padding valid model.add( Conv1D(2, kernel_size=3, padding="same", data_format="channels_last", activation='relu', input_shape=(self.num_c + self.num_z, self.num_tenors))) model.add(MaxPooling1D(pool_size=2)) model.add(Flatten()) # final layers model.add( Dense(np.prod((self.num_o, self.num_tenors)), activation=self.params['gen_last_activation'])) model.add(Reshape((self.num_o, self.num_tenors))) print("-" * 20 + "\ngan generator") model.summary() condition = Input(shape=(self.num_c, self.num_tenors)) z = Input(shape=(self.num_z, self.num_tenors)) model_input = concatenate([condition, z], axis=1) out = model(model_input) return Model([condition, z], concatenate([condition, out], axis=1)) def build_discriminator(self): model = Sequential() if self.params['dis_model_type'] == 'standard': model.add( Flatten(input_shape=(self.num_c + self.num_o, self.num_tenors))) for i in np.arange(len(self.params['dis_layers'])): model.add( Dense(self.params['dis_layers'][i], activation='relu')) # model.add(LeakyReLU(alpha=self.params['leaky_relu'])) elif self.params['dis_model_type'] == 'conv': model.add( Conv1D(32, kernel_size=4, strides=1, padding='same', activation='relu', input_shape=(self.num_c + self.num_z, self.num_tenors))) model.add(MaxPooling1D(pool_size=2)) model.add(Flatten()) # final layer model.add(Dense(1, activation=self.params['dis_last_activation'])) print("-" * 20 + "\ngan discriminator") model.summary() model_input = Input(shape=(self.num_c + self.num_o, self.num_tenors)) validity = model(model_input) return Model(model_input, validity) def train(self, data_train, name=None, sample_interval=200, epochs=None, batch_size=None): if epochs is None: epochs = self.params['epochs'] if batch_size is None: batch_size = self.params['batch_size'] discriminator_loss = [] discriminator_acc = [] generator_loss = [] for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Select a random batch of images real = self.collect_samples(data_train, 2 * batch_size, self.num_c + self.num_o) real_labels = np.ones((2 * batch_size, 1)) d_loss_real = self.discriminator.train_on_batch(real, real_labels) # Generate a batch of new images condition = self.collect_samples(data_train, batch_size, self.num_c) noise = np.random.normal(size=(batch_size, self.num_z, self.num_tenors)) # THIS WORKS! gen_imgs = self.generator.predict([condition, noise]) fake_labels = np.zeros((batch_size, 1)) d_loss_fake = self.discriminator.train_on_batch( gen_imgs, fake_labels) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- real = self.collect_samples(data_train, batch_size, self.num_c) # THIS ALSO WORKS # noise = self.collect_samples(G, batch_size, num_z) # THIS WORKS! noise = np.random.normal(size=(batch_size, self.num_z, self.num_tenors)) real_labels = np.ones((batch_size, 1)) # Train the generator (to have the discriminator label samples as valid) g_loss = self.combined.train_on_batch([real, noise], real_labels) # If at save interval => save generated image samples if epoch % sample_interval == 0: # record progress print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss)) discriminator_loss.append(d_loss[0]) discriminator_acc.append(d_loss[1]) generator_loss.append(g_loss) if np.isnan(d_loss[0]) or np.isnan(g_loss): # something has gone wrong :( break # plot simulation if self.plot: generated, real_ = self.generate(condition=data_train, num_simulations=1) self.plotting.plot_3d_training( "gan_3d_simple_training/" + "%d" % epoch, generated, real_) if self.plot: self.plotting.plot_losses(discriminator_loss, discriminator_acc, generator_loss, "gan 3d simple training", legend=[ 'discriminator loss', 'discriminator acc', 'generator loss' ]) if name is not None: self.save_model(name) def generate(self, condition=None, condition_on_end=True, num_simulations=1, remove_condition=True, repeat=None): if isinstance(condition, pd.DataFrame): _condition = np.array(condition) else: _condition = condition.copy() print("_condition", _condition.shape) if condition_on_end: if isinstance(condition, list): _condition = _condition[0][np.newaxis, -self.num_c:] elif len(condition.shape) == 2: _condition = _condition[np.newaxis, -self.num_c:] else: _condition = _condition[:, -self.num_c:] else: # not condition_on_end: if type(condition) is list: _condition = _condition[0][np.newaxis, :self.num_c] elif len(condition.shape) == 2: _condition = _condition[np.newaxis, :self.num_c] else: # len(condition.shape) == 3: _condition = _condition[:, :self.num_c] print("_condition after", _condition.shape) # override num_simulations if _conditions already is a 2d array _num_simulations = 1 if num_simulations > 1: _condition = np.repeat(_condition, num_simulations, axis=0) _num_simulations = num_simulations elif len(_condition.shape) > 1 and _condition.shape[0] is not 1: _num_simulations = _condition.shape[0] noise = np.random.normal(size=(_num_simulations, self.num_z, self.num_tenors)) generated = self.generator.predict([_condition, noise]) if remove_condition: generated = generated[:, self.num_c:, :] if isinstance(repeat, int) and repeat > 0: for _ in np.arange(repeat - 1): generated_temp, _ = self.generate(condition=generated, remove_condition=True) generated = np.append(generated, generated_temp, axis=1) return generated, _condition def collect_samples(self, data, batch_size, pattern_len, ret_indices=False, indices=None): if type(data) is list: _data = np.array(data[np.random.randint(len(data))]) else: _data = np.array(data) n = _data.shape[0] - pattern_len + 1 if indices is None: indices = np.random.randint(n, size=batch_size) if ret_indices: return np.array([_data[a:a + pattern_len, :] for a in indices]), indices else: return np.array([_data[a:a + pattern_len, :] for a in indices]) def save_model(self, name): self.generator.save( self.config.get_filepath_gan_model(name + "_3d_simple_generator")) self.discriminator.save( self.config.get_filepath_gan_model(name + "_3d_simple_discriminator")) self.combined.save( self.config.get_filepath_gan_model(name + "_3d_simple_combined")) def load_model(self, name): generator_filepath = self.config.get_filepath_gan_model( name + "_3d_simple_generator") discriminator_filepath = self.config.get_filepath_gan_model( name + "_3d_simple_discriminator") combined_filepath = self.config.get_filepath_gan_model( name + "_3d_simple_combined") if self.config.file_exists( generator_filepath) and self.config.file_exists( discriminator_filepath) and self.config.file_exists( combined_filepath): self.generator = load_model(generator_filepath) self.discriminator = load_model(discriminator_filepath) self.combined = load_model(combined_filepath) return True else: print("trained model does not exist yet!") print(self.config.file_exists(generator_filepath), self.config.file_exists(discriminator_filepath), self.config.file_exists(combined_filepath)) print(generator_filepath, discriminator_filepath, combined_filepath) return False def load_else_train(self, x_train, name): did_load = self.load_model(name) if not did_load: self.train(x_train) self.save_model(name)
def simulate(): plotting = Plotting() preprocess_normalisation = PreprocessData() preprocess_normalisation.enable_normalisation_scaler = True preprocess_normalisation.feature_range = [-1, 1] # preprocess_normalisation.enable_ignore_price = True # 1. get data and apply normalisation sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data( ) all_training_scaled = np.vstack(sets_training_scaled) ae_params = { 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': 3, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'leaky_relu': 0.1, 'last_activation': 'linear', # sigmoid or linear 'loss': 'mean_square_error', # binary_crossentropy or mean_square_error 'epsilon_std': 1.0, 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() # 2. train/load variational autoencoder vae = VariationalAutoencoder(ae_params) vae.train(all_training_scaled, sets_test_scaled) vae.save_model("vae_" + ae_params_hash) # vae.load_model("vae_" + ae_params_hash) # 3: encode data using autoencoder sets_encoded_training = [] for set_training_scaled in sets_training_scaled: sets_encoded_training.append(vae.encode(set_training_scaled)) sets_encoded_test = [] for set_test_scaled in sets_test_scaled: sets_encoded_test.append(vae.encode(set_test_scaled)) # 4: decode using vae decoded_data = vae.decode(sets_encoded_test[0]) # 7: undo minimax, for now only the first simulation simulated = preprocess_normalisation.rescale_data( decoded_data, dataset_name=test_dataset_names[0]) # reconstruction error # reconstruction_error(sets_test_scaled[0], decoded_data) reconstruction_error(np.array(sets_test[0]), simulated) # plot latent space plotting.plot_2d(sets_encoded_test[0], "test_feature_normalised_encoded_vae_on_", save=True) plotting.plot_space(maturities, vae, "variational_grid", latent_dim=sets_encoded_test[0].shape[1]) # plot scaled results plotting.plot_some_curves("test_feature_normalised_compare_vae_scaled", sets_test_scaled[0], decoded_data, [25, 50, 75, 815], maturities) plotting.plot_some_curves("test_feature_normalised_compare_vae", sets_test[0], simulated, [25, 50, 75, 815], maturities)
def simulate(plot=True): plotting = Plotting() preprocess = PreprocessData() preprocess.enable_normalisation_scaler = True preprocess.feature_range = [0, 1] window_size = 20 # 1. get data and apply normalisation sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( chunks_of=window_size) print("sets_training_scaled.shape", sets_training_scaled[0].shape) # plotting.plot_2d(sets_training_scaled[0][:, 0], "sets_training_scaled[0][:, 0]", save=False) # plotting.plot_2d(sets_test_scaled[0][:, 0], "test_feature_normalised_short_end", save=True) ae_params = { 'input_dim': ( window_size, sets_training_scaled[0].shape[1], ), # 10 x 56 'latent_dim': ( 2, 56, ), 'hidden_layers': ( 12 * 56, 4 * 56, ), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500, } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = AutoencoderWindows(ae_params) print("sets_training_scaled", sets_training_scaled[0].shape) autoencoder.train(sets_training_scaled, sets_test_scaled) autoencoder.save_model("ae_" + ae_params_hash) # autoencoder.load_model("ae_" + ae_params_hash) # 2: encode data using autoencoder sets_encoded_training = [] for set_training_scaled in sets_training_scaled: sets_encoded_training.append(autoencoder.encode(set_training_scaled)) sets_encoded_test = [] for set_test_scaled in sets_test_scaled: sets_encoded_test.append(autoencoder.encode(set_test_scaled)) print("sets_encoded_training", len(sets_encoded_training), sets_encoded_training[0].shape) print("sets_encoded_test", sets_encoded_test[0].shape) # 6: decode using autoencoder decoded_test = autoencoder.decode(sets_encoded_test[0]) print("decoded_test", decoded_test.shape) # 7: undo minimax, for now only the first simulation # decoded_generated_segments_first_sim = decoded_generated_segments[0] preprocess.enable_curve_smoothing = True simulated_smooth = preprocess.rescale_data( decoded_test, dataset_name=test_dataset_names[0]) # reconstruction error # reconstruction_error(sets_test_scaled[0], decoded_test) # error = reconstruction_error(np.array(sets_test[0]), simulated_smooth) # print("error:", error) smape_result_smooth = smape(simulated_smooth, np.array(sets_test[0]), over_curves=True) print(np.mean(smape_result_smooth), np.var(smape_result_smooth)) if plot: # plotting.plot_2d(sets_encoded_test[0], "test_feature_normalised_encoded_autoencoder_on_", save=True) # plotting.plot_some_curves("normalised_compare_ae_before_rescale", sets_test_scaled[0], decoded_test, # [25, 50, 75, 815], maturities) plotting.plot_some_curves("normalised_compare_ae", sets_test[0], simulated_smooth, [25, 50, 75, 815], maturities)
def simulate(): plotting = Plotting() preprocess_minmax = PreprocessData() preprocess_logreturns = PreprocessData() preprocess_minmax.enable_min_max_scaler = True preprocess_logreturns.enable_log_returns = True # 1. get data and apply minimax sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_minmax.get_data( ) print("sets_training_scaled.shape", sets_training_scaled[0].shape) autoencoder = DeepAutoencoder( input_shape=(sets_training_scaled[0].shape[1], ), latent_dim=2) # autoencoder.train(np.vstack(sets_training_scaled), sets_test_scaled, epochs=100, batch_size=5) # autoencoder.save_model("deep_general_minimax") autoencoder.load_model("deep_general_minimax") # 2: encode data using autoencoder sets_encoded_training = [] for set_training_scaled in sets_training_scaled: sets_encoded_training.append(autoencoder.encode(set_training_scaled)) sets_encoded_test = [] for set_test_scaled in sets_test_scaled: sets_encoded_test.append(autoencoder.encode(set_test_scaled)) plotting.plot_2d(sets_encoded_test[0], "encoded test data with deep autoencoder", save=False) # 3: log returns of encoded data sets_encoded_log_training = [] for index, set_encoded_training in enumerate(sets_encoded_training): sets_encoded_log_training.append( preprocess_logreturns.scale_data(set_encoded_training)) sets_encoded_log_test = [] for index, set_encoded_test in enumerate(sets_encoded_test): sets_encoded_log_test.append( preprocess_logreturns.scale_data(set_encoded_test)) plotting.plot_2d( sets_encoded_log_test[0], "encoded test data with deep autoencoder, then log returns", save=False) num_tenors = sets_encoded_log_training[0].shape[1] gan = GAN(num_c=6 * 7, num_z=6 * 7, num_o=6 * 7, num_tenors=num_tenors) # try training on larger input and output # gan.train(sets_encoded_log_training, epochs=20000, batch_size=100, sample_interval=200) # gan.save_model("general_ae") gan.load_model("general_ae") print("sets_encoded_log_test[0].shape", sets_encoded_log_test[0].shape) test_arr = np.full([1, 6 * 7 + 6 * 7, num_tenors], 10) validity = gan.discriminator.predict( test_arr) # np.array(sets_encoded_log_test[0] print(validity) rolled_encoded_log_test = rolling_windows(sets_encoded_log_test[0], 6 * 7 + 6 * 7) validity = gan.discriminator.predict( rolled_encoded_log_test) # np.array(sets_encoded_log_test[0] print(validity)
def simulate(): plotting = Plotting() preprocess_type = PreprocessType.STANDARDISATION_OVER_TENORS preprocess = PreprocessData(preprocess_type) # 1. get data and apply minimax sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) all_training_scaled = np.vstack(sets_training_scaled) ae_params = { 'preprocess_type': preprocess_type.value, # only to make preprocess_type part of the hash 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': 2, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = Autoencoder(ae_params) autoencoder.load_else_train(all_training_scaled, sets_test_scaled, "ae_" + ae_params_hash) # 2: encode data using autoencoder encoded = autoencoder.encode(sets_test_scaled[0]) decoded = autoencoder.decode(encoded) rescaled = preprocess.rescale_data(decoded, dataset_name=test_dataset_names[0]) smape_result = smape(rescaled, np.array(sets_test[0]), over_curves=True) print("smape_result test set", np.mean(smape_result), np.std(smape_result), np.min(smape_result), np.max(smape_result)) plotting.plot_2d(sets_test[0], "evaluation of test curves", timeseries=True, evaluation=smape_result, title=False) # for i in np.arange(len(test_eval)): # if test_eval[i] > 4: # plotting.plot_2d(sets_test_scaled[0][i], "Possible unrealistic curve" + str(i), save=False, title=True) # 3: lets see how well the autoencoder can map a zero vector # todo: generate random curves, THEN apply min-max feature scaling, THEN evaluate unrealistic_curves = [] curve_shape = 56 unrealistic_curves.append(np.full(curve_shape, 5)) unrealistic_curves.append(np.full(curve_shape, 10)) unrealistic_curves.append(np.full(curve_shape, 20)) unrealistic_curves.append(np.full(curve_shape, 50)) unrealistic_curves.append(np.full(curve_shape, 70)) unrealistic_curves.append(np.full(curve_shape, 100)) unrealistic_curves.append(np.full(curve_shape, 150)) unrealistic_curves.append(np.full(curve_shape, 200)) unrealistic_curves.append(np.full(curve_shape, 250)) unrealistic_curves.append(np.full(curve_shape, 300)) unrealistic_curves.append( np.hstack((np.full(int(curve_shape / 2), 50), np.full(int(curve_shape / 2), 150)))) unrealistic_curves.append( np.hstack((np.full(int(curve_shape / 2), 100), np.full(int(curve_shape / 2), 150)))) unrealistic_curves.append( np.hstack((np.full(int(curve_shape / 2), 100), np.full(int(curve_shape / 2), 200)))) unrealistic_curves.append(np.random.uniform(0, 10, curve_shape)) unrealistic_curves.append(np.random.uniform(10, 70, curve_shape)) unrealistic_curves.append(np.random.uniform(0, 100, curve_shape)) unrealistic_curves.append(np.random.uniform(100, 200, curve_shape)) unrealistic_curves.append(np.random.uniform(200, 300, curve_shape)) unrealistic_curves.append(np.random.uniform(0, 200, curve_shape)) unrealistic_curves.append(np.random.uniform(0, 250, curve_shape)) unrealistic_curves.append(np.random.uniform(0, 300, curve_shape)) unrealistic_curves.append(np.linspace(0, 100, num=curve_shape)) unrealistic_curves.append(np.linspace(50, 150, num=curve_shape)) unrealistic_curves.append(np.linspace(100, 200, num=curve_shape)) unrealistic_curves.append(np.linspace(150, 250, num=curve_shape)) unrealistic_curves.append(np.linspace(200, 300, num=curve_shape)) unrealistic_curves.append(np.linspace(0, 200, num=curve_shape)) unrealistic_curves.append(np.linspace(0, 300, num=curve_shape)) unrealistic_curves.append(np.linspace(100, 0, num=curve_shape)) unrealistic_curves.append(np.linspace(150, 50, num=curve_shape)) unrealistic_curves.append(np.linspace(200, 100, num=curve_shape)) unrealistic_curves.append(np.linspace(250, 150, num=curve_shape)) unrealistic_curves.append(np.linspace(300, 200, num=curve_shape)) unrealistic_curves.append(np.linspace(200, 0, num=curve_shape)) unrealistic_curves.append(np.linspace(300, 0, num=curve_shape)) unrealistic_curves = np.array(unrealistic_curves) print("unrealistic_curves.shape", unrealistic_curves.shape) unrealistic_curves_scaled = preprocess.scale_data( unrealistic_curves, dataset_name=training_dataset_names[0], should_fit=True) encoded = autoencoder.encode(unrealistic_curves_scaled) decoded = autoencoder.decode(encoded) rescaled = preprocess.rescale_data(decoded, dataset_name=training_dataset_names[0]) smape_result = smape(rescaled, unrealistic_curves, over_curves=True) round_to_n = lambda x, n: round(x, -int(np.floor(np.log10(x))) + (n - 1)) print("smape results", smape_result) for a_smape_result in smape_result: print(round_to_n(a_smape_result, 2)) plotting.plot_2d(smape_result, "loss of unrealistic curves from autoencoder SMAPE", save=False, title=True) plotting.plot_2d(smape_result, "loss of unrealistic curves from autoencoder SMAPE", save=False, title=True) # plotting.plot_2d(unrealistic_eval_mse, "loss of unrealistic curves from autoencoder MSE", save=False, title=True) plotting.plot_unrealisticness( unrealistic_curves, "loss of unrealistic curves from autoencoder", timeseries=True, evaluation=smape_result, title=False, eval_label="SMAPE")
def simulate(latent_dim=2, plot=False, preprocess_type=None, model_type=None, force_training=True): plotting = Plotting() preprocess = PreprocessData(preprocess_type) window_size = None if model_type is AEModel.AE_WINDOWS: window_size = 10 # 1. get data and apply normalisation sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( chunks_of=window_size) all_training_scaled = np.vstack(sets_training_scaled) if model_type is AEModel.AAE: ae_params = { 'preprocess_type': preprocess_type. value, # only to make preprocess_type part of the hash 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': latent_dim, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'hidden_layers_discriminator': ( 2, 2, ), 'leaky_relu': 0.1, 'last_activation': 'linear', 'last_activation_discriminator': 'sigmoid', 'loss_generator': 'mean_squared_error', 'loss_discriminator': 'binary_crossentropy', 'batch_size': 20, 'epochs': 20000 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() # 2. train/load variational autoencoder autoencoder = AdversarialAutoencoder(ae_params, plot=False) elif model_type is AEModel.VAE: ae_params = { 'preprocess_type': preprocess_type. value, # only to make preprocess_type part of the hash 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': latent_dim, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'leaky_relu': 0.1, 'last_activation': 'linear', # sigmoid or linear 'loss': 'mean_squared_error', # binary_crossentropy or mean_square_error 'epsilon_std': 1.0, 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() # 2. train/load variational autoencoder autoencoder = VariationalAutoencoder(ae_params, plot=False) elif model_type is AEModel.AE: ae_params = { 'preprocess_type': preprocess_type. value, # only to make preprocess_type part of the hash 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': latent_dim, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = Autoencoder(ae_params, plot=False) elif model_type is AEModel.PCA: ae_params = { 'preprocess_type': preprocess_type. value, # only to make preprocess_type part of the hash 'latent_dim': latent_dim } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = PCAModel(ae_params, plot=False) else: # model_type is AEModel.AE_WINDOWS: ae_params = { 'input_dim': ( window_size, sets_training_scaled[0].shape[1], ), # 10 x 56 'latent_dim': ( 2, 56, ), 'hidden_layers': ( 12 * 56, 4 * 56, ), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 10, 'steps_per_epoch': 500, } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = AutoencoderWindows(ae_params, plot=False) if force_training: autoencoder.train(all_training_scaled, sets_test_scaled, "ae_" + ae_params_hash) else: autoencoder.load_else_train(all_training_scaled, sets_test_scaled, "ae_" + ae_params_hash) # 2: encode data using autoencoder sets_encoded_training = autoencoder.encode(sets_training_scaled) sets_encoded_test = autoencoder.encode(sets_test_scaled) # 6: decode using autoencoder decoded_test = autoencoder.decode(sets_encoded_test[0]) # 7: undo scaling # decoded_generated_segments_first_sim = decoded_generated_segments[0] simulated = preprocess.rescale_data(decoded_test, dataset_name=test_dataset_names[0]) preprocess.enable_curve_smoothing = True simulated_smooth = preprocess.rescale_data( decoded_test, dataset_name=test_dataset_names[0]) # reconstruction error # error = reconstruction_error(np.array(sets_test[0]), simulated) # error_smooth = reconstruction_error(np.array(sets_test[0]), simulated_smooth) smape_result = smape(simulated, np.array(sets_test[0]), over_curves=True) smape_result_smooth = smape(simulated_smooth, np.array(sets_test[0]), over_curves=True) print(np.mean(smape_result_smooth)) if plot and model_type is not AEModel.AE_WINDOWS: plotting.plot_2d(sets_encoded_test[0], preprocess_type.name + "_" + model_type.name + "_latent_space", sets_test_scaled[0].index.values, save=True) plotting.plot_some_curves( preprocess_type.name + "_" + model_type.name + "_in_vs_out", sets_test[0], simulated, [25, 50, 75, 815], maturities) # plotting.plot_some_curves("normalised_compare_ae", sets_test[0], sets_test_scaled[0], # [25, 50, 75, 815, 100, 600, 720, 740], maturities, plot_separate=True) preprocess.enable_curve_smoothing = False if model_type is AEModel.VAE: plotting.plot_grid_2dim(maturities, autoencoder.generator_model, preprocess_type.name + "_" + model_type.name + "_latent_grid", preprocess, test_dataset_names[0], n=6) elif model_type is AEModel.AAE: plotting.plot_grid_2dim(maturities, autoencoder.decoder, preprocess_type.name + "_" + model_type.name + "_latent_grid", preprocess, test_dataset_names[0], n=6) return smape_result_smooth
from helpers.preprocess_data import PreprocessData from helpers.evaluate import * from helpers.plotting import Plotting from imputance.gain_model import gain import numpy as np import matplotlib.pyplot as plt if __name__ == '__main__': plotting = Plotting() preprocess = PreprocessData(PreprocessType.STANDARDISATION_OVER_TENORS, short_end=True) preprocess2 = PreprocessData(PreprocessType.LOG_RETURNS_OVER_TENORS, short_end=True) sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) sets_encoded_log_training = preprocess2.scale_data(sets_training_scaled, training_dataset_names, should_fit=True) sets_encoded_log_test = preprocess2.scale_data(sets_test_scaled, test_dataset_names, should_fit=True) train = sets_encoded_log_training[0].copy() test = sets_encoded_log_test[0].copy() # print("train.shape[1]", train.shape[1]) # print("sets_test_scaled[0]", sets_test_scaled[0].shape) # print("sets_encoded_log_test[0]", sets_encoded_log_test[0].shape) params = {
def simulate(plot=True): plotting = Plotting() preprocess = PreprocessData() preprocess.enable_normalisation_scaler = True preprocess.feature_range = [0, 1] # 1. get data and apply normalisation sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess.get_data( ) print("sets_training_scaled.shape", sets_training_scaled[0].shape) # plotting.plot_2d(sets_training_scaled[0][:, 0], "sets_training_scaled[0][:, 0]", save=False) # plotting.plot_2d(sets_test_scaled[0][:, 0], "test_feature_normalised_short_end", save=True) ae_params = { 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': 2, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = Autoencoder(ae_params) autoencoder.train(sets_training_scaled, sets_test_scaled) autoencoder.save_model("ae_" + ae_params_hash) # autoencoder.load_model("ae_" + ae_params_hash) # 2: encode data using autoencoder sets_encoded_training = [] for set_training_scaled in sets_training_scaled: sets_encoded_training.append(autoencoder.encode(set_training_scaled)) sets_encoded_test = [] for set_test_scaled in sets_test_scaled: sets_encoded_test.append(autoencoder.encode(set_test_scaled)) # 6: decode using autoencoder decoded_test = autoencoder.decode(sets_encoded_test[0]) # 7: undo minimax, for now only the first simulation # decoded_generated_segments_first_sim = decoded_generated_segments[0] simulated = preprocess.rescale_data(decoded_test, dataset_name=test_dataset_names[0]) # reconstruction error # reconstruction_error(sets_test_scaled[0], decoded_test) error = reconstruction_error(np.array(sets_test[0]), simulated) if plot: plotting.plot_2d(sets_encoded_test[0], "test_feature_normalised_encoded_autoencoder_on_", save=True) plotting.plot_some_curves("normalised_compare_ae_before_rescale", sets_test_scaled[0], decoded_test, [25, 50, 75, 815], maturities) plotting.plot_some_curves("normalised_compare_ae", sets_test[0], simulated, [25, 50, 75, 815], maturities) plotting.plot_some_curves("normalised_compare_ae", sets_test[0], sets_test_scaled[0], [25, 50, 75, 815, 100, 600, 720, 740], maturities, plot_separate=True) return error
def simulate(latent_dim=2, preprocess_type1=None, preprocess_type2=None, ae_model=None, gan_model=None, force_training=True, plot=False): preprocess1 = PreprocessData(preprocess_type1) preprocess2 = PreprocessData(preprocess_type2) # 1. get data and apply scaling sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess1.get_data( ) if ae_model is AEModel.AAE: ae_params = { 'preprocess_type': preprocess_type1. value, # only to make preprocess_type part of the hash 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': latent_dim, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'hidden_layers_discriminator': ( 2, 2, ), 'leaky_relu': 0.1, 'last_activation': 'linear', 'last_activation_discriminator': 'sigmoid', 'loss_generator': 'mean_squared_error', 'loss_discriminator': 'binary_crossentropy', 'batch_size': 20, 'epochs': 20000 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = AdversarialAutoencoder(ae_params, plot=False) elif ae_model is AEModel.VAE: ae_params = { 'preprocess_type': preprocess_type1. value, # only to make preprocess_type part of the hash 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': latent_dim, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'leaky_relu': 0.1, 'last_activation': 'linear', # sigmoid or linear 'loss': 'mean_square_error', # binary_crossentropy or mean_square_error 'epsilon_std': 1.0, 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = VariationalAutoencoder(ae_params, plot=False) elif ae_model is AEModel.AE: ae_params = { 'preprocess_type': preprocess_type1. value, # only to make preprocess_type part of the hash 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': latent_dim, 'hidden_layers': ( 56, 40, 28, 12, 4, ), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = Autoencoder(ae_params, plot=False) else: # elif ae_model is AEModel.PCA: ae_params = { 'preprocess_type': preprocess_type1. value, # only to make preprocess_type part of the hash 'latent_dim': latent_dim } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = PCAModel(ae_params, plot=False) # 2. train/load autoencoder autoencoder.load_else_train(np.vstack(sets_training_scaled), sets_test_scaled, "ae_" + ae_params_hash) # 2: encode data using autoencoder sets_encoded_training = autoencoder.encode(sets_training_scaled) sets_encoded_test = autoencoder.encode(sets_test_scaled) # 3: log returns of encoded data sets_encoded_log_training = preprocess2.scale_data(sets_encoded_training, training_dataset_names, should_fit=True) sets_encoded_log_test = preprocess2.scale_data(sets_encoded_test, test_dataset_names, should_fit=True) num_z = 6 * 7 num_c = 6 * 7 num_o = 6 * 7 if gan_model is GANModel.WGAN: gan_params = { 'ae_params_hash': ae_params_hash, 'num_tenors': sets_encoded_log_training[0].shape[1], 'num_c': num_c, 'num_z': num_z, 'num_o': num_o, 'gen_model_type': 'standard', # conv 'dis_model_type': 'standard', # conv 'gen_layers': (4 * (6 * 7 * 2), ), # 4 * num_o * num_tenors 'dis_layers': (4 * (6 * 7), ), # 4 * num_o 'gen_last_activation': 'tanh', 'dis_last_activation': 'sigmoid', 'loss': 'binary_crossentropy', 'batch_size': 32, 'epochs': 10000, 'sample_interval': 1000 } gan_params_hash = hashlib.md5( json.dumps(gan_params, sort_keys=True).encode('utf-8')).hexdigest() gan = CWGANGP(gan_params, plot=False) else: if gan_model is GANModel.GAN_CONV: model_type = 'conv' else: # if gan_model is GANModel.GAN: model_type = 'standard' gan_params = { 'ae_params_hash': ae_params_hash, 'num_tenors': sets_encoded_log_training[0].shape[1], 'num_c': num_c, 'num_z': num_z, 'num_o': num_o, 'gen_model_type': model_type, # conv 'dis_model_type': model_type, # conv 'gen_layers': (4 * (6 * 7 * 2), ), # 4 * num_o * num_tenors 'dis_layers': (4 * (6 * 7), ), # 4 * num_o 'gen_last_activation': 'tanh', 'dis_last_activation': 'sigmoid', 'loss': 'binary_crossentropy', 'batch_size': 128, 'epochs': 20000 } gan_params_hash = hashlib.md5( json.dumps(gan_params, sort_keys=True).encode('utf-8')).hexdigest() gan = GAN(gan_params, plot=False) # try training on larger input and output if force_training: gan.train(sets_encoded_log_training, "gan_" + gan_params_hash) else: gan.load_else_train(sets_encoded_log_training, "gan_" + gan_params_hash) # 4: simulate on encoded log returns, conditioned on test dataset num_simulations = 100 num_repeats = 1 generated, _ = gan.generate(condition=sets_encoded_log_test[-1], condition_on_end=False, num_simulations=num_simulations, repeat=num_repeats) # insert the last real futures curve in order to do rescaling if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS: generated = np.insert(generated, 0, sets_encoded_log_test[-1].iloc[num_c], axis=1) # 5: undo scaling encoded_generated = preprocess2.rescale_data( generated, start_value=sets_encoded_test[-1][num_c], dataset_name=test_dataset_names[-1]) if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS: encoded_generated = encoded_generated[:, 1:] # remove first curve again # 6: decode using autoencoder decoded_generated_segments = autoencoder.decode(encoded_generated) # 7: undo scaling, this can be log-returns simulated = preprocess1.rescale_data(decoded_generated_segments, start_value=sets_test[-1].iloc[num_c], dataset_name=test_dataset_names[-1]) preprocess1.enable_curve_smoothing = True simulated_smooth = preprocess1.rescale_data( decoded_generated_segments, start_value=sets_test[-1].iloc[num_c], dataset_name=test_dataset_names[-1]) if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS: real = sets_test[-1].iloc[ num_c:num_c + num_o * num_repeats + 1] # `+1` because the log-returns also does +1 else: real = sets_test[-1].iloc[num_c:num_c + num_o * num_repeats + 1] print("simulated, real", simulated.shape, real.shape) smape_result = smape(simulated, real) smape_result_smooth = smape(simulated_smooth, real) print("smape_result_smooth mean and std:", np.mean(smape_result_smooth), np.std(smape_result_smooth)) if plot: plotting = Plotting() plotting.plot_3d("real", real, show_title=False) cov_log_returns = cov_log_returns_over_tenors(real) plotting.plot_3d_cov("gan_real_cov", cov_log_returns, show_title=False) for i in np.arange(1, 11): # name = '_' + preprocess_type1.name + '_' + preprocess_type2.name + '_' + str(latent_dim) + '_' + ae_model.name + '_'+ gan_model.name plotting.plot_3d("gan_simulated_" + str(i), simulated_smooth[i], maturities=maturities, time=real.index.values, show_title=False) smape_result = smape(simulated_smooth[i], real) print("simulated_smooth[i], real", simulated_smooth[i].shape, real.shape) print("simulate rates", i) print("smape:", smape_result) print("=============\n") cov_log_returns = cov_log_returns_over_tenors(simulated_smooth[i]) plotting.plot_3d_cov("gan_simulated_" + str(i) + "_cov", cov_log_returns, maturities=maturities, show_title=False) return smape_result_smooth
def main(): plotting = Plotting() preprocess_normalisation = PreprocessData() preprocess_normalisation.enable_normalisation_scaler = True # preprocess_normalisation.enable_standardisation_scaler = True sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data( ) # sklearn model (check that it is doing the same (it is)) # pca_model_sklearn = PCA(n_components=2) # pca_model_sklearn.fit(sets_test_scaled[0]) # test_data_scaled_encoded = pca_model_sklearn.transform(sets_test_scaled[0]) # test_data_scaled_decoded = pca_model_sklearn.inverse_transform(test_data_scaled_encoded) # our own model def pca_on_normalised(): params = {'latent_dim': 2} pca_model = PCAModel(params) pca_model.train(np.vstack(sets_training_scaled)) test_data_scaled_encoded = pca_model.encode(sets_test_scaled[0]) test_data_scaled_decoded = pca_model.decode(test_data_scaled_encoded) print("sets_test_scaled[0].shape", sets_test_scaled[0].shape) print("test_data_scaled_encoded.shape", test_data_scaled_encoded.shape) print("test_data_scaled_decoded.shape", test_data_scaled_decoded.shape) # plot results plotting.plot_2d(test_data_scaled_encoded, "wti_nymex_encoded_pca") simulated = preprocess_normalisation.rescale_data( test_data_scaled_decoded, dataset_name=test_dataset_names[0]) plotting.plot_some_curves("wti_nymex_normalised_compare_pca", sets_test[0], simulated, [25, 50, 75, 815], maturities) # plotting.plot_some_curves("test_feature_normalised_compare_normalisation", sets_test[0], sets_test_scaled[0], # [25, 50, 75, 815, 100, 600, 720, 740], maturities, plot_separate=True) # print("reconstruction_error", reconstruction_error(sets_test_scaled[0], test_data_scaled_decoded)) # print("reconstruction_error", reconstruction_error(np.array(sets_test[0]), simulated)) print("smape", smape(np.array(sets_test[0]), simulated)) # print("smape", np.mean(smape(np.array(sets_test[0]), simulated, over_curves=True))) def pca_on_unnormalised(): pca_model = PCAModel(k=2) pca_model.train(np.vstack(sets_training)) test_data_encoded = pca_model.encode(np.array(sets_test[0])) test_data_decoded = pca_model.decode(test_data_encoded) # plot results plotting.plot_2d(test_data_encoded.T, "wti_nymex_pca") # simulated = preprocess_normalisation.rescale_data(test_data_decoded, dataset_name=test_dataset_names[0]) plotting.plot_some_curves("wti_nymex_compare_pca", sets_test[0], test_data_decoded, [25, 50, 75, 815], maturities) # pca_on_unnormalised() pca_on_normalised()
def simulate(): plotting = Plotting() preprocess_normalisation = PreprocessData() preprocess_normalisation.enable_normalisation_scaler = True preprocess_normalisation.feature_range = [0, 1] # preprocess_normalisation.enable_scaler = True # 1. get data and apply normalisation sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data( ) # plotting.plot_2d(sets_training_scaled[0][:, 0], "sets_training_scaled[0][:, 0]", save=False) # plotting.plot_2d(sets_test_scaled[0][:, 0], "test_feature_normalised_short_end", save=True) all_stacked = np.vstack((np.vstack(sets_training), np.vstack(sets_test))) all_stacked_scaled = np.vstack( (np.vstack(sets_training_scaled), np.vstack(sets_test_scaled))) all_training_scaled = np.vstack(sets_training_scaled) # print("all_stacked_scaled.shape", all_stacked_scaled.shape) # plotting.plot_2d(all_stacked[:, 0], "training and test data", save=False) # plotting.plot_2d(all_stacked_scaled[:, 0], "training and test data scaled", save=False) ae_params = { 'input_dim': sets_training_scaled[0].shape[1], # 56 'latent_dim': 2, 'hidden_layers': (56, 40, 28, 12, 4, 2), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 20, 'epochs': 100, 'steps_per_epoch': 500 } ae_params_hash = hashlib.md5( json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = Autoencoder(ae_params) # autoencoder.train(all_stacked_scaled, sets_test_scaled) # autoencoder.train(sets_test_scaled[0], sets_test_scaled) # autoencoder.train(all_training_scaled, sets_test_scaled) # autoencoder.save_model("ae_" + ae_params_hash) autoencoder.load_model("ae_" + ae_params_hash) # 2: encode data using autoencoder sets_encoded_training = [] for set_training_scaled in sets_training_scaled: sets_encoded_training.append(autoencoder.encode(set_training_scaled)) sets_encoded_test = [] for set_test_scaled in sets_test_scaled: sets_encoded_test.append(autoencoder.encode(set_test_scaled)) plotting.plot_2d(sets_encoded_test[0], "test_feature_normalised_encoded_autoencoder_on_", save=True) # 6: decode using autoencoder decoded_test = autoencoder.decode(sets_encoded_test[0]) # 7: undo minimax, for now only the first simulation simulated = preprocess_normalisation.rescale_data( decoded_test, dataset_name=test_dataset_names[0]) plotting.plot_some_curves( "test_feature_normalised_compare_autoencoder_before_rescale", sets_test_scaled[0], decoded_test, [25, 50, 75, 815], maturities) # old: [25, 50, 75, 100, 600, 720, 740, 815] plotting.plot_some_curves( "test_feature_normalised_compare_autoencoder", sets_test[0], simulated, [25, 50, 75, 815], maturities) # old: [25, 50, 75, 100, 600, 720, 740, 815] # curve_smooth = [] # for curve in simulated: # print("curve.shape", curve.shape) # curve_smooth.append(savgol_filter(curve, 23, 5)) # window size 51, polynomial order 3 # curve_smooth = np.array(curve_smooth) print("reconstruction error BEFORE smoothing:") reconstruction_error(np.array(sets_test[0]), simulated) preprocess_normalisation.enable_curve_smoothing = True simulated = preprocess_normalisation.rescale_data( decoded_test, dataset_name=test_dataset_names[0]) plotting.plot_some_curves( "test_feature_normalised_compare_autoencoder", sets_test[0], simulated, [25, 50, 75, 815], maturities) # old: [25, 50, 75, 100, 600, 720, 740, 815] # plotting.plot_some_curves("test_feature_normalised_compare_normalisation", sets_test[0], sets_test_scaled[0], # [25, 50, 75, 815, 100, 600, 720, 740], maturities, plot_separate=True) # reconstruction error # reconstruction_error(sets_test_scaled[0], decoded_test) print("reconstruction error AFTER smoothing:") reconstruction_error(np.array(sets_test[0]), simulated)
def __init__(self, params): self.config = Config() self.plotting = Plotting() self.params = params
class AdversarialAutoencoder(): def __init__(self, params, plot=True): self.config = Config() self.plotting = Plotting() self.params = params self.plot = plot self.input_dim = params['input_dim'] self.latent_dim = params['latent_dim'] optimizer = Adam(0.0002, 0.5) # learning rate, beta_1 # Build and compile the discriminator self.discriminator = self.build_discriminator(params) self.discriminator.compile(loss=params['loss_discriminator'], optimizer=optimizer, metrics=['accuracy']) # Build the encoder / decoder self.encoder = self.build_encoder(params) self.decoder = self.build_decoder(params) img = Input(shape=(self.input_dim, )) # The generator takes the image, encodes it and reconstructs it # from the encoding encoded_repr = self.encoder(img) reconstructed_img = self.decoder(encoded_repr) # For the adversarial_autoencoder model we will only train the generator self.discriminator.trainable = False # The discriminator determines validity of the encoding validity = self.discriminator(encoded_repr) # The adversarial_autoencoder model (stacked generator and discriminator) self.adversarial_autoencoder = Model(img, [reconstructed_img, validity]) self.adversarial_autoencoder.compile( loss=[params['loss_generator'], params['loss_discriminator']], loss_weights=[0.999, 0.001], optimizer=optimizer) def build_encoder(self, params): # Encoder img = Input(shape=(self.input_dim, )) h = img for i in np.arange(len(params['hidden_layers'])): h = Dense(params['hidden_layers'][i])(h) h = LeakyReLU(alpha=params['leaky_relu'])(h) mu = Dense(self.latent_dim)(h) log_var = Dense(params['latent_dim'])(h) latent_repr = Lambda( lambda p: p[0] + K.random_normal(K.shape(p[0])) * K.exp(p[1] / 2), lambda p: p[0])([mu, log_var]) model = Model(img, latent_repr) print("-" * 100, "\nencoder:") model.summary() return model def build_decoder(self, params): # model = Sequential() z = Input(shape=(params['latent_dim'], )) h = z # for i in np.flip(np.arange(1, 2 * (len(params['hidden_layers']) + 1))): for i in np.flip(np.arange(len(params['hidden_layers']))): h = Dense(params['hidden_layers'][i])(h) h = LeakyReLU(alpha=params['leaky_relu'])(h) img = Dense(self.input_dim, activation=params['last_activation'])(h) model = Model(z, img) print("-" * 100, "\ndecoder:") model.summary() return model def build_discriminator(self, params): # model = Sequential() encoded_repr = Input(shape=(self.latent_dim, )) h = encoded_repr h = Dense(self.latent_dim)(h) for i in np.arange(len(params['hidden_layers_discriminator'])): h = Dense(params['hidden_layers_discriminator'][i])(h) h = LeakyReLU(alpha=params['leaky_relu'])(h) validity = Dense( 1, activation=self.params['last_activation_discriminator'])(h) model = Model(encoded_repr, validity) print("-" * 100, "\ndiscriminator:") model.summary() return model def train(self, x_train, x_val, name=None, sample_interval=50, epochs=None, batch_size=None): if epochs is None: epochs = self.params['epochs'] if batch_size is None: batch_size = self.params['batch_size'] # Adversarial ground truths valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) discriminator_loss = [] generator_loss = [] generator_mse = [] for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Select a random batch of images idx = np.random.randint(0, x_train.shape[0], batch_size) imgs = x_train[idx] latent_fake = self.encoder.predict(imgs) latent_real = np.random.normal(size=(batch_size, self.latent_dim)) # Train the discriminator d_loss_real = self.discriminator.train_on_batch(latent_real, valid) d_loss_fake = self.discriminator.train_on_batch(latent_fake, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- # Train the generator g_loss = self.adversarial_autoencoder.train_on_batch( imgs, [imgs, valid]) # Plot the progress if sample_interval is not None and sample_interval != -1: if epoch % int(sample_interval) == 0: print( "%d [D loss: %f, acc: %.2f%%] [G loss: %f, mse: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss[0], g_loss[1])) # if epoch % sample_interval == 0: # self.plotting.plot_grid_1dim(self.config.get_filepath_img("/aae_training/" + str(epoch)), maturities, self.decoder) discriminator_loss.append(d_loss[0]) generator_loss.append(g_loss[0]) generator_mse.append(g_loss[1]) print("[D loss: %f, acc: %.2f%%] [G loss: %f, mse: %f]" % (discriminator_loss[-1], 100 * discriminator_loss[-1], generator_loss[-1], generator_mse[-1])) if self.plot: self.plotting.plot_losses(discriminator_loss, generator_loss, generator_mse, "adversarial_losses") if name is not None: self.save_model(name) def save_model(self, name): self.encoder.save(self.config.get_filepath_ae_model(name + "_encoder")) self.decoder.save(self.config.get_filepath_ae_model(name + "_decoder")) self.discriminator.save( self.config.get_filepath_ae_model(name + "_discriminator")) def load_model(self, name): encoder_filepath = self.config.get_filepath_ae_model(name + "_encoder") decoder_filepath = self.config.get_filepath_ae_model(name + "_decoder") discriminator_filepath = self.config.get_filepath_ae_model( name + "_discriminator") if self.config.file_exists( encoder_filepath) and self.config.file_exists( decoder_filepath) and self.config.file_exists( discriminator_filepath): self.encoder = load_model(encoder_filepath, compile=False) self.decoder = load_model(decoder_filepath, compile=False) self.discriminator = load_model(discriminator_filepath, compile=False) return True else: print("trained model does not exist yet!") return False def load_else_train(self, x_train, x_val, name): did_load = self.load_model(name) if not did_load: self.train(x_train, x_val) self.save_model(name) def encode(self, data): # if the data is a list then encode each item separately if isinstance(data, list): temp = [] for i in np.arange(len(data)): temp.append(self.encoder.predict(data[i])) return temp else: return self.encoder.predict(data) def decode(self, data): # if the data has three dimensions then the first is the number of simulations if len(data.shape) is 3: temp = [] for i in np.arange(data.shape[0]): temp.append(self.decoder.predict(data[i])) return np.array(temp) else: return self.decoder.predict(data)
def simulate(): plotting = Plotting() preprocess_normalisation = PreprocessData() preprocess_logreturns = PreprocessData() preprocess_normalisation.enable_normalisation_scaler = True preprocess_logreturns.enable_log_returns = True # 1. get data and apply pre-processing sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess_normalisation.get_data() ae_params = { 'preprocess_type': PreprocessType.NORMALISATION_OVER_TENORS.value, 'input_dim': (10, sets_training_scaled[0].shape[1],), # 56 'latent_dim': 2*56, 'hidden_layers': (12*56, 4*56, ), 'leaky_relu': 0.1, 'loss': 'mse', 'last_activation': 'linear', 'batch_size': 5, 'epochs': 5, 'steps_per_epoch': 500} ae_params_hash = hashlib.md5(json.dumps(ae_params, sort_keys=True).encode('utf-8')).hexdigest() autoencoder = Autoencoder(ae_params) # autoencoder.train(np.vstack(sets_training_scaled), sets_test_scaled) # autoencoder.save_model("ae_" + ae_params_hash) autoencoder.load_else_train(sets_training_scaled, sets_test_scaled, "ae_" + ae_params_hash) # 2: encode data using autoencoder sets_encoded_training = autoencoder.encode(sets_training_scaled) sets_encoded_test = autoencoder.encode(sets_test_scaled) print("sets_encoded_test", sets_encoded_test[0].shape) plotting.plot_2d(sets_encoded_test[0], "encoded test data with deep autoencoder", save=False) # 3: log returns of encoded data sets_encoded_log_training = preprocess_logreturns.scale_data(sets_encoded_training) sets_encoded_log_test = preprocess_logreturns.scale_data(sets_encoded_test) plotting.plot_2d(sets_encoded_log_test[0], "encoded test data with deep autoencoder, then log returns", save=False) num_c = 6*7 num_o = 6*7 gan_params = {'ae_params_hash': ae_params_hash, 'num_tenors': sets_encoded_log_training[0].shape[1], 'num_c': num_c, 'num_z': 6*7, 'num_o': num_o, 'gen_model_type': 'standard', # conv 'dis_model_type': 'standard', # conv 'gen_layers': (4*(6*7*2),), # 4 * num_o * num_tenors 'dis_layers': (4*(6*7),), # 4 * num_o 'gen_last_activation': 'tanh', 'dis_last_activation': 'sigmoid', 'loss': 'binary_crossentropy', 'batch_size': 128, 'epochs': 20000} gan_params_hash = hashlib.md5(json.dumps(gan_params, sort_keys=True).encode('utf-8')).hexdigest() gan = GAN(gan_params) # try training on larger input and output # gan.train(sets_encoded_log_training, sample_interval=200) # gan.save_model("gan_" + gan_params_hash) gan.load_model("gan_" + gan_params_hash) # COV TEST, TEMPORARY # for name, set in zip(training_dataset_names, sets_training): # print("name:", name) # set_cov_log_returns_over_features = cov_log_returns_over_features(set) # plotting.plot_3d_cov("covariance_time_series_" + name, set_cov_log_returns_over_features, show_title=False) # plotting.plot_3d("time_series_" + name, set, maturities) # END COV TEST. # 4: simulate on encoded log returns, conditioned on test dataset num_simulations = 10 num_repeats = 0 generated, _ = gan.generate(condition=sets_encoded_log_test[-1], condition_on_end=False, num_simulations=num_simulations, repeat=num_repeats) # insert the last real futures curve in order to do rescaling print("sets_encoded_log_test[-1][num_c] shape", sets_encoded_log_test[-1].iloc[num_c].shape) print("generated_segments.shape", generated.shape) generated = np.insert(generated, 0, sets_encoded_log_test[-1].iloc[num_c], axis=0) # 5: undo log-returns # todo: this start_value is actually one off! Error still persists... autoencoder causing the difference? encoded_generated = preprocess_logreturns.rescale_data(generated, start_value=sets_encoded_test[-1][num_c]) encoded_generated = encoded_generated[:, 1:] # remove first curve again # 6: decode using autoencoder decoded_generated_segments = autoencoder.decode(encoded_generated) # 7: undo minimax, for now only the first simulation simulated = preprocess_normalisation.rescale_data(decoded_generated_segments, dataset_name=test_dataset_names[-1]) preprocess_normalisation.enable_curve_smoothing = True simulated_smooth = preprocess_normalisation.rescale_data(decoded_generated_segments, dataset_name=test_dataset_names[-1]) real = np.array(sets_test[-1])[num_c:num_c + num_o] print("simulated, real", simulated.shape, real.shape) smape_result = smape(simulated, real) smape_result_smooth = smape(simulated_smooth, real) print("smape_result and smooth", smape_result, smape_result_smooth) print("smape_resul_smooth", smape_result_smooth)
def simulate(latent_dim=2, preprocess_type1=None, preprocess_type2=None, ae_model=None, gan_model=None, force_training=True, plot=False): preprocess1 = PreprocessData(preprocess_type1, short_end=True) preprocess2 = PreprocessData(preprocess_type2, short_end=True) # 1. get data and apply scaling sets_training, sets_test, sets_training_scaled, sets_test_scaled, training_dataset_names, test_dataset_names, maturities = preprocess1.get_data( ) print("sets_test_scaled, sets_training_scaled:", sets_test_scaled[0].shape, sets_training_scaled[0].shape) # 2: log returns of encoded data sets_encoded_log_training = preprocess2.scale_data(sets_training_scaled, training_dataset_names, should_fit=True) sets_encoded_log_test = preprocess2.scale_data(sets_test_scaled, test_dataset_names, should_fit=True) num_c = 6 * 7 num_o = 6 * 7 if gan_model is GANModel.WGAN: gan_params = { 'short_end_encoding': preprocess_type1.name + "_" + preprocess_type2.name, 'num_tenors': sets_encoded_log_training[0].shape[1], 'num_c': 6 * 7, 'num_z': 6 * 7, 'num_o': 6 * 7, 'gen_model_type': 'standard', # conv 'dis_model_type': 'standard', # conv 'gen_layers': (4 * (6 * 7 * 2), ), # 4 * num_o * num_tenors 'dis_layers': (4 * (6 * 7), ), # 4 * num_o 'gen_last_activation': 'tanh', 'dis_last_activation': 'sigmoid', 'loss': 'binary_crossentropy', 'batch_size': 32, 'epochs': 10000, 'sample_interval': 1000 } gan_params_hash = hashlib.md5( json.dumps(gan_params, sort_keys=True).encode('utf-8')).hexdigest() gan = CWGANGP(gan_params, plot=False) else: if gan_model is GANModel.GAN_CONV: model_type = 'conv' else: # if gan_model is GANModel.GAN: model_type = 'standard' print("num tenors:", sets_encoded_log_training[0].shape[1]) gan_params = { 'short_end_encoding': preprocess_type1.name + "_" + preprocess_type2.name, 'num_tenors': sets_encoded_log_training[0].shape[1], 'num_c': num_c, 'num_z': 6 * 7, 'num_o': num_o, 'gen_model_type': model_type, # conv 'dis_model_type': model_type, # conv 'gen_layers': (4 * (6 * 7 * 2), ), # 4 * num_o * num_tenors 'dis_layers': (4 * (6 * 7), ), # 4 * num_o 'gen_last_activation': 'tanh', 'dis_last_activation': 'sigmoid', 'loss': 'binary_crossentropy', 'batch_size': 128, 'epochs': 20000 } gan_params_hash = hashlib.md5( json.dumps(gan_params, sort_keys=True).encode('utf-8')).hexdigest() gan = GAN(gan_params, plot=False) # try training on larger input and output if force_training: gan.train(sets_encoded_log_training, "gan_" + gan_params_hash) else: gan.load_else_train(sets_encoded_log_training, "gan_" + gan_params_hash) # 4: simulate on encoded log returns, conditioned on test dataset num_simulations = 100 num_repeats = 0 print("sets_encoded_log_test[-1]", sets_encoded_log_test[-1].shape) generated, _ = gan.generate(condition=sets_encoded_log_test[-1], condition_on_end=False, num_simulations=num_simulations, repeat=num_repeats) # insert the last real futures curve in order to do rescaling if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS: generated = np.insert(generated, 0, sets_encoded_log_test[-1].iloc[num_c], axis=1) print("sets_test_scaled[-1]", sets_test_scaled[-1].shape) print("sets_test_scaled[-1][num_c]", sets_test_scaled[-1].iloc[num_c]) # 5: undo scaling encoded_generated = preprocess2.rescale_data( generated, start_value=sets_test_scaled[-1].iloc[num_c], dataset_name=test_dataset_names[-1]) if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS: encoded_generated = encoded_generated[:, 1:] # remove first curve again # 7: undo scaling, this can be log-returns simulated = preprocess1.rescale_data(encoded_generated, start_value=sets_test[-1].iloc[num_c], dataset_name=test_dataset_names[-1]) if preprocess_type2 is PreprocessType.LOG_RETURNS_OVER_TENORS: real = np.array( sets_test[-1])[num_c:num_c + num_o + 1] # `+1` because the log-returns also does +1 else: real = np.array(sets_test[-1])[num_c:num_c + num_o + 1] sim = simulated.reshape(100, 43) print("sets_test[-1].iloc[num_c], sim[0][0]", sets_test[-1].iloc[num_c], sim[0][0], sim[1][0], sim[2][0]) print("real, simulated", real.shape, sim.shape) smape_result = smape(sim, real, over_curves=True) if plot: condition_and_real = sets_test[-1].iloc[0:num_c + num_o + 1] plotting = Plotting() plotting.plot_training_sample("simulated_simple", sim, condition_and_real, num_c, after_real_data=True) # print("smape test:", smape(simulated[0], real), smape_result) return smape_result